百度获取下拉词和相关词
- 编程
- 2023-02-26
- 1290
import requests from parsel import Selector import re import time # 获取相关词 def get_xg(word): headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.56', 'Cookie': 'ck' } url = 'http://www.baidu.com/s?wd=%s' % word resp = requests.get(url, headers=headers) resp.encoding = 'utf-8' html = resp.text selector = Selector(html) tbody = selector.xpath('//*[@id="rs_new"]/table/tbody') ks = tbody.xpath('.//a/text()').getall() keywords = [key.strip() for key in ks] return keywords # 获取下拉词 def get_xl(word): url = 'http://suggestion.baidu.com/su?wd=%s'%word headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.56', 'Cookie': 'ck' } resp = requests.get(url, headers=headers) html = resp.text content = re.findall('s:\[(.*?)\]', html) if content: kws = content[0].split(',') kws = [kw.strip('"') for kw in kws] return kws
爬取百度记住不要爬https
发表评论