百度获取下拉词和相关词
- 编程
- 2023-02-26
- 1426
Python
import requests
from parsel import Selector
import re
import time
# 获取相关词
def get_xg(word):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.56',
'Cookie': 'ck'
}
url = 'http://www.baidu.com/s?wd=%s' % word
resp = requests.get(url, headers=headers)
resp.encoding = 'utf-8'
html = resp.text
selector = Selector(html)
tbody = selector.xpath('//*[@id="rs_new"]/table/tbody')
ks = tbody.xpath('.//a/text()').getall()
keywords = [key.strip() for key in ks]
return keywords
# 获取下拉词
def get_xl(word):
url = 'http://suggestion.baidu.com/su?wd=%s'%word
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.56',
'Cookie': 'ck'
}
resp = requests.get(url, headers=headers)
html = resp.text
content = re.findall('s:\[(.*?)\]', html)
if content:
kws = content[0].split(',')
kws = [kw.strip('"') for kw in kws]
return kws
爬取百度记住不要爬https
发表评论