一、爬取微博热搜关键词需要的第三方库
1、requests
2、BeautifulSoup 美味汤
3、worldcloud 词云
4、jieba 中文分词
5、matplotlib 绘图
二、爬取微博热搜关键词代码示例import requests
import wordcloud
import jieba
from bs4 import BeautifulSoup
from matplotlib import pyplot as plt
from pylab import mpl
#设置字体
mpl.rcParams['font.sans-serif'] = ['SimHei']
mpl.rcParams['axes.unicode_minus'] = False
url = 'https://s.weibo.com/top/summary?Refer=top_hot&topnav=1&wvr=6'
try:
#获取数据
r = requests.get(url)
r.raise_for_status()
r.encoding = r.apparent_encoding
soup = BeautifulSoup(r.text,'html.parser')
data = soup.find_all('a')
d_list = []
for item in data:
d_list.append(item.text)
words = d_list[4:-11:]
#中文分词
result = list(jieba.cut(words[0]))
for word in words[1::]:
result.extend(jieba.cut(word))
redata = []
for it in result:
if len(it) <= 1:
continue
else:
redata.append(it)
result_str = ' '.join(redata)
#输出词云图
font = r'C:\Windows\Fonts\simhei.ttf'
w = wordcloud.WordCloud(font_path=font,width=600,height=400)
w.generate(result_str)
w.to_file('微博热搜关键词词云.png')
key = list(set(redata))
x,y = [],[]
#筛选数据
for st in key:
count = redata.count(st)
if count <= 1:
continue
else:
x.append(st)
y.append(count)
x.sort()
y.sort()
#绘制结果图
plt.plot(x,y)
plt.show()
except Exception as e:
print(e)
以上就是Python爬虫爬取微博热搜关键词的代码示例,大家可以套入代码直接使用哦~更多python爬虫推荐:python爬虫教程。