import html
def get_article_content(url):
"""
获取文章内容
:return:
"""
header = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:45.0) Gecko/20100101 Firefox/45.0'}
html_content = requests.get(url, headers=header).content
real_html = html_content.decode('utf8')
return html.unescape(real_html)html模块中有个方法unescape,可以处理html中转义字符
版权声明:本文为qq_33733970原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。