本节为保存爬虫数据到txt文本中,还是以排行榜为例,上节可以看:爬虫之数据保存到csv,xlsx等Excel文件中
数据获取代码如下:
import requests
from fake_useragent import UserAgent
from lxml import html
import pandas as pd
etree = html.etree
from bs4 import BeautifulSoup
url = 'https://www.17k.com/top/refactor/top100/14_recommend/14_recommend_top_100_pc.html'
# 定义变量:URL 与 headers
headers = {'User-Agent': str(UserAgent().random)}
# 根据访问的网址为https选用“https”,选http用“http”
# proxies = {'协议': '协议://IP:端口号'}
ips = {"https": "https://58.20.232.245:9091"}
ip = {"http": "http://58.20.232.245:9091"}
get = requests.get(url, proxies=ip, headers=headers)
get.encoding = 'UTF-8'
soup = BeautifulSoup(get.text, 'lxml')
div = soup.find(name='div', attrs={'class': "TYPE"})
a = div.find_all(name='a')
table = list()
for i in a:
table.append(i.string)
table0 = table[0::4]
for i in range(0, len(table0)):
table0[i] = table0[i].replace('[', '').replace(']', '')
table1 = table[1::4]
table2 = table[2::4]
table3 = table[3::4]
数据保存到txt文本如下:
file = open('排行榜.txt', 'w', encoding='utf-8')
for i in range(0, len(table1)):
file.write(f'书名:{table1[i]}\n')
file.write(f'作者:{table3[i]}\n')
file.write(f'类型:{table0[i]}\n')
file.write(f'最新章节:{table2[i]}\n')
file.write(f'{"=" * 30}\n')
file.close()
print('ok')
效果如下:
最终代码如下:
import requests
from fake_useragent import UserAgent
from lxml import html
import pandas as pd
etree = html.etree
from bs4 import BeautifulSoup
url = 'https://www.17k.com/top/refactor/top100/14_recommend/14_recommend_top_100_pc.html'
# 定义变量:URL 与 headers
headers = {'User-Agent': str(UserAgent().random)}
# 根据访问的网址为https选用“https”,选http用“http”
# proxies = {'协议': '协议://IP:端口号'}
ips = {"https": "https://58.20.232.245:9091"}
ip = {"http": "http://58.20.232.245:9091"}
get = requests.get(url, proxies=ip, headers=headers)
get.encoding = 'UTF-8'
soup = BeautifulSoup(get.text, 'lxml')
div = soup.find(name='div', attrs={'class': "TYPE"})
a = div.find_all(name='a')
table = list()
for i in a:
table.append(i.string)
table0 = table[0::4]
for i in range(0, len(table0)):
table0[i] = table0[i].replace('[', '').replace(']', '')
table1 = table[1::4]
table2 = table[2::4]
table3 = table[3::4]
file = open('排行榜.txt', 'w', encoding='utf-8')
for i in range(0, len(table1)):
file.write(f'书名:{table1[i]}\n')
file.write(f'作者:{table3[i]}\n')
file.write(f'类型:{table0[i]}\n')
file.write(f'最新章节:{table2[i]}\n')
file.write(f'{"=" * 30}\n')
file.close()
print('ok')
版权声明:本文为weixin_43788986原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。