爬取2020中国大学排名(总榜)
-- coding utf-8 --
from idlelib.multicall import r
import requests
import xlwt
from lxml import etree
f = xlwt.Workbook()
sheet = f.add_sheet(‘3D’)
row0 = [‘排名’,‘学校名称’,‘省市’,‘类型’,‘总分’,‘办学层次’]
print(len(row0))
for i in range(0,len(row0)):
sheet.write(0,i,row0[i])
j=1
url = ‘http://www.shanghairanking.cn/rankings/bcur/2020’
response = requests.get(url = url)
response.encoding = ‘UTF-8’
def wi(response):
global j
element = etree.HTML(response.text)
nodes = element.xpath(’//tr’)
for node in nodes:
sheet.write(j, 0, node.xpath(’./td[1]/text()’))
sheet.write(j, 1, node.xpath(’./td[2]/a/text()’))
sheet.write(j, 2, node.xpath(’./td[3]/text()’))
sheet.write(j, 3, node.xpath(’./td[4]/text()’))
sheet.write(j, 4, node.xpath(’./td[5]/text()’))
sheet.write(j, 5, node.xpath(’./td[6]/text()’))
j+=1
wi(response)
f.save(‘3D.xls’)