代码日记

爬取2020中国大学排名(总榜)

-- coding utf-8 --

from idlelib.multicall import r

import requests
import xlwt
from lxml import etree

f = xlwt.Workbook()
sheet = f.add_sheet(‘3D’)
row0 = [‘排名’,‘学校名称’,‘省市’,‘类型’,‘总分’,‘办学层次’]
print(len(row0))
for i in range(0,len(row0)):
sheet.write(0,i,row0[i])
j=1

url = ‘http://www.shanghairanking.cn/rankings/bcur/2020’
response = requests.get(url = url)
response.encoding = ‘UTF-8’

def wi(response):
global j
element = etree.HTML(response.text)
nodes = element.xpath(’//tr’)
for node in nodes:
sheet.write(j, 0, node.xpath(’./td[1]/text()’))
sheet.write(j, 1, node.xpath(’./td[2]/a/text()’))
sheet.write(j, 2, node.xpath(’./td[3]/text()’))
sheet.write(j, 3, node.xpath(’./td[4]/text()’))
sheet.write(j, 4, node.xpath(’./td[5]/text()’))
sheet.write(j, 5, node.xpath(’./td[6]/text()’))
j+=1

wi(response)
f.save(‘3D.xls’)


版权声明:本文为weixin_42603276原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。