python获取熊猫tv专区的人气数据

# coding:utf-8
from urllib import request
import re


class Spider():
    url = 'https://www.panda.tv/cate/kingglory'   # 切换专区,修改成专区的URL就可
    root_pattern = '<div class="video-info">(.*?)</div>'  # 使用贪婪匹配会匹配到网页最后一个</div>
    name_pattern = '<span class="video-nickname" title="(.*?)">'
    number_pattern = '<span class="video-number">[\s\S]*?</i>([\s\S]*?)</span>'
    anchors = []

    # 获取网页内容
    def __get_content(self):
        r = request.urlopen(Spider.url)  # 发送http请求
        content = r.read()
        htmls = str(content,encoding='utf-8')
        return htmls

    # 处理抓取的内容
    def __handle_content(self, htmls):
        root_html = re.findall(Spider.root_pattern,htmls, re.S)  # re.S 使.匹配所有字符
        for html in root_html:
            name = re.findall(Spider.name_pattern, html)[0]
            number = re.findall(Spider.number_pattern, html)[0]
            anchor = {
                'name': name,
                'number': number
            }
            Spider.anchors.append(anchor)
        # print(Spider.anchors)

    # 按人气排序
    def sort(self, anchors):
        anchors = self.keey_seed(anchors)
        rank_list = sorted(anchors, key=lambda anchors : anchors['number'], reverse=True)
        return rank_list

    # 将人气转换成数值
    def keey_seed(self, anchors):
        i = 0
        for a in anchors:
            if '万' in a['number']:
                a['number'] = float(re.findall('\d*', a['number'])[0])*10000
            else:
                a['number'] = float(a['number'])
            anchors[i] = a
            i += 1
        return anchors

    def show(self,r):
        for a in r:
            print(a['name'],': ',a['number'])

    def run(self):
        contents = self.__get_content()
        self.__handle_content(contents)
        rank_list = self.sort(Spider.anchors)
        self.show(rank_list)


spider = Spider()
spider.run()

输出:在这里插入图片描述


版权声明:本文为StrongbyTime原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。