正方教务爬虫

基于selenium的正方教务成绩爬虫

写在最前


这个爬虫是一个半成品,可以自动跳到成绩页面并自动保存成绩,后续功能正在开发中。

仓库地址https://github.com/cnhkbbs/ZhengFangSpider

使用方法

step 1 下载


直接复制代码或下载仓库里的文件到本地

step 2 安装最新版Chrome浏览器(已有请跳过)


https://www.google.cn/intl/zh-CN/chrome/

step 3 下载对应的驱动


下载地址 http://chromedriver.storage.googleapis.com/index.html 驱动安装教程 https://blog.csdn.net/m0_67575344/article/details/126142295

step 4 安装所需模块


  1. ddddocr

  1. selenium

  1. openpyxl

命令行安装
pip install ddddocr
pip install ddddocr
pip install openpyxl
自动安装

使用pycharm自动安装

step 5 运行


直接在编译器环境运行即可

已知bug


1.没有捕获验证码识别错误后抛出的异常


完整代码

# -*- coding: utf-8 -*-
import time
import datetime
import ddddocr
import openpyxl
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains

service = ChromeService(executable_path=r'C:\Program Files\Google\Chrome\Application\chromedriver.exe')  # chrome驱动路径
chrome = webdriver.Chrome(service=service)

# 配置项

name = '123456'  # 账号
pwd = '132456l'  # 密码
safe_time = 3  # 安全间隔时间
servernum = 0  # 选择服务器 填: 0,1,2,3
retry = True  # 失败重试
mode = 1  # 查询模式  1.成绩查询 2.一键评教 3.课表查询
servers = ['http://127.0.0.1/', 'http://127.0.0.1/', 'http://127.0.0.1/', 'http://127.0.0.1/']


def print_INFO(message):
    print('[' + datetime.datetime.now().strftime('%H:%M:%S') + ']' + message)


def print_ERROR(error):
    print('[' + datetime.datetime.now().strftime('%H:%M:%S') + ']' + "\033[1;31m" + error + " \033[0m")


def print_Exception(e):
    print("\033[1;31m异常!\033[0m\n")
    print(e)


def recognize():
    if chrome.find_element(By.ID, 'icode').screenshot('img.png'):  # 捕获验证码
        # 验证码识别
        with open('img.png', 'rb') as f:
            img = f.read()
        ocr = ddddocr.DdddOcr()
        result = ocr.classification(img)
        print('[' + datetime.datetime.now().strftime('%H:%M:%S') + ']' + "验证码" + result)
        return result


def auto_Login():
    chrome.get(servers[servernum])
    print_INFO("尝试登录" + str(servernum) + '号服务器')
    if chrome.title == 'ERROR - 出错啦!':
        return False
    try:
        chrome.find_element(By.ID, 'txtUserName').send_keys(name)
        chrome.find_element(By.ID, 'TextBox2').send_keys(pwd)
    except:
        print_Exception(Exception)
        return False
    chrome.find_element(By.ID, 'txtSecretCode').send_keys(recognize())
    try:
        chrome.find_element(By.ID, 'Button1').click()
    except:
        print_Exception(Exception)
        return False
    if chrome.title == 'ERROR - 出错啦!' or chrome.title == '欢迎使用正方教务管理系统!请登录':
        print_ERROR('跳转失败')
        return False
    return True


def get_score():
    try:
        hovertarget = chrome.find_element(By.XPATH, '/html/body/div/div[1]/ul/li[5]/a/span')
        ActionChains(chrome).move_to_element(hovertarget).perform()
        chrome.find_element(By.XPATH, '/html/body/div/div[1]/ul/li[5]/ul/li[4]/a').click()
    except:
        print_Exception(Exception)
        print_ERROR('成绩查询按钮点击失败')
        return False
    if chrome.title == 'ERROR - 出错啦!' or chrome.title == '欢迎使用正方教务管理系统!请登录':
        auto_Login()
        return False
    time.sleep(5)
    try:
        chrome.switch_to.frame('zhuti')
        chrome.find_element(By.ID, 'btn_zcj').click()
        if chrome.title == 'ERROR - 出错啦!' or chrome.title == '欢迎使用正方教务管理系统!请登录':
            chrome.switch_to.default_content()
            while 1:
                if auto_Login():
                    break
            return False
        # 保存到excel
        work_book = openpyxl.Workbook()
        shell = work_book.worksheets[0]
        trs = chrome.find_elements(By.XPATH, '/html/body/form/div[2]/div/span/div[1]/table[1]/tbody/tr')
        trnum = 1
        for tr in trs:
            tdnum = 1
            while 1:
                tdXPATH = '/html/body/form/div[2]/div/span/div[1]/table[1]/tbody/tr[' + str(trnum) + ']/td[' + str(tdnum) + ']'
                shell.cell(trnum, tdnum, chrome.find_element(By.XPATH, tdXPATH).text)
                tdnum += 1
                if tdnum == 20:
                    break
            trnum += 1
        work_book.save('score.xlsx')
        chrome.switch_to.default_content()
    except:
        print_Exception(Exception)
        print_ERROR('成绩获取错误')
        return False
    return True


def main():
    if mode == 1:
        print_INFO('开始查询成绩')
        chrome.maximize_window()
        while 1:
            trytimes = 0
            succeed = False
            while 1:
                trytimes += 1
                if auto_Login():
                    succeed = True
                    print_INFO('登录成功')
                    break
                else:
                    print_ERROR('尝试登录失败')
                    if retry:
                        if trytimes > 10 and succeed == False:
                            print('\033[0;32m已经为你尝试了' + str(
                                trytimes) + '次登录, 全部登录失败。建议更换服务器或检查你的账号密码是否正确。\033[0m')
                        time.sleep(safe_time)
                        continue
                    else:
                        break
            time.sleep(1)
            trygetscore = 0
            get_scoreFaile = False
            while 1:
                if trygetscore >= 5:
                    get_scoreFaile = True
                    break
                trygetscore += 1
                status = get_score()
                if status:
                    print_INFO('查询成功')
                    break
                else:
                    print_ERROR('查询失败')
                    if retry:
                        time.sleep(safe_time)
                        continue
                    else:
                        break
            if get_scoreFaile is True:
                continue
            else:
                a = input()
    elif mode == 2:
        print_INFO('该功能正在开发')



main()

版权声明:本文为qq_62284064原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。