# -*- coding: utf-8 -*-
import re
from selenium.webdriver import ActionChains
from selenium import webdriver
from PIL import Image
import json
import time
from BaiDu_SDK.aip.ocr import AipOcr
from PIL import Image
import pytesseract
from urllib.request import urlretrieve
class crack_text_captcha(object):
def __init__(self, driver):
super(crack_text_captcha, self).__init__()
# selenium驱动
self.driver = driver
# 百度文字识别appid
self.APP_ID = '17809029'
# 百度文字识别appkey
self.APP_KEY = 'XyoghidG1NXFuGu9jlkG9YmC'
# 百度文字识别secretkey
self.SECRET_KEY = '1RcuwsLprEKiYBkuGL9ef127MZc4jpeX'
# 百度文字识别sdk客户端
self.CLIENT = AipOcr(self.APP_ID, self.APP_KEY, self.SECRET_KEY)
# 访问网站填写表单信息
def fill_form(self,):
# 填写登录表单信息并保存验证码图片
user_element = self.driver.find_element_by_id('username1')
user_element.clear()
user_element.send_keys('13104966565')
pass_element = self.driver.find_element_by_id('password1')
pass_element.clear()
pass_element.send_keys('@Ahjm456')
valid_button = self.driver.find_element_by_id('selectyzm_text')
valid_button.click()
time.sleep(1)
self.driver.save_screenshot('web_image.png')
# 图片二值化便于识别
def image_to_binary(self,):
try:
left = 1079
top = 344
right = 1540
bottom = 668
# 裁剪图
img = Image.open('./web_image.png')
img = img.crop((left, top, right, bottom))
# 1.二值化转灰色
pic = img.convert("L")
# 2.二值化
# pic = img.point(lambda p: p * 4)
pic.save("grey_code.png")
time.sleep(3)
except Exception as e:
print('图片二值化失败:%s' % str(e))
# 读取本地图片
def get_file(self,):
file_path = "./grey_code.png"
with open(file_path, 'rb') as pic:
return pic.read()
# 获取小图文字
def target_words(self,):
try:
text_element = self.driver.find_element_by_id('selectyzm_text')
match = re.compile('"(.*)"')
result = match.findall(text_element.text)
# print(result)
return result
except Exception as e:
print("获取小图文字失败%s" % str(e))
# 获取大图的文字和位置信息
def position_info(self,text):
try:
op = {'language_type': 'CHN_ENG', 'recognize_granularity': 'small'}
# 高精度
# res = self.CLIENT.accurate(self.get_file(), options=op)
time.sleep(20)
# 通用
res = self.CLIENT.general(self.get_file(), options=op)
print("接口返回的位置信息:%s" % res)
# 所有文字的位置信息
all_pos_info = []
# 要点击的文字位置信息
target_pos_info = []
for item in res['words_result']:
all_pos_info.extend(item['chars'])
# 按次序筛选出要点击的文字位置信息
words = self.target_words()
for word in words:
for item in all_pos_info:
if word == item['char']:
all_pos_info.append(item)
# return target_pos_info
return all_pos_info
except Exception as e:
print("获取文字位置信息失败%s" % str(e))
# 按顺序点击文字
def click_words(self,location_info):
# 获取到大图的element
img_element = self.driver.find_element_by_xpath(
"/html/body/div[2]/div[11]/div[2]/div[1]/form/div[1]/div[2]/img")
# 根据小图文字在大图中的顺序依次点击
for info in location_info:
ActionChains(self.driver).move_to_element_with_offset(
to_element=img_element, xoffset=info['location']['left'] + 20,
yoffset=info['location']['top'] + 20).click().perform()
time.sleep(1)
# 下载数字验证码
def pic_download(self,):
IMAGE_URL = "http://cpquery.sipo.gov.cn/freeze.main?txn-code=createImgServlet&freshStept=1"
urlretrieve(IMAGE_URL, './verify_code.png')
# 两个数字加减验证码
def crack_image_code(self,):
# 下载验证码图片
self.pic_download()
# 图片转字符串
result = pytesseract.image_to_string(Image.open('./verify_code.png'))
str_result = result.encode('utf-8')
try:
# one_num=str_result[0]
# two_num=str_result[2]
# symbol=str_result[1]
print(str_result)
print(str_result[0])
print(str_result[1])
print(str_result[2])
symbol = str_result[1]
if symbol == '+':
return (int(str_result[0]) + int(str_result[2]))
else:
return (int(str_result[0]) - int(str_result[2]))
except Exception as e:
print(str(e))
return -99
# 核心逻辑
def main(self,):
self.fill_form()
self.image_to_binary()
time.sleep(3)
text = self.target_words()
print(type(text))
print("目标文字为:%s" % text)
words_location = self.position_info(text)
print("目标文字位置信息:%s" % words_location)
# 由于识别不准确 字数不全 需要重新识别
# while len(words_location) != 3:
# self.image_to_binary()
# time.sleep(3)
# self.target_words()
# self.position_info()
# time.sleep(3)
print("文字识别成功")
self.click_words(words_location)
# 点击登录按钮
# log_in_button = self.driver.find_element_by_id('publiclogin')
# log_in_button.click()
# print("登录成功")
# 同意声明
# agree = self.driver.find_element_by_id('agreeid')
# agree.click()
# # 点击继续
# go_on = self.driver.find_element_by_id('goBtn')
# go_on.click()
# # 输入专利号
# input_patent_num = self.driver.find_element_by_id('select-key:shenqingh')
# input_patent_num.clear()
# # 填写专利号
# input_patent_num.send_keys('2016800900500')
# # 数字加减验证码的结果
#
# # 填写验证码
# verify_code = self.driver.find_element_by_id('very-code')
# verify_code.clear()
# verify_code.send_keys()
# # 点击查询按钮
# query_btn = self.driver.find_element_by_id('query')
# query_btn.click()
# 保存查询结果
# 调用接口传出数据
def unlock():
driver = webdriver.Chrome()
# 打开Chrome浏览器,需要将Chrome的驱动放在当前文件夹
driver.maximize_window()
driver.get('http://cpquery.sipo.gov.cn/')
# 开始破解
crack = crack_text_captcha(driver)
crack.main()
if __name__ == '__main__':
unlock()
版权声明:本文为weixin_43866211原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。