aiohttp 多任务异步协程式爬虫爬取某站图片

爬取的是图片原地址 并不是预览图
点进图片后的大高清图
#版权原因 不能发图
代码如下
只需改变文件储存路径即可

import requests
import os
import asyncio
import aiohttp


async def getpic(oneof_datalist):
    async with aiohttp.ClientSession() as session:
        if 'coverImage' in oneof_datalist['data']:
            url = 'https://img2.huashi6.com/'+oneof_datalist['data']['coverImage']['originalPath']
            filename = oneof_datalist['data']['title']
            sets = ['/', '\\', ':', '*', '?', '"', '<', '>', '|']
            for char in filename:
                if char in sets:
                    filename = filename.replace(char, '')
            filename.split()
            async with await session.get(url) as response:
                picdata = await response.read()
                cpicPath = picPath + '/' + filename + '.jpg'
                with open(cpicPath, 'wb+') as f:
                    f.write(picdata)
                    print(filename, '下载成功')
        elif 'worksList'in oneof_datalist['data']:
            for i in oneof_datalist['data']['worksList']:
                url = 'https://img2.huashi6.com/'+i['coverImage']['originalPath']
                filename = i['title']
                sets = ['/', '\\', ':', '*', '?', '"', '<', '>', '|']
                for char in filename:
                    if char in sets:
                        filename = filename.replace(char, '')
                filename.split()
                async with await session.get(url) as response:
                    picdata = await response.read()
                    cpicPath = picPath + '/' + filename + '.jpg'
                    with open(cpicPath, 'wb+') as f:
                        f.write(picdata)
                        print(filename, '下载成功')

headers ={
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36'
    }

page = int(input('enter  pages'))
picPath = r'{}'.format(input('plz input a  filepath '))

if not os.path.exists(picPath):
     os.makedirs(picPath)

json_list = []
for page in range(1,page):
    url = 'https://rt.huashi6.com/front/index/load_pc_data?_ts_=1636363387274&cursor={}-1636361232855'.format(page)
    response_data = requests.post(url = url,headers = headers).json()
    json_list.append(response_data)

datalist = []
for i in json_list:
    datalist.extend(i['data']['datas'])
print(len(datalist))

tasks = []
for one in datalist:
    c = getpic(one)
    task = asyncio.ensure_future(c)
    tasks.append(task)

loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.wait(tasks))


运行输入页数和文件存储地址 然后等待下载完毕就行了

https?/www.huashi6.com/


版权声明:本文为weixin_45059947原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。