python 原生ebay(易贝网)搜索页码爬虫

近期有业务涉及的易贝网的爬虫写了一个demo拿出来大家参看看看,还有淘宝,闲鱼,1688,速卖通,京东,苏宁,国美,当当,网易,微店,小红书,拼多多,唯品会,亚马逊,一号店爬虫后面慢慢也放出来


#  -*-coding:utf8 -*-
import requests
from lxml import html

from utils import user_agent


def run(q,page):
    headers = {
        "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
        "accept-encoding": "gzip, deflate",
        "accept-language": "zh-CN,zh;q=0.8",
        "connection": "keep-alive",
        "User-Agent": user_agent.userAgent(),
        "X-Requested-With": "XMLHttpRequest",
    }

    url = 'https://www.ebay.com/sch/i.html?_sacat=0&_nkw=%s&_pgn=4&_skc=%s&rt=nc&_dmd=1'\
    %(q,50*(int(page)-1))
    statuses = requests.get(url,headers=headers)
    statuses = statuses.content.decode('utf-8')
    dom_tree = html.etree.HTML(statuses)
    scriptDate = dom_tree.xpath('//*[@id="ListViewInner"]/li')
    list = []
    for i in range(0, len(scriptDate)):
        img = dom_tree.xpath('//*[@id="ListViewInner"]/li['+str(i+1)+']/div[1]/div/a/img/@src')[0] # 主图
        url = dom_tree.xpath('//*[@id="ListViewInner"]/li[' + str(i + 1) + ']/div[1]/div/a/@href')[0]  # 商品连接
        table = dom_tree.xpath('//*[@id="ListViewInner"]/li[' + str(i + 1) + ']/h3/a/text()')#标题
        table = table[len(table)-1]
        jiage = dom_tree.xpath('//*[@id="ListViewInner"]/li[' + str(i + 1) + ']/ul[1]/li[1]/span/text()') # 价格
        jiage = jiage[len(jiage) - 1]
        map = {
            "img":img,
            "url": url,
            "table": table,
            "jiage": jiage
        }
        list.append(map)
    return list

if __name__ == '__main__':
    print(run("iphone",1))



 


版权声明:本文为qq_31977367原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。