爬取全民K歌主页上的歌
import urllib.request
import requests
import os
import re
path = "C:\\Users\\HUAWEI\\Desktop\\spider\\kg"
url = "https://kg.qq.com/node/personal?uid=6a9d9a81222830833c"
# path = input("path:")
# url = input("url:")
#伪装浏览器用户
headers = {'User-Agent':'User-Agent:Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)'}
req = urllib.request.Request(url,headers=headers)
#执行请求获取响应信息
res = urllib.request.urlopen(req)
# 从响应对象中读取信息并解码
html = res.read().decode("utf-8")
# print(len(html))
#使用正则解析出歌曲链接
pat = '<a href="(.*?)" .*? target="_blank">(.*?)</a>'
dlist = re.findall(pat,html)
# 遍历输出结果
# for v in dlist:
# print(v[1]+":"+v[0])
# 匹配歌曲链接
pat_music = 'http://[a-z][a-z].stream.kg.qq.com.*.m4a.*?"'
#获取当前目录下歌曲
for root,dirs,files in os.walk(path):
print('root:',root)
print('dirs',dirs)
print('files',files)
for url in dlist:
music = urllib.request.Request(url[0],headers=headers)
res = urllib.request.urlopen(music)
music_html = res.read().decode("utf-8")
mus = re.findall(pat_music,music_html)
if mus:
mus_name = url[1] +".mp3"
if(mus_name not in files):
MP3 = requests.get(mus[0])
if(path[-1]=='\\'):
path_url = path+mus_name
else:
path_url = path+'\\'+mus_name
# print(path_url)
else:
MP3 = requests.get(mus[0])
yesorno = input("是否需要覆盖:")
if(yesorno=='y'):
path_url = path+'\\'+mus_name
else:
mus_name = input("重命名:")
path_url = path+'\\'+mus_name
with open(path_url,"wb") as f:
f.write(MP3.content)
# if MP3.content:
# print(url[1]+":"+url[0])
# else:
# print(url[1]+":"+url[0]+"write error!")
版权声明:本文为matafeiyanll原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。