#1.读取保存在本地的html文件
#2.提取页面中的链接文件
#3.在屏幕上显示提取到的链接文件
#4.将提取到的链接保存到文件
def DuQuWenJian(inputfile):
f = open(inputfile,'r',encoding='utf-8')
fo = f.readlines()
f.close()
return fo
def TiQuImgLianJei(filetxt):
urls = []
for line in filetxt:
if 'img' in line:
url = line.split('src=')[-1].split('"')[1]
if 'http' in url:
urls.append(url)
return urls
def XianShiLianJei(TiQuDaoDeLianJei):
count = 0
for url in TiQuDaoDeLianJei:
count += 1
print('第{}张图片URL:{}'.format(count,url))
def ShuChuDaoWenJian(outputfile,TiQuDaoDeLianJei):
f = open(outputfile,'w',encoding = 'utf-8')
for line in TiQuDaoDeLianJei:
f.write(line + '\n')
f.close()
print('\n-------------------------------------------')
print("\n链接已保存至{}".format(outputfile))
def main():
inputfile = '/Users/denglinzhe/Documents/html.txt'
outputfile = '/Users/denglinzhe/Documents/pic.txt'
filetxt = DuQuWenJian(inputfile)
TiQuDaoDeLianJei = TiQuImgLianJei(filetxt)
XianShiLianJei(TiQuDaoDeLianJei)
ShuChuDaoWenJian(outputfile,TiQuDaoDeLianJei)
main()