python爬虫详解(六)——爬取王者英雄图片包括皮肤
之前都是爬取文本,今天爬取图片,内容简单,看看就会
点个赞留个关注吧!!
上代码:
# -*- coding: utf-8 -*-
import requests
import re
import os
from bs4 import BeautifulSoup
P_ = os.path.exists('王者荣耀图片爬取')
if P_ == 1:
pass
else:
os.mkdir('王者荣耀图片爬取')
URL = requests.get('https://pvp.qq.com/web201605/herolist.shtml')
html = URL.content
soup = BeautifulSoup(html,'html.parser',from_encoding="utf-8") #解析器
div_people_list = soup.find('ul', attrs={'class': 'herolist clearfix'})
num = 0
for a in div_people_list.find_all('li'):
text_1 = a.find('a')
URL_2 = ('https://pvp.qq.com/web201605/'+text_1['href']) #链接
URL_3 = requests.get(f'{URL_2}')
html_2 = URL_3.content #再次解析
soup_2 = BeautifulSoup(html_2,'html.parser',from_encoding="utf-8") #解析器
text_2 = soup_2.find('h2', attrs={'class': 'cover-name'}).get_text() #名称
print(text_2 ,URL_2)
URL_4 = requests.get(URL_2)
html_3 = URL_4.content
soup_3 = BeautifulSoup(html_3, 'html.parser', from_encoding="utf-8") # 解析器
text_3 = soup_3.find('div', attrs={'class': 'zk-con1 zk-con'})['style']
list_1 = re.findall('//(.*)', text_3)[0]
URL_5 = re.findall('game.gtimg.cn.*?.', list_1)[0][:-2]
URL_6 = 'https://' + URL_5 #链接
text_4 = soup_2.find('div', attrs={'class': 'zk-con1 zk-con'})
text_5 = text_4.find('ul', attrs={'class': 'pic-pf-list pic-pf-list3'})['data-imgname']
data_1 = text_5.replace("|", "] [").replace("&", "") # 替换
new_string = ''.join([i for i in data_1 if not i.isdigit()]) # 去除数字
print('[' + new_string + ']')
H = 1
for a in text_5:
if a == '|':
H += 1
else:
pass
num = 1
for G_ in range(H):
pic = requests.get(URL_6+f'{num}.jpg')
string = text_2+str(num) + '.jpg'
fp = open(f'王者荣耀图片爬取/{string}', 'wb')
fp.write(pic.content)
fp.close()
num += 1
本图文内容来源于网友网络收集整理提供,作为学习参考使用,版权属于原作者。
THE END
二维码