python爬虫详解（六）——爬取王者英雄图片包括皮肤

admin • 2022-01-08 08:12 • Python

之前都是爬取文本，今天爬取图片，内容简单，看看就会

点个赞留个关注吧！！

上代码：

# -*- coding: utf-8 -*-
import requests
import re
import os
from bs4 import BeautifulSoup

P_ = os.path.exists('王者荣耀图片爬取')
if P_ == 1:
    pass
else:
    os.mkdir('王者荣耀图片爬取')

URL = requests.get('https://pvp.qq.com/web201605/herolist.shtml')
html = URL.content
soup = BeautifulSoup(html,'html.parser',from_encoding="utf-8")  #解析器
div_people_list = soup.find('ul', attrs={'class': 'herolist clearfix'})

num = 0
for a in div_people_list.find_all('li'):
    text_1 = a.find('a')
    URL_2 = ('https://pvp.qq.com/web201605/'+text_1['href'])  #链接
    URL_3 = requests.get(f'{URL_2}')
    html_2 = URL_3.content              #再次解析
    soup_2 = BeautifulSoup(html_2,'html.parser',from_encoding="utf-8")    #解析器
    text_2 = soup_2.find('h2', attrs={'class': 'cover-name'}).get_text()   #名称
    print(text_2  ,URL_2)
    URL_4 = requests.get(URL_2)
    html_3 = URL_4.content
    soup_3 = BeautifulSoup(html_3, 'html.parser', from_encoding="utf-8")  # 解析器
    text_3 = soup_3.find('div', attrs={'class': 'zk-con1 zk-con'})['style']
    list_1 = re.findall('//(.*)', text_3)[0]
    URL_5 = re.findall('game.gtimg.cn.*?.', list_1)[0][:-2]
    URL_6 = 'https://' + URL_5  #链接
    text_4 = soup_2.find('div', attrs={'class': 'zk-con1 zk-con'})
    text_5 = text_4.find('ul', attrs={'class': 'pic-pf-list pic-pf-list3'})['data-imgname']
    data_1 = text_5.replace("|", "]  [").replace("&", "")  # 替换
    new_string = ''.join([i for i in data_1 if not i.isdigit()])  # 去除数字
    print('[' + new_string + ']')
    H = 1
    for a in text_5:
        if a == '|':
            H += 1
        else:
            pass

    num = 1
    for G_ in range(H):
        pic = requests.get(URL_6+f'{num}.jpg')
        string = text_2+str(num) + '.jpg'
        fp = open(f'王者荣耀图片爬取/{string}', 'wb')
        fp.write(pic.content)
        fp.close()
        num += 1