分享36个C源码,总有一款适合您

C源码

分享36个C源码,总有一款适合您

下面是文件的名字,我放了一些图片,文章里不是所有的图主要是放不下...,大家下载后可以看到。

源码下载链接:https://pan.baidu.com/s/1WTLgtQ2J5gfZdj-LMEYnEA?pwd=nimr 
提取码:nimr

 

import os
import shutil
import time
from time import sleep

import requests
from bs4 import BeautifulSoup
from docx import Document
from docx.shared import Inches

from framework.base.BaseFrame import BaseFrame
from sprider.business.DownLoad import DownLoad
from sprider.business.SeleniumTools import SeleniumTools
from sprider.business.SpriderTools import SpriderTools

from selenium import webdriver
from selenium.webdriver.common.by import By
from sprider.model.SpriderEntity import SpriderEntity
from sprider.access.SpriderAccess import SpriderAccess


class HuaJunCode:
    base_url =  "https://down.chinaz.com" # 采集的网址
    save_path = "D:\Freedom\Sprider\ChinaZ\"
    sprider_count = 111  # 采集数量
    sprider_start_count=100# 正在采集第51页的第7个资源,共60页资源 debug



    word_content_list = []
    folder_name = ""
    page_end_number=0
    max_pager=15 #每页的数量
    haved_sprider_count =0  # 已经采集的数量
    page_count = 1  # 每个栏目开始业务content="text/html; charset=gb2312"
    filter_down_file=[]

    def __init__(self):
        pass

    def sprider(self,title_name="NET"):

        """
       采集
       PHP https://down.chinaz.com/class/572_5_1.htm
       NET https://down.chinaz.com/class/572_4_1.htm
       ASP https://down.chinaz.com/class/572_3_1.htm
       Python https://down.chinaz.com/class/604_572_1.htm
            https://down.chinaz.com/class/608_572_1.htm
        微信 https://down.chinaz.com/class/610_572_1.htm
       Ruby   https://down.chinaz.com/class/622_572_1.htm
       NodeJs https://down.chinaz.com/class/626_572_1.htm
       C https://down.chinaz.com/class/594_572_1.htm
       :return:
       """
        if title_name == "PHP":
            self.folder_name = "PHP源码"
            self.second_column_name = "572_5"
        elif title_name == "Go":
            self.folder_name = "Go源码"
            self.second_column_name = "606_572"
        elif title_name == "NET":
            self.folder_name = "NET源码"
            self.second_column_name = "572_4"
        elif title_name == "ASP":
            self.folder_name = "ASP源码"
            self.second_column_name = "572_3"
        elif title_name == "Python":
            self.folder_name = "Python源码"
            self.second_column_name = "604_572"
        elif title_name == "JavaScript":
            self.folder_name = "JavaScript源码"
            self.second_column_name = "602_572"
        elif title_name == "Java":
            self.folder_name = "Java源码"
            self.second_column_name = "572_517"
        elif title_name == "HTML":
            self.folder_name = "HTML-CSS源码"
            self.second_column_name = "608_572"
        elif title_name == "TypeScript":
            self.folder_name = "TypeScript源码"
            self.second_column_name = "772_572"
        elif title_name == "微信小程序":
            self.folder_name = "微信小程序源码"
            self.second_column_name = "610_572"
        elif title_name == "Ruby":
            self.folder_name = "Ruby源码"
            self.second_column_name = "622_572"
        elif title_name == "NodeJs":
            self.folder_name = "NodeJs源码"
            self.second_column_name = "626_572"
        elif title_name == "C++":
            self.folder_name = "C++源码"
            self.second_column_name = "596_572"
        elif title_name == "C":
            self.folder_name = "C源码"
            self.second_column_name = "594_572"
        #https://down.chinaz.com/class/594_572_1.htm


        first_column_name = title_name # 一级目录
        second_folder_name = str(self.sprider_count) + "个" + self.folder_name #二级目录
        self.sprider_type =second_folder_name
        self.merchant=int(self.sprider_start_count) //int(self.max_pager)+1 #起始页码用于效率采集
        self.file_path = self.save_path + os.sep + "Code" + os.sep + first_column_name + os.sep + second_folder_name
        self.save_path = self.save_path+ os.sep + "Code" + os.sep+first_column_name+os.sep + second_folder_name+ os.sep + self.folder_name
        BaseFrame().debug("开始采集ChinaZCode"+self.folder_name+"...")
        sprider_url = (self.base_url + "/class/{0}_1.htm".format(self.second_column_name))
        down_path="D:\Freedom\Sprider\ChinaZ\Code\"+first_column_name+"\"+second_folder_name+"\Temp\"
        if os.path.exists(down_path) is True:
            shutil.rmtree(down_path)
        if os.path.exists(down_path) is False:
            os.makedirs(down_path)

        if os.path.exists(self.save_path ) is True:
            shutil.rmtree(self.save_path )
        if os.path.exists(self.save_path ) is False:
            os.makedirs(self.save_path )
        chrome_options = webdriver.ChromeOptions()
        diy_prefs ={'profile.default_content_settings.popups': 0,
                    'download.default_directory':'{0}'.format(down_path)}
        # 添加路径到selenium配置中
        chrome_options.add_experimental_option('prefs', diy_prefs)
        chrome_options.add_argument('--headless') #隐藏浏览器

        # 实例化chrome浏览器时,关联忽略证书错误
        driver = webdriver.Chrome(options=chrome_options)
        driver.set_window_size(1280, 800)  # 分辨率 1280*800

        # driver.get方法将定位在给定的URL的网页,get接受url可以是任何网址,此处以百度为例
        driver.get(sprider_url)
        # content = driver.page_source
        # print(content)
        div_elem = driver.find_element(By.CLASS_NAME, "main")  # 列表页面 核心内容
        element_list = div_elem.find_elements(By.CLASS_NAME, 'item')

        laster_pager_ul = driver.find_element(By.CLASS_NAME, "el-pager")
        laster_pager_li =laster_pager_ul.find_elements(By.CLASS_NAME, 'number')
        laster_pager_url = laster_pager_li[len(laster_pager_li) - 1]
        page_end_number = int(laster_pager_url.text)
        self.page_count=self.merchant
        while self.page_count <= int(page_end_number):  # 翻完停止
            try:
                if self.page_count == 1:
                    self.sprider_detail(driver,element_list,self.page_count,page_end_number,down_path)
                    pass
                else:
                    if self.haved_sprider_count == self.sprider_count:
                        BaseFrame().debug("采集到达数量采集停止...")
                        BaseFrame().debug("开始写文章...")
                        self.builder_word(self.folder_name, self.save_path, self.word_content_list)
                        BaseFrame().debug("文件编写完毕,请到对应的磁盘查看word文件和下载文件!")
                        break
                    #(self.base_url + "/sort/{0}/{1}/".format(url_index, self.page_count))
                    #http://soft.onlinedown.net/sort/177/2/

                    next_url = self.base_url + "/class/{0}_{1}.htm".format(self.second_column_name, self.page_count)
                    driver.get(next_url)

                    div_elem = driver.find_element(By.CLASS_NAME, "main")  # 列表页面 核心内容
                    element_list = div_elem.find_elements(By.CLASS_NAME, 'item')
                    self.sprider_detail( driver, element_list, self.page_count, page_end_number, down_path)
                    pass
                #print(self.page_count)
                self.page_count = self.page_count + 1  # 页码增加1
            except Exception as e:
                print("sprider()执行过程出现错误:" + str(e))
                sleep(1)



    def sprider_detail(self, driver,element_list,page_count,max_page,down_path):
        """
        采集明细页面
        :param driver:
        :param element_list:
        :param page_count:
        :param max_page:
        :param down_path:
        :return:
        """
        index = 0
        element_array=[]
        element_length=len(element_list)
        for element in element_list:
            url_A_obj = element.find_element(By.CLASS_NAME,  'name-text')
            next_url = url_A_obj.get_attribute("href")
            coder_title = url_A_obj.get_attribute("title")
            e=coder_title+"$"+ next_url
            element_array.append(e)
            pass
        if int(self.page_count) == int(self.merchant):
            self.sprider_start_index = int(self.sprider_start_count) % int(self.max_pager)
            index=self.sprider_start_index
        while index < element_length:


            if os.path.exists(down_path) is False:
                os.makedirs(down_path)

            if self.haved_sprider_count == self.sprider_count:
                BaseFrame().debug("采集到达数量采集停止...")
                break

            #element = element_list[index]
            element=element_array[index]
            time.sleep(1)

            index = index + 1
            sprider_info="正在采集第"+str(page_count)+"页的第"+str(index)+"个资源,共"+str(max_page)+"页资源"
            BaseFrame().debug(sprider_info)
            next_url=element.split("$")[1]
            coder_title=element.split("$")[0]
            # next_url = element.find_element(By.TAG_NAME, 'a').get_attribute("href")
            # coder_title =element.find_element(By.TAG_NAME, 'img').get_attribute("title")
            driver.get(next_url) # 请求明细页面
            try:
                # codeEntity = SpriderEntity()  # 下载过的资源不再下载
                # codeEntity.sprider_base_url = self.base_url
                # codeEntity.create_datetime = SpriderTools.get_current_datetime()
                # codeEntity.sprider_url = next_url
                # codeEntity.sprider_pic_title = coder_title
                # codeEntity.sprider_pic_index = str(index)
                # codeEntity.sprider_pager_index = page_count
                # codeEntity.sprider_type = self.sprider_type
                # if SpriderAccess().query_sprider_entity_by_urlandindex(next_url, str(index)) is None:
                #     SpriderAccess().save_sprider(codeEntity)
                # else:
                #     BaseFrame().debug(coder_title+next_url + "数据采集过因此跳过")
                #     continue

                if SeleniumTools.judeg_element_isexist(driver, "CLASS_NAME", "download-item") == 3:
                    driver.back()
                    BaseFrame().debug(coder_title+"不存在源码是soft因此跳过哦....")
                    continue
                print("准备点击下载按钮...")
                driver.find_element(By.CLASS_NAME, "download-item").click() #下载源码
                sleep(1)
                result,message=SpriderTools.judge_file_exist(True,240,1,down_path,"zip|rar|gz|tgz")#判断源码
                if result is True:

                    sprider_content = [coder_title, self.save_path + os.sep +"image"+ os.sep + coder_title + ".jpg"]  # 采集成功的记录
                    self.word_content_list.append(sprider_content)  # 增加到最终的数组
                    self.haved_sprider_count = self.haved_sprider_count + 1
                    BaseFrame().debug("已经采集完成第" + str(self.haved_sprider_count) + "个")
                    time.sleep(1)
                    driver.back()

                    coder_title = str(coder_title).replace("/", "") #去掉windows不识别的字符
                    files = os.listdir(down_path)
                    file_name = files[0] #获取默认值
                    if len(self.filter_down_file)>0:
                        for file in files:
                            for filter_file in self.filter_down_file:
                                if str(file) in str(filter_file):
                                    BaseFrame().error(filter_file + "文件被过滤...")
                                    pass
                                else:
                                    file_name = file

                    srcFile = down_path + os.sep + file_name
                    file_ext = os.path.splitext(srcFile)[-1]

                    dstFile = down_path + os.sep + coder_title + file_ext
                    os.rename(srcFile, dstFile)
                    srcFile = dstFile
                    dstFile = self.save_path + os.sep + coder_title + file_ext

                    shutil.move(srcFile, dstFile)  # 移动文件

                else:
                    files = os.listdir(down_path)  # 读取目录下所有文件
                    coder_title = str(coder_title).replace("/", "")  # 去掉windows不识别的字符
                    try:
                        if str(message)=="0个文件认定是False":
                            BaseFrame().error(coder_title+"文件不存在...")
                            shutil.rmtree(down_path)  # 如果没下载完是无法删除的
                            pass
                        else:
                            BaseFrame().error("检测下载文件出错可能原因是等待时间不够已经超时,再等待60秒...")
                            time.sleep(60)
                            shutil.rmtree(down_path) #如果没下载完是无法删除的
                            #清空数组
                            self.filter_down_file.clear()
                    except Exception as e:
                        # 使用数组append记录文件名字 移动的时候过滤
                        self.builder_filter_file(files)
                    pass
            except Exception as e:
                BaseFrame().error("sprider_detail()执行过程出现错误:" + str(e))
                BaseFrame().error("sprider_detail()记录下载的文件名")
                # 使用数组append记录文件名字 移动的时候过滤
                files = os.listdir(down_path)  # 读取目录下所有文件
                self.builder_filter_file(files)

        if(int(page_count)==int(max_page)):
            self.builder_word(self.folder_name,self.save_path,self.word_content_list)
            BaseFrame().debug("文件编写完毕,请到对应的磁盘查看word文件和下载文件!")

Advanced Web Statistics(AWStats) v7.1

Cetus MySQL数据库中间件 v2.3.9

clumsy网络环境模拟工具 v0.3 rc4 源码包

dnnmmp开发环境 v1.4.0

EasyFlash嵌入式Flash存储器库 v4.1.0

FlashDB超轻量级数据库 v1.1.2

Freeside v2.3.0

Friso中文分词器 v1.6.4

Gear-Lib通用的C基础库 v1.1.21

HarmonyOS华为鸿蒙系统 v1.1.4 LTS版本

HarmonyOS华为鸿蒙系统 v2.0 Canary

hetao Web 服务器软件 v0.9.0

htop交互式进程查看器 v3.2.1

ip2region地址定位库 v2.11.0

IP地址数据库 v1.0

iSulad容器解决方案 v2.1.0

LCUI 图形界面开发库 v2.2.0

libhv跨平台网络库 v1.3.0

LuatOS实时操作系统 v0007

Markdown编辑器MacDown v0.6

Movable Type 博客平台 v4.38

Netdata性能实时监测工具 v1.37.1

obs-studio实时流媒体和屏幕录制软件 v28.1.2

OSPod.CMS专业建站平台 v3.0.2

PHP7框架Phalcon7 v1.2.3

rtty终端管理工具 v8.1.0

SeasLog v2.2.0

starrtc-server免费IM系统 v1.0

TBOX跨平台开发库 v1.7.2

tengine淘宝Web服务器 v2.3.4

Ventoy创建可启动U盘的工具 v1.0.86 源码包

Wireshark网络流量分析器 v4.0.2

xlswriter v1.5.2

双鱼林vb图书信息管理demo系统 v1.0

多功能信息管理 v3.0 共享版 v3.0 共享版

雷傲极酷超级论坛LeoBBS X Build 090206  简体正式版

最后送大家一首诗:

山高路远坑深,
大军纵横驰奔,
谁敢横刀立马?
惟有点赞加关注大军。
 

本图文内容来源于网友网络收集整理提供,作为学习参考使用,版权属于原作者。
THE END
分享
二维码
< <上一篇
下一篇>>