資源簡介
這是一個基于python scrapy的專利爬蟲
代碼片段和文件信息
#?-*-?coding:?utf-8?-*-
“““
Created?on?2017/3/19
@author:?will4906
“““
import?time
import?os
import?sys
from?scrapy?import?cmdline
from?config.baseConfig?import?baseConfig
from?util.excel.ExcelUtil?import?ExcelUtil
def?initProgress():
????try:
????????os.mkdir(“output“)
????except?Exception?as?e:
????????pass
????try:
????????os.mkdir(“log“)
????except:
????????pass
def?init_excel_config():
????title_list?=?[“專利類型“?“專利名稱“?“法律狀態“?“法律狀態最后修改日期“?“公布號“?“申請公布日/授權公告日“?“申請號“?“申請日“?“申請人/專利權人“?“發明人“?“IPC分類號“?“代理人“?“代理機構“?“外觀設計洛迦諾分類號“]
????editor?=?ExcelUtil(baseConfig.FILE_NAME).edit()
????sh?=?editor.getSheet(0)
????for?index?each?in?enumerate(title_list):
????????sh.write(0?index?each)
????editor.commit()
????return
#?第頁?共?10?頁?721?條數據
if?__name__?==?‘__main__‘:
????#?startDate?=?input(“請輸入公布日開始日期如{0}:“.format(TimeUtil.getFormatTime(“%Y-%m-%d“)))
????#?Config.writeLog(“程序啟動,輸入的公布開始日期為{0}“.format(startDate))
????print(“程序開始“)
????print(“*?使用說明:https://github.com/will4906/PatentCrawler/wiki\n*?代碼更新:https://github.com/will4906/PatentCrawler\n*?bug反饋、交流建議:\n郵箱:553105821@qq.com\ngithub:https://github.com/will4906/PatentCrawler/issues“)
????initProgress()
????init_excel_config()
????cmdline.execute((“scrapy?crawl?Patent?-s?LOG_FILE=“?+?baseConfig.LOG_FILE_NAME).split())
????#?共?1?頁???5條數據
????#?第頁?共?10?頁?721?條數據
????#?str?=?“第頁?共?10?頁?721?條數據“
????#?s?=?str[2:].find(“頁“)
????#?e?=?str.find(“條“)
????#?print(s)
????#?print(e)
????#?print(int(str[str[2:].find(“頁“)?+?3:str.find(“條“)]))
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2017-05-22?11:45??PatentCrawler\
?????目錄???????????0??2017-05-22?11:45??PatentCrawler\.git\
?????文件??????????16??2017-05-21?17:38??PatentCrawler\.git\COMMIT_EDITMSG
?????文件?????????308??2017-05-21?13:39??PatentCrawler\.git\config
?????文件??????????73??2017-05-21?13:35??PatentCrawler\.git\desc
?????文件??????????23??2017-05-21?13:39??PatentCrawler\.git\HEAD
?????目錄???????????0??2017-05-21?13:35??PatentCrawler\.git\hooks\
?????文件?????????478??2017-05-21?13:35??PatentCrawler\.git\hooks\applypatch-msg.sample
?????文件?????????896??2017-05-21?13:35??PatentCrawler\.git\hooks\commit-msg.sample
?????文件?????????189??2017-05-21?13:35??PatentCrawler\.git\hooks\post-update.sample
?????文件?????????424??2017-05-21?13:35??PatentCrawler\.git\hooks\pre-applypatch.sample
?????文件????????1642??2017-05-21?13:35??PatentCrawler\.git\hooks\pre-commit.sample
?????文件????????1348??2017-05-21?13:35??PatentCrawler\.git\hooks\pre-push.sample
?????文件????????4951??2017-05-21?13:35??PatentCrawler\.git\hooks\pre-reba
?????文件?????????544??2017-05-21?13:35??PatentCrawler\.git\hooks\pre-receive.sample
?????文件????????1239??2017-05-21?13:35??PatentCrawler\.git\hooks\prepare-commit-msg.sample
?????文件????????3610??2017-05-21?13:35??PatentCrawler\.git\hooks\update.sample
?????文件????????3206??2017-05-21?17:38??PatentCrawler\.git\index
?????目錄???????????0??2017-05-21?13:35??PatentCrawler\.git\info\
?????文件?????????240??2017-05-21?13:35??PatentCrawler\.git\info\exclude
?????目錄???????????0??2017-05-21?13:39??PatentCrawler\.git\logs\
?????文件?????????336??2017-05-21?17:38??PatentCrawler\.git\logs\HEAD
?????目錄???????????0??2017-05-21?13:39??PatentCrawler\.git\logs\refs\
?????目錄???????????0??2017-05-21?13:39??PatentCrawler\.git\logs\refs\heads\
?????文件?????????336??2017-05-21?17:38??PatentCrawler\.git\logs\refs\heads\master
?????目錄???????????0??2017-05-21?13:39??PatentCrawler\.git\logs\refs\remotes\
?????目錄???????????0??2017-05-21?13:39??PatentCrawler\.git\logs\refs\remotes\origin\
?????文件?????????185??2017-05-21?13:39??PatentCrawler\.git\logs\refs\remotes\origin\HEAD
?????文件?????????142??2017-05-21?17:39??PatentCrawler\.git\logs\refs\remotes\origin\master
?????目錄???????????0??2017-05-21?17:38??PatentCrawler\.git\ob
?????目錄???????????0??2017-05-21?17:38??PatentCrawler\.git\ob
............此處省略84個文件信息
- 上一篇:Lua語言程序設計合集8本
- 下一篇:python3.7 官方中文手冊文檔全套
評論
共有 條評論