資源簡介
利用Python的Flask框架和爬蟲相關技術,基于“中國裁判文書網”,構建一個內部專用網站。網站前端接受用戶所需的查詢條件,根據此條件服務器端下載“中國裁判文書網”對應裁判文書,并提供給用戶下載。

代碼片段和文件信息
import?re
import?requests
import?validationService
#獲取此條件下共有多少案件
#deep是重新爬取的次數,如果出remind就識別驗證碼?2次不成功就放棄
def?getCaseTotalNumber(condeep=2):
????index=1?#目前是查詢結果第幾頁
????direction=“asc“#不明白含義
????order=“法院層級“#查詢結構排序的根據
????page=20#查詢結構每頁顯示多少條
????param=con.toParam()
????para={“Param“:param“Index“:index“Page“:page“Order“:order“Direction“:direction}
????url=“http://192.0.101.71/List/ListContent“
????txt=requests.post(urlpara).text
????if?“remind“?in?txt?and?deep?>=?0:
????????validationService.valid()
????????return?getCaseTotalNumber(con?deep?-?1)
????if?“remind“?in?txt:
????????print(“驗證碼識別失敗“)
????????return?None
????#匹配第一個數字就是案件總數
????caseTotalNumber=re.search(r‘\d+‘txt).group()
????return?int(caseTotalNumber)
#同時返回案件ID和其他?提升效率
def?getCaseContentList(condeep=2):
????direction=“asc“#不明白含義
????order=“法院層級“#查詢結構排序的根據
????page=20#查詢結構每頁顯示多少條
????param=con.toParam()
????url=“http://192.0.101.71/List/ListContent“
????idList=[]
????cprqList=[]
????ajmcList=[]
????fymcList=[]
????ahList=[]
????spcxList=[]
????totalnunber=getCaseTotalNumber(con)
????totalPages=totalnunber//page?+1?#總數整除每頁20個?得出共有多少頁
????#如果總頁數都讀不到?說明驗證碼識別失敗
????for?i?in?range(1totalPages+1):
????????index=i#目前是查詢結果第幾頁
????????para={“Param“:param“Index“:index“Page“:page“Order“:order“Direction“:direction}
????????txt=requests.post(urlpara).text
????????if?“remind“?in?txt?:#如果有驗證碼了?就識別之后再下載當前頁一次
????????????validationService.valid()
????????????txt=requests.post(urlpara).text
????????????js=eval(eval(txt))
????????????for?i?in?range(1len(js)):
????????????????ajmcList.append(js[i][“案件名稱“])
????????????????fymcList.append(js[i][“法院名稱“])
????????????????ahList.append(js[i][“案號“])
????????????????spcxList.append(js[i][“審判程序“])
????????????????idList.append(js[i][“文書ID“])
????????????????cprqList.append(js[i][“裁判日期“])
????????????continue
????????js=eval(eval(txt))
????????for?i?in?range(1len(js)):
????????????ajmcList.append(js[i][“案件名稱“])
????????????fymcList.append(js[i][“法院名稱“])
????????????ahList.append(js[i][“案號“])
????????????spcxList.append(js[i][“審判程序“])
????????????idList.append(js[i][“文書ID“])
????????????cprqList.append(js[i][“裁判日期“])
????if?deep>0?and?totalnunber>len(idList):
????????return?getCaseContentList(condeep-1)
????elif(totalnunber?>?len(idList)):
????????print(“在此條件{}下,已爬取的案件ID數是{}實際案件ID數是{}發生缺少“.format(con.toParam()len(idList)totalnunber))
????return?dict(caseIds=idListcaseDates=cprqListajmcList=ajmcListfymcList=fymcListahList=ahListspcxList=spcxList)
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????文件????????378??2017-08-20?14:50??ChinaJudgementsOnline\.gitattributes
?????文件????????649??2017-08-20?14:50??ChinaJudgementsOnline\.gitignore
?????文件????????674??2018-12-03?00:36??ChinaJudgementsOnline\.idea\ChinaJudgementsOnline-master.iml
?????文件????????294??2018-12-03?00:30??ChinaJudgementsOnline\.idea\misc.xm
?????文件????????315??2018-12-03?00:30??ChinaJudgementsOnline\.idea\modules.xm
?????文件??????14171??2018-12-07?14:57??ChinaJudgementsOnline\.idea\workspace.xm
?????文件????????342??2017-08-20?14:50??ChinaJudgementsOnline\China_Judgements_Online_Spider\.idea\China_Judgements_Online_Spider.iml
?????文件????????159??2017-08-20?14:50??ChinaJudgementsOnline\China_Judgements_Online_Spider\.idea\encodings.xm
?????文件????????975??2017-08-20?14:50??ChinaJudgementsOnline\China_Judgements_Online_Spider\.idea\misc.xm
?????文件????????553??2017-08-20?14:50??ChinaJudgementsOnline\China_Judgements_Online_Spider\.idea\modules.xm
?????文件??????44980??2017-08-20?14:50??ChinaJudgementsOnline\China_Judgements_Online_Spider\.idea\workspace.xm
?????文件???????3035??2017-08-20?14:50??ChinaJudgementsOnline\China_Judgements_Online_Spider\caseListProcess.py
?????文件???????1205??2017-08-20?14:50??ChinaJudgementsOnline\China_Judgements_Online_Spider\code\20170727092032.jpg
?????文件???????1186??2017-08-20?14:50??ChinaJudgementsOnline\China_Judgements_Online_Spider\code\20170727092052.jpg
?????文件???????1238??2017-08-20?14:50??ChinaJudgementsOnline\China_Judgements_Online_Spider\code\20170727110047.jpg
?????文件???????1237??2017-08-20?14:50??ChinaJudgementsOnline\China_Judgements_Online_Spider\code\20170727150507.jpg
?????文件???????1199??2017-08-20?14:50??ChinaJudgementsOnline\China_Judgements_Online_Spider\code\20170727174642.jpg
?????文件???????1247??2017-08-20?14:50??ChinaJudgementsOnline\China_Judgements_Online_Spider\code\20170728085141.jpg
?????文件???????1231??2017-08-20?14:50??ChinaJudgementsOnline\China_Judgements_Online_Spider\code\20170728102152.jpg
?????文件???????1224??2017-08-20?14:50??ChinaJudgementsOnline\China_Judgements_Online_Spider\code\20170728163408.jpg
?????文件???????1263??2017-08-20?14:50??ChinaJudgementsOnline\China_Judgements_Online_Spider\code\20170728163536.jpg
?????文件???????1226??2017-08-20?14:50??ChinaJudgementsOnline\China_Judgements_Online_Spider\code\20170728164025.jpg
?????文件???????1240??2017-08-20?14:50??ChinaJudgementsOnline\China_Judgements_Online_Spider\code\20170728164812.jpg
?????文件???????1223??2017-08-20?14:50??ChinaJudgementsOnline\China_Judgements_Online_Spider\code\20170729095507.jpg
?????文件???????1197??2017-08-20?14:50??ChinaJudgementsOnline\China_Judgements_Online_Spider\code\20170729095608.jpg
?????文件???????1250??2017-08-20?14:50??ChinaJudgementsOnline\China_Judgements_Online_Spider\code\20170729100503.jpg
?????文件???????1252??2017-08-20?14:50??ChinaJudgementsOnline\China_Judgements_Online_Spider\code\20170729102416.jpg
?????文件???????1251??2017-08-20?14:50??ChinaJudgementsOnline\China_Judgements_Online_Spider\code\20170729121104.jpg
?????文件???????1208??2017-08-20?14:50??ChinaJudgementsOnline\China_Judgements_Online_Spider\code\20170810164320.jpg
?????文件???????1247??2017-08-20?14:50??ChinaJudgementsOnline\China_Judgements_Online_Spider\code\20170810164500.jpg
............此處省略789個文件信息
評論
共有 條評論