資源簡介
python 爬蟲爬取站長之站的模板,需要的看一看,畢業了,需要模板
代碼片段和文件信息
#?-*-?conding:UTF-8?-*-
import?requests
import?re
class?Resume(object):
????def?__init__(self):
????????self.headers?=?{
????????????“User-Agent“:?“Mozilla/5.0?(Windows?NT?6.1;?Win64;?x64)?AppleWebKit/537.36?(KHTML?like?Gecko)?Chrome/67.0.3396.99?Safari/537.36“
????????}
????????self.next_url_list?=?[]
????????self.Download_list?=?[]
????def?get_page(self?url):
????????response?=?requests.get(url=url?headers=self.headers)
????????if?response.status_code?==?200:
????????????print(“請求成功“)
????????????html?=?response.text
????????????next_urls?=?re.findall(‘\s‘?html)
????????????for?next_url?in?next_urls:
????????????????self.next_url_list.append(next_url)
????def?parse_page(self?next_url):
????????response?=?requests.get(url=next_url?headers=self.headers)
????????response.encoding?=?“UTF-8“??#?轉碼為中文
????????if?response.status_code?==?200:
????????????print(“請求成功“)
????????????html?=?response.text
????????????Download_url?=?re.findall(“福建電信下載 “?html)
????????????name?=?re.findall(‘title“:?“(.*?)“‘?html)
????????????print(‘----------------------------------------------------‘)
????????????print(Download_url)
????????????print(name)
????????????print(‘----------------------------------------------------‘)
????????????self.Download_list.append([Download_url[0]?name[0]])
????????????print(Download_url?name)
????def?Download(self?download_url?name):
????????response?=?requests.get(url=download_url?headers=self.headers)
????????if?response.status_code?==?200:
????????????with?open(“test/%s.rar“?%?name?‘wb‘)?as?f:
????????????????f.write(response.content)
????def?main(self):
????????for?page?in?range(2?3):
????????????if?page?==?1:
????????????????url?=?“http://sc.chinaz.com/jianli/free.html“
????????????else:
????????????????url?=?“http://sc.chinaz.com/jianli/free_%s.html“?%?page
????????????print(url)
????????????self.get_page(url)
????????for?next_url?in?self.next_url_list:
????????????self.parse_page(next_url)
????????for?Download_info?in?self.Download_list:
????????????self.Download(Download_info[0]?Download_info[1])
if?__name__?==?“__main__“:
????resume?=?Resume()
????resume.main()
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????文件?????240993??2016-01-13?14:44??jianli8782\1.jpg
-----------?---------??----------?-----??----
???????????????240993????????????????????1
- 上一篇:基于python的數據分析論文集
- 下一篇:電影推薦系統
評論
共有 條評論