資源簡介
爬蟲1.py
代碼片段和文件信息
#-*-?coding:?UTF-8?-*-
import?urllib.request
from?urllib?import?request
import?re?xlwt?datetime
def?function4():
????#?file?=?urllib.request.urlopen(“http://www.baidun.com“timeout=1)
????#?file?=?urllib.request.urlopen(“http://www.hellobi.com“timeout=0.5)
????for?i?in?range(0100):
????????try:
????????????file=urllib.request.urlopen(“http://yumiqianyue.com“timeout=1)#time是表示服務器的響應時間1s
????????????data=file.read()
????????????print(len(data))
????????except?Exception?as?e:
????????????print(“異常的原因:“+str(e))
def?function3():
????html=“https://read.douban.com/provider/all“
????data=(urllib.request.urlopen(html).read())
????data=data.decode(‘utf-8‘)
????pat=‘(.*?)‘
????pat1=‘(.*?)‘
????mydata=re.compile(pat).findall(data)
????mydata1=re.compile(pat1).findall(data)
????“““寫入文件.txt中“““
????dir=‘./爬蟲練習1.txt‘
????with?open(dir“w+“encoding=“utf-8“)?as?f:
????????savedata?=(‘,‘.join(mydata))+(‘‘.join(mydata1))
????????f.write(savedata)
????????f.close
????“““寫入Excel文件中“““
def?function2():
????urllib.request.urlretrieve(“http://www.youku.com/“filename=“./網頁/數據挖掘實例網頁.html“)#這是網頁下載到本地文件中
????urllib.request.urlcleanup()#清理緩存
????file=urllib.request.urlopen(“http://www.hellobi.com“)
????print(“環(huán)境信息:“file.info())
????#?getcode()狀態(tài)編碼
????#?geturl()獲取網頁的例子
????print(file.getcode())
????print(file.geturl())
def?function():
????“““官方實例“““
????html?=?request.urlopen(“https://read.douban.com/provider/all“).read()??#?讀取網頁源代碼內容
????wzgz?=?“
(.*?)(.*?)?部作品在售“
????xx?=?re.compile(wzgz).findall(str(html?“utf-8“))??#?通過正則表達式匹配在網頁源代碼中提取所需內容
????#?print(xx)
????#?創(chuàng)建workbook和sheet對象
????workbook?=?xlwt.Workboo
- 上一篇:用Python實現域用戶與企業(yè)微信通訊錄同步
- 下一篇:運動物體的檢測和識別
評論
共有 條評論