91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

資源簡介

爬取百度新聞的新聞,并可以進行主題搜索,搜索結果按照主題相關度進行排序

資源截圖

代碼片段和文件信息

#?-*-?coding:utf-8?-*-


import?urllib????
import?re????
import?time
import?urllib
import?os
from?urllib?import?request


#創建爬取類,并申明相關屬性和方法

class?News:
????#class具體的屬性及函數
????def?__init__(selfurl):
????????httpobj?=?request.urlopen(url)
????????#read()讀取url信息
????????html?=?httpobj.read()
????????#網頁編碼格式為gb2312
????????htmltext?=?html.decode(‘gb2312‘‘ignore‘)?
????????#正則表達式匹配新聞標題
????????pat?=?‘]*)“?mon=“[^>“]*“?target=“[^>“]*“>([^>#]*)‘?
????????result?=?re.findall(pathtmltext)
????????pat?=‘]*)“?target=“[^>“]*“?class=“[^>“]*“?mon=“[^>“]*“>([^>#]*)‘
????????added?=?re.findall(pathtmltext)
????????result.extend(added)
????????pat?=‘]*)“?target=“[^>“]*“??mon=“[^>“]*“>([^>#]*)‘
????????added?=?re.findall(pathtmltext)
????????result.extend(added)
????????se?=?set(result)
????????result?=?list(se)
????????#replace()將文章標題中的quot取消
????????#將匹配出的標題和URL加入到data列表中
????????self.data?=?[[item[0]item[1].replace(‘"‘‘“‘)]?for?item?in?result]
??????
??????
????#提取關鍵字,并將包含關鍵字的標題加入到target列表當中????
????def?key(self):
????????self.key?=input(“輸入關鍵字:“)
????????self.target?=?[]
????????for?item?in?self.data?:
????????????if?self.key?in?item[1]:
????????????????self.target.append(item)
????????#返回與關鍵字相關的新聞個數????????
????????return?len(self.target)
????
????
????#訪問匹配到的新聞信息
????def?visit(self):
????????for?i?in?range(len(self.target)):
????????????url?=?self.target[i][0]
????????????httpobj?=?request.urlopen(url)
????????????html?=?httpobj.read()
????????????#百度新聞網頁的編碼格式為gb2312
????????????htmltext?=?html.decode(‘gb2312‘‘ignore‘)?
????????????#相關度匹配
????????????#在新聞全文當中匹配關鍵

評論

共有 條評論