資源簡介
請確保D盤有一個 image文件夾 用于存放采集到的圖片
代碼片段和文件信息
import?requests
from?lxml?import?etree
import?time
?
?
class?Baiduspider(object):
????def?__init__(self):
????????self.baseurl?=?‘https://tieba.baidu.com/‘
????????self.url?=?‘https://tieba.baidu.com/f?‘
????????self.headers?=?{‘User-Agent‘:‘Mozilla/5.0‘}
?????
????#獲取帖子鏈接
????def?getPageUrl(selfurl):
????????res?=?requests.get(urlheaders=self.headers)
????????res.encoding=?‘utf-8‘
????????html?=?res.text
????????parseHtml?=?etree.HTML(html)
????????#?2.?解析對象調用xpath
????????r1?=?parseHtml.xpath(‘//div[@class=“t_con?cleafix“]/div/div/div/a/@href‘)
????????for?t?in?r1:
????????????self.getImaUrl(t)
?????????
????#獲取帖子中圖片的連接
????def?getImaUrl(selft):
????????res?=?requests.get(self.baseurl+theaders=self.headers)
????????res.encoding=?‘utf-8‘
????????html?=?res.text
????????parseHtml?=?etree.HTML(html)
????????#?2.?解析對象調用xpath.??兩個解析式不確定?第一個不能用就換第二個
????????r2?=?parseHtml.xpath(‘//div[
評論
共有 條評論