資源簡介
給定任意關鍵詞,能夠通過新浪微博搜索,爬取相關微博的博主id,微博正文,轉發數,評論數,點贊數,發布時間
代碼片段和文件信息
from?urllib.parse?import?urlencode
import?requests
from?pyquery?import?PyQuery?as?pq
import?os
import?re
import?xlwt
import?pandas?as?pd
current_Path?=?os.path.dirname(os.path.abspath(__file__))?+?‘\\‘
base_url?=?‘https://s.weibo.com/‘
headers?=?{
????‘Host‘:‘m.weibo.cn‘
????‘Refer‘:‘https://weibo.com/zzk1996?is_all=1‘
????‘User-Agent‘:?‘Mozilla/5.0?(Windows?NT?10.0;?Win64;?x64)?AppleWebKit/537.36?(KHTML?like?Gecko)?Chrome/80.0.3987.87?Safari/537.36?Edg/80.0.361.48‘
}
#搜索
def?get_Research(research_Wordspage):
????params?=?{
????????‘q‘:?research_Words
????????‘Refer‘:?‘index‘
????????‘page‘:?str(page)
????}
????url?=?‘https://s.weibo.com/weibo?‘?+?urlencode(params)
????#print(url)
????#?print(urlencode(params))
????try:
????????response?=?requests.get(url)
????????if?response.status_code?==?200:
????????????return?response.text
????except?requests.Connectionerror:
????????return?None
def?get_Information(research_Wordspage):
????res?=?[]
????html?=?get_Research(research_Wordspage)
????doc?=?pq(html)
????#print(doc)
????with?open(current_Path?+?‘test.txt‘‘w+‘encoding?=?‘utf8‘)?as?f:
????????f.write(html)
????#?items?=?doc(“.content“).items()
????items?=?doc(“div[class=‘card‘]“).items()
????
????for?li?in?items:
????????temp_Info_Dict?=?{}
????????
????????###抽取昵稱
????????info?=?li.find(‘div‘)(‘.name‘)
????????nick_Name?=?info.attr(‘nick-name‘)
????????temp_Info_Dict[‘博主id‘]?=?nick_Name
????????###抽取內容
????????#?text?=?li(‘.txt‘)
????????text?=?li(“p[node-type=‘feed_list_content_full‘]>a“)
????????temp_Info_Dict[‘微博正文‘]?=?text.text()
????????if?temp_Info_Dict[‘微博正文‘]?==?‘‘:
- 上一篇:算法的python實現代碼、測試數據集及結果
- 下一篇:bow python實現
評論
共有 條評論