91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

  • 大小: 7KB
    文件類型: .py
    金幣: 1
    下載: 1 次
    發布日期: 2021-05-12
  • 語言: Python
  • 標簽: python??微博??

資源簡介

利用python爬取微博內容,能夠做到爬取任意關鍵字下的全部微博內容

資源截圖

代碼片段和文件信息

#?-*-?coding:?utf-8?-*-
import?re?requests
import?time
from?bs4?import?BeautifulSoup
import?sys
import?json
import?datetime
reload(sys)
sys.setdefaultencoding(‘utf-8‘)

class?weibo():
????header?=?{‘Accept‘:‘text/htmlapplication/xhtml+xmlapplication/xml;q=0.9image/webpimage/apng*/*;q=0.8‘‘Accept-Encoding‘:‘gzip?deflate?br‘‘Accept-Language‘:‘zh-CNzh;q=0.8‘‘Cookie‘:‘_T_WM=dbeb65f9c841bd67a4f32cab3ddbf7ec;?ALF=1514628952;?SCF=AuIwFvQ5M6uY3uNIsY0hghDyz7lZI2hJXLAmQfi-kZSxz7rrfhFd-xg1a49sZCflizdhw72KVhkyNCabj6L-AfQ.;?SUB=_2A253G6oIDeRhGeBN41oR9ynFwjuIHXVU5zZArDV6PUNbktBeLWPykW1NRAPsHIqkTzNa_zyi5uR2WyUO7jV8VH6z;?SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WWjY5CN2FR7gfTeS2hHSFmV5JpX5KMhUgL.Foq01hn7S0M41KM2dJLoIp7LxKML1KBLBKnLxKqL1hnLBoMRSh2pShM0Sh-p;?SUHB=0z1wvP8FOQCXXR;?SSOLoginState=1512036952‘‘User-Agent‘:‘Mozilla/5.0?(Windows?NT?10.0;?WOW64)?AppleWebKit/537.36?(KHTML?like?Gecko)?Chrome/60.0.3112.113?Safari/537.36‘‘Upgrade-Insecure-Requests‘:‘1‘‘Connection‘:‘keep-alive‘‘Cache-Control‘:‘max-age=0‘}
????
????def?_init_(self):
????????pass
????????
????def?get_soup(selfurl):#返回soup
????????time.sleep(1.5)
????????req?=?requests.get(urlheaders=self.header)
????????html=req.text
????????soup=BeautifulSoup(html“lxml“)
????????return?soup
????
????
????
????def?create_url(selfpagestarttime):#構造連續url
????????#https://weibo.cn/search/mblog?hideSearchframe=&keyword=%E4%B8%AD%E5%8D%B0%E5%AF%B9%E5%B3%99&advancedfilter=1&starttime=20170630&endtime=20170701&sort=time&page=2
????????url=“https://weibo.cn/search/mblog/?keyword=中印對峙&sort=time&advancedfilter=1&“
????????x=datetime.timedelta(days=0)
????????endtime=starttime+x
????????endtime=endtime.strftime(‘%Y%m%d‘)
????????starttime=starttime.strftime(‘%Y%m%d‘)
????????url=url+‘starttime=‘+starttime+‘&endtime=‘+endtime+‘&sort=time&page=‘+str(page)
????????return?url
????????
????????#格式?{‘time‘:time‘name‘:name‘text‘:text‘zan‘:zan‘ping‘:ping‘zhuan‘:zhuan‘comment‘:[{‘name‘:name‘text‘:text}{‘name‘:name‘text‘:text}]}
????def?comment_url(selfurlnum):#評論url
????????url=re.sub(‘#cmtfrm‘‘‘url)
????????url=url+‘&page=‘+str(num)
????????return?url
????def?get_commentsoup(selfurl):#評論soup
????????time.sleep(2)
????????req?=?requests.get(urlheaders=self.header)
????????html=req.text
????????soup=BeautifulSoup(html“lxml“)
????????return?soup
????
????def?get_comment(selflistping_url):#解析評論
????????ping_url=re.sub(‘http‘?‘https‘?ping_url)
????????soup=self.get_soup(ping_url)
????????con_pagenum=soup.find(‘input‘?attrs={“name“:“mp“})
????????if?con_pagenum==None:
????????????con_pagenum=1
????????else:
????????????con_pagenum=con_pagenum.attrs[“value“]
????????for?i?in?range(1int(con_pagenum)+1):
????????????url=self.comment_url(ping_urli)
????????????soup=self.get_commentsoup(url)
????????????for?div?in?soup.find_all(“div“attrs?=?{“id“:re.compile(“C_.*?“)}):
????????????????name=div.find(“a“)
????????????????name=name.get_text()

評論

共有 條評論