91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

  • 大小: 5KB
    文件類型: .py
    金幣: 1
    下載: 0 次
    發布日期: 2022-09-06
  • 語言: Python
  • 標簽: python??

資源簡介

python 實現股吧評論抓取及分析

資源截圖

代碼片段和文件信息

import?re?requests?codecs?time?random?jiebatushare
import?jieba.analyse
from?lxml?import?html

#?proxies={“http“?:?“123.53.86.133:61234“}
proxies?=?None
headers?=?{
????‘Host‘:?‘guba.eastmoney.com‘
????‘User-Agent‘:?‘Mozilla/5.0?(Windows?NT?6.1)?AppleWebKit/537.36?(KHTML?like?Gecko)?Chrome/49.0.2623.221?Safari/537.36?SE?2.X?metaSr?1.0‘}

def?get_url(stocknumpage):
????url?=?‘http://guba.eastmoney.com/list‘?+?str(stocknum)?+?‘_‘?+?str(page)?+?‘.html‘
????try:
????????text?=?requests.get(url?headers=headers?proxies=proxies?timeout=20)
????????requests.adapters.DEFAULT_RETRIES?=?5
????????s?=?requests.session()
????????s.keep_alive?=?False
????????text?=?html.fromstring(text.text)
????????urls?=?text.xpath(‘//div[@id=“articlelistnew“]//div[@class=“articleh?normal_post“]/span[3]/a/@href‘)
????????#?print(urls)
????except?Exception?as?e:
????????time.sleep(random.random()?+?random.randint(1?3))
????????urls?=?‘‘
????return?urls


def?get_comments(urls):
????for?newurl?in?urls:
????????newurl1?=?‘http://guba.eastmoney.com‘?+?newurl
????????#?print(newurl1)
????????try:
????????????text1?=?requests.get(newurl1?headers=headers?proxies=proxies?timeout=20)
????????????requests.adapters.DEFAULT_RETRIES?=?5
????????????s?=?requests.session()
????????????s.keep_alive?=?False
????????????text1?=?html.fromstring(text1.text)
????????????times1?=?text1.xpath(‘//div[@class=“zwfbtime“]/text()|//div[@class=“zwli?clearfix“]/div[4]/div/div[2]/text()‘)
????????????times?=?‘!‘.join(re.sub(re.compile(‘發表于|?‘)?‘‘?x)[:10]?for?x?in?times1).split(‘!‘)
????????????#?print(times)
????????????#?times=list(map(lambda?x:re.sub(re.compile(‘發表于|?‘)‘‘x)[:10]times))
????????????comments1?=?text1.xpath(‘//div[@class=“stockcodec?.xeditor“]/text()|//div[@class=“zwli?clearfix“]/div[4]/div/div[3]/div/text()‘)
????????????comments?=?‘!‘.join(w.strip()?for?w?in?comments1).split(‘!‘)
????????????if?comments?==?[‘‘]:
????????????????continue
????????????else:
????????????????dic?=?dict(zip(times?comments))
????????????????save_to_file(dic)
????????except:
????????????print(‘error!!!!‘)
????????????time.sleep(random.random()?+?random.randint(0?3))

????#?if?times?and?comments:
????????#?dic.append({‘time‘:times‘comment‘:comments})
????#?re

評論

共有 條評論