91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

資源簡介

此為python實(shí)現(xiàn)的基于網(wǎng)路爬蟲的電影評論爬取和分析系統(tǒng)。其中包括源代碼、完整文檔。本系統(tǒng)主要由熱門電影排名、影評內(nèi)容詞云、觀眾滿意度餅圖等模塊組成。其中代碼有bug(我去年可以運(yùn)行,不知道今年為什么不可了嗚嗚嗚),介意勿下載!!!

資源截圖

代碼片段和文件信息

from?urllib?import?request
headers={‘User-Agent‘:‘Mozilla/5.0?(Windows?NT?6.1;?WOW64)?AppleWebKit/537.1?(KHTML?like?Gecko)?Chrome/21.0.1180.89?Safari/537.1‘}

resp?=?request.urlopen(‘https://movie.douban.com/nowplaying/hangzhou/‘)
html_data?=?resp.read().decode(‘utf-8‘)

from?bs4?import?BeautifulSoup?as?bs
soup?=?bs(html_data?‘html.parser‘)????
nowplaying_movie?=?soup.find_all(‘div‘?id=‘nowplaying‘)
nowplaying_movie_list?=?nowplaying_movie[0].find_all(‘li‘?class_=‘list-item‘)
nowplaying_list?=?[]
for?item?in?nowplaying_movie_list:????????
????????nowplaying_dict?=?{}????????
????????nowplaying_dict[‘id‘]?=?item[‘data-subject‘]????????
????????for?tag_img_item?in?item.find_all(‘img‘):???????????
?????????????nowplaying_dict[‘name‘]?=?tag_img_item[‘a(chǎn)lt‘]???????????
?????????????nowplaying_list.append(nowplaying_dict)
?????????????
print(‘豆瓣排行榜中名列前茅的影片為:‘)
for?i?in?range(len(nowplaying_list)):
????print(‘NO.‘(i+1)‘\t‘nowplaying_list[i][‘name‘])
#print(nowplaying_list)
import?requests
requrl?=?‘https://movie.douban.com/subject/‘?+?nowplaying_list[1][‘id‘]?+?‘/comments‘?+‘?‘?+‘start=0‘?+?‘&limit=20‘
resp?=?requests.get(requrlheaders)
html_data?=?resp.text
soup?=?bs(html_data?‘html.parser‘)
comment_div_lits?=?soup.find_all(‘div‘?class_=‘comment‘)
#print(comment_div_lits)
eachAudiList=[]
for?person?in?comment_div_lits:
????b=person.find_all(‘a(chǎn)‘class_=‘‘)
????eachAudiList.append(b[0].string)
#print(eachAudiList)??
eachTimeList=[]
for?time?in?comment_div_lits:
??a=time.find_all(‘span‘class_=‘comment-time‘)
??eachTimeList.append(a[0].text.split()[0])
#print(eachTimeList)
eachCommentList?=?[]
for?item?in?comment_div_lits:????
?????i=item.find_all(‘p‘)[0].text
?????eachCommentList.append(i)
#print(eachCommentList)
comments?=?‘‘
for?k?in?range(len(eachCommentList)):
????comments?=?comments?+?(str(eachCommentList[k])).strip()
#print(comments)

print(‘------------------以下為各路神仙的留言-----------------------------------------‘)
for?i?in?range(len(eachCommentList)):
????print(eachAudiList[i]+‘??的留言為:‘)
????print(eachCommentList[i])
????print(‘\t\t\t‘eachTimeList[i])


from?wordcloud?import?WordCloud
import?jieba
import?matplotlib.pyplot?as?plt



wordlist_after_jieba?=?jieba.cut(comments?cut_all=True)
wl_space_split?=?“?“.join(wordlist_after_jieba)


my_wordcloud?=?WordCloud(background_color=“white“width=1000height=860?font_path=“font.ttf“).generate(wl_space_split)
plt.imshow(my_wordcloud)
plt.axis(“off“)
plt.show()



import?requests
requrl?=?‘https://movie.douban.com/subject/‘?+?nowplaying_list[1][‘id‘]?+?‘/‘+‘?‘+‘from=showing‘

resp?=?requests.get(requrl)
html_data?=?resp.text
soup?=?bs(html_data?‘html.parser‘)
assess=soup.find_all(‘div‘class_=‘ratings-on-weight‘)
#print(assess[0])
assess_dit={}
for?ass?in?range(len(assess)):
???x=assess[ass].find_all(‘div‘class_=‘item‘)
star=[]
percent=[]
for?y?in?x:
????z=y.find_all(‘span‘)
????star.append(z[0].string.split

?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????文件?????5828044??2019-06-11?13:35??python程序設(shè)計(jì)\font.ttf
?????文件????????3707??2020-04-01?16:53??python程序設(shè)計(jì)\python語言程序設(shè)計(jì).py
?????文件??????529408??2020-04-01?16:58??python程序設(shè)計(jì)\文檔.doc
?????文件?????????211??2020-04-01?17:01??python程序設(shè)計(jì)\附錄.txt
?????目錄???????????0??2020-04-01?17:08??python程序設(shè)計(jì)\

評論

共有 條評論