91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

  • 大小: 5KB
    文件類型: .py
    金幣: 2
    下載: 1 次
    發(fā)布日期: 2021-06-07
  • 語言: Python
  • 標(biāo)簽: 爬蟲??

資源簡介

利用python爬蟲對豆瓣和IMDB上的電影評價信息,并且最終通過圖像可視化將IMDB250和豆瓣250的電影進(jìn)行了比較,大致可以看出兩個平臺用戶的電影審美水平

資源截圖

代碼片段和文件信息

import?urllib.request
import?requests
import?matplotlib.pyplot?as?plt
from?bs4?import?BeautifulSoup
def?readhtml(url):#url轉(zhuǎn)換為html格式
????head={}
????data={}
????head[‘User-Agent‘]=“Mozilla/5.0?(compatible;?MSIE?8.0;?Windows?NT?6.0;?Trident/4.0;?Acoo?Browser?1.98.744;?.NET?CLR?3.5.30729)“
????req=urllib.request.Request(urldatahead)
????response=urllib.request.urlopen(req)
????html=response.read()
????html=html.decode(‘utf-8‘)
????return?html
def?analizeimdb(htmlimdbmovieimdbrating):#imdb的數(shù)據(jù)解析以及文件讀入
????imdb=[]
????soup=BeautifulSoup(html“html.parser“);
????datas=soup.find_all(“tr“);
????for?data?in?datas:
????????x=data.find_all(“td“class_=“titleColumn“);
????????if?len(x)==0:
????????????continue;
????????blist=[];
????????for?y?in?x:
????????????blist.append(y.get_text());
????????imdbmovie.append(blist);
????for?data?in?datas:
????????x=data.find_all(“td“class_=“ratingColumn?imdbRating“);
????????if?len(x)==0:
?????????????continue;
????????blist=[];
????????for?y?in?x:
????????????blist.append(y.get_text());
????????imdbrating.append(blist);
????for?i?in?range(250):
????????x=imdbmovie[i];
????????y=imdbrating[i];
????????s=x[0]+y[0]+“\n“;
????????imdb.append(s);
????file=open(r“C:\Users\Administrator\Desktop\3.csv““w“encoding=‘utf-8‘);
????file.writelines(imdb);
????file.close();
def?analizedou(htmldoumoviedourating):#豆瓣的數(shù)據(jù)解析以及文件讀入
????douban=[];
????soup=BeautifulSoup(html“html.parser“);
????datas=soup.find_all(“l(fā)i“);
????for?data?in?datas:
????????x=data.find_all(“span“class_=“title“);
????????if?len(x)==0:
????????????continue;
????????blist=[];
????????for?y?in?x:
????????????blist.append(y.get_text());
????????doumovie.append(blist);
????for?data?in?datas:
????????x=data.find_all(“span“class_=“rating_num“);
????????if?len(x)==0:
????????????continue;
????????blist=[];
????????for?y?in?x:
????????????blist.append(y.get_text());
????????dourating.append(blist);
????for?i?in?range(250):
????????x=doumovie[i];
????????y=dourating[i]
????????s=str(i+1)+“\t“+x[0]+‘\t‘+try_(x)+‘\t‘+y[0]+“\n“;
????????douban.append(s);
????file=open(r“C:\Users\Administrator\Desktop\4.txt““w“encoding=‘utf-8‘)
????file.writelines(douban);
????file.close();
def?try_(x):#處理豆瓣中有些電影名稱不存在外文的情況
????try:
????????if?x[1]?in?x:
????????????return?x[1];
????except:
???????return?“??/N“;
def?douban(doumoviedourating):???
????htmldatas=““;
????for?i?in?range(10):
????????url=“https://movie.douban.com/top250?start=“+str(25*i)+“&filter=“;
????????html=readhtml(url);
????????htmldatas=htmldatas

評論

共有 條評論

相關(guān)資源