91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

  • 大小: 641KB
    文件類型: .rar
    金幣: 2
    下載: 0 次
    發布日期: 2024-01-30
  • 語言: Python
  • 標簽: python??爬蟲??

資源簡介

初學python爬蟲小練-------爬取豆瓣排行榜上的電影,并分類存儲到excel表

資源截圖

代碼片段和文件信息

#?_*_?coding:?UTF-8?_*-
#?study?number?15210220308
#?name?Vision
import?re
import?requests
import?xlwt
from?bs4?import?BeautifulSoup


#?爬取電影排行列表
def?getMovie(url):
#?設置headers,網站會根據這個判斷你的瀏覽器及操作系統,很多網站沒有此信息將拒絕你訪問
????header?=?{
????????‘User-Agent‘:?‘Mozilla/5.0?(Windows?NT?10.0;?WOW64)?AppleWebKit/537.36?(KHTML?like?Gecko)?Chrome/50.0.2661.102?UBrowser/6.1.2107.204?Safari/537.36‘}
????#?用get方法打開url并發送headers
????html?=?requests.get(url?headers=header)
????list?=?[]
????#?使用html.parser解析
????soup?=?BeautifulSoup(html.text?‘html.parser‘)
????ol_grid_view?=?soup.find(class_=‘grid_view‘);
????itemList?=?ol_grid_view.find_all(class_=‘item‘);
????#?找到評價人數
????findJudge?=?re.compile(r‘(\d*)人評價‘)
????#?找到影片相關內容:導演,主演,年份,地區,類別
????findBd?=?re.compile(r‘(.*?)

‘?re.S)
????#?去掉無關內容
????remove?=?re.compile(r‘????????????????????????????|\n|
|\.*‘)
????#?遍歷item屬性中的內容
????for?item?in?itemList:
????????#?名字
????????movieName?=?item.find(class_=‘title‘);
????????#?排行
????????paihang?=?item.find(‘em‘);
????????#?星級
????????xingji?=?item.find(class_=‘rating_num‘);
????????#?評論人數
????????peopleNum?=?re.findall(findJudge?str(item));
????????#?概括
????????kaikuo?=?item.find(class_=‘inq‘);
????????#?導演/主演
????????dy?=?re.findall(findBd?str(item))[0];
????????#?正則去空
????????dy?=?re.sub(remove?““?dy);
????????#?電影列表
????????movie?=?[];
????????movie.append(movieName.get_text())
????????movie.append(paihang.get_text())
????????movie.append(xingji.get_text())
????????movie.append(peopleNum[0])
????????#?防止空概括
????????if?not?kaikuo?is?None:
????????????movie.append(kaikuo.get_text())
????????else:
????????????movie.append(“無“);
????????movie.append(dy)
????????#?添加到主list
????????list.append(movie);
????#?for?var?in?list:
????#?????print?var[‘name‘]??‘-----NO:‘??var[‘No‘]‘-----rating_num:‘var[‘rating_num‘]‘----people:‘var[‘people‘]‘-----Inq:‘var[‘inq‘]‘-------bd:‘movie[‘bd‘]
????return?list;

#將相關數據寫入excel中
def?saveData(datalistsavepathname):
????#?拼接路徑
????savepath?+=?name?+?‘.xls‘
????#?設置excel的字符類型
????book?=?xlwt.Workbook(encoding=‘utf-8‘?style_compression=0)
????sheet?=?book.add_sheet(name?cell_overwrite_ok=True)
????#?列名
????col?=?(‘電影名字‘?‘豆瓣排行‘
???????????‘評分‘?‘評價數‘?‘概況‘?‘導演/主演/地區‘)

????for?i?in?range(06):
????????sheet.write(0?i?col[i])
????for?i?in?range(len(datalist)):
????????data?=?datalist[i]
????????for?j?in?range(len(data)):
????????????sheet.write(i+1jdata[j])
????#?保存
????book.save(savepath)

def?cityClass(list):
????civilMovie?=?[]
????foreignMovie?=[]
????civil?=?‘中國|香港|臺灣|大陸|China|china‘
????for?i?in?range(len(list)):
????????bd?=?list[i][5]
????????b?=?re.search(civilbd)
????????if?not?b?is?None:
????????????print?‘發現國內電影---排名‘list[i][1]‘的《‘list[i][0]‘》‘
????????????civilMovie.append(list[i])
????????else?:
????????????foreignMovie.append(list[i])
????saveData(civilMovie‘‘u‘豆瓣國內電影排行‘)
????s

?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----

?????文件???????4012??2018-06-05?10:33??pc.py

?????文件?????475460??2018-05-21?15:50??網絡爬蟲.docx

?????文件?????228456??2018-05-21?15:50??正則表達式.docx

-----------?---------??----------?-----??----

???????????????707928????????????????????3


評論

共有 條評論