91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

  • 大小: 42.48MB
    文件類型: .zip
    金幣: 1
    下載: 0 次
    發(fā)布日期: 2023-07-17
  • 語(yǔ)言: 其他
  • 標(biāo)簽: 數(shù)據(jù)集??

資源簡(jiǎn)介

豆瓣電影film數(shù)據(jù)集10萬(wàn)!

資源截圖

代碼片段和文件信息

import?logging
import?random
import?string
import?requests
import?time
from?collections?import?deque
from?urllib?import?parse
import?pandas?as?pd
import?re

from?settings?import?User_AgentsAgent_IP


class?DoubanSpider(object):
????“““豆瓣爬蟲“““
????def?__init__(selfformTypecountrygenres):
????????#?基本的URL
????????self.base_url?=?‘https://movie.douban.com/j/new_search_subjects?sort=T&range=010&‘
????????self.full_url?=?self.base_url?+?‘{query_params}‘
????????#?從User-Agents中選擇一個(gè)User-Agent
????????self.headers?=?{‘User-Agent‘:random.choice(User_Agents)}??
????????self.proxies?=?{‘http‘:random.choice(Agent_IP)}?
????????#?可選參數(shù)?
????????self.form_tag?=?form??#?影視形式
????????self.type_tag?=?Type??#?類型
????????self.countries_tag?=??country?#?地區(qū)
????????self.genres_tag?=?genres?#特色
????????#默認(rèn)參數(shù)
????????self.sort?=?‘T‘??#?排序方式默認(rèn)是T表示熱度
????????self.range?=?0?10??#?評(píng)分范圍

????def?encode_query_data(self):
????????“““對(duì)輸入信息進(jìn)行編碼處理“““
????????
????????if?not?(self.form_tag?and?self.type_tag?and?self.countries_tag?and?self.genres_tag):
????????????all_tags?=?‘‘
????????else:
????????????all_tags?=?[self.form_tag?self.type_tag?self.countries_tag?self.genres_tag]
????????query_param?=?{
????????????‘sort‘:?self.sort
????????????‘range‘:?self.range
????????????‘tags‘:?all_tags
????????}

????????#?string.printable:表示ASCII字符就不用編碼了
????????query_params?=?parse.urlencode(query_param?safe=string.printable)
????????#?去除查詢參數(shù)中無(wú)效的字符
????????invalid_chars?=?[‘(‘?‘)‘?‘[‘?‘]‘?‘+‘?‘\‘‘]
????????for?char?in?invalid_chars:
????????????if?char?in?query_params:
????????????????query_params?=?query_params.replace(char?‘‘)
????????#?把查詢參數(shù)和base_url組合起來(lái)形成完整的url
????????self.full_url?=?self.full_url.format(query_params=query_params)?+?‘&start={start}‘
????????‘‘‘
????????query_params?=?‘tags=‘+str(self.form_tag)+‘‘+str(self.type_tag)+‘‘+str(self.countries_tag)+‘‘+\
????????????str(self.genres_tag)
????????self.full_url?=?self.full_url.format(query_params=query_params)?+?‘&start={start}‘
????????‘‘‘

????def?download_movies(self?offset):
????????“““下載電影信息
????????:param?offset:?控制一次請(qǐng)求的影視數(shù)量
????????:return?resp:請(qǐng)求得到的響應(yīng)體“““
????????full_url?=?self.full_url.format(start=offset)
????????print(full_url)
????????resp?=?None
????????try:
????????????#方法1.USER_AGENT配置仿造瀏覽器訪問(wèn)?headers
????????????#方法2.偽造Cookie,解封豆瓣IP?cookies?=?jar
????????????#jar?=?requests.cookies.RequestsCookieJar()??
????????????#jar.set(‘bid‘?‘ehjk9OLdwha‘?domain=‘.douban.com‘?path=‘/‘)
????????????#jar.set(‘11‘?‘25678‘?domain=‘.douban.com‘?path=‘/‘)
????????????#方法3.使用代理IP?proxies
????????????resp=requests.get(full_urlheaders=self.headersproxies=self.proxies)
????????except?Exception?as?e:
????????????print(resp)
????????????logging.error(e)
????????return?resp

????def?get_movies(self?resp):
????????“““獲取電影信息
????????:param?resp:?響應(yīng)體
????????:return?movies:爬取到的電影信息“““
????????if?resp:
????????????if?resp.status_code?==?200:
????????????????#?獲取響應(yīng)文件中的電影數(shù)據(jù)
????????????????movies?=?dict(resp.

?屬性????????????大小?????日期????時(shí)間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2020-01-05?21:57??新建文件夾\
?????文件????????7373??2018-08-07?23:25??新建文件夾\Crawler?program.py
?????文件????78450688??2018-08-06?16:22??新建文件夾\douban.db
?????文件????11426666??2018-08-08?20:38??新建文件夾\movie_10.csv
?????文件????12239589??2018-08-10?00:10??新建文件夾\use_10.csv
?????目錄???????????0??2019-07-03?22:32??新建文件夾\__MACOSX\
?????文件?????????814??2018-08-06?16:22??新建文件夾\__MACOSX\._douban.db
?????文件?????????210??2018-08-07?23:25??新建文件夾\__MACOSX\._douban.py

評(píng)論

共有 條評(píng)論

相關(guān)資源