91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

資源簡介


資源截圖

代碼片段和文件信息

import?logging
import?random
import?string
import?requests
import?time
from?collections?import?deque
from?urllib?import?parse
from?bs4?import?BeautifulSoup?as?beaut_soup
import?pandas?as?pd
import?re


#?from?settings?import?User_AgentsAgent_IP


class?DoubanSpider(object):
????“““豆瓣爬蟲“““

????def?__init__(self?form?Type?country?genres):
????????#?基本的URL
????????#?self.base_url?=?‘https://movie.douban.com/tag/#/?sort=S&range=010&‘
????????self.base_url?=?‘https://movie.douban.com/j/new_search_subjects?sort=T&range=010&‘
????????self.full_url?=?self.base_url?+?‘{query_params}‘
????????#?從User-Agents中選擇一個User-Agent
????????#?self.headers?=?{‘User-Agent‘:random.choice(User_Agents)}
????????self.headers?=?{‘User-Agent‘:?‘Mozilla/4.0‘}
????????#?self.proxies?=?{‘http‘:random.choice(Agent_IP)}
????????#?可選參數(shù)
????????self.form_tag?=?form??#?影視形式
????????self.type_tag?=?Type??#?類型
????????self.countries_tag?=?country??#?地區(qū)
????????self.genres_tag?=?genres??#?特色
????????#?默認參數(shù)
????????self.sort?=?‘T‘??#?排序方式默認是T表示熱度
????????self.range?=?0?10??#?評分范圍

????def?encode_query_data(self):
????????“““對輸入信息進行編碼處理“““

????????if?not?(self.form_tag?and?self.type_tag?and?self.countries_tag?and?self.genres_tag):
????????????all_tags?=?‘‘
????????else:
????????????all_tags?=?[self.form_tag?self.type_tag?self.countries_tag?self.genres_tag]
????????query_param?=?{
????????????‘sort‘:?self.sort
????????????‘range‘:?self.range
????????????‘tags‘:?all_tags
????????}

????????#?string.printable:表示ASCII字符就不用編碼了
????????query_params?=?parse.urlencode(query_param?safe=string.printable)
????????#?去除查詢參數(shù)中無效的字符
????????invalid_chars?=?[‘(‘?‘)‘?‘[‘?‘]‘?‘+‘?‘\‘‘]
????????for?char?in?invalid_chars:
????????????if?char?in?query_params:
????????????????query_params?=?query_params.replace(char?‘‘)
????????#?把查詢參數(shù)和base_url組合起來形成完整的url
????????self.full_url?=?self.full_url.format(query_params=query_params)?+?‘&start={start}‘
????????‘‘‘
????????query_params?=?‘tags=‘+str(self.form_tag)+‘‘+str(self.type_tag)+‘‘+str(self.countries_tag)+‘‘+\
????????????str(self.genres_tag)
????????self.full_url?=?self.full_url.format(query_params=query_params)?+?‘&start={start}‘
????????‘‘‘

????def?download_movies(self?offset):
????????“““下載電影信息
????????:param?offset:?控制一次請求的影視數(shù)量
????????:return?resp:請求得到的響應體“““
????????full_url?=?self.full_url.format(start=offset)
????????print(full_url)
????????resp?=?None
????????try:
????????????#?方法1.USER_AGENT配置仿造瀏覽器訪問?headers
????????????#?方法2.偽造Cookie,解封豆瓣IP?cookies?=?jar
????????????#?jar?=?requests.cookies.RequestsCookieJar()
????????????#?jar.set(‘bid‘?‘ehjk9OLdwha‘?domain=‘.douban.com‘?path=‘/‘)
????????????#?jar.set(‘11‘?‘25678‘?domain=‘.douban.com‘?path=‘/‘)
????????????#?方法3.使用代理IP?proxies
????????????resp?=?requests.get(full_url?headers=self.headers)??#?proxies=self.proxies
????????except?Exception?as?e:
????????????print(resp)
????????????

?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????文件????????8006??2020-12-13?16:38??璇曢獙1.0.py
?????文件?????????755??2020-12-13?16:38??__MACOSX\._璇曢獙1.0.py
?????目錄???????????0??2020-12-13?16:46??data\
?????文件?????????212??2020-12-13?16:46??__MACOSX\._data
?????文件????????6148??2020-12-13?16:46??data\.DS_Store
?????文件?????????120??2020-12-13?16:46??__MACOSX\data\._.DS_Store
?????文件???????15079??2020-12-13?16:41??data\movie.csv
?????文件?????????235??2020-12-13?16:41??__MACOSX\data\._movie.csv
?????文件???????15079??2020-12-13?16:41??data\movie.csv
?????文件?????????235??2020-12-13?16:41??__MACOSX\data\._movie.csv

評論

共有 條評論