91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

  • 大小: 6KB
    文件類型: .py
    金幣: 1
    下載: 2 次
    發布日期: 2021-09-04
  • 語言: Python
  • 標簽: python爬蟲??

資源簡介

爬取豆瓣電視劇天盛長歌影評,并去掉其中的停止詞,生成詞云,

資源截圖

代碼片段和文件信息

#coding=utf-8
import?requests
from?lxml?import?etree
import?random
import?pymysql
import?jieba.analyse
import?re
#?from?scipy.misc?import?imread
#?from?wordcloud?import?WordCloud
#?from?wordcloud?import?ImageColorGenerator
#?import?matplotlib.pyplot?as?plt
#?from?os?import?path
from?PIL?import?ImageImageSequence
import?numpy?as?np
import?matplotlib.pyplot?as?plt
from?wordcloud?import?WordCloudImageColorGenerator


def?geturl(urlIP_pools):
????USER_AGENTS?=?[
????????“Mozilla/5.0?(Macintosh;?Intel?Mac?OS?X?10_12_5)?AppleWebKit/537.36?(KHTML?like?Gecko)?Chrome/62.0.3202.94?Safari/537.36“
????????“Mozilla/5.0?(Windows?NT?10.0;?WOW64)?AppleWebKit/537.36?(KHTML?like?Gecko)?Chrome/68.0.3440.106?Safari/537.36“
????????“Mozilla/5.0?(Windows?NT?10.0;?Win64;?x64;?rv:61.0)?Gecko/20100101?Firefox/61.0“
????????“Mozilla/5.0?(Windows?NT?10.0;?WOW64)?AppleWebKit/537.36?(KHTML?like?Gecko)?Chrome/55.0.2883.87?Safari/537.36“

????]
????Agent_Value?=?random.choice(USER_AGENTS)
????headers?=?{
????????“User-Agent“:Agent_Value?
????????“Host“:?“movie.douban.com“
????????“Accept“:“text/htmlapplication/xhtml+xmlapplication/xml;q=0.9image/webpimage/apng*/*;q=0.8“
????}

????try:
????????ip_one?=?random.choice(IP_pools)
????????print(ip_one)
????????proxies1?=?{‘http‘:?“http://“?+?ip_one}
????????print(url)
????????r?=?requests.get(url=url?headers=headers?proxies=proxies1?timeout=5)
????????print(r.status_code)
????????assert?r.status_code?==?200
????????return?etree.HTML(r.content)
????except:
????????try:
????????????ip_one?=?random.choice(IP_pools)
????????????print(ip_one)
????????????proxies1?=?{‘http‘:?“http://“?+?ip_one}
????????????print(url)
????????????r?=?requests.get(url=url?headers=headers?proxies=proxies1?timeout=5)
????????????print(r.status_code)
????????????assert?r.status_code?==?200
????????????return?etree.HTML(r.content)
????????except:
????????????try:
????????????????ip_one?=?random.choice(IP_pools)
????????????????print(ip_one)
????????????????proxies1?=?{‘http‘:?“http://“?+?ip_one}
????????????????print(url)
????????????????r?=?requests.get(url=url?headers=headers?proxies=proxies1?timeout=5)
????????????????print(r.status_code)
????????????????assert?r.status_code?==?200
????????????????return?etree.HTML(r.content)
????????????except:
????????????????print(“**“*20+“出現錯誤!“+“**“*20)

def?get_IP():
????con?=?pymysql.connect(host=‘192.168.0.136‘?user=‘root‘?passwd=‘oysm=K8cV6eldcv‘?db=‘lh‘?port=3306
??????????????????????????charset=‘utf8‘)
????if?con:
????????print(“ok“)
????????cur?=?con.cursor()
????????if?cur:
????????????sql_read?=?“select?IPport?from?ip_pool?where?score??=?%s?“
????????????cur.execute(sql_read?“T“)
????????????con.commit()
????????????lines?=?cur.fetchall()
????????????a_list?=?[]
????????????for?i?in?lines:
????????????????li?=?i[0]?+?“:“?+?i[1]
????????????????#?print(li)
????????????????a_lis

評論

共有 條評論