91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

  • 大小: 8KB
    文件類型: .py
    金幣: 2
    下載: 1 次
    發布日期: 2021-06-17
  • 語言: Python
  • 標簽: python??今日頭條??

資源簡介

爬取今日頭條列表以及今日頭條詳情內容并存儲到數據庫。

資源截圖

代碼片段和文件信息

#?-*-?coding:utf-8?-*-
#設置utf-8編碼
import?os
import?requests
from?urllib.parse?import?urlencode
from?hashlib?import?md5
from?multiprocessing.pool?import?Pool
import?time
import?pymysql
import?random
GROUP_START?=?1
GROUP_END?=?20

#爬取今日頭條列表頁
#由于今日頭條爬取頻繁會封ip??推薦使用?牛魔ip代理或者太陽代理?等自動切換代理ip的軟件
def?get_page(offsetkeyword):
????params?=?{
????????‘offset‘:?offset
????????‘format‘:?‘json‘
????????‘keyword‘:?keyword
????????‘autoload‘:?‘true‘
????????‘count‘:?‘20‘
????????‘cur_tab‘:?‘1‘
????????‘from‘:?‘news‘
????}
????url?=?‘https://www.toutiao.com/search_content/?‘?+?urlencode(params)
????try:
????????response?=?requests.get(url)
????????if?response.status_code?==?200:
????????????return?response.json()
????except?requests.Connectionerror:
????????return?None


def?get_images(json):
????data?=?json.get(‘data‘)
????if?data:
????????for?item?in?data:
????????????#?print(item)
????????????image_list?=?item.get(‘image_list‘)
????????????title?=?item.get(‘title‘)
????????????media_name?=?item.get(‘media_name‘);
????????????datetime?=?item.get(‘datetime‘);
????????????image01=““;
????????????image02=““;
????????????image03=““;
????????????tag_id?=str(item.get(‘tag_id‘));
????????????#?print(image_list)
????????????if?image_list:
????????????????for?image?in?image_list:
????????????????????#?len?判斷是否為空
????????????????????if?len(image_list)==1:
????????????????????????image01?=?image.get(‘url‘);
????????????????????if?len(image_list)==2:
????????????????????????image01?=?image.get(‘url‘);
????????????????????????image02?=?image.get(‘url‘);
????????????????????if?len(image_list)==3:
????????????????????????image01?=?image.get(‘url‘);
????????????????????????image02?=?image.get(‘url‘);
????????????????????????image03?=?image.get(‘url‘);

????????????yield?{


????????????????‘title‘:?title
????????????????‘media_name‘:?media_name
????????????????‘datetime‘:?datetime
????????????????‘image01‘:?image01
????????????????‘image02‘:?image02
????????????????‘image03‘:?image03
????????????????‘tag_id‘:tag_id
????????????}


#保存圖片到本地
def?save_image(item):
????if?not?os.path.exists(item.get(‘title‘)):
????????os.mkdir(item.get(‘title‘))
????try:
????????local_image_url?=?item.get(‘image‘)
????????new_image_url?=?local_image_url.replace(‘list‘‘large‘)
????????response?=?requests.get(‘http:‘?+?new_image_url)
????????if?response.status_code?==?200:
????????????file_path?=?‘{0}/{1}.{2}‘.format(item.get(‘title‘)?md5(response.content).hexdigest()?‘jpg‘)
????????????if?not?os.path.exists(file_path):
????????????????with?open(file_path?‘wb‘)as?f:
????????????????????f.write(response.content)
????????????else:
????????????????print(‘Already?Downloaded‘?file_path)
????except?requests.Connectionerror:
????????print(‘Failed?to?save?image‘)


def?main(offset):
????#?創建連接
????conn?=?pymysql.connect(host=‘127.0.0.1‘?port=3306?user=‘root‘?passwd=‘123456‘?db=‘today_news‘?c

評論

共有 條評論