91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

  • 大小: 6KB
    文件類型: .py
    金幣: 1
    下載: 1 次
    發布日期: 2021-08-02
  • 語言: Python
  • 標簽: python??爬蟲??

資源簡介

可用的谷歌圖片爬蟲,默認的關鍵詞是心情,如angry、sad

資源截圖

代碼片段和文件信息

#?-*-?coding:?utf-8?-*-
#?@Author:?wlc
#?@Date:???2017-09-25?23:54:24
#?@Last?Modified?by:???Henry
#?@Last?Modified?time:?2018-7-11?22:40:11


####################################################################################################################
#?Download?images?from?google?with?specified?keywords?for?searching
#?search?query?is?created?by?“main_keyword?+?supplemented_keyword“
#?if?there?are?multiple?keywords?each?main_keyword?will?join?with?each?supplemented_keyword
#?mainly?use?urllib?and?each?search?query?will?download?at?most?100?images?due?to?page?source?code?limited?by?google
#?allow?single?process?or?multiple?processes?for?downloading
####################################################################################################################


import?os
import?time
import?re
import?logging
import?urllib.request
import?urllib.error

from?multiprocessing?import?Pool
from?user_agent?import?generate_user_agent


log_file?=?‘download.log‘
logging.basicConfig(level=logging.DEBUG?filename=log_file?filemode=“a+“?format=“%(asctime)-15s?%(levelname)-8s??%(message)s“)


def?download_page(url):
????“““download?raw?content?of?the?page
????
????Args:
????????url?(str):?url?of?the?page?
????
????Returns:
????????raw?content?of?the?page
????“““
????try:
????????headers?=?{}
????????headers[‘User-Agent‘]?=?generate_user_agent()
????????headers[‘Referer‘]?=?‘https://www.google.com‘
????????req?=?urllib.request.Request(url?headers?=?headers)
????????resp?=?urllib.request.urlopen(req)
????????return?str(resp.read())
????except?Exception?as?e:
????????print(‘error?while?downloading?page?{0}‘.format(url))
????????logging.error(‘error?while?downloading?page?{0}‘.format(url))
????????return?None


def?parse_page(url):
????“““parge?the?page?and?get?all?the?links?of?images?max?number?is?100?due?to?limit?by?google
????
????Args:
????????url?(str):?url?of?the?page
????
????Returns:
????????A?set?containing?the?urls?of?images
????“““
????page_content?=?download_page(url)
????if?page_content:
????????link_list?=?re.findall(‘“ou“:“(.*?)“‘?page_content)
????????if?len(link_list)?==?0:
????????????print(‘get?0?links?from?page?{0}‘.format(url))
????????????logging.info(‘get?0?links?from?page?{0}‘.format(url))
????????????return?set()
????????else:
????????????return?set(link_list)
????else:
????????return?set()


def?download_images(main_keyword?supplemented_keywords?download_dir):
????“““download?images?with?one?main?keyword?and?multiple?supplemented?keywords
????
????Args:
????????main_keyword?(str):?main?keyword
????????supplemented_keywords?(list[str]):?list?of?supplemented?keywords
????
????Returns:
????????None
????“““??
????image_links?=?set()
????print(‘Process?{0}?Main?keyword:?{1}‘.format(os.getpid()?main_keyword))

????#?create?a?directory?for?a?main?keyword
????img_dir?=??download_dir?+?main_keyword?+?‘/‘
????if?not?os.path.exists(img_dir)

評論

共有 條評論