資源簡介
運行程序,輸入搜索關鍵詞,然后輸入文件名稱并輸入需要爬取的數量 ,所需圖片就會下載到當前所建的目錄里

代碼片段和文件信息
import?re
import?requests
from?urllib?import?error
from?bs4?import?BeautifulSoup
import?os
num?=?0
numPicture?=?0
file?=?‘‘
List?=?[]
def?Find(url):
????global?List
????print(‘正在檢測圖片總數,請稍等.....‘)
????t?=?0
????i?=?1
????s?=?0
????while?t?1000:
????????Url?=?url?+?str(t)
????????try:
????????????Result?=?requests.get(Url?timeout=7)
????????except?baseException:
????????????t?=?t?+?60
????????????continue
????????else:
????????????result?=?Result.text
????????????pic_url?=?re.findall(‘“objURL“:“(.*?)“‘?result?re.S)??#?先利用正則表達式找到圖片url
????????????s?+=?len(pic_url)
????????????if?len(pic_url)?==?0:
????????????????break
????????????else:
????????????????List.append(pic_url)
????????????????t?=?t?+?60
????return?s
def?recommend(url):
????Re?=?[]
????try:
????????html?=?requests.get(url)
????except?error.HTTPError?as?e:
????????return
????else:
????????html.encoding?=?‘utf-8‘
????????bsObj?=?BeautifulSoup(html.text?‘html.parser‘)
????????div?=?bsObj.find(‘div‘?id=‘topRS‘)
????????if?div?is?not?None:
????????????listA?=?div.findAll(‘a‘)
????????????for?i?in?listA:
????????????????if?i?is?not?None:
????????????????????Re.append(i.get_text())
????????return?Re
def?dowmloadPicture(html?keyword):
????global?num
????#?t?=0
????pic_url?=?re.findall(‘“objURL“:“(.*?)“‘?html?re.S)??#?先利用正則表達式找到圖片url
????print(‘找到關鍵詞:‘?+?keyword?+?‘的圖片,即將開始下載圖片...‘)
????for?each?in?pic_url:
????????print(‘正在下載第‘?+?str(num?+?1)?+?‘張圖片,圖片地址:‘?+?str(each))
????????try:
????????????if?each?is?not?None:
????????????????pic?=?requests.get(each?timeout=7)
????????????else:
????????????????continue
????????except?baseException:
????????????print(‘錯誤,當前圖片無法下載‘)
????????????continue
????????else:
????????????string?=?file?+?r‘\\‘?+?keyword?+?‘_‘?+?str(num)?+?‘.jpg‘
????????????fp?=?open(string?‘wb‘)
????????????fp.write(pic.content)
????????????fp.close()
????????????num?+=?1
????????if?num?>=?numPicture:
????????????return
if?__name__?==?‘__main__‘:??#?主函數入口
????word?=?input(“請輸入搜索關鍵詞(可以是人名,地名等):?“)
????#?add?=?‘http://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word=%E5%BC%A0%E5%A4%A9%E7%88%B1&pn=120‘
????url?=?‘http://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word=‘?+?word?+?‘&pn=‘
????tot?=?Find(url)
????Recommend?=?recommend(url)??#?記錄相關推薦
????print(‘經過檢測%s類圖片共有%d張‘?%?(word?tot))
????numPicture?=?int(input(‘請輸入想要下載的圖片數量?‘))
????file?=?input(‘請建立一個存儲圖片的文件夾,輸入文件夾名稱即可‘)
????y?=?os.path.exists(file)
????if?y?==?1:
????????print(‘該文件已存在,請重新輸入‘)
????????file?=?input(‘請建立一個存儲圖片的文件夾,)輸入文件夾名稱即可‘)
????????os.mkdir(file)
????else:
????????os.mkdir(file)
????t?=?0
????tmp?=?url
????while?t?????????try:
????????????url?=?tmp?+?str(t)
????????????result?=?requests.get(url?timeout=10)
????????????print(url)
????????except?error.HTTPError?as?e:
????????????print(‘網絡錯誤,請調整網絡后重試‘)
????????????t?=?t?+?60
????????else:
????????????dowmloadPictur
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????文件???????3574??2019-08-30?11:52??main.py
-----------?---------??----------?-----??----
?????????????????3574????????????????????1
評論
共有 條評論