資源簡介
Python爬蟲實現對圖蟲網相關圖片的在線爬取,只需要填寫相應的主題名即可自動下載資源至相應目錄
代碼片段和文件信息
import?urllib.request
import?urllib.parse
import?os
import?time
import?json
def?url_open(url):
????headers?=?(“User-Agent““Mozilla/5.0?(Windows?NT?10.0;?Win64;?x64)?AppleWebKit/537.36?(KHTML?like?Gecko)?Chrome/51.0.2704.79?Safari/537.36?Edge/14.14393“)
????req?=?urllib.request.Request(url)
????req.add_header(headers[0]headers[1])
????response?=?urllib.request.urlopen(req)
????html?=?response.read()
????return?html
????
def?get_pages(url?count):
????pages?=?[]
????
????html?=?url_open(url).decode(‘utf-8‘)
????target?=?json.loads(html)
????for?mytag?in?target[“postList“]:
????????tag1?=?mytag[‘site_id‘]
????????tag2?=?mytag[‘post_id‘]
????????tag?=?tag1?+?‘/‘?+?tag2
????????pages.append(tag)
????return?pages
def?find_imgs(url):
????html?=?url_open(url).decode(‘utf-8‘)
????img_addrs?=?[]
????a?=?html.find(‘class=“multi-photo-image“?src=“‘)
????while?a?!=?-1:
????????b?=?html.find(‘.jpg‘?a?a+255)
????????if?b?!=?-1:
????????????img_addrs.append(html[a+31?:?b+4])
????????else:
????????????b?=?a?+?31
???
- 上一篇:網站圖片爬取代碼
- 下一篇:對任意關鍵字爬蟲對應圖片代碼
評論
共有 條評論