資源簡介
彼岸花網(wǎng)壁紙爬蟲
代碼片段和文件信息
import?time?
import?re
import?random
from?bs4?import?BeautifulSoup?as?bf
from??urllib.request??import?urlopenurlretrieve
def?get_page(url):
????????html_url??=?url
????????html?=?urlopen(html_url)
????????page??=??bf(html.read()‘html.parser‘)
????????return?page?
#讀取a標(biāo)簽中的超鏈接,并篩選出圖片的鏈接
def?GetImg_href(page):
????????img_href?=?[]
????????for?i?in??page.find_all(‘a(chǎn)‘):
????????????????tupian_href??=?i.get(‘href‘)
????????????????img_search?=?re.search(“^\/tupian\/.*\.html$“tupian_href)
????????????????if??img_search==?None:
????????????????????????continue
????????????????img_href.append(“http://pic.netbian.com“?+?str(img_search.group()))
????????????????#返回的列表
????????return?img_href
def?New_GetImg_href(page):
????????“““
????????獲取圖片的url
????????return:img_src;
????????“““
????????img_src?=?[]
????????for?i?in??page.find_all(‘img‘):
????????????????tupian_href??=?i.get(‘src‘)
????????????????img_search_href?=?re.search(“^\/uploads/.*\.jpg$“tupian_href)
????????????????if??img_search_href==?None:
????????????????????????continue
????????????????img_src.append(img_search_href.group())
????????return?img_src
def?img_down_load(abc):
????????‘‘‘
?????????@a:已經(jīng)下載的數(shù)據(jù)塊?
????????@b
評(píng)論
共有 條評(píng)論