資源簡(jiǎn)介
Python爬取小說網(wǎng)站
代碼片段和文件信息
import?requests
import?pymysql
from?lxml?import?etree
import?os
#?設(shè)計(jì)模式?--?面向?qū)ο?繼承、封裝
class?Spider(object):
????def?start_request(self):
????????#?1.?請(qǐng)求網(wǎng)站拿到HTML源代碼,抽取小說名、小說鏈接?創(chuàng)建文件夾
????????response?=?requests.get(“https://www.qidian.com/all“)
????????html?=?etree.HTML(response.text)???#?lxml?中的?etree?來解析?HTML
????????Bigtit_list?=?html.xpath(‘//div[@class=“book-mid-info“]/h4/a/text()‘)
????????Bigsrc_list?=?html.xpath(‘//div[@class=“book-mid-info“]/h4/a/@href‘)
????????for?Bigtit?Bigsrc?in?zip(Bigtit_list?Bigsrc_list):
????????????if?os.path.exists(Bigtit)?==?False:
????????????????os.mkdir(Bigtit)
????????????self.file_data(Bigtit?Bigsrc)
????def?file_data(self?Bigtit?Bigsrc):
????????#?2.?請(qǐng)求小說拿到HTML源代碼,抽取章名、章鏈接
????????response?=?requests.get(“http:“?+?Bigsrc)
評(píng)論
共有 條評(píng)論