資源簡介
運用了requests os re lxml threading queue prettytable bs4
等庫,實現了小說的智能選擇與爬取。
代碼片段和文件信息
import?requests
import?os
import?re
import?time
from?lxml?import?etree
from?threading?import?Thread
from?queue?import?Queue
from?prettytable?import?PrettyTable
import?prettytable?as?pt
from?bs4?import?BeautifulSoup
def?request():
????global?input_name
????#?“https://www.booktxt.net/2_2219/“
????input_name?=?input(“請輸入要查找的小說:\n“)
????headers?=?{
????????‘user-agent‘:?‘Mozilla/5.0?(Windows?NT?6.1;?Win64;?x64)?AppleWebKit/537.36?(KHTML?like?Gecko)?‘
??????????????????????‘Chrome/73.0.3683.86?Safari/537.36?‘
????}
????root?=?“https://www.xsbiquge.com/search.php?keyword=“?+?input_name
????response?=?requests.get(root?headers=headers)
????#?page_content?=?etree.HTML(response.text)
????response.encoding?=?‘utf-8‘
information?=?0
def?find_details(information):
????headers?=?{
????????‘user-agent‘:?‘Mozilla/5.0?(Windows?NT?6.1;?Win64;?x64)?AppleWebKit/537.36?(KHTML?like?Gecko)?‘
??????????????????????‘Chrome/73.0.3683.86?Safari/537.36?‘
????}
????root?=?“http://www.biquger.com/modules/article/search.php?searchkey=“?+?input_name
????response?=?requests.get(root?headers=headers)
????page_content?=?etree.HTML(response.content)
????response.encoding?=?‘utf-8‘
????find_first?=?page_content.xpath(‘string(//*[@id=“wrapper“]/table)‘)
????print(find_first)
????print(len(find_first))
????find_first_show?=?page_content.xpath(“normalize-space(string(//div[@class=‘novelslist2‘]/ul/li[position()>1]))“)
????global?information_first
????if?len(find_first)?<=?1:
????????str(find_first_show)
????????print(find_first_show)
????????input_name_find?=?input(“你所查找的小說僅此一個按1開始下載,按2返回查找頁面\n“)
????????if?input_name_find.replace(“.“?‘‘).isdigit():
????????????if?input_name_find.count(“.“)?==?0:
????????????????if?int(input_name_find)?==?1:
????????????????????find_first_a?=?page_content.xpath(
????????????????????????“//div[@class=‘novelslist2‘]/ul/li[position()>1]/span[@class=‘s2‘]/a/text()“)
????????????????????print(find_first_a)
????????????????????index_first?=?find_first_a.index(input_name)
????????????????????find_first_href?=?page_content.xpath(
????????????????????????“//div[@class=‘novelslist2‘]/ul/li[position()>1]/span[@class=‘s2‘]/a/@href“)
????????????????????information_first?=?find_first_href[index_first]
????????????????????return?0
????????????????if?int(input_name_find)?==?2:
????????????????????request()
????????????????????find_details(information)
????????????else:
????????????????print(“請不要輸入帶點的小數,只能輸入1或2?3秒后返回“)
????????????????time.sleep(3)
????????????????find_details(information)
????????else:
????????????print(“請輸入1或2?不得輸入其他字符?3秒后將返回當前的小說頁面“)
????????????print(“3“)
????????????time.sleep(1)
????????????print(“2“)
????????????time.sleep(1)
????????????print(“1“)
????????????time.sleep(1)
????????????find_details(information)
class?MyThread(Thread):
????def?__init__(self?q):
????????Thread.__init__(self)
????????self.q?=?q
????def?run(self):
????????global?index
- 上一篇:抖音視頻無水印核心源碼
- 下一篇:15個pyqt5項目
評論
共有 條評論