資源簡介
Python 抓取網頁下載鏈接
代碼片段和文件信息
########################################################
#?Find?gudaiyanqing?xiaoshuo?on?http://www.bookben.com?#
########################################################
#?-*-?coding:?utf-8?-*-
import?time
import?urllib.request
from?bs4?import?BeautifulSoup
num?=?0
web?=?“http://m.bookben.com“
url?=?“http://m.bookben.com/gudaiyanqing“
result?=?“######Get?the?update?of?novel?website?on?“?+?url?+?“\n“?+?“\n“
date_mark?=?time.strftime(‘%Y-%m-%d‘time.localtime(time.time()))
time_mark?=?time.strftime(‘%Y-%m-%d-%H-%M-%S‘time.localtime(time.time()))
#Get?the?update?of?bookben.com?website
main_page?=?urllib.request.urlopen(url).read().decode(‘gb2312‘errors=‘replace‘)
main_soup?=?BeautifulSoup(main_page?“lxml“)
main_classes?=?main_soup.findAll(‘li‘?class_=‘li_bg‘)
for?main_links?in?main_classes:
????for?main_link?in?main_links.find_all(‘a‘):
????????novel_name?=?main_link.get_text()
????????novel_url?=?web?+?main_link.get(‘href‘)
????????num?=?num?+?1
????????print(str(num)?+?novel_name?+?novel_url)
????????
????????novel_page?=?urllib.request.urlopen(novel_url).read().decode(‘gb2312‘errors=‘replace‘)
????????novel_soup?=?BeautifulSoup(novel_page?“lxml“)
????????novel_date?=?novel_sou
- 上一篇:libvsm_3.1
- 下一篇:k均值聚類python實現
評論
共有 條評論