資源簡介
Python爬取論文標題、、摘要等信息并存入MySQL。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。
代碼片段和文件信息
import?requests
from?bs4?import?BeautifulSoup
import?re
import?pymysql
global?count
def?getUrls():
????all_items?=?12*2+3
????urls?=?[]
????partstr?=?“http://crad.ict.ac.cn/CN/volumn/volumn_“
????for?i?in?range(all_items+1):
????????strone?=?partstr?+?str(1300+i)?+?“.shtml“
????????urls.append(strone)
????for?url?in?urls:
????????yield?url
def?getHTMLText(url):
????try:
????????r?=?requests.get(urltimeout=50)
????????r.raise_for_status()
????????r.encoding?=?r.apparent_encoding
????????return?r.text
????except:
????????return?“error“
def?getConnection():
????connection?=?pymysql.connect(
????????host=“l(fā)ocalhost“
????????port=3306
????????user=“root“
????????password=“123456“
????????database=“adnm“
????????charset=“utf8“
????????cursorclass=pymysql.cursors.DictCursor
????)
????return?connection
def?parsePage(infoList?html):
????soup?=?BeautifulSoup(html“html.parser“)
????item?=?soup(name=‘a‘attrs={“class“:“biaoti“})
????biaoti?=?re.findall(r‘target=“_blank“>(.*?)????item?=?soup(name=‘dd‘attrs={“class“:“zuozhe“})
????zuozhe?=?re.findall(r‘class=“zuozhe“>(.*?)??
評論
共有 條評論