資源簡介
利用selenium編寫的批量下載google學術腳本,使用前請先下載配置firefox相關webdriver
代碼片段和文件信息
from?selenium?import?webdriver
import?time
import?requests
import?re
import?sys
reload(sys)
sys.setdefaultencoding(‘utf-8‘)
def?getpdfurl(tstr):
#????repdflink?=?‘href=\“(.+?pdf)\“‘
????repdflink?=?‘href=\“([\S]+pdf)‘
????pdflink?=?re.findall(repdflinktstr)
????pdflink?=?list(set(pdflink))
????return?pdflink
def?writefile(filepathtext):
????file_object?=?open(filepath‘w‘)
????file_object.write(text)
????file_object.close()
def?googlescholararticle(qwordrecordnum):
????driver?=?webdriver.Firefox()
????googlescholarurl?=‘https://scholar.google.com/scholar‘
????url?=googlescholarurl+‘?start=‘+recordnum+‘&q=‘+qword
????driver.get(url)
????data?=?driver.page_source
????driver.quit()
#????writefile(‘res.txt‘‘\n‘.join(getpdfurl(data)))
????return?getpd
評論
共有 條評論