資源簡介
使用前一定要看使用說明,本程序批量下載excel中的文件鏈接并自動命名寫入excel ,解決了遇到批量的鏈接時,用迅雷下載會遇到文件名無法統(tǒng)計的問題,本程序可以做到下載文件名與下載鏈接一一對應(yīng),寫入excel。
代碼片段和文件信息
#encoding:utf8
import?re
import?urlparse
import?codecs
import?xlrd
import?time
import?urllib
import?logging
logging.basicConfig(level=logging.INFO)
logger?=?logging.getLogger(__name__)
class?Spider(object):
????name?=?“ausstats“
????excel_sync?=?None?
????def?__init__(self?excel_name?line_num):
????????excel=?xlrd.open_workbook(excel_name)
????????self.sheet?=?excel.sheet_by_index(0)
????????self.data_rows_len?=?self.sheet.nrows
????????self.line_num?=?int(line_num)
????????self.excel_output?=?codecs.open(“ausstats.csv“?“w+“?‘utf_8_sig‘)
????????self.excel_output.write(“pdf-nameurl\n“)
????def?close(self):
????????if?self.excel_output:
????????????self.excel_output.close()
????def?req(self):
????????for?row_num?in?range(self.line_num?self.data_rows_len+1):
????????????name?=?self.sheet.cell_value(row_num?0)
????????????url?=?self.sheet.cell_value(row_num?1)
????????????print(“%s?%s“?%?(name?url))
????????????try:
????????????????file_name?=?self.create_pdf(url?name)
????????????????self.excel_output.write(“%s%s\r\n“?%?(file_name?url))
????????????except?Exception?as?e:
????????????????pass
????
????def?create_pdf(self?url?filename):
????????headers?=?{
????????????“accept“:?“application/json?text/javascript?*/*;?q=0.01“
????????????“accept-encoding“:?“gzip?deflate?br“
????????????“accept-language“:?“zh-CNzh;q=0.9“
????????????“user-agent“:?“Mozilla/5.0?(Windows?NT?6.1;?Win64;?x64)?AppleWebKit/537.36?(KHTML?like?Gecko)?Chrome/62.0.3202.89?Safari/537.36“
????????}
????????try:
????????????if?url.split(“&“)[1].find(“.“)?!=?-1:
????????????????filename?=?url.split(“&“)[1]
????????????#?resp?=?requests.get(url?allow_redirects=False?stream=True?headers=headers)
????????????#?with?open(filename?‘wb‘)?as?fd:
????????????#?????for?chunk?in?resp.iter_content(2048):
????????????#????????fd.write(chunk)
????????????#?resp.close()
????????????urllib.urlretrieve(url?filename)
????????except?Exception?as?e:
????????????urllib.urlretrieve(url?filename)
????????return?filename?
def?main():
????logger.info(“begin?crawler“)
????spider?=?None
????try:
????????excel_name?=?raw_input(“please?input?excel?name:“)
????????line_num?=?raw_input(“please?input?line?num:“)
????????#?excel_name?=?“data_for_dow.xlsx“
????????#?line_num?=?“1“
????????spider?=?Spider(excel_name?line_num)
????????spider.req()
????????logger.info(“end?crawler“)
????except?Exception?as?e:
????????logger.error(“error:%s“?%?e)
????finally:
????????spider.close()
if?__name__?==?‘__main__‘:
????main()
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????文件?????????248??2018-05-25?08:56??程序使用說明.txt
?????文件?????1197370??2017-11-15?22:31??pip-9.0.1.tar.gz
?????文件????20082688??2017-11-16?13:24??python-2.7.13.amd64.msi
?????文件??????541397??2018-03-09?14:30??xlrd-1.1.0.tar.gz
?????文件????????2622??2018-03-12?14:33??ausstats.py
評論
共有 條評論