資源簡介
demo百度文庫下載
python代碼
class DownloadImg():
def __init__(self):
self.ua = UserAgent()
def download_one_img(self, img_url, saved_path):
# 下載圖片
header = {
"User-Agent": "{}".format(self.ua.random().strip()),
'Connection': 'close'}
r = requests.get(img_url, headers=header, stream=True)
print("請求圖片狀態碼 {}".format(r.status_code)) # 返回狀態碼
if r.status_code == 200: # 寫入圖片
with open(saved_path, mode="wb") as f:
f.write(r.content)
print("download {} success!".format(saved_path))
del r
return saved_path
class StartChrome():
def __init__(self):
mobile_emulation = {"deviceName": "Galaxy S5"}
capabilities = DesiredCapabilities.CHROME
capabilities['loggingPrefs'] = {'browser': 'ALL'}
options = webdriver.ChromeOptions()
options.add_experimental_option("mobileEmulation", mobile_emulation)
self.brower = webdriver.Chrome(desired_capabilities=capabilities,
chrome_options=options)
# 啟動瀏覽器,打開需要下載的網頁
self.brower.get(url)
self.download_img = DownloadImg()
代碼片段和文件信息
import?os
import?time
from?selenium?import?webdriver
from?selenium.webdriver.common.desired_capabilities?import?DesiredCapabilities
from?scrapy?import?Selector
import?requests
from?my_fake_useragent?import?UserAgent
import?docx
from?docx.shared?import?Inches
import?cv2??#?opencv??計算機視覺
from?pptx?import?Presentation
from?pptx.util?import?Inches
#dows是的chromedriver
chromedriver_path?=?“./chromedriver.exe“
#用ubuntu的chromedriver
#?chromedriver_path?=?“./chromedriver“
doc_dir_path?=?“./doc“
ppt_dir_path?=?“./ppt“
#?url?=?“https://wenku.baidu.com/view/4410199cb0717fd5370cdc2e.html?fr=search“#?doc_txt?p
#?url?=?“https://wenku.baidu.com/view/4d18916f7c21af45b307e87101f69e314332fa36.html“?#?doc_txt?span
#?url?=?“https://wenku.baidu.com/view/dea519c7e53a580216fcfefa.html?fr=search“?#?doc_txt?span?br
#?url?=?‘https://wk.baidu.com/view/062edabeb6360b4c2e3f5727a5e9856a5712262d?pcf=2&bfetype=new‘?#?doc_img
#?url?=?“https://wenku.baidu.com/view/2af6de34a7e9856a561252d380eb6294dd88228d“#?vip限定doc
#?url?=?“https://wenku.baidu.com/view/3de365cc6aec0975f46527d3240c844769eaa0aa.html?fr=search“?#ppt
#?url?=?“https://wenku.baidu.com/view/18a8bc08094e767f5acfa1c7aa00b52acec79c55“#pdf
#?url?=?“https://wenku.baidu.com/view/bbe27bf21b5f312b3169a45177232f60dccce772“
#?url?=?“https://wenku.baidu.com/view/5cb11d096e1aff00bed5b9f3f90f76c660374c24.html?fr=search“
#?url?=?“https://wenku.baidu.com/view/71f9818fef06eff9aef8941ea76e58fafab045a6.html“
#?url?=?“https://wenku.baidu.com/view/ffc6b32a68eae009581b6bd97f1922791788be69.html“
#url?=?“https://wenku.baidu.com/view/d4d2e1e3122de2bd960590c69ec3d5bbfd0adaa6.html“
url?=?‘https://wenku.baidu.com/view/a277ab04ce84b9d528ea81c758f5f61fb73628ef.html‘
class?DownloadImg():
????def?__init__(self):
????????self.ua?=?UserAgent()
????def?download_one_img(self?img_url?saved_path):
????????#?下載圖片
????????header?=?{
????????????“User-Agent“:?“{}“.format(self.ua.random().strip())
????????????‘Connection‘:?‘close‘}
????????r?=?requests.get(img_url?headers=header?stream=True)
????????print(“請求圖片狀態碼 {}“.format(r.status_code))??#?返回狀態碼
????????if?r.status_code?==?200:??#?寫入圖片
????????????with?open(saved_path?mode=“wb“)?as?f:
????????????????f.write(r.content)
????????????print(“download?{}?success!“.format(saved_path))
????????del?r
????????return?saved_path
class?StartChrome():
????def?__init__(self):
????????mobile_emulation?=?{“deviceName“:?“Galaxy?S5“}
????????capabilities?=?DesiredCapabilities.CHROME
????????capabilities[‘loggingPrefs‘]?=?{‘browser‘:?‘ALL‘}
????????options?=?webdriver.ChromeOptions()
????????options.add_experimental_option(“mobileEmulation“?mobile_emulation)
????????self.brower?=?webdriver.Chrome(desired_capabilities=capabilities
???????????????????????????????????????chrome_options=options)
????????#?啟動瀏覽器,打開需要下載的網頁
????????self.brower.get(url)
????????self.download_img?=?DownloadImg()
????def?click_ele(self?click_xpath):
????????#?單擊指定控件
評論
共有 條評論