資源簡介
20200901版裁判文書爬蟲,需要設置登錄賬號,相關思路可以參閱我的博客:https://mp.csdn.net/console/article
代碼片段和文件信息
“““程序說明“““
#?-*-??coding:?utf-8?-*-
#?Author:?cao?wang
#?Datetime?:?2020
#?software:?PyCharm
#?收獲:
from?selenium?import?webdriver
from?selenium.webdriver.common.by?import?By
from?selenium.webdriver.support?import?expected_conditions?as?EC
from?selenium.webdriver.support.wait?import?WebDriverWait
import?math
import?time
import?logging
from?selenium.webdriver.firefox.options?import?Options
import?os
from?crawler_tools?import?user_agent?as?u
from?datetime?import?datetime
from?selenium.common.exceptions?import?*
import?pyautogui
import?random
from?selenium.webdriver?import?ActionChains
from?retrying?import?retry
logging.disable(logging.INFO)
def?start_logger():
????path?=?os.path.dirname(__file__)+“\\log“
????if?not?os.path.exists(path):
????????os.makedirs(path)
????“““日志初始化設置、文件名(時間)、DEBUG為調試級別(級別導致輸出內容的不同)、日志的記錄格式、日期格式“““
????logging.basicConfig(filename=path+‘//daily_report_%s.log‘?%datetime.strftime(datetime.now()?‘%m%d%Y_%H%M%S‘)
????????level=logging.WARNING
????????format=‘%(asctime)s?%(message)s‘
????????datefmt=‘%m-%d?%H:%M:%S‘)
start_logger()
class?Selenium_firefox():
????def?__init__(self):
????????#?設置輸出內容目錄
????????#?下載無彈窗
????????path?=?“E:\Firefox\Download“
????????if?not?os.path.exists(path):
????????????os.makedirs(path)
????????profile?=?webdriver.FirefoxProfile()
????????#?profile.set_preference(‘browser.download.folderList‘?2)
????????#?logging.info(‘運行支持‘)
????????profile.set_preference(‘browser.download.dir‘?path.strip(‘\u202a‘))
????????profile.set_preference(‘browser.download.folderList‘?2)
????????profile.set_preference(‘browser.download.manager.showWhenStarting‘?False)
????????profile.set_preference(‘browser.helperApps.neverAsk.saveToDisk‘?‘application/zipapplication/octet-stream‘)
????????#?無圖
????????profile.set_preference(‘browser.migration.version‘?9001)
????????profile.set_preference(‘permissions.default.image‘?2)
????????profile.set_preference(‘user-agent‘?u()[‘User-Agent‘])
????????ops?=?Options()
????????ops.add_argument(‘--headless‘)
????????ops.add_argument(‘disable-infobars‘)
????????“““網頁獲取“““
????????self.browser?=?webdriver.Firefox(profileoptions=ops)
????????self.wait?=?WebDriverWait(self.browser?20)
????????self.browser.get(‘https://wenshu.court.gov.cn/website/wenshu/181217BMTKHNT2W0/index.html?pageId=d176b4c9586ed2bea95d1fbab98bdd9d&s8=02‘)
????@retry
????def?login(self):
????????“““登錄“““
????????#?切換框架
????????wait?=?self.wait
????????self.browser.refresh()
????????frame?=?wait.until(EC.presence_of_element_located((By.XPATH?‘//*[@id=“contentiframe“]‘)))
????????self.browser.switch_to.frame(frame)
????????click?=?wait.until(EC.presence_of_element_located(
????????????(By.XPATH?‘/html/body/app-root/div/app-login/div/div/form/div/div[1]/input‘)))
????????#?actions.move_to_element(click).click().perform()
????????#?click.click()
????????click.send_keys(“自己的手機號“)
????????time.sleep(1)
????????click1?=?wait.until(E
- 上一篇:生物信息平臺Galaxy文檔
- 下一篇:美萍2010v5綠色免安裝完美破解版
評論
共有 條評論