-
大小: 7.14KB文件類型: .rar金幣: 1下載: 0 次發(fā)布日期: 2024-05-05
- 語言: Python
- 標(biāo)簽: 網(wǎng)絡(luò)爬蟲??爬蟲??python??
資源簡介
:爬取網(wǎng)站數(shù)據(jù),基于 selenium.webdriver
代碼片段和文件信息
__author__?=?‘fandechun‘
import?datetime
import?re
import?sys
import?time
import?uuid
import?pymysql
from?bs4?import?BeautifulSoup
from?selenium.webdriver.common.by?import?By
from?selenium.webdriver.support.wait?import?WebDriverWait
from?selenium.webdriver.common.action_chains?import?ActionChains
from?selenium?import?webdriver
from?selenium.webdriver.support?import?expected_conditions?as?EC
def?find_element(driverlocater):#封裝WebDriverWait.until方法
????element=WebDriverWait(driver?10?0.5).until(EC.presence_of_element_located(locater))
????return?element
def?isElementExist(driverxpath1):
????flag=True
????try:
????????driver.find_element_by_xpath(xpath1)
????????return?flag
????except:
????????flag=False
????????return?flag
def?get_by_xpath_if_existed(driverxpath1):
????try:
????????driver.find_element_by_xpath(xpath1)
????????return?driver.find_element_by_xpath(xpath1)
????except:
????????return?‘未定‘
def?get_by_class_if_existed(drivercolumn):
????try:
????????driver.find_element_by_class_name(column)
????????return?driver.find_element_by_class_name(column)
????except:
????????return?‘未定‘
def?get_by_id_if_existed(drivercolumn):
????try:
????????driver.find_element_by_id(column)
????????return?driver.find_element_by_id(column)
????except:
????????return?‘未定‘
class?MySqlSession:
????def?__init__(self):
????????self.con?=?pymysql.connect(
????????????host=‘localhost‘
????????????port=3306
????????????user=‘root‘
????????????passwd=‘123‘
????????????db=‘python‘
????????????charset=‘utf8‘
????????)
????def?insert_house_summary_info(self?house_nameother_namehouse_priceaddressopen_datehouse_layouts):
????????#?數(shù)據(jù)庫游標(biāo)!
????????cue?=?self.con.cursor()
????????uid?=?str(uuid.uuid1())
????????house_id?=?‘‘.join(uid.split(‘-‘))
????????sql=‘insert?into?d_house_summary?values?(%s%s%s%s%s%s%s)‘
????????try:
????????????cue.execute(sql[house_idhouse_nameother_namehouse_priceaddressopen_datehouse_layouts])
????????except?Exception?as?e:
????????????print(f“插入數(shù)據(jù)庫失敗:{e}“)
????????????self.con.rollback()
????????else:
????????????self.con.commit()
????def?insert_house_details_info(self?house_namebuilding_areahouse_layout):
????????#?數(shù)據(jù)庫游標(biāo)!
????????cue?=?self.con.cursor()
????????uid?=?str(uuid.uuid1())
????????layout_id?=?‘‘.join(uid.split(‘-‘))
????????sql=‘insert?into?d_house_details?values?(%s%s%s%s)‘
????????try:
????????????cue.execute(sql[layout_idhouse_namebuilding_areahouse_layout])
????????except?Exception?as?e:
????????????print(f“插入數(shù)據(jù)庫失敗:{e}“)
????????????self.con.rollback()
????????else:
????????????self.con.commit()
browser?=?webdriver.Chrome()
browser.get(“https://jn.fang.anjuke.com/loupan/“)
########判斷頁面是否已打開########
locater=(By.ID‘search-btn‘)
find_element(browserlocater)
shouye=browser.current_window_handle
########查詢條件########
########這個下拉列表框比較特殊,無法通過正常的方法選擇,只能通過模擬鼠標(biāo)操作來點擊。且需要放在最前面,通過點擊其它查詢條件觸發(fā)檢索########
elemen
- 上一篇:python demo百度文庫.py
- 下一篇:NumPy Cookbook
評論
共有 條評論