資源簡(jiǎn)介
利用selenium編寫(xiě)的python網(wǎng)絡(luò)爬蟲(chóng)-淘寶商品信息并保存到mysql數(shù)據(jù)庫(kù)。包括寶貝的詳細(xì)信息
代碼片段和文件信息
from?selenium?import?webdriver
from?selenium.webdriver.common.by?import?By
from?selenium.webdriver.support.ui?import?WebDriverWait
from?selenium.webdriver.support?import?expected_conditions?as?EC
from?pyquery?import?PyQuery?as?pq
from?Tbmeishi.config?import?*
import?MySQLdb
#?browser?=?webdriver.Chrome()???使用chrome來(lái)運(yùn)行,會(huì)跳出界面
browser?=?webdriver.PhantomJS(service_args=[‘--load-images=false‘?‘--disk-cache=true‘])
wait?=?WebDriverWait(browser?10)
#?設(shè)置phantomjs的界面大小
browser.set_window_size(1400?900)
def?search():
????print(‘正在搜索‘)
????browser.get(‘https://www.taobao.com‘)
????#?判斷加載是否成功
????#?輸入框選擇器
????input?=?wait.until(
????????????EC.presence_of_element_located((By.CSS_SELECTOR?‘#q‘))
????)
????#?點(diǎn)擊事件J_TSearchForm?>?div.search-button?>?button
????submit?=?wait.until(
????????????EC.element_to_be_clickable((By.CSS_SELECTOR?‘#J_TSearchForm?>?div.search-button?>?button‘)))
????#?輸入搜索內(nèi)容KEYWORD
????input.send_keys(KEYWORD)
????#?點(diǎn)擊搜索
????submit.click()
????#?網(wǎng)頁(yè)等待
????total?=?wait.until(
????????????EC.presence_of_element_located((By.CSS_SELECTOR?“#mainsrp-pager?>?div?>?div?>?div?>?div.total“)))
????get_products()
????return?total.text
def?next_page(page_number):
????print(‘正在翻頁(yè)%d‘?%?page_number)
????try:
????????#?頁(yè)碼的數(shù)據(jù)框選擇器
????????input?=?wait.until(
????????????????EC.presence_of_element_located((By.CSS_SELECTOR?‘#mainsrp-pager?>?div?>?div?>?div?>?div.form?>?input‘))
????????)
????????#?確定按鈕
????????submit?=?wait.until(
????????????????EC.element_to_be_clickable(
????????????????????????(By.CSS_SELECTOR?‘#mainsrp-pager?>?div?>?div?>?div?>?div.form?>?span.btn.J_Submit‘)))
????????#?清除下面的頁(yè)碼
????????input.clear()
????????#?寫(xiě)入當(dāng)前頁(yè)的下一頁(yè)
????????input.send_keys(page_number)
????????#?點(diǎn)擊確定按鈕
????????submit.click()
????????wait.until(EC.text_to_be_present_in_element(
????????????????(By.CSS_SELECTOR?‘#mainsrp-pager?>?div?>?div?>?div?>?ul?>?li.item.active?>?span‘)?str(page_number)))
????????get_products()
????except?TimeoutError:
????????next_page
評(píng)論
共有 條評(píng)論