資源簡(jiǎn)介
用于爬取人人貸網(wǎng)站信息,在之前的代碼基礎(chǔ)上進(jìn)行了新的更新
代碼片段和文件信息
#?-*-?coding:?utf-8?-*-
“““
Created?on?Mon?Aug?13?11:10:39?2018
@author:?95647
“““
from?selenium?import?webdriver
import?time
import?json
import?pandas?as?pd
import?numpy?as?np
from?bs4?import?BeautifulSoup
from?selenium.webdriver.firefox.options?import?Options??#headless?browser?login?ini
import?requests
from?pandas?import?Dataframe
import?threading
time_start?=?time.clock()
#driver?=?webdriver.PhantomJS(executable_path=r‘‘‘C:\Users\95647\Desktop\小工具\(yùn)phantomjs-2.1.1-windows\bin\phantomjs.exe‘‘‘)
headers?=?{‘User-Agent‘:‘Mozilla/5.0?(Windows?NT?10.0;?WOW64)?AppleWebKit/537.36?(KHTML?like?Gecko)?Chrome/55.0.2883.87?Safari/537.36‘}
#headers?according?to?push?F12?in?browser
#defin?a?def?to?annalys?of?website
#use?the?login?information?you?have?signed
username?=?“******“??????????#username
password?=?u“*****“?????#password
#?driver?=?webdriver.Firefox()
#?use?headless?browser?to?log
options?=?Options()
options.add_argument(‘-headless‘)
driver?=?webdriver.Firefox(firefox_options=options)??#use?headless?firefox?to?login?in
def?LoginRRD(username?password):
????try:
????????print(u‘ready?loging?renrendai?website...‘)
????????driver.get(“https://www.renrendai.com/login“)
????????login_in_pswd?=?driver.find_element_by_class_name(“tab-password“)?#點(diǎn)擊密碼登錄
????????login_in_pswd.click()
????????time.sleep(2)
????????driver.find_element_by_id(“l(fā)ogin_username“).send_keys(username)
????????time.sleep(0.5)
????????driver.find_element_by_id(“J_pass_input“).send_keys(password)
????????time.sleep(0.5)
????????driver.find_element_by_xpath(r“““/html/body/div[2]/div/div/div[2]/div[2]/div/div[1]/button“““).click()
????????time.sleep(2)?#設(shè)置等待幾秒,以進(jìn)入用戶主界面,如不等待而直接進(jìn)入爬蟲會(huì)提示未登錄
????????print(u‘login?successful!‘)
????except?Exception?as?e:
????????print(“Error:“?e)
????finally:
????????print(u‘End?Login!\n‘)
loanid_e?=[]
def?parse_userinfo(loanididx):?#defin?def?to?analysis?borrower?informations
????#?global?login_status
????global?loanid_e
????login_status?=False
????urll=“https://www.renrendai.com/loan-%s.html“%str(loanid)
????driver.get(urll)
????html?=?BeautifulSoup(driver.page_source‘lxml‘)
????#?f=?open(“htm%s.txt“%idx“w“)
????#?f.write(html.decode(“utf-8“).replace(‘\xa9‘“@“))
????#?f.close
????info?=?html.findAll(‘div‘?class_=“l(fā)oan-user-info“)??#?這個(gè)地方的命名經(jīng)常修改
????try:
????????userinfo?=?{}
????????items?=?info[0].findAll(‘span‘{“class“:“pr20“})
????except:
????????loanid_e.append(loanid)
????else:????
????????for?item?in?items:
????????????var?=?item.get_text()
????????????value?=?item.parent.text.replace(var““)
????????????userinfo[var]=value
????data?=?pd.Dataframe(userinfoi
評(píng)論
共有 條評(píng)論