資源簡介
亞馬遜評論詳情頁是動態加載的,不過多折騰,直接用selenium進行爬??;用pandas寫入csv文件,解決亂碼、無序問題;
代碼片段和文件信息
#?coding=utf-8
import?time
from?selenium?import?webdriver
from?pandas?import?Dataframe
class?Comment(object):
????def?__init__(self):
????????self.url?=?‘https://www.amazon.com/PISEN-20000mAh-Portable-Capacity-External/product-reviews/B075D4SS7F/ref=cm_cr_dp_d_show_all_btm?ie=UTF8&reviewerType=all_reviews‘
????????self.driver?=?webdriver.Chrome()
????????#?self.file?=?open(‘demo_Amazon-comment.json‘?‘w‘?encoding=‘utf-8‘)
????????self.temp?=?{}
????def?__del__(self):
????????self.driver.close()
????????#?self.file.close()
????def?parse_data(self):
????????#?獲取所有節點列表
????????time.sleep(3)
????????node_list?=?self.driver.find_elements_by_xpath(‘//*[@id=“cm_cr-review_list“]/div/div‘)
????????#?print(len(node_list))
????????#?遍歷列表
????????for?node?in?node_list:
????????????self.temp[‘level‘].append(node.find_element_by_xpath(‘./div[1]/a[1]‘).get_attribute(‘title‘))
????????????self.temp[‘name‘].append(node.find_element_by_xpath(‘./div[2]/span[1]/a‘).text)
????????????self.temp[‘date‘].append(node.find_element_by_xpath(‘./div[2]/span[4]‘).text)
????????????self.temp[‘colour‘].append(node.find_element
評論
共有 條評論