資源簡介
里面有兩個文件,一個是python代碼文件,另一個是測試數據,程序不夠健壯,由于Amazon有防機器人,所以本程序采用延時方法,所以耗時比較長。但對于初學者說,真的不失為一個好的實例,程序涉及到csv文件的讀寫,beautifulSoup的使用、報頭的偽裝。

代碼片段和文件信息
from?urllib?import?request
from?urllib.request?import?HTTPError
from?urllib.request?import?URLError
from?bs4?import?BeautifulSoup
from?datetime?import?datetime
from?time?import?sleep
from?tkinter?import?*
from?tkinter?import?filedialog
import?csv
class?Application(frame):
????def?__init__(selfmaster?=?None):
????????frame.__init__(selfmaster)
????????self.pack()
????????self.createWidgets()
????def?createWidgets(self):
????????self.helloLabel?=?Label(selftext?=?“您好,請選擇文件!“)
????????self.helloLabel.pack()
????????self.selectButton?=?Button(self?text?=?“選擇“command?=?self.funcOpenRead)
????????self.selectButton.pack()
????????self.sureButton?=?Button(selftext?=?“確定“command?=?self.funcCheckWrite)
????????self.sureButton.pack()
????def?funcOpenRead(self):
????????self.filename?=?filedialog.askopenfilename(title?=?“選擇文件“)
????????with?open(self.filename“r“)?as?csvfile:
????????????reader?=?csv.DictReader(csvfile)
????????????self.asin_list?=?[asin[“ASIN“]?for?asin?in?reader]
????????????return?self.asin_list
????def?funcCheckWrite(self):
????????self.helloLabel[“text“]?=?“進行中……“
????????check?=?Check()
????????rank_list?=?check.check_rank(self.asin_list)
????????write_in(self.filenamerank_list)
????????self.helloLabel[“text“]?=?“已完成“
class?Check():
????def?check_rank(selfasin_list):
????????rank_list?=?[]
????????head?=?{}
????????head[“User-Agent“]?=?“Mozilla/5.0?(Windows?NT?6.3;?WOW64;?Trident/7.0;?rv:11.0;?JuziBrowser)?like?Gecko“
????????for?asin?in?asin_list:
????????????url?=?“https://www.amazon.com/dp/“+?asin
????????????req?=?request.Request(urlheaders?=?head)
????????????sleep(10)
????????????try:
????????????????web?=?request.urlopen(req)
????????????except?HTTPError?as?e:
????????????????rank_want?=?“網頁找不到,下架?拼寫正確?服務器?UPC?“
????????????????print(e.code)
????????????except?URLError?as?e:
????????????????rank_want?=?“網絡連接失敗“
????????????????sleep(150)
????????????else:
????????????????soup?=?BeautifulSoup(web.read()“html.parser“)
????????????????try:
????????????????????doubel_rank?=?soup.findAll(“span“class_=“zg_hrsr_rank“)
????????????????????rank_want?=?doubel_rank[-1].string
????????????????except?:
????????????????????rank_want?=?“null“
????????????finally:
????????????????rank_list.append(rank_want)
????????return?rank_list
????
def?write_in(filenamerank_list):
????write_list?=?[]
????write_list.append(datetime.now().strftime(“%Y-%m-%d?%H:%M“))
????write_list?=?write_list?+?rank_list
????with?open(filename“r“)?as?csvfile:
????????reader?=?csv.reader(csvfile)
????????lines?=?[line?for?line?in?reader]
????????with?open(filename“w+“newline?=?““)?as?csvfile:
????????????writer?=?csv.writer(csvfile)
????????????for?itemelement?in?zip(lineswrite_list):
????????????????item.append(element)
????????????writer.writerows(lines)
if?__name__?==?“__main__“:
????app?=?Application()
????app.master.title(“排名查詢“)
????app.mainloop()
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????文件????????3086??2017-09-28?10:58??checkrank.py
?????文件?????????213??2018-01-13?11:21??測試數據.csv
評論
共有 條評論