資源簡介
使用Python3,爬取500彩票網(wǎng)站的足球比賽場次賠率,爬取后以excel形式存放在E:\2017-2018賠率\賠率下載\……
代碼片段和文件信息
#!/usr/bin/env?python
#?-*-?coding:?utf-8?-*-
import?requests
#讀入網(wǎng)頁加以解析抓取需要用到的軟件包是?requests_html.此處并不需要這個軟件包的全部功能只讀入其中的?HTMLSession?就可以
#from?requests_html?import?HTMLSession
import?re?#正則表達
import?xlwt?#import?xlrt
import?json
import?os
#from?openpyxl?import?workbook??#?寫入Excel表所用
#from?openpyxl?import?load_workbook??#?讀取Excel表所用
def?get_num():
?????url?=?“http://live.500.com/zqdc.php“
?????page?=?requests.get(url)
?????result=str(page.content)
?????pattern?=?re.compile(“ ?????content?=?re.findall(patternresult)
?????#print(content)
?????return?content
def?get_data(numindex):
?????urlmain?=?“http://odds.500.com/fenxi/ouzhi-“+str(num)+“.shtml?ctype=2“
?????pagemain=requests.get(urlmain)
?????resultmain=str(pagemain.content)
?????contentmain=re.findall(re.compile(‘‘)resultmain)?#主流公司個數(shù)
?????data_main=[]
?????for?data?in?contentmain:
?????????data_main.append(float(data))
?????#print(data_main)
?????url1?=?“http://odds.500.com/fenxi/ouzhi-“+str(num)+“.shtml?ctype=4“
?????url2=“http://odds.500.com/fenxi1/ouzhi.php?id=“+str(num)+“&ctype=4&start=30&r=1&style=0&guojia=0&chupan=1“
?????url3=“http://odds.500.com/fenxi1/ouzhi.php?id=“+str(num)+“&ctype=4&start=60&r=1&style=0&guojia=0&chupan=1“
?????url4=“http://odds.500.com/fenxi1/ouzhi.php?id=“+str(num)+“&ctype=4&start=90&r=1&style=0&guojia=0&chupan=1“
?????url5=“http://odds.500.com/fenxi1/ouzhi.php?id=“+str(num)+“&ctype=4&start=120&r=1&style=0&guojia=0&chupan=1“
?????url6=“http://odds.500.com/fenxi1/ouzhi.php?id=“+str(num)+“&ctype=4&start=150&r=1&style=0&guojia=0&chupan=1“
?????url7=“http://odds.500.com/fenxi1/ouzhi.php?id=“+str(num)+“&ctype=4&start=180&r=1&style=0&guojia=0&chupan=1“
?????url8=“http://odds.500.com/fenxi1/ouzhi.php?id=“+str(num)+“&ctype=4&start=210&r=1&style=0&guojia=0&chupan=1“
?????page1=requests.get(url1)
?????page2=requests.get(url2)
?????page3=requests.get(url3)
?????page4=requests.get(url4)
?????page5=requests.get(url5)
?????page6=requests.get(url6)
?????page7=requests.get(url7)
?????page8=requests.get(url8)
?????#html?=?html.decode(‘UTF-8‘)
?????result1=str(page1.content.decode(‘gbk‘))?#print(result)
?????result2=str(page2.content)?#print(result)
?????result3=str(page3.content)?#print(result)
?????result4=str(page4.content)?#print(result)
?????result5=str(page5.content)?#print(result)
?????result6=str(page6.content)?#print(result)
?????result7=str(page7.content)?#print(result)
?????result8=str(page8.content)?#print(result)
?????print(‘\n‘)
?????content1?=?re.findall(re.compile(r‘klfc.*?style=“cursor.*?([\d][\d]?[\d]?[\.]?[\d]?[\d]?)‘)result1)#賠率
?????content2?=?re.findall(re.compile(r‘‘)result1)#勝率
?????#content3?=?re.findall(re.compile(r‘class=“?[y|p].*?>([0|1][\.][\d]?[\d]?)‘)result)#賠寸率
?????#content4?=?re.findall(re.compile(r‘([0|1][\.][\d]?[\d]?) ‘)
評論
共有 條評論