資源簡介
NBA數(shù)據(jù)爬蟲

代碼片段和文件信息
import?urllib2
from?bs4?import??BeautifulSoup
import?re
import?xlrd
import?xdrlibsys
import?xlwt
def?transformCodec(re_data):#ascii?(gbk)???unicode??
????try:??
????????re_data?=?re_data.decode(‘gbk‘)??
????except?Exception?as?error:??
????????print?error??
????????print?‘delete?illegal?stringtry?again...‘??
??????????
????????pos?=?re.findall(r‘decodebytesinposition([\d]+)-([\d]+):illegal‘str(error).replace(‘?‘‘‘))??
????????if?len(pos)==1:??
????????????re_data?=?re_data[0:int(pos[0][0])]+re_data[int(pos[0][1]):]??
????????????re_data?=?transformCodec(re_data)??
????????????return?re_data??
????return?re_data
file=xlwt.Workbook()
table=file.add_sheet(‘shuju‘cell_overwrite_ok=True)
table.write(00‘team‘)
table.write(01‘W/L‘)
table.write(02‘Strk‘)
table.write(03‘Home‘)
table.write(04‘Away‘)
table.write(05‘Day‘)
table.write(06‘Night‘)
table.write(07‘Div‘)
table.write(08‘Conf‘)
row=1
col=0
for?page?in?range(128):
????print?page
????url=“http://www.covers.com/pageLoader/pageLoader.aspx?page=/data/nba/matchups/g5_preview_“+str(page)+“.html“
????response=urllib2.urlopen(url)
????print?response.getcode()
????soup=BeautifulSoup(
????????????????????????????????response
????????????????????????????????‘html.parser‘
????????????????????????????????from_encoding=‘utf-8‘?
????????????????????????????????)
????links2=soup.find_all(‘div‘class_=“sdi-so“l(fā)imit=2)
????cishu=0
????for?i?in?links2:
????????if(cishu==1):
????????????two=i.find_all(‘td‘class_=“sdi-datacell“)
????????????for?q?in?two:
????????????????print?q.text
????????????????table.write(rowcolq.text)
????????????????col=(col+1)%9
????????????????if(col==0):
????????????????????row=row+1
????????????row=row+1
????????????file.save(‘NBA.xls‘)
????????cishu=cishu+1
????
file.save(‘NBA.xls‘)
?屬性????????????大小?????日期????時(shí)間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2017-04-10?06:23??PythonApplication1\
?????目錄???????????0??2017-04-15?16:38??PythonApplication1\PythonApplication1\
?????文件?????????859??2017-04-10?06:23??PythonApplication1\PythonApplication1.sln
?????文件???????18944??2017-05-03?03:01??PythonApplication1\PythonApplication1.v12.suo
?????文件???????13824??2017-04-24?00:04??PythonApplication1\PythonApplication1\NBA.xls
?????文件????????1859??2017-04-15?16:38??PythonApplication1\PythonApplication1\PythonApplication1.py
?????文件????????1953??2017-04-10?06:23??PythonApplication1\PythonApplication1\PythonApplication1.pyproj
評(píng)論
共有 條評(píng)論