資源簡介
簡單的爬蟲實例,爬取了大學排名的相關信息,適合爬蟲入門
代碼片段和文件信息
#?-*-?coding:?utf-8?-*-
“““
Created?on?Thu?Aug??9?16:24:29?2018
@author:?Administrator
“““
from?bs4?import?BeautifulSoup
import?requests
def?getHtmlUrl(url):
????try:
????????r=requests.get(url)
????????r.raise_for_status()
????????r.encoding=r.apparent_encoding
????????return?r.text
????except?baseException?as?e:
????????print(e)
def?getData(html):
????title=[]
????range_num=[]
????rating_num=[]
????province=[]
????source_num=[]
????data={}
????soup=BeautifulSoup(html‘html.parser‘)
????uni_all=soup.find_all(‘tr‘class_=‘alt‘)
????for?i?in?range(010):
????????range_num.append(uni_all[i].find_all(‘td‘)[0].text)
????????title.append(uni_all[i].find_all(‘td‘)[1].find(‘div‘).text)
????????province.append(uni_all[i].find_all(‘td‘)[2].text)
????????rating_num.append(uni_all[i].find_all(‘td‘)[3].text)
????????source_num.append(uni_all[i].find_all(‘td‘)[4].text)
????????
????data[‘title‘]=title
????data[‘rating_num‘]=rating_num
????data[‘range_num‘]=range_num
????data[‘province‘]=province
????data[‘souce_num‘]=source_num
????return?data
def?show(data):
????f=open(“D://uni.html“‘w‘)
????f.write(““)
????f.write(““)
????f.write(“