91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

  • 大小: 3KB
    文件類型: .py
    金幣: 1
    下載: 0 次
    發布日期: 2021-06-04
  • 語言: Python
  • 標簽: python??spider??data??

資源簡介

簡單的爬蟲實例,爬取了大學排名的相關信息,適合爬蟲入門

資源截圖

代碼片段和文件信息

#?-*-?coding:?utf-8?-*-
“““
Created?on?Thu?Aug??9?16:24:29?2018

@author:?Administrator
“““


from?bs4?import?BeautifulSoup
import?requests

def?getHtmlUrl(url):
????try:
????????r=requests.get(url)
????????r.raise_for_status()
????????r.encoding=r.apparent_encoding
????????return?r.text
????except?baseException?as?e:
????????print(e)

def?getData(html):
????title=[]
????range_num=[]
????rating_num=[]
????province=[]
????source_num=[]
????data={}
????soup=BeautifulSoup(html‘html.parser‘)
????uni_all=soup.find_all(‘tr‘class_=‘alt‘)

????for?i?in?range(010):
????????range_num.append(uni_all[i].find_all(‘td‘)[0].text)
????????title.append(uni_all[i].find_all(‘td‘)[1].find(‘div‘).text)
????????province.append(uni_all[i].find_all(‘td‘)[2].text)
????????rating_num.append(uni_all[i].find_all(‘td‘)[3].text)
????????source_num.append(uni_all[i].find_all(‘td‘)[4].text)
????????
????data[‘title‘]=title
????data[‘rating_num‘]=rating_num
????data[‘range_num‘]=range_num
????data[‘province‘]=province
????data[‘souce_num‘]=source_num
????return?data

def?show(data):
????f=open(“D://uni.html“‘w‘)
????f.write(““)
????f.write(““)
????f.write(““)
????
????f.write(““)
????f.write(““)
????f.write(“
排名“)
????f.write(“
學校名

評論

共有 條評論

<tbody id="gxgis"></tbody>