-
大小: 22KB文件類型: .zip金幣: 2下載: 1 次發(fā)布日期: 2021-07-05
- 語言: Python
- 標(biāo)簽: pythonAjax??
資源簡介
這個爬蟲是通過分析Ajax 接口寫成的Python爬蟲。爬取的是2018年QS全球大學(xué)排名,文件中包含完整的Python爬蟲代碼和爬取下來的2018年QS全球大學(xué)排名。

代碼片段和文件信息
#?Ajax?loading
import?requests
import?json
from?pyquery?import?PyQuery?as?pq
#?分析Ajax接口得到url
url?=?‘https://www.topuniversities.com/sites/default/files/qs-rankings-data/357051.txt?_=1525068930958‘
headers?=?{“User-Agent“:?“Mozilla/5.0?(X11;?Linux?x86_64)?AppleWebKit/537.36?(KHTML?like?Gecko)?Chrome/66.0.3359.139?“
?????????????????????????“Safari/537.36“}
def?get_page(url):
????try:
????????r?=?requests.get(url?headers=headers)
????????if?r.status_code?==?200:
????????????return?r.json()
????except?requests.Connectionerror?as?e:
????????print(e)
def?parser_page(json):
????if?json:
????????items?=?json.get(‘data‘)
????????for?i?in?range(len(items)):
????????????item?=?items[i]
????????????qsrank?=?{}
????????????if?“=“?in?item[‘rank_display‘]:
????????????????rk_str?=?str(item[‘rank_display‘]).split(‘=‘)[-1]
????????????????qsrank[‘rank_display‘]?=?rk_str
????????????else:
????????????????qsrank[‘rank_display‘]?=?item[‘rank_display‘]
????????????qsrank[‘title‘]?=?item[‘title‘]
????????????qsrank[‘region‘]?=?item[‘region‘]
????????????qsrank[‘score‘]?=?item[‘score‘]
????????????qsrank[‘url‘]?=?item[‘url‘]
????????????yield?qsrank
def?main():
????json?=?get_page(url)
????results?=?parser_page(json)
????for?result?in?results:
????????with?open(‘/home/zhiying/文檔/QSrank.txt‘?‘a(chǎn)‘)?as?f:
????????????f.write(result[‘rank_display‘]?+?‘????‘?+?result[‘title‘]?+?‘????‘?+?result[‘region‘]?+?‘????‘
????????????????????+?result[‘score‘]?+?‘????‘?+?‘https://www.topuniversities.com‘?+?result[‘url‘]?+?‘\n‘)
????????????f.close()
????????????print(result)
if?__name__?==?‘__main__‘:
????print(‘開始獲取!‘)
????with?open(‘/home/zhiying/文檔/QSrank.txt‘?‘a(chǎn)‘)?as?f:
????????f.write(‘排名‘?+?‘????‘?+?‘大學(xué)名稱‘?+?‘????‘?+?‘所屬國家或地區(qū)‘?+?‘????‘?+?‘得分‘?+?‘????‘?+?‘網(wǎng)址‘?+?‘\n‘)
????????f.close()
????main()
????print(‘QS大學(xué)排名獲取成功!‘)
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2018-04-30?12:44??分析Ajax接口獲取QS排名的Python爬蟲及QS排名文檔\
?????文件????????1912??2018-04-30?12:40??分析Ajax接口獲取QS排名的Python爬蟲及QS排名文檔\qs_rank.py
?????文件??????125213??2018-04-30?12:40??分析Ajax接口獲取QS排名的Python爬蟲及QS排名文檔\QSrank.txt
- 上一篇:爬蟲(爬取加分析)
- 下一篇:Python爬蟲獲取電子書資源實戰(zhàn)
評論
共有 條評論