91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

  • 大小: 6KB
    文件類(lèi)型: .py
    金幣: 1
    下載: 0 次
    發(fā)布日期: 2024-02-04
  • 語(yǔ)言: Python
  • 標(biāo)簽: python??

資源簡(jiǎn)介

python網(wǎng)絡(luò)爬蟲(chóng)獲取去哪兒網(wǎng)景點(diǎn)信息源碼,獲取的景點(diǎn)信息有'景點(diǎn)', '景點(diǎn)類(lèi)別', '景點(diǎn)級(jí)別', '地點(diǎn)', '經(jīng)度', '緯度', '開(kāi)放時(shí)間', '景點(diǎn)介紹', '評(píng)論次數(shù)', '游客評(píng)分', '熱度', '關(guān)鍵詞', '圖片路徑'。內(nèi)有詳細(xì)注釋。

資源截圖

代碼片段和文件信息

#?-!-?coding:?utf-8?-!-

from?bs4?import?BeautifulSoup
from?urllib.parse?import?*
import?urllib
import?urllib.request
import?re
import?string
import?time
import?codecs

import?csv

import?jieba
import?jieba.analyse
from?optparse?import?OptionParser


hotnum=re.compile(r‘\d(\d)*‘)

def?getHotNum(cNumgrade):
????if?(int(cNum)>=50000):
????????hotNum?=?50+?10?*?float(grade)
????else:
????????hotNum=?int(cNum)/1000?+?10?*?float(grade)
????return?hotNum

#初始化Http請(qǐng)求
url_base?=‘http://piao.qunar.com‘#?‘http://piao.qunar.com/ticket/list.htm?‘
user_agent?=?‘Mozilla/5.0?(Windows?NT?10.0;?WOW64;?rv:58.0)‘
header?=?{‘User-Agent‘:?user_agent}

ak?=?‘whSDgmRhKopIDFMCGxj21FcY611b6R9h‘

#將結(jié)果存入csv文件
csvfile?=?open(‘畢設(shè)測(cè)試數(shù)據(jù).csv‘?‘a(chǎn)+‘?encoding=‘utf-8‘?newline=‘‘)
writer?=?csv.writer(csvfile)
#writer.writerow([‘景點(diǎn)‘?‘景點(diǎn)類(lèi)別‘?‘景點(diǎn)級(jí)別‘?‘地點(diǎn)‘?‘經(jīng)度‘?‘緯度‘?‘開(kāi)放時(shí)間‘?‘景點(diǎn)介紹‘?‘評(píng)論次數(shù)‘?‘游客評(píng)分‘?‘熱度‘?‘關(guān)鍵詞‘?‘圖片路徑‘])

#請(qǐng)求網(wǎng)頁(yè)
pageIndex=38?#請(qǐng)求頁(yè)序號(hào)
while?True:??####
????if?pageIndex?==?1:????#首頁(yè)(默認(rèn))

????????#url?=?‘http://piao.qunar.com/ticket/list.htm?keyword=中國(guó)‘
????????url?=?‘http://piao.qunar.com/ticket/list.htm?keyword=%E7%83%AD%E9%97%A8%E6%99%AF%E7%82%B9®ion=&from=mpl_search_suggest&subject=文化古跡&page=1‘
????????url?=?quote(url?safe=string.printable)??#編碼?問(wèn)題
????elif?pageIndex>2:????#限制爬取頁(yè)數(shù)
????????break

????else:?#第pageIndex頁(yè)
????????print(pageIndex)


????????#url?=?‘http://piao.qunar.com/ticket/list.htm?keyword=中國(guó)&page={0}‘.format(pageIndex)
????????url?=?‘http://piao.qunar.com/ticket/list.htm?keyword=%E7%83%AD%E9%97%A8%E6%99%AF%E7%82%B9®ion=&from=mpl_search_suggest&subject=文化古跡&page={}‘.format(pageIndex)
????????url?=?quote(url?safe=string.printable)


????#使用urlib庫(kù)請(qǐng)求網(wǎng)頁(yè)pageCode
????request?=?urllib.request.Request(urlheaders?=?header)
????response?=?urllib.request.urlopen(request)
????html?=?response.read().decode(‘utf-8‘‘ignore‘)

????#構(gòu)造soup對(duì)象
????soup?=?BeautifulSoup(html?‘html.parser‘)

????#獲取該頁(yè)所有的新聞鏈接
????a?=?soup.find_all(‘div‘‘result_list‘)#search-list
????#print?(a)
????soup_news?=?BeautifulSoup(a.__str__()?‘html.parser‘)
????#print(a.__str__())
????links?=?soup_news.find_all(‘a(chǎn)‘‘sight_item_do‘)#(‘a(chǎn)‘)
????#print(links)
????#初始化結(jié)果數(shù)組和景點(diǎn)序號(hào)
????results?=?[]?#保存景點(diǎn)結(jié)果(9個(gè)字段??景點(diǎn)名稱(chēng),地點(diǎn),景點(diǎn)開(kāi)放時(shí)間,景點(diǎn)介紹??熱度等)
????i?=?0;??#第幾個(gè)景點(diǎn)

????#遍歷新聞鏈接列表
????for?item?in?links:
????????#構(gòu)造景點(diǎn)詳情頁(yè)面鏈接
????????href?=?links[0][‘href‘]
????????#print(href)
????????href?=?url_base?+?href[0:]
????????links=links[1:]
????????#print(links)
????????#print(href)

????????#?保存景點(diǎn)鏈接
????????#results.append(href)

????????#請(qǐng)求景點(diǎn)內(nèi)容頁(yè)面
????????request?=?urllib.request.Request(href?headers=header)
????????response?=?urllib.request.urlopen(request)
????????html?=?response.read().decode(‘utf-8‘)

????????soup_content?=?BeautifulSoup(html?‘html.parser‘)?#景點(diǎn)內(nèi)容



????????name=?soup_content.find(‘span‘‘mp-description-name‘).string
????????results.append(name)??#保存景點(diǎn)名稱(chēng)
????????results.append(‘文化古跡‘)????

評(píng)論

共有 條評(píng)論