資源簡介
完整python項目,可以自己運行。利用python爬蟲 爬取今日頭條后臺數據。然后使用flask框架 實現自己的后臺 ,通過爬蟲獲取 今日頭條數據。html實現前端 顯示數據。網站UI一級界面自己實現,仿照今日頭條網站
代碼片段和文件信息
#!/usr/bin/env?python
#?-*-?coding:?utf-8?-*-
‘‘‘
Created?on?2017-8-29
@author:?Administrator
‘‘‘
import?urllib2
import?urllib
from?bs4?import?BeautifulSoup
from?Type?import?NewsType
from?NewsModel?import?NewsInfo
import?json
URL?=?‘http://www.toutiao.com/api/pc/feed/?‘
newsSet?=?set()
def?getNews(nextNewstimeNewstype):
????headers?=?{‘User-Agent‘?:‘Mozilla/5.0?(Windows?NT?6.1;?WOW64)?AppleWebKit/537.36?(KHTML?like?Gecko)?Chrome/60.0.3112.113?Safari/537.36‘
???????????????‘Referer‘:‘http://www.toutiao.com/ch/news_hot/‘}
????data?=?{
????????????‘category‘:Newstype
????????????‘utm_source‘:‘toutiao‘
????????????‘widen‘:1
????????????‘max_behot_time‘:nextNewstime#下一條新聞時間,由上一條返回
????????????‘max_behot_time_tmp‘:nextNewstime
???????????‘?tadrequire‘:‘true‘
????????????‘as‘:‘as:A145E9AAB5D2B44‘#作用不知到
????????????‘cp‘:‘59A5C23B14C4EE1‘}#作用不知到
????request?=?urllib2.Request(URL+urllib.urlencode(data)headers=headers)
????response?=?urllib2.urlopen(request)
????d?=json.load(response)
????hasMore?=?d.get(‘has_more‘)
????data?=?d.get(‘data‘)
????nextTime?=?d.get(‘next‘)
????nextNewstime?=?nextTime.get(“max_behot_time“)
????for?new?in?data:
????????if(new.get(‘single_mode‘)?==?True):
????????????tittle?=?new.get(“tittle“)
????????????abstract?=?new.get(“abstract“)
????????????image_url?=?new.get(“image_url“)
????????????group_id?=?new.get(‘group_id‘)
????????????source?=?new.get(‘source‘)
????????????source_url?=?new.get(‘source_url‘)
????????????newInfo?=?NewsInfo(tittleabstractimage_urlgroup_idsourcesource_urlsource_url)
????????????newsSet.add(newInfo)
????return?nextNewstime
if?__name__?==?‘__main__‘:
????time?=?getNews(0?NewsType.NEWS_HOT)
#????time?=?getNews(time?NewsType.NEWS_HOT)
#????time?=?getNews(time?NewsType.NEWS_HOT)
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2017-09-07?17:08??ATodatNews\
?????文件?????????381??2017-08-29?11:56??ATodatNews\.project
?????文件?????????434??2017-08-29?11:56??ATodatNews\.pydevproject
?????目錄???????????0??2017-08-29?19:13??ATodatNews\.settings\
?????文件?????????192??2017-09-13?15:44??ATodatNews\.settings\org.eclipse.core.resources.prefs
?????目錄???????????0??2017-09-07?17:05??ATodatNews\dist\
?????文件?????????142??2017-09-07?17:05??ATodatNews\dist\cookie.txt
?????目錄???????????0??2017-09-07?17:04??ATodatNews\dist\static\
?????目錄???????????0??2017-09-07?17:04??ATodatNews\dist\static\css\
?????文件?????????127??2017-09-06?14:36??ATodatNews\dist\static\css\global.css
?????文件????????3586??2017-09-07?15:47??ATodatNews\dist\static\css\home_css.css
?????目錄???????????0??2017-09-07?17:04??ATodatNews\dist\static\images\
?????文件??????112556??2017-09-06?15:34??ATodatNews\dist\static\images\guanggao.png
?????文件????????3480??2017-08-30?11:37??ATodatNews\dist\static\images\logo.png
?????文件????????5069??2017-09-05?20:12??ATodatNews\dist\static\images\navi_bar.png
?????目錄???????????0??2017-09-07?17:04??ATodatNews\dist\static\js\
?????文件????????6846??2017-09-05?12:01??ATodatNews\dist\static\js\md5.js
?????目錄???????????0??2017-09-07?17:04??ATodatNews\dist\templates\
?????文件???????32856??2017-09-07?15:48??ATodatNews\dist\templates\home.html
?????文件?????????398??2017-09-06?11:46??ATodatNews\dist\templates\newfile.html
?????文件????14948438??2017-09-07?17:03??ATodatNews\dist\toutiao.exe
?????目錄???????????0??2017-08-29?19:18??ATodatNews\src\
?????目錄???????????0??2017-09-13?15:44??ATodatNews\src\mySpider\
?????文件????????1866??2017-08-29?19:16??ATodatNews\src\mySpider\Main.py
?????文件???????12315??2017-09-06?20:37??ATodatNews\src\mySpider\NewSpider.py
?????文件????????9909??2017-09-06?20:39??ATodatNews\src\mySpider\NewSpider.pyc
?????文件????????4642??2017-09-07?12:13??ATodatNews\src\mySpider\NewsModel.py
?????文件????????4863??2017-09-07?12:13??ATodatNews\src\mySpider\NewsModel.pyc
?????文件?????????331??2017-08-29?18:03??ATodatNews\src\mySpider\Type.py
?????文件?????????799??2017-08-29?19:14??ATodatNews\src\mySpider\Type.pyc
?????文件???????????0??2017-08-29?19:13??ATodatNews\src\mySpider\__init__.py
............此處省略20個文件信息
評論
共有 條評論