資源簡介
實(shí)現(xiàn)功能:1、抓取知乎上關(guān)注人數(shù)超過2000人數(shù)的話題、相應(yīng)連接、父子話題,并存入數(shù)據(jù)庫。 2、抓取某一話題的回答,可以設(shè)置贊的最低數(shù)才會被抓取,并存入數(shù)據(jù)庫。 3、將感興趣的內(nèi)容獲取出來發(fā)送qq郵件。
代碼片段和文件信息
#?-*-?coding:?utf-8?-*-
import?datetimetime
import?codecs
import?smtplib
from?email.mime.text?import?MIMEText
from?email.mime.image?import?MIMEImage
from?email.mime.multipart?import?MIMEMultipart
from?email.mime.application?import?MIMEApplication??
import?MySQLdb
from?zhihu.spiders.zhihu_spider?import?*
def?zan_cmp(a?b):
return?-cmp(a[1]?b[1])
class?GetInteresting:
def?__init__(self):
self.conn?=?MySQLdb.connect(
host=‘localhost‘
user?=?‘root‘
passwd?=?‘‘
port?=?3306)
self.cur?=?self.conn.cursor()
self.conn.select_db(‘zhihu‘)
self.receiver?=?ZhihuSpider.my_parse.receiver
def?read_answer(self?lst):
order?=?1
l?=?len(lst)
file_name?=?‘%s.txt‘?%?ZhihuSpider.my_parse.topic
f?=?codecs.open(file_name?‘w‘)
msg?=?MIMEMultipart(“related“)
body?=?‘‘
for?answer?in?lst:
f.write(‘%s個贊\n‘?%?answer[1])
f.write(‘時間%s\n‘?%?time.strftime(‘%Y-%m-%d?%H:%M:%S‘?time.gmtime(answer[2])))
f.write(‘%s%s‘?%?(
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2015-07-07?21:21??zhihu\
?????文件?????????252??2015-06-22?19:02??zhihu\scrapy.cfg
?????目錄???????????0??2015-07-07?21:21??zhihu\zhihu\
?????文件?????????123??2015-07-07?21:21??zhihu\zhihu\config.ini
?????文件????????3630??2015-07-07?21:18??zhihu\zhihu\getInteresting.py
?????文件????????4476??2015-07-07?21:03??zhihu\zhihu\getInteresting.pyc
?????文件?????????486??2015-06-23?22:14??zhihu\zhihu\items.py
?????文件?????????502??2015-06-23?22:39??zhihu\zhihu\items.pyc
?????文件????????2711??2015-07-07?21:02??zhihu\zhihu\pipelines.py
?????文件????????2655??2015-07-07?21:03??zhihu\zhihu\pipelines.pyc
?????文件?????????817??2015-06-22?20:50??zhihu\zhihu\settings.py
?????文件?????????598??2015-06-22?20:50??zhihu\zhihu\settings.pyc
?????目錄???????????0??2015-07-07?21:21??zhihu\zhihu\spiders\
?????文件????????3307??2015-06-23?21:07??zhihu\zhihu\spiders\rotate_useragent.py
?????文件????????3255??2015-06-23?21:08??zhihu\zhihu\spiders\rotate_useragent.pyc
?????文件????????1756??2015-07-07?21:03??zhihu\zhihu\spiders\zhihu_spider.py
?????文件????????2114??2015-07-07?21:03??zhihu\zhihu\spiders\zhihu_spider.pyc
?????文件?????????161??2015-06-17?16:03??zhihu\zhihu\spiders\__init__.py
?????文件?????????143??2015-06-22?19:13??zhihu\zhihu\spiders\__init__.pyc
?????文件????????1598??2015-07-04?15:34??zhihu\zhihu\util.py
?????文件????????1856??2015-07-04?15:34??zhihu\zhihu\util.pyc
?????文件???????????0??2015-06-17?16:03??zhihu\zhihu\__init__.py
?????文件?????????135??2015-06-22?19:13??zhihu\zhihu\__init__.pyc
?????文件?????????200??2015-07-07?21:10??zhihu\zhihu\互聯(lián)網(wǎng)行業(yè)招聘.txt
?????目錄???????????0??2015-07-07?21:22??zhihu_topic\
?????文件????????1076??2015-07-07?21:22??zhihu_topic\getInteresting.py
?????文件?????????264??2015-06-30?20:26??zhihu_topic\scrapy.cfg
?????文件??????530337??2015-07-04?15:27??zhihu_topic\topic.txt
?????目錄???????????0??2015-07-07?21:21??zhihu_topic\zhihu_topic\
?????文件?????????432??2015-06-30?20:46??zhihu_topic\zhihu_topic\items.py
?????文件?????????544??2015-06-30?20:52??zhihu_topic\zhihu_topic\items.pyc
............此處省略13個文件信息
- 上一篇:解魔方算法
- 下一篇:Ansoft Maxwell 40W 無刷直流電機(jī)
評論
共有 條評論