資源簡介
5652華爾街見聞.py
代碼片段和文件信息
#!/usr/bin/env?python
#?-*-?coding:utf-8?-*-
import?sys
import?re
import?urllib?urllib2
import?requests
import?pymongo
import?datetime
import?multiprocessing?as?mp
Category_Map?=?{
????“1“:u“外匯“
????“2“:u“股市“
????“3“:u“商品“
????“4“:u“債市“
????“5“:u“央行“
????“9“:u“中國“
????“10“:u“美國“
????“11“:u“歐元區(qū)“
????“12“:u“日本“
????“13“:u“英國“
????“14“:u“澳洲“
????“15“:u“加拿大“
????“16“:u“瑞士“
????“17“:u“其他地區(qū)“
}
def?num2name(category_num):
????if?Category_Map.has_key(category_num):
????????return?Category_Map[category_num]
????else:
????????return?““
class?MongoDBIO:
????#?申明相關(guān)的屬性
????def?__init__(self?host?port?name?password?database?collection):
????????self.host?=?host
????????self.port?=?port
????????self.name?=?name
????????self.password?=?password
????????self.database?=?database
????????self.collection?=?collection
????#?連接數(shù)據(jù)庫,db和posts為數(shù)據(jù)庫和集合的游標
????def?Connection(self):
????????#?connection?=?pymongo.Connection()?#?連接本地數(shù)據(jù)庫
????????connection?=?pymongo.Connection(host=self.host?port=self.port)
????????#?db?=?connection.datas
????????db?=?connection[self.database]
????????if?self.name?or?self.password:
????????????db.authenticate(name=self.name?password=self.password)?#?驗證用戶名密碼
????????#?print?“Database:“?db.name
????????#?posts?=?db.cn_live_news
????????posts?=?db[self.collection]
????????#?print?“Collection:“?posts.name
????????return?posts
#?保存操作
#?def?ResultSave(save_host?save_port?save_name?save_password?save_database?save_collection?save_contents):
#?????posts?=?MongoDBIO(save_host?save_port?save_name?save_password?save_database?save_collection).Connection()
#?????for?save_content?in?save_contents:
#?????????posts.save(save_content)
def?ResultSave(save_host?save_port?save_name?save_password?save_database?save_collection?save_content):
????posts?=?MongoDBIO(save_host?save_port?save_name?save_password?save_database?save_collection).Connection()
????posts.save(save_content)
def?Spider(url?data):
????#?#?方法1:requests?get
????content?=?requests.get(url=url?params=data).content?#?GET請求發(fā)送
????#?#?方法2:urllib2?get
????#?data?=?urllib.urlencode(data)?#?編碼工作,由dict轉(zhuǎn)為string
????#?full_url?=?url+‘?‘+data
????#?print?full_url
????#?content?=?urllib2.urlopen(full_url).read()?#?GET請求發(fā)送
????#?#?content?=?requests.get(full_url).content?#?GET請求發(fā)送
????#?print?type(content)?#?str
????return?content
def?ContentSave(item):
????#?保存配置
????save_host?=?“l(fā)ocalhost“
????save_port?=?27017
????save_name?=?““
????save_password?=?““
????save_database?=?“textclassify“
????save_collection?=?“WallstreetcnSave“
????source?=?“wallstreetcn“
????createdtime?=?datetime.datetime.now()
?
評論
共有 條評論