資源簡介
使用python,mysql爬去百度百科的代碼,并利用neo4j展示
代碼片段和文件信息
#?-*-?coding:?utf-8?-*-
#?author:wxm
import?pandas?as?pd
import?pymysql
from?py2neo?import?GraphNodeRelationship
import?csv
##?加上字符集參數,防止中文亂碼
def?get_items_from_database():
????#鏈接數據庫
????dbconn?=?pymysql.connect(
????host=“192.168.1.24“
????database=“baike_science“
????user=“root“
????password=“root“
????port=3306
????charset=‘utf8‘
????use_unicode=True
????)
????#?#?sql語句
????#?sqlcmd?=?“SELECT?idtitleurl?FROM?webpage?WHERE?time_stamp?‘2017-12-21?00:00:00‘?“
????#
????#?#?利用pandas?模塊導入mysql數據
????#?data=?pd.read_sql(sqlcmd?dbconn)?#python?pandas.core.frame.Dataframe類型
????#鏈接neo4j
????test_graph?=?Graph(
????????“http://localhost:7474“
????????username=“neo4j“
????????password=“780961“
????)
????#清空數據庫
????test_graph.delete_all()
?
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????文件???????1950??2017-12-21?20:41??BaiDuBaiKei\baike_science.sql
?????文件???????7485??2017-12-26?10:49??BaiDuBaiKei\DatatoNeo4j.py
?????文件???????3006??2017-12-20?21:19??BaiDuBaiKei\data_storager.py
?????文件??????16974??2017-12-21?20:37??BaiDuBaiKei\Spider.py
?????文件???????2315??2017-12-20?20:46??BaiDuBaiKei\wipe_off_html_tag.py
?????文件??????????0??2017-12-21?20:37??BaiDuBaiKei\__init__.py
?????目錄??????????0??2017-12-26?10:48??BaiDuBaiKei
-----------?---------??----------?-----??----
????????????????31730????????????????????7
評論
共有 條評論