資源簡介
利用網(wǎng)絡(luò)上公開的數(shù)據(jù)構(gòu)建一個小型的證券知識圖譜/知識庫
代碼片段和文件信息
import?os
import?csv
import?hashlib
def?get_md5(string):
????“““Get?md5?according?to?the?string
????“““
????byte_string?=?string.encode(“utf-8“)
????md5?=?hashlib.md5()
????md5.update(byte_string)
????result?=?md5.hexdigest()
????return?result
def?build_executive(executive_prep?executive_import):
????“““Create?an?‘executive‘?file?in?csv?format?that?can?be?imported?into?Neo4j.
????format?->?person_id:IDnamegenderage:int:LABEL
????label?->?Person
????“““
????print(‘Writing?to?{}?file...‘.format(executive_import.split(‘/‘)[-1]))
????with?open(executive_prep?‘r‘?encoding=‘utf-8‘)?as?file_prep?\
????????open(executive_import?‘w‘?encoding=‘utf-8‘)?as?file_import:
????????file_prep_csv?=?csv.reader(file_prep?delimiter=‘‘)
????????file_import_csv?=?csv.writer(file_import?delimiter=‘‘)
????????headers?=?[‘person_id:ID‘?‘name‘?‘gender‘?‘a(chǎn)ge:int‘?‘:LABEL‘]
????????file_import_csv.writerow(headers)
????????for?i?row?in?enumerate(file_prep_csv):
????????????if?i?==?0?or?len(row)?3:
????????????????continue
????????????info?=?[row[0]?row[1]?row[2]]
????????????#?generate?md5?according?to?‘name‘?‘gender‘?and?‘a(chǎn)ge‘
????????????info_id?=?get_md5(‘{}{}{}‘.format(row[0]?row[1]?row[2]))
????????????info.insert(0?info_id)
????????????info.append(‘Person‘)
????????????file_import_csv.writerow(info)
????print(‘-?done.‘)
def?build_stock(stock_industry_prep?stock_concept_prep?stock_import):
????“““Create?an?‘stock‘?file?in?csv?format?that?can?be?imported?into?Neo4j.
????format?->?company_id:IDnamecode:LABEL
????label?->?CompanyST
????“““
????print(‘Writing?to?{}?file...‘.format(stock_import.split(‘/‘)[-1]))
????stock?=?set()??#?‘codename‘
????with?open(stock_industry_prep?‘r‘?encoding=‘utf-8‘)?as?file_prep:
????????file_prep_csv?=?csv.reader(file_prep?delimiter=‘‘)
????????for?i?row?in?enumerate(file_prep_csv):
????????????if?i?==?0:
????????????????continue
????????????code_name?=?‘{}{}‘.format(row[0]?row[1].replace(‘?‘?‘‘))
????????????stock.add(code_name)
????with?open(stock_concept_prep?‘r‘?encoding=‘utf-8‘)?as?file_prep:
????????file_prep_csv?=?csv.reader(file_prep?delimiter=‘‘)
????????for?i?row?in?enumerate(file_prep_csv):
????????????if?i?==?0:
????????????????continue
????????????code_name?=?‘{}{}‘.format(row[0]?row[1].replace(‘?‘?‘‘))
????????????stock.add(code_name)
????with?open(stock_import?‘w‘?encoding=‘utf-8‘)?as?file_import:
????????file_import_csv?=?csv.writer(file_import?delimiter=‘‘)
????????headers?=?[‘stock_id:ID‘?‘name‘?‘code‘?‘:LABEL‘]
????????file_import_csv.writerow(headers)
????????for?s?in?stock:
????????????split?=?s.split(‘‘)
????????????ST?=?False??#?ST?flag
????????????states?=?[‘*ST‘?‘ST‘?‘S*ST‘?‘SST‘]
????????????info?=?[]
????????????for?state?in?states:
????????????????if?split[1].startswith(state):
????????????????????ST?=?True
????????????????????split[1]?=?split[1].replace(state?‘‘)
???
評論
共有 條評論