-
大小: 11KB文件類型: .py金幣: 1下載: 0 次發布日期: 2021-05-19
- 語言: Python
- 標簽: Tensorflow??python??BiLSTM??
資源簡介
該段Tensorflow代碼可用于文本分類,和情感分類。其主要特點是,在同一份代碼中,同時實現兩張張量圖,一張用于訓練,另一張用于測試。并做交叉驗證。
代碼片段和文件信息
#!?usr/bin/env?python3
#?-*-?coding:utf-8?-*-
“““
@Author:zhoukaiyin
@Time:2017/7/22
“““
import?glob
import?tensorflow?as?tf
import?gensim
import?numpy?as?np
import?pickle
import?os
from?sklearn.metrics?import?precision_score?recall_score?f1_score
from?sklearn.model_selection?import?KFold
def?data2index():
????words?=?list(get_word())
????w2label?=?{w:i?for?iw?in?enumerate(words1)}
????label2w?=?{i:w?for?iw?in?enumerate(words1)}
????return?w2labellabel2w
def?get_word():
????words=[]
????files?=?glob.glob(“../BIO/*“)
????for?file?in?files:
????????with?open(file‘r‘)?as?rf:
????????????for?line?in?rf:
????????????????line?=?line.strip()
????????????????if?len(line)!=0:
????????????????????w?=?line.split(‘\t‘)[0]
????????????????????words.append(w)
????return?set(words)
def?get_phara():
????pharas?=?[]
????labels?=?[]
????human_feature?=?[]
????files?=?glob.glob(“../BIO/*“)
????with?open(“h_features.pkl“‘rb‘)?as?hf:
????????features?=?pickle.load(hfencoding=“bytes“)
????????m_features?=?eval(str(features))
????with?open(‘../label/result.txt‘‘r‘)?as?pf:
????????dom?=?pf.read()
????????labe_dir?=?eval(dom)
????for?file?in?files:
????????phara?=?[]
????????name?=?os.path.basename(file)
????????label?=?labe_dir[name]
????????if?label!=‘unknown‘:
????????????human_feature.append(m_features[name])
????????????label?=?int(label)
????????????with?open(file‘r‘)?as?rf:
????????????????for?line?in?rf:
????????????????????line?=?line.strip()
????????????????????if?len(line)!=0:
????????????????????????w?=?line.split(‘\t‘)[0]
????????????????????????phara.append(w)
????????????labels.append(label)
????????????pharas.append(phara)
????return?pharaslabelshuman_feature
def?w2embed():
????w2labellabel2w?=?data2index()
????model?=?gensim.models.KeyedVectors.load_word2vec_format(“glove200.txt“?binary=False)
????embeding=[]
????dim?=?model.vector_size
????add?=?np.random.randn(dim)
????embeding.append(add)
????count?=?0
????ncount?=?0
????for?i?in?range(1?len(w2label)?+?1):
????????word?=?label2w[i].lower()
????????try:
????????????embed?=?model[word]
????????except?KeyError:
????????????count?+=?1
????????????embed?=?np.zeros((dim))
????????????word_list?=?word.split(‘-‘)
????????????try:
????????????????for?i?w?in?enumerate(word_list):
????????????????????embed?+=?model[word_list[i]]
????????????except?KeyError:
????????????????ncount+=1
????????????????embed?=?embeding[0]
????????embeding.append(embed)
????print(“一共有{}個單詞沒有被找到有{}個重新被找到!“.format(countcount-ncount))
????wf=open(“embed.pkl“‘wb‘)
????wwf?=?open(“w2label.pkl“‘wb‘)
????pickle.dump(w2labelwwf)
????pickle.dump(embedingwf)
????wf.close()
????wwf.close()
def?load_data():
????rrf?=?open(“w2label.pkl“‘rb‘)
????w2label?=?pickle.load(rrfencoding=“bytes“)
????index_pharas?=?[]
????labels?=?[]
????pharas?clsshuman_feature=?get_phara()
????for?iphara?in?enumerate(pharas):
????????ph
- 上一篇:感知機算法Python實現
- 下一篇:最好中國大學近幾年排名及python爬蟲代碼
評論
共有 條評論