-
大小: 37.02MB文件類型: .zip金幣: 1下載: 0 次發(fā)布日期: 2023-06-28
- 語(yǔ)言: Python
- 標(biāo)簽:
資源簡(jiǎn)介
主要實(shí)現(xiàn)使用了基于字向量的四層雙向LSTM與CRF模型的網(wǎng)絡(luò).該項(xiàng)目提供了原始訓(xùn)練數(shù)據(jù)樣本(一般醒目,出院情況,病史情況,病史特點(diǎn),診療經(jīng)過(guò))與轉(zhuǎn)換版本,訓(xùn)練腳本,預(yù)訓(xùn)練模型,可用于序列標(biāo)注研究.把玩和PK使用
代碼片段和文件信息
#!/usr/bin/env?python3
#?coding:?utf-8
#?File:?lstm_predict.py
#?Author:?lhy
#?Date:?18-5-23
import?numpy?as?np
from?keras?import?backend?as?K
from?keras.preprocessing.sequence?import?pad_sequences
from?keras.models?import?Sequentialload_model
from?keras.layers?import?embedding?Bidirectional?LSTM?Dense?TimeDistributed?Dropout
from?keras_contrib.layers.crf?import?CRF
import?matplotlib.pyplot?as?plt
import?os
os.environ[‘TF_CPP_MIN_LOG_LEVEL‘]?=?‘2‘
class?LSTMNER:
????def?__init__(self):
????????cur?=?‘/‘.join(os.path.abspath(__file__).split(‘/‘)[:-1])
????????self.train_path?=?os.path.join(cur?‘data/train.txt‘)
????????self.vocab_path?=?os.path.join(cur?‘model/vocab.txt‘)
????????self.embedding_file?=?os.path.join(cur?‘model/token_vec_300.bin‘)
????????self.model_path?=?os.path.join(cur?‘model/tokenvec_bilstm2_crf_model_20.h5‘)
????????self.word_dict?=?self.load_worddict()
????????self.class_dict?={
?????????????????????????‘O‘:0
?????????????????????????‘TREATMENT-I‘:?1
?????????????????????????‘TREATMENT-B‘:?2
?????????????????????????‘BODY-B‘:?3
?????????????????????????‘BODY-I‘:?4
?????????????????????????‘SIGNS-I‘:?5
?????????????????????????‘SIGNS-B‘:?6
?????????????????????????‘CHECK-B‘:?7
?????????????????????????‘CHECK-I‘:?8
?????????????????????????‘DISEASE-I‘:?9
?????????????????????????‘DISEASE-B‘:?10
????????????????????????}
????????self.label_dict?=?{j:i?for?ij?in?self.class_dict.items()}
????????self.embedDING_DIM?=?300
????????self.EPOCHS?=?10
????????self.BATCH_SIZE?=?128
????????self.NUM_CLASSES?=?len(self.class_dict)
????????self.VOCAB_SIZE?=?len(self.word_dict)
????????self.TIME_STAMPS?=?150
????????self.embedding_matrix?=?self.build_embedding_matrix()
????????self.model?=?self.tokenvec_bilstm2_crf_model()
????????self.model.load_weights(self.model_path)
????‘加載詞表‘
????def?load_worddict(self):
????????vocabs?=?[line.strip()?for?line?in?open(self.vocab_path)]
????????word_dict?=?{wd:?index?for?index?wd?in?enumerate(vocabs)}
????????return?word_dict
????‘‘‘構(gòu)造輸入,轉(zhuǎn)換成所需形式‘‘‘
????def?build_input(self?text):
????????x?=?[]
????????for?char?in?text:
????????????if?char?not?in?self.word_dict:
????????????????char?=?‘UNK‘
????????????x.append(self.word_dict.get(char))
????????x?=?pad_sequences([x]?self.TIME_STAMPS)
????????return?x
????def?predict(self?text):
????????str?=?self.build_input(text)
????????raw?=?self.model.predict(str)[0][-self.TIME_STAMPS:]
????????result?=?[np.argmax(row)?for?row?in?raw]
????????chars?=?[i?for?i?in?text]
????????tags?=?[self.label_dict[i]?for?i?in?result][len(result)-len(text):]
????????res?=?list(zip(chars?tags))
????????print(res)
????????return?res
????‘‘‘加載預(yù)訓(xùn)練詞向量‘‘‘
????def?load_pretrained_embedding(self):
????????embeddings_dict?=?{}
????????with?open(self.embedding_file?‘r‘)?as?f:
????????????for?line?in?f:
????????????????values?=?line.strip().split(‘?‘)
????????????????if?len(values)?300:
????????????????????cont
?屬性????????????大小?????日期????時(shí)間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2018-12-15?05:13??MedicalNamedEntityRecognition-master\
?????目錄???????????0??2018-12-15?05:13??MedicalNamedEntityRecognition-master\.idea\
?????文件?????????459??2018-12-15?05:13??MedicalNamedEntityRecognition-master\.idea\illness_entity_recognize.iml
?????文件?????????212??2018-12-15?05:13??MedicalNamedEntityRecognition-master\.idea\misc.xm
?????文件?????????300??2018-12-15?05:13??MedicalNamedEntityRecognition-master\.idea\modules.xm
?????文件???????17622??2018-12-15?05:13??MedicalNamedEntityRecognition-master\.idea\workspace.xm
?????文件????????7140??2018-12-15?05:13??MedicalNamedEntityRecognition-master\README.md
?????目錄???????????0??2018-12-15?05:13??MedicalNamedEntityRecognition-master\data\
?????文件?????2022512??2018-12-15?05:13??MedicalNamedEntityRecognition-master\data\train.txt
?????文件?????1452349??2018-12-15?05:13??MedicalNamedEntityRecognition-master\data_origin.zip
?????文件????????1213??2018-12-15?05:13??MedicalNamedEntityRecognition-master\length_distribution.txt
?????文件????????4857??2018-12-15?05:13??MedicalNamedEntityRecognition-master\lstm_predict.py
?????文件????????6869??2018-12-15?05:13??MedicalNamedEntityRecognition-master\lstm_train.py
?????目錄???????????0??2018-12-15?05:13??MedicalNamedEntityRecognition-master\model\
?????文件????70849895??2018-12-15?05:13??MedicalNamedEntityRecognition-master\model\token_vec_300.bin
?????文件?????9438552??2018-12-15?05:13??MedicalNamedEntityRecognition-master\model\tokenvec_bilstm2_crf_model_20.h5
?????文件????????6881??2018-12-15?05:13??MedicalNamedEntityRecognition-master\model\vocab.txt
?????文件????????2604??2018-12-15?05:13??MedicalNamedEntityRecognition-master\transfer_data.py
評(píng)論
共有 條評(píng)論