91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

  • 大小: 405KB
    文件類型: .zip
    金幣: 2
    下載: 1 次
    發布日期: 2021-06-08
  • 語言: Python
  • 標簽: CRF??醫療??

資源簡介

基于crfsuited的醫療命名實體抽取的Python實現 醫療電子病例命名實體識別評測任務的一個可執行demo,采用的方法是條件隨機場(CRF),實現CRF的第三方庫為[python-crfsuite]。目前該demo準確率為68%,召回率為62%,F1值為64.8%。

資源截圖

代碼片段和文件信息

#!/usr/bin/python
#?-*-?coding:utf-8?-*-
#?**************************
#?*?Author??????:??baiyyang
#?*?Email???????:??baiyyang@163.com
#?*?Description?:??
#?*?create?time?:??2018/1/10上午10:29
#?*?file?name???:??crf_unit.py


import?sys
import?codecs
import?pycrfsuite
import?string
import?zhon.hanzi?as?zh
import?reader
from?sklearn.metrics?import?classification_report
from?sklearn.preprocessing?import?LabelBinarizer
reload(sys)
sys.setdefaultencoding(‘utf-8‘)


#?獲取數據
def?readData(filename):
????fr?=?codecs.open(filename?‘r‘?‘utf-8‘)
????data?=?[]
????for?line?in?fr:
????????fields?=?line.strip().split(‘\t‘)
????????if?len(fields)?==?3:
????????????data.append(fields)
????return?data


train?=?readData(‘train.txt‘)
test?=?readData(‘test.txt‘)


#?判斷是否為標點符號
#?punctuation
def?ispunctuation(word):
????punctuation?=?string.punctuation?+?zh.punctuation
????if?punctuation.find(word)?!=?-1:
????????return?True
????else:
????????return?False


#?特征定義
def?word2features(sent?i):
????“““返回特征列表“““
????word?=?sent[i][0]
????postag?=?sent[i][1]
????features?=?[
????????‘bias‘
????????‘word=‘?+?word
????????‘word_tag=‘?+?postag
????]
????if?i?>?0:
????????features.append(‘word[-1]=‘?+?sent[i-1][0])
????????features.append(‘word[-1]_tag=‘?+?sent[i-1][1])
????????if?i?>?1:
????????????features.append(‘word[-2]=‘?+?sent[i-2][0])
????????????features.append(‘word[-2?-1]=‘?+?sent[i-2][0]?+?sent[i-1][0])
????????????features.append(‘word[-2]_tag=‘?+?sent[i-2][1])
????if?i?????????features.append(‘word[1]=‘?+?sent[i+1][0])
????????features.append(‘word[1]_tag=‘?+?sent[i+1][1])
????????if?i?????????????features.append(‘word[2]=‘?+?sent[i+2][0])
????????????features.append(‘word[1?2]=‘?+?sent[i+1][0]?+?sent[i+2][0])
????????????features.append(‘word[2]_tag=‘?+?sent[i+2][1])
????return?features


def?sent2feature(sent):
????return?[word2features(sent?i)?for?i?in?range(len(sent))]


def?sent2label(sent):
????return?[label?for?word?tag?label?in?sent]


def?sent2word(sent):
????return?[word?for?word?tag?label?in?sent]


X_train?=?sent2feature(train)
y_train?=?sent2label(train)

X_test?=?sent2feature(test)
y_test?=?sent2label(test)

#?訓練模型
model?=?pycrfsuite.Trainer(verbose=True)
model.append(X_train?y_train)
model.set_params({
????‘c1‘:?1.0??#?coefficient?for?L1?penalty
????‘c2‘:?1e-3??#?coefficient?for?L2?penalty
????‘max_iterations‘:?100??#?stop?earlier
????#?include?transitions?that?are?possible?but?not?observed
????‘feature.possible_transitions‘:?True
????‘feature.minfreq‘:?3
})

model.train(‘./medical.crfsuite‘)


#?預測數據
tagger?=?pycrfsuite.Tagger()
tagger.open(‘./medical.crfsuite‘)

#?一份測試數據集
print?‘?‘.join(sent2word(readData(‘test1.txt‘)))
predicted?=?tagger.tag(sent2feature(readData(‘test1.txt‘)))
correct?=?sent2label(readData(‘test1.txt‘))

#?預測結果對比
print?‘Predicted:?‘?‘?‘.join(predicted)
print?‘Correct:?‘?‘?‘.join(correct)

#?預測準確率
num?=?0
for?i?tag?in?enumerate(predicted):
????if?tag?==?correct[i]:
????????num?

?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2018-05-03?01:54??medical_ner_crfsuite-master\
?????文件????????1239??2018-05-03?01:54??medical_ner_crfsuite-master\README.md
?????文件????????4103??2018-05-03?01:54??medical_ner_crfsuite-master\crf_unit.py
?????目錄???????????0??2018-05-03?01:54??medical_ner_crfsuite-master\data\
?????文件????????2350??2018-05-03?01:54??medical_ner_crfsuite-master\data\病史特點-1.txt
?????文件????????2505??2018-05-03?01:54??medical_ner_crfsuite-master\data\病史特點-10.txt
?????文件????????4555??2018-05-03?01:54??medical_ner_crfsuite-master\data\病史特點-100.txt
?????文件????????2898??2018-05-03?01:54??medical_ner_crfsuite-master\data\病史特點-11.txt
?????文件????????5652??2018-05-03?01:54??medical_ner_crfsuite-master\data\病史特點-12.txt
?????文件????????2127??2018-05-03?01:54??medical_ner_crfsuite-master\data\病史特點-13.txt
?????文件????????2832??2018-05-03?01:54??medical_ner_crfsuite-master\data\病史特點-14.txt
?????文件????????2314??2018-05-03?01:54??medical_ner_crfsuite-master\data\病史特點-15.txt
?????文件????????2533??2018-05-03?01:54??medical_ner_crfsuite-master\data\病史特點-16.txt
?????文件????????3044??2018-05-03?01:54??medical_ner_crfsuite-master\data\病史特點-17.txt
?????文件????????2310??2018-05-03?01:54??medical_ner_crfsuite-master\data\病史特點-18.txt
?????文件????????5179??2018-05-03?01:54??medical_ner_crfsuite-master\data\病史特點-19.txt
?????文件????????2368??2018-05-03?01:54??medical_ner_crfsuite-master\data\病史特點-2.txt
?????文件????????2010??2018-05-03?01:54??medical_ner_crfsuite-master\data\病史特點-20.txt
?????文件?????????233??2018-05-03?01:54??medical_ner_crfsuite-master\data\病史特點-21.txt
?????文件????????5934??2018-05-03?01:54??medical_ner_crfsuite-master\data\病史特點-22.txt
?????文件????????2808??2018-05-03?01:54??medical_ner_crfsuite-master\data\病史特點-23.txt
?????文件????????5115??2018-05-03?01:54??medical_ner_crfsuite-master\data\病史特點-24.txt
?????文件????????5503??2018-05-03?01:54??medical_ner_crfsuite-master\data\病史特點-25.txt
?????文件????????3506??2018-05-03?01:54??medical_ner_crfsuite-master\data\病史特點-26.txt
?????文件????????2210??2018-05-03?01:54??medical_ner_crfsuite-master\data\病史特點-27.txt
?????文件????????2457??2018-05-03?01:54??medical_ner_crfsuite-master\data\病史特點-28.txt
?????文件????????3772??2018-05-03?01:54??medical_ner_crfsuite-master\data\病史特點-29.txt
?????文件????????3023??2018-05-03?01:54??medical_ner_crfsuite-master\data\病史特點-3.txt
?????文件????????3063??2018-05-03?01:54??medical_ner_crfsuite-master\data\病史特點-30.txt
?????文件????????2256??2018-05-03?01:54??medical_ner_crfsuite-master\data\病史特點-31.txt
?????文件????????1810??2018-05-03?01:54??medical_ner_crfsuite-master\data\病史特點-32.txt
............此處省略79個文件信息

評論

共有 條評論