91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

  • 大小: 9.16KB
    文件類型: .py
    金幣: 1
    下載: 0 次
    發布日期: 2024-05-06
  • 語言: Python
  • 標簽: python??中文分詞??

資源簡介

使用BiLSTM CRF分詞模型,在SIGHAN Microsoft Research數據集上進行中文分詞的訓練和測試。


運行方法可在readme看到,同時有詳細報告描述

【源碼目錄】

中山大學_中文分詞

├── readme.md
├── 代碼
│   ├── cws.py
│   ├── msr_test.utf8
│   ├── msr_test_gold.utf8
│   ├── msr_training.utf8
│   ├── result.txt
│   ├── test_score.py
│   ├── train.py
│   └── train_result.pkl
├── 中文報告.docx
└── 英文報告.docx

1 directory, 11 files


資源截圖

代碼片段和文件信息

import?codecs?torch
import?torch.autograd?as?autograd
import?torch.nn?as?nn
import?torch.optim?as?optim
from?tqdm?import?tqdm
from?time?import?sleep

torch.cuda.set_device(1)?#根據服務器直接設定即可

def?text(filename?data):??
????file?=?open(filename?‘w‘)
????for?i?in?data:?file.write(i?+?‘\n‘)
????file.close()

class?BiLSTM_CRF(nn.Module):
????def?__init__(self?vocab_size?tag_to_ix?embedding_dim?hidden_dim):
????????super(BiLSTM_CRF?self).__init__()
????????self.embedding_dim?=?embedding_dim
????????self.hidden_dim?=?hidden_dim
????????self.vocab_size?=?vocab_size
????????self.tag_to_ix?=?tag_to_ix
????????self.tagset_size?=?len(tag_to_ix)
????????self.word_embeds?=?nn.embedding(vocab_size?embedding_dim)??
????????self.lstm?=?nn.LSTM(embedding_dim?hidden_dim?//?2?num_layers=1?bidirectional=True?batch_first=True)
????????self.hidden2tag?=?nn.Linear(hidden_dim?self.tagset_size)??
????????self.transitions?=?nn.Parameter(torch.randn(self.tagset_size?self.tagset_size))??
????????self.transitions.data[tag_to_ix[START_TAG]?:]?=?-10000
????????self.transitions.data[:?tag_to_ix[STOP_TAG]]?=?-10000
????????self.hidden?=?self.init_hidden()

????def?init_hidden(self):??
????????return?(torch.randn(2?1?self.hidden_dim?//?2).to(device)
????????????????torch.randn(2?1?self.hidden_dim?//?2).to(device))

????def?_get_lstm_features_test(self?sentence):?
????????self.hidden?=?self.init_hidden()
????????embeds?=?self.word_embeds(sentence).unsqueeze(dim=0)
????????lstm_out?self.hidden?=?self.lstm(embeds)
????????lstm_out?=?lstm_out.squeeze()
????????lstm_feats?=?self.hidden2tag(lstm_out)
????????if?len(sentence)?==?1:?lstm_feats?=?lstm_feats.unsqueeze(0)??
????????return?lstm_feats

????def?_forward_alg(self?feats):??
????????init_alphas?=?torch.full([feats.shape[0]?self.tagset_size]?-10000.)
????????init_alphas[:?self.tag_to_ix[START_TAG]]?=?0.??

????????forward_var_list?=?[]
????????forward_var_list.append(init_alphas)
????????for?feat_index?in?range(feats.shape[1]):??
????????????gamar_r_l?=?torch.stack([forward_var_list[feat_index]]?*?feats.shape[2]).transpose(0?1)?
????????????t_r1_k?=?torch.unsqueeze(feats[:?feat_index?:]?1).transpose(1?2)??
????????????aa?=?gamar_r_l.to(device)?+?t_r1_k.to(device)?+?torch.unsqueeze(self.transitions?0)??
????????????forward_var_list.append(torch.logsumexp(aa?dim=2))??
????????terminal_var?=?forward_var_list[-1]?+?self.transitions[self.tag_to_ix[STOP_TAG]].repeat(
????????????[feats.shape[0]?1])??
????????alpha?=?torch.logsumexp(terminal_var?dim=1)
????????return?alpha

????def?_get_lstm_features(self?sentence):?
????????self.hidden?=?self.init_hidden()
????????embeds?=?self.word_embeds(sentence)
????????lstm_out?self.hidden?=?self.lstm(embeds)
????????lstm_feats?=?self.hidden2tag(lstm_out)
????????return?lstm_feats

????def?_score_sentence(self?feats?tags):??
????????score?=?torch.zeros(tags.shape[0]).to(device)
????????tags?=?torch.cat([torch.full([tags.shape[0]?1]?self.tag_to_ix[START_TAG]).

評論

共有 條評論