91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

  • 大小: 8KB
    文件類型: .py
    金幣: 1
    下載: 0 次
    發布日期: 2021-06-02
  • 語言: Python
  • 標簽: lstm??nlp??

資源簡介

基于lstm的語義相似度計算模型,使用百度qa的數據集進行實驗。

資源截圖

代碼片段和文件信息

import?os
import?json
import?h5py
import?utils
import?jieba
import?pickle
import?numpy?as?np
import?keras.preprocessing.text

from?gensim.models?import?Word2Vec
from?sklearn.metrics?import?roc_curve?auc
from?keras.preprocessing.text?import?Tokenizer
from?keras.preprocessing.sequence?import?pad_sequences
from?keras.layers?import?Dense?Input?LSTM?embedding?Dropout
from?keras.layers.merge?import?concatenate
from?keras.layers.wrappers?import?Bidirectional
from?keras.models?import?Modelmodel_from_jsonload_model
from?keras.layers.normalization?import?BatchNormalization
from?keras.callbacks?import?EarlyStopping?ModelCheckpoint

def?get_tokenizer():
????with?open(‘tok_expend.pkl‘‘rb‘)?as?f:
????????tokenizer=pickle.load(f)

????word_index?=?tokenizer.word_index
????nb_words=min(700000len(word_index)+1)
????return?nb_wordsword_index

def?get_data():

????data_1=np.load(‘E:/2018泰迪杯/LSTM/data/data_1_s.npy‘)
????data_2=np.load(‘E:/2018泰迪杯/LSTM/data/data_2_s.npy‘)
????labels=np.load(‘E:/2018泰迪杯/LSTM/data/labels_s.npy‘)
????data_t_1=np.load(‘E:/2018泰迪杯/LSTM/data/data_t_1.npy‘)
????data_t_2=np.load(‘E:/2018泰迪杯/LSTM/data/data_t_2.npy‘)
????labels_t=np.load(‘E:/2018泰迪杯/LSTM/data/labels_t.npy‘)
????return?data_1data_2labelsdata_t_1data_t_2labels_t

def?train():
????num_lstm?=?175
????num_dense?=?100
????rate_drop_lstm?=?0.15
????rate_drop_dense?=?0.15
????embedDING_DIM?=?100
????VALIDATION_SPLIT?=?0.1
????act?=?‘relu‘

????print(‘get_data‘)
????nb_wordsword_index=get_tokenizer()
????data_1data_2labelsdata_t_1data_t_2labels_t=get_data()

????print(‘get_model‘)
????word2vec?=?Word2Vec.load(‘E:/2018泰迪杯/數據/w2v_expend.mod‘)
????embedding_matrix?=?np.zeros((nb_words?embedDING_DIM))
????for?word?i?in?word_index.items():
????????if?word?in?word2vec.wv.vocab:
????????????try:
????????????????embedding_matrix[i]?=?word2vec.wv.word_vec(word)
????????????except:
????????????????pass

????embedding_layer_1?=?embedding(nb_words
????????????????????????????????embedDING_DIM
????????????????????????????????weights=[embedding_matrix]
????????????????????????????????input_length=30
????????????????????????????????trainable=False)
????embedding_layer_2?=?embedding(nb_words
????????????????????????????????embedDING_DIM
????????????????????????????????weights=[embedding_matrix]
????????????????????????????????input_length=300
????????????????????????????????trainable=False)
????lstm_layer?=?LSTM(num_lstm?dropout=rate_drop_lstm?recurrent_dropout=rate_drop_lstm)

????sequence_1_input?=?Input(shape=(30)?dtype=‘int32‘)
????embedded_sequences_1?=?embedding_layer_1(sequence_1_input)
????y1?=?lstm_layer(embedded_sequences_1)

????sequence_2_input?=?Input(shape=(300)?dtype=‘int32‘)
????embedded_sequences_2?=?embedding_layer_2(sequence_2_input)
????y2?=?lstm_layer(embedded_sequences_2)

????merged?=?concatenate([y1?y2])
????merged?=?Dropout(rate_drop_dense)(merged)
????merg

評論

共有 條評論