91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

  • 大小: 6KB
    文件類型: .py
    金幣: 1
    下載: 0 次
    發布日期: 2023-08-07
  • 語言: Python
  • 標簽: textCNN??

資源簡介

keras實現中文文本分類;實現中文分析,詞向量引入;基于語義的特征卷積計算,實現文本分類。

資源截圖

代碼片段和文件信息

#-*-coding:utf-8-*-
import?numpy?as?np
import?jieba
from?sklearn?import?preprocessing
from?keras.preprocessing.text?import?Tokenizer
from?keras.preprocessing.sequence?import?pad_sequences
import?keras

def?load_data(file_path?stop_words_path):?#‘E:/data/NLP/textteaser_new/textteaser/trainer/stopWords.txt‘
????#?get?stop_words
????with?open(stop_words_path?encoding=‘utf-8‘)?as?file:
????????words?=?file.readlines()
????stop_words?=?[word.replace(‘\n‘?‘‘)?for?word?in?words]

????#?get?content?and?label
????data?=?[item.split(‘\t‘)?for?item?in?open(file_path?‘r‘?encoding=‘utf-8‘).readlines()]
????content?=?[item[3]?for?item?in?data]
????label?=?[item[1]?for?item?in?data]

????le?=?preprocessing.LabelEncoder()
????ohe?=?preprocessing.OneHotEncoder()
????label_le?=?[[item]?for?item?in?le.fit_transform(label)]
????label_ohe?=?ohe.fit_transform(label_le).toarray()

????#?jieba?cut?and?exclude?stop_words
????content?=?[‘?‘.join([word?for?word?in?jieba.cut(item)?if?word?not?in?stop_words])?for?item?in?content]
????return?[content?label_ohe]

file_path?=?‘E:/data/NLP/textteaser/test_data‘
stop_words_path?=?‘E:/data/NLP/textteaser_new/textteaser/trainer/stopWords.txt‘
contents?labels?=?load_data(file_path=file_path?stop_words_path=stop_words_path)

#?關于詞典的一些配置參數
MAX_NB_WORDS?=?10000
MAX_SEQUENCE_LENGTH?=?200
embedDING_DIM?=?128

tokenizer?=?Tokenizer(num_words=MAX_NB_WORDS)
tokenizer.fit_on_texts(texts=contents)
sequences?=?tokenizer.texts_to_sequences(texts=contents)

word_index?=?tokenizer.word_index

data?=?pad_sequences(sequences=sequences?maxlen=MAX_SEQUENCE_LENGTH)

np.random.seed(101)
shuffled_index?=?np.random.permutation(np.arange(len(data)))
shuffled_data?=?data[shuffled_index]
shuffled_label?=?labels[shuffled_index]

print(shuffled_data.shape)
print(shuffled_label.shape)

#?from?keras.layers.core?import?Dense?Dropout?Flatten
#?from?keras.models?import?Model?Input
#?from?keras.layers?import?MaxPooling1D?embedding?Conv1D
#?#?構建訓練網絡
#?inputs?=?Input(shape=(MAX_SEQUENCE_LENGTH))
#?embedding_layer?=?embedding(len(word_index)?+?1?embedDING_DIM?input_length=MAX_SEQUENCE_LENGTH?trainable=True)
#?embedded_sequences?=?embedding_layer(input)
#
#?#?卷積核類型1:strides?=?3?filter_size?=?128?256?512
#?x_1?=?Conv1D(128?3?activation=‘relu‘)(embedded_sequences)
#?x_1?=?MaxPooling1D(3?strides=2)(x_1)
#?x_1?=?Conv1D(256?3?activation=‘relu‘)(x_1)
#?x_1?=?MaxPooling1D(3?strides=2)(x_1)
#?x_1?=?Conv1D(512?3?activation=‘relu‘)(x_1)
#?x_1?=?MaxPooling1D(3?strides=2)(x_1)
#?x_1?=?Flatten()(x_1)
#?out_put?=?Dense(len(labels[0])?activation=‘softmax‘)(x_1)
#
#?#?#?卷積核類型2:strides?=?4?filter_size?=?128?256?512
#?#?x_2?=?Conv1D(128?4?activation=‘relu‘)(embedded_sequences)
#?#?x_2?=?MaxPooling1D(4?strides=2)(x_2)
#?#?x_2?=?Conv1D(256?4?activation=‘relu‘)(x_2)
#?#?x_2?=?MaxPooling1D(4?strides=2)(x_2)
#?#?x_2?=?Conv1D(512?4?activation=‘relu‘)(x_2)
#?#?x_2?=?MaxPooling

評論

共有 條評論