91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

資源簡介

python實現CNN中文文本分類

資源截圖

代碼片段和文件信息

#?encoding:?UTF-8

import?numpy?as?np
import?re
import?itertools
from?collections?import?Counter
import?os
import?word2vec_helpers
import?time
import?pickle

def?load_data_and_labels(input_text_file?input_label_file?num_labels):
????x_text?=?read_and_clean_zh_file(input_text_file)
????y?=?None?if?not?os.path.exists(input_label_file)?else?map(int?list(open(input_label_file?“r“).readlines()))
????return?(x_text?y)

def?load_positive_negative_data_files(positive_data_file?negative_data_file):
????“““
????Loads?MR?polarity?data?from?files?splits?the?data?into?words?and?generates?labels.
????Returns?split?sentences?and?labels.
????“““
????#?Load?data?from?files
????positive_examples?=?read_and_clean_zh_file(positive_data_file)
????negative_examples?=?read_and_clean_zh_file(negative_data_file)
????#?Combine?data
????x_text?=?positive_examples?+?negative_examples
????#?Generate?labels
????positive_labels?=?[[0?1]?for?_?in?positive_examples]
????negative_labels?=?[[1?0]?for?_?in?negative_examples]
????y?=?np.concatenate([positive_labels?negative_labels]?0)
????return?[x_text?y]

def?padding_sentences(input_sentences?padding_token?padding_sentence_length?=?None):
????sentences?=?[sentence.split(‘?‘)?for?sentence?in?input_sentences]
????max_sentence_length?=?padding_sentence_length?if?padding_sentence_length?is?not?None?else?max([len(sentence)?for?sentence?in?sentences])
????for?sentence?in?sentences:
????????if?len(sentence)?>?max_sentence_length:
????????????sentence?=?sentence[:max_sentence_length]
????????else:
????????????sentence.extend([padding_token]?*?(max_sentence_length?-?len(sentence)))
????return?(sentences?max_sentence_length)

def?batch_iter(data?batch_size?num_epochs?shuffle=True):
????‘‘‘
????Generate?a?batch?iterator?for?a?dataset
????‘‘‘
????data?=?np.array(data)
????data_size?=?len(data)
????num_batches_per_epoch?=?int((data_size?-?1)?/?batch_size)?+?1
????for?epoch?in?range(num_epochs):
????????if?shuffle:
????#?Shuffle?the?data?at?each?epoch
????shuffle_indices?=?np.random.permutation(np.arange(data_size))
????shuffled_data?=?data[shuffle_indices]
else:
????shuffled_data?=?data
for?batch_num?in?range(num_batches_per_epoch):
????start_idx?=?batch_num?*?batch_size
????end_idx?=?min((batch_num?+?1)?*?batch_size?data_size)
????yield?shuffled_data[start_idx?:?end_idx]

def?test():
????#?Test?clean_str
????print(“Test“)
????#print(clean_str(“This‘s?a?huge?dog!?Who‘re?going?to?the?top.“))
????#?Test?load_positive_negative_data_files
????#x_texty?=?load_positive_negative_data_files(“./tiny_data/rt-polarity.pos“?“./tiny_data/rt-polarity.neg“)
????#print(x_text)
????#print(y)
????#?Test?batch_iter
????#batches?=?batch_iter(x_text?2?4)
????#for?batch?in?batches:
????#????print(batch)

def?mkdir_if_not_exist(dirpath):
????if?not?os.path.exists(dirpath):
????????os.mkdir(dirpath)

def?seperate_line(line):
????return?‘‘.join([word?+?‘?‘?for?word?in?line])

def?read_and_clean_zh_file(input_file?output_cleaned_file?=?

?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2017-06-15?02:39??zh_cnn_text_classify-master\
?????文件??????????14??2017-06-15?02:39??zh_cnn_text_classify-master\.gitignore
?????文件????????1917??2017-06-15?02:39??zh_cnn_text_classify-master\README.md
?????目錄???????????0??2017-06-15?02:39??zh_cnn_text_classify-master\data\
?????文件???????59489??2017-06-15?02:39??zh_cnn_text_classify-master\data\ham_100.utf8
?????文件???????44997??2017-06-15?02:39??zh_cnn_text_classify-master\data\spam_100.utf8
?????文件????????4504??2017-06-15?02:39??zh_cnn_text_classify-master\data_helpers.py
?????文件????????4870??2017-06-15?02:39??zh_cnn_text_classify-master\eval.py
?????目錄???????????0??2017-06-15?02:39??zh_cnn_text_classify-master\runs\
?????目錄???????????0??2017-06-15?02:39??zh_cnn_text_classify-master\runs\1492954581\
?????目錄???????????0??2017-06-15?02:39??zh_cnn_text_classify-master\runs\1492954581\checkpoints\
?????文件?????????697??2017-06-15?02:39??zh_cnn_text_classify-master\runs\1492954581\checkpoints\checkpoint
?????文件?????2373156??2017-06-15?02:39??zh_cnn_text_classify-master\runs\1492954581\checkpoints\model-200.data-00000-of-00001
?????文件????????1009??2017-06-15?02:39??zh_cnn_text_classify-master\runs\1492954581\checkpoints\model-200.index
?????文件??????102143??2017-06-15?02:39??zh_cnn_text_classify-master\runs\1492954581\checkpoints\model-200.meta
?????文件?????2373156??2017-06-15?02:39??zh_cnn_text_classify-master\runs\1492954581\checkpoints\model-300.data-00000-of-00001
?????文件????????1009??2017-06-15?02:39??zh_cnn_text_classify-master\runs\1492954581\checkpoints\model-300.index
?????文件??????102143??2017-06-15?02:39??zh_cnn_text_classify-master\runs\1492954581\checkpoints\model-300.meta
?????文件?????2373156??2017-06-15?02:39??zh_cnn_text_classify-master\runs\1492954581\checkpoints\model-400.data-00000-of-00001
?????文件????????1009??2017-06-15?02:39??zh_cnn_text_classify-master\runs\1492954581\checkpoints\model-400.index
?????文件??????102143??2017-06-15?02:39??zh_cnn_text_classify-master\runs\1492954581\checkpoints\model-400.meta
?????文件?????2373156??2017-06-15?02:39??zh_cnn_text_classify-master\runs\1492954581\checkpoints\model-500.data-00000-of-00001
?????文件????????1009??2017-06-15?02:39??zh_cnn_text_classify-master\runs\1492954581\checkpoints\model-500.index
?????文件??????102143??2017-06-15?02:39??zh_cnn_text_classify-master\runs\1492954581\checkpoints\model-500.meta
?????文件?????2373156??2017-06-15?02:39??zh_cnn_text_classify-master\runs\1492954581\checkpoints\model-600.data-00000-of-00001
?????文件????????1009??2017-06-15?02:39??zh_cnn_text_classify-master\runs\1492954581\checkpoints\model-600.index
?????文件??????102143??2017-06-15?02:39??zh_cnn_text_classify-master\runs\1492954581\checkpoints\model-600.meta
?????文件???????46336??2017-06-15?02:39??zh_cnn_text_classify-master\runs\1492954581\prediction.csv
?????目錄???????????0??2017-06-15?02:39??zh_cnn_text_classify-master\runs\1492954581\summaries\
?????目錄???????????0??2017-06-15?02:39??zh_cnn_text_classify-master\runs\1492954581\summaries\dev\
?????文件??????159244??2017-06-15?02:39??zh_cnn_text_classify-master\runs\1492954581\summaries\dev\events.out.tfevents.1492954586.escenter11PC
............此處省略7個文件信息

評論

共有 條評論