-
大小: 4.79MB文件類型: .zip金幣: 2下載: 0 次發(fā)布日期: 2023-11-18
- 語言: Python
- 標(biāo)簽:
資源簡(jiǎn)介
基于tensorflow 實(shí)現(xiàn)的用textcnn方法做情感分析的項(xiàng)目,有數(shù)據(jù),可以直接跑

代碼片段和文件信息
import?numpy?as?np
import?re
import?word2vec
#?import?itertools
#?from?collections?import?Counter
#?import?codecs
class?w2v_wrapper:
?????def?__init__(selffile_path):
????????#?w2v_file?=?os.path.join(base_path?“vectors_poem.bin“)
????????self.model?=?word2vec.load(file_path)
????????if?‘unknown‘?not??in?self.model.vocab_hash:
????????????unknown_vec?=?np.random.uniform(-0.10.1size=128)
????????????self.model.vocab_hash[‘unknown‘]?=?len(self.model.vocab)
????????????self.model.vectors?=?np.row_stack((self.model.vectorsunknown_vec))
def?clean_str(string):
????“““
????Tokenization/string?cleaning?for?all?datasets?except?for?SST.
????Original?taken?from?https://github.com/yoonkim/CNN_sentence/blob/master/process_data.py
????“““
????string?=?re.sub(r“[^A-Za-z0-9()!?\‘\‘]“?“?“?string)
????string?=?re.sub(r“\‘s“?“?\‘s“?string)
????string?=?re.sub(r“\‘ve“?“?\‘ve“?string)
????string?=?re.sub(r“n\‘t“?“?n\‘t“?string)
????string?=?re.sub(r“\‘re“?“?\‘re“?string)
????string?=?re.sub(r“\‘d“?“?\‘d“?string)
????string?=?re.sub(r“\‘ll“?“?\‘ll“?string)
????string?=?re.sub(r““?“??“?string)
????string?=?re.sub(r“!“?“?!?“?string)
????string?=?re.sub(r“\(“?“?\(?“?string)
????string?=?re.sub(r“\)“?“?\)?“?string)
????string?=?re.sub(r“\?“?“?\??“?string)
????string?=?re.sub(r“\s{2}“?“?“?string)
????return?string.strip().lower()
def?removezero(?x?y):
????nozero?=?np.nonzero(y)
????print(‘removezero‘np.shape(nozero)[-1]len(y))
????if(np.shape(nozero)[-1]?==?len(y)):
????????return?np.array(x)np.array(y)
????y?=?np.array(y)[nozero]
????x?=?np.array(x)
????x?=?x[nozero]
????return?x?y
def?read_file_lines(filenamefrom_sizeline_num):
????i?=?0
????text?=?[]
????end_num?=?from_size?+?line_num
????for?line?in?open(filename):
????????if(i?>=?from_size):
????????????text.append(line.strip())
????????i?+=?1
????????if?i?>=?end_num:
????????????return?text
????return?text
def?load_data_and_labels(filepathmax_size?=?-1):
????“““
????Loads?MR?polarity?data?from?files?splits?the?data?into?words?and?generates?labels.
????Returns?split?sentences?and?labels.
????“““
????#?Load?data?from?files
????train_datas?=?[]
????with?open(filepath?‘r‘?encoding=‘utf-8‘errors=‘ignore‘)?as?f:
????????train_datas?=?f.readlines()
????one_hot_labels?=?[]
????x_datas?=?[]
????for?line?in?train_datas:
????????parts?=?line.split(‘\t‘1)
????????if(len(parts[1].strip())?==?0):
????????????continue
????????x_datas.append(parts[1])
????????if?parts[0].startswith(‘0‘)?:
????????????one_hot_labels.append([01])
????????else:
????????????one_hot_labels.append([10])
????print?(‘?data?size?=?‘?len(train_datas))
????#?Split?by?words
????#?x_text?=?[clean_str(sent)?for?sent?in?x_text]
????return?[x_datas?np.array(one_hot_labels)]
def?batch_iter(data?batch_size?num_epochs?shuffle=True):
????“““
????Generates?a?batch?iterator?for?a?dataset.
????““
?屬性????????????大小?????日期????時(shí)間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2017-08-25?01:45??sentiment_analysis_textcnn-master\
?????文件?????????472??2017-08-25?01:45??sentiment_analysis_textcnn-master\README.md
?????文件???????33806??2017-08-25?01:45??sentiment_analysis_textcnn-master\accuracy.png
?????目錄???????????0??2017-08-25?01:45??sentiment_analysis_textcnn-master\data\
?????文件?????3654737??2017-08-25?01:45??sentiment_analysis_textcnn-master\data\cutclean_label_corpus10000.txt
?????文件?????4131771??2017-08-25?01:45??sentiment_analysis_textcnn-master\data\vectors.bin
?????目錄???????????0??2017-08-25?01:45??sentiment_analysis_textcnn-master\textcnn\
?????文件????????4330??2017-08-25?01:45??sentiment_analysis_textcnn-master\textcnn\data_input_helper.py
?????文件????????3982??2017-08-25?01:45??sentiment_analysis_textcnn-master\textcnn\eval.py
?????文件????????4131??2017-08-25?01:45??sentiment_analysis_textcnn-master\textcnn\text_cnn.py
?????文件???????10312??2017-08-25?01:45??sentiment_analysis_textcnn-master\textcnn\train.py
評(píng)論
共有 條評(píng)論