資源簡介
本文是依賴jieba分詞構(gòu)成中文字典,寫成的中文聊天機器人,本文是依賴jieba分詞構(gòu)成中文字典,寫成的中文聊天機器人,本文是依賴jieba分詞構(gòu)成中文字典,寫成的中文聊天機器人

代碼片段和文件信息
#?-*-?coding:utf-8?-*-
#?-*-?author:zzZ_CMing??CSDN?address:https://blog.csdn.net/zzZ_CMing
#?-*-?2018/07/31;14:23
#?-*-?python3.5
import?sys
import?numpy?as?np
import?tensorflow?as?tf
from?tensorflow.contrib.legacy_seq2seq.python.ops?import?seq2seq
import?word_token
import?jieba
import?random
size?=?8???????????????#?LSTM神經(jīng)元size
GO_ID?=?1??????????????#?輸出序列起始標(biāo)記
EOS_ID?=?2?????????????#?結(jié)尾標(biāo)記
PAD_ID?=?0?????????????#?空值填充0
min_freq?=?1???????????#?樣本頻率超過這個值才會存入詞表
epochs?=?2000??????????#?訓(xùn)練次數(shù)
batch_num?=?1000???????#?參與訓(xùn)練的問答對個數(shù)
input_seq_len?=?25?????????#?輸入序列長度
output_seq_len?=?50????????#?輸出序列長度
init_learning_rate?=?0.5?????#?初始學(xué)習(xí)率
wordToken?=?word_token.WordToken()
#?放在全局的位置,為了動態(tài)算出?num_encoder_symbols?和?num_decoder_symbols
max_token_id?=?wordToken.load_file_list([‘./samples/question‘?‘./samples/answer‘]?min_freq)
num_encoder_symbols?=?max_token_id?+?5
num_decoder_symbols?=?max_token_id?+?5
def?get_id_list_from(sentence):
????“““
????得到分詞后的ID
????“““
????sentence_id_list?=?[]
????seg_list?=?jieba.cut(sentence)
????for?str?in?seg_list:
????????id?=?wordToken.word2id(str)
????????if?id:
????????????sentence_id_list.append(wordToken.word2id(str))
????return?sentence_id_list
def?get_train_set():
????“““
????得到訓(xùn)練問答集
????“““
????global?num_encoder_symbols?num_decoder_symbols
????train_set?=?[]
????with?open(‘./samples/question‘?‘r‘?encoding=‘utf-8‘)?as?question_file:
????????with?open(‘./samples/answer‘?‘r‘?encoding=‘utf-8‘)?as?answer_file:
????????????while?True:
????????????????question?=?question_file.readline()
????????????????answer?=?answer_file.readline()
????????????????if?question?and?answer:
????????????????????#?strip()方法用于移除字符串頭尾的字符
????????????????????question?=?question.strip()
????????????????????answer?=?answer.strip()
????????????????????#?得到分詞ID
????????????????????question_id_list?=?get_id_list_from(question)
????????????????????answer_id_list?=?get_id_list_from(answer)
????????????????????if?len(question_id_list)?>?0?and?len(answer_id_list)?>?0:
????????????????????????answer_id_list.append(EOS_ID)
????????????????????????train_set.append([question_id_list?answer_id_list])
????????????????else:
????????????????????break
????return?train_set
def?get_samples(train_set?batch_num):
????“““
????構(gòu)造樣本數(shù)據(jù):傳入的train_set是處理好的問答集
????batch_num:讓train_set訓(xùn)練集里多少問答對參與訓(xùn)練
????“““
????raw_encoder_input?=?[]
????raw_decoder_input?=?[]
????if?batch_num?>=?len(train_set):
????????batch_train_set?=?train_set
????else:
????????random_start?=?random.randint(0?len(train_set)-batch_num)
????????batch_train_set?=?train_set[random_start:random_start+batch_num]
????#?添加起始標(biāo)記、結(jié)束填充
????for?sample?in?batch_train_set:
????????raw_encoder_input.append([PAD_ID]?*?(input_seq_len?-?len(sample[0]))?+?sample[0])
????????raw_decoder_input.append([GO_ID]?+?sample[1]?+?[PAD_ID]?*?(output_seq_len?-?len(sample[1])?-?1))
????encoder_inputs?=?[]
????decoder_inputs?=?[]
????target_weights?=?[]
????for?length_idx?in?range(input_seq_len):
????????encoder_i
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2018-07-31?18:09??chatbot_Chinese\.idea\
?????文件?????????398??2018-07-31?16:54??chatbot_Chinese\.idea\chatbot_Chinese.iml
?????文件?????????185??2018-07-31?16:54??chatbot_Chinese\.idea\misc.xm
?????文件?????????282??2018-07-31?16:54??chatbot_Chinese\.idea\modules.xm
?????文件???????11108??2018-07-31?18:09??chatbot_Chinese\.idea\workspace.xm
?????目錄???????????0??2018-07-31?16:51??chatbot_Chinese\__pycache__\
?????文件????????2138??2018-07-31?15:33??chatbot_Chinese\__pycache__\word_token.cpython-35.pyc
?????文件???????10227??2018-07-31?17:44??chatbot_Chinese\demo_test.py
?????目錄???????????0??2018-07-31?18:04??chatbot_Chinese\model\
?????目錄???????????0??2018-07-31?18:04??chatbot_Chinese\model\2000\
?????文件??????????67??2018-07-31?18:04??chatbot_Chinese\model\2000\checkpoint
?????文件???????13412??2018-07-31?18:04??chatbot_Chinese\model\2000\demo_.data-00000-of-00001
?????文件?????????851??2018-07-31?18:04??chatbot_Chinese\model\2000\demo_.index
?????文件?????8500795??2018-07-31?18:04??chatbot_Chinese\model\2000\demo_.me
?????目錄???????????0??2018-07-31?18:02??chatbot_Chinese\samples\
?????文件??????????83??2018-07-31?18:02??chatbot_Chinese\samples\answer
?????文件??????????73??2018-07-31?18:02??chatbot_Chinese\samples\question
?????文件????????1949??2018-07-31?14:51??chatbot_Chinese\word_token.py
評論
共有 條評論