資源簡介
用LSTM實現(xiàn)機器翻譯,有教程,有任務,非常適合學習。
代碼片段和文件信息
import?os
import?pickle
import?copy
import?numpy?as?np
CODES?=?{‘‘:?0?‘‘:?1?‘‘:?2?‘‘:?3?}
def?load_data(path):
????“““
????Load?Dataset?from?File
????“““
????input_file?=?os.path.join(path)
????with?open(input_file?‘r‘?encoding=‘utf-8‘)?as?f:
????????return?f.read()
def?preprocess_and_save_data(source_path?target_path?text_to_ids):
????“““
????Preprocess?Text?Data.??Save?to?to?file.
????“““
????#?Preprocess
????source_text?=?load_data(source_path)
????target_text?=?load_data(target_path)
????source_text?=?source_text.lower()
????target_text?=?target_text.lower()
????source_vocab_to_int?source_int_to_vocab?=?create_lookup_tables(source_text)
????target_vocab_to_int?target_int_to_vocab?=?create_lookup_tables(target_text)
????source_text?target_text?=?text_to_ids(source_text?target_text?source_vocab_to_int?target_vocab_to_int)
????#?Save?Data
????with?open(‘preprocess.p‘?‘wb‘)?as?out_file:
????????pickle.dump((
????????????(source_text?target_text)
????????????(source_vocab_to_int?target_vocab_to_int)
????????????(source_int_to_vocab?target_int_to_vocab))?out_file)
def?load_preprocess():
????“““
????Load?the?Preprocessed?Training?data?and?return?them?in?batches?of??or?less
????“““
????with?open(‘preprocess.p‘?mode=‘rb‘)?as?in_file:
????????return?pickle.load(in_file)
def?create_lookup_tables(text):
????“““
????Create?lookup?tables?for?vocabulary
????“““
????vocab?=?set(text.split())
????vocab_to_int?=?copy.copy(CODES)
????for?v_i?v?in?enumerate(vocab?len(CODES)):
????????vocab_to_int[v]?=?v_i
????int_to_vocab?=?{v_i:?v?for?v?v_i?in?vocab_to_int.items()}
????return?vocab_to_int?int_to_vocab
def?save_params(params):
????“““
????Save?parameters?to?file
????“““
????with?open(‘params.p‘?‘wb‘)?as?out_file:
????????pickle.dump(params?out_file)
def?load_params():
????“““
????Load?parameters?from?file
????“““
????with?open(‘params.p‘?mode=‘rb‘)?as?in_file:
????????return?pickle.load(in_file)
def?batch_data(source?target?batch_size):
????“““
????Batch?source?and?target?together
????“““
????for?batch_i?in?range(0?len(source)//batch_size):
????????start_i?=?batch_i?*?batch_size
????????source_batch?=?source[start_i:start_i?+?batch_size]
????????target_batch?=?target[start_i:start_i?+?batch_size]
????????yield?np.array(pad_sentence_batch(source_batch))?np.array(pad_sentence_batch(target_batch))
def?pad_sentence_batch(sentence_batch):
????“““
????Pad?sentence?with??id
????“““
????max_sentence?=?max([len(sentence)?for?sentence?in?sentence_batch])
????return?[sentence?+?[CODES[‘‘]]?*?(max_sentence?-?len(sentence))
????????????for?sentence?in?sentence_batch]
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????文件????????279??2018-05-19?14:21??dlnd_language_translation\.git\config
?????文件?????????73??2018-05-19?14:20??dlnd_language_translation\.git\desc
?????文件?????????23??2018-05-19?14:21??dlnd_language_translation\.git\HEAD
?????文件????????478??2018-05-19?14:20??dlnd_language_translation\.git\hooks\applypatch-msg.sample
?????文件????????896??2018-05-19?14:20??dlnd_language_translation\.git\hooks\commit-msg.sample
?????文件????????189??2018-05-19?14:20??dlnd_language_translation\.git\hooks\post-update.sample
?????文件????????424??2018-05-19?14:20??dlnd_language_translation\.git\hooks\pre-applypatch.sample
?????文件???????1642??2018-05-19?14:20??dlnd_language_translation\.git\hooks\pre-commit.sample
?????文件???????1348??2018-05-19?14:20??dlnd_language_translation\.git\hooks\pre-push.sample
?????文件???????4898??2018-05-19?14:20??dlnd_language_translation\.git\hooks\pre-reba
?????文件????????544??2018-05-19?14:20??dlnd_language_translation\.git\hooks\pre-receive.sample
?????文件???????1239??2018-05-19?14:20??dlnd_language_translation\.git\hooks\prepare-commit-msg.sample
?????文件???????3610??2018-05-19?14:20??dlnd_language_translation\.git\hooks\update.sample
?????文件????????963??2018-05-19?14:21??dlnd_language_translation\.git\index
?????文件????????240??2018-05-19?14:20??dlnd_language_translation\.git\info\exclude
?????文件????????194??2018-05-19?14:21??dlnd_language_translation\.git\logs\HEAD
?????文件????????194??2018-05-19?14:21??dlnd_language_translation\.git\logs\refs\heads\master
?????文件????????194??2018-05-19?14:21??dlnd_language_translation\.git\logs\refs\remotes\origin\HEAD
?????文件????????118??2018-05-19?14:20??dlnd_language_translation\.git\ob
?????文件????????648??2018-05-19?14:20??dlnd_language_translation\.git\ob
?????文件????????277??2018-05-19?14:20??dlnd_language_translation\.git\ob
?????文件????????183??2018-05-19?14:20??dlnd_language_translation\.git\ob
?????文件????????185??2018-05-19?14:20??dlnd_language_translation\.git\ob
?????文件????1935397??2018-05-19?14:20??dlnd_language_translation\.git\ob
?????文件????????802??2018-05-19?14:20??dlnd_language_translation\.git\ob
?????文件????????986??2018-05-19?14:21??dlnd_language_translation\.git\ob
?????文件????????246??2018-05-19?14:21??dlnd_language_translation\.git\ob
?????文件??????19417??2018-05-19?14:21??dlnd_language_translation\.git\ob
?????文件?????????54??2018-05-19?14:20??dlnd_language_translation\.git\ob
?????文件????????105??2018-05-19?14:21??dlnd_language_translation\.git\ob
............此處省略73個文件信息
評論
共有 條評論