文章最前: 我是Octopus,这个名字来源于我的中文名--章鱼;我热爱编程、热爱算法、热爱开源。所有源码在我的个人github ;这博客是记录我学习的点点滴滴,如果您对 Python、Java、AI、算法有兴趣,可以关注我的动态,一起学习,共同进步
本项目采用tensorflow2.4
目录
1)引入包
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import backend as K
from tensorflow.keras import activations
from tensorflow.keras.layers import Layer, Input, Embedding, LSTM, Dense, Attention
from tensorflow.keras.models import Model2)Encoder编码部分
class Encoder(keras.Model):
def __init__(self, vocab_size, embedding_dim, hidden_units):
super(Encoder, self).__init__()
# Embedding Layer
self.embedding = Embedding(vocab_size, embedding_dim, mask_zero=True)
# Encode LSTM Layer
self.encoder_lstm = LSTM(hidden_units, return_sequences=True, return_state=True, name="encode_lstm")
def call(self, inputs):
encoder_embed = self.embedding(inputs)
encoder_outputs, state_h, state_c = self.encoder_lstm(encoder_embed)
return encoder_outputs, state_h, state_c3)Decode解码部分
class Decoder(keras.Model):
def __init__(self, vocab_size, embedding_dim, hidden_units):
super(Decoder, self).__init__()
# Embedding Layer
self.embedding = Embedding(vocab_size, embedding_dim, mask_zero=True)
# Decode LSTM Layer
self.decoder_lstm = LSTM(hidden_units, return_sequences=True, return_state=True, name="decode_lstm")
# Attention Layer
self.attention = Attention()
def call(self, enc_outputs, dec_inputs, states_inputs):
decoder_embed = self.embedding(dec_inputs)
dec_outputs, dec_state_h, dec_state_c = self.decoder_lstm(decoder_embed, initial_state=states_inputs)
attention_output = self.attention([dec_outputs, enc_outputs])
return attention_output, dec_state_h, dec_state_c3)定义Seq2Seq方法
def Seq2Seq(maxlen, embedding_dim, hidden_units, vocab_size):
"""
seq2seq model
"""
# Input Layer
encoder_inputs = Input(shape=(maxlen,), name="encode_input")
decoder_inputs = Input(shape=(None,), name="decode_input")
# Encoder Layer
encoder = Encoder(vocab_size, embedding_dim, hidden_units)
enc_outputs, enc_state_h, enc_state_c = encoder(encoder_inputs)
dec_states_inputs = [enc_state_h, enc_state_c]
# Decoder Layer
decoder = Decoder(vocab_size, embedding_dim, hidden_units)
attention_output, dec_state_h, dec_state_c = decoder(enc_outputs, decoder_inputs, dec_states_inputs)
# Dense Layer
dense_outputs = Dense(vocab_size, activation='softmax', name="dense")(attention_output)
# seq2seq model
model = Model(inputs=[encoder_inputs, decoder_inputs], outputs = dense_outputs)
return model4)读取单词
def read_vocab(vocab_path):
vocab_words = []
with open(vocab_path, "r", encoding="utf8") as f:
for line in f:
vocab_words.append(line.strip())
return vocab_words
def read_data(data_path):
datas = []
with open(data_path, "r", encoding="utf8") as f:
for line in f:
words = line.strip().split()
datas.append(words)
return datas
def process_data_index(datas, vocab2id):
data_indexs = []
for words in datas:
line_index = [vocab2id[w] if w in vocab2id else vocab2id["<UNK>"] for w in words]
data_indexs.append(line_index)
return data_indexsvocab_words = read_vocab("./data/ch_word_vocab.txt")
special_words = ["<PAD>", "<UNK>", "<GO>", "<EOS>"]
vocab_words = special_words + vocab_words
vocab2id = {word: i for i, word in enumerate(vocab_words)}
id2vocab = {i: word for i, word in enumerate(vocab_words)}
num_sample = 1000
source_data = read_data("./data/ch_source_data_seg.txt")[:num_sample]
source_data_ids = process_data_index(source_data, vocab2id)
target_data = read_data("./data/ch_target_data_seg.txt")[:num_sample]
target_data_ids = process_data_index(target_data, vocab2id)
print("vocab test: ", [id2vocab[i] for i in range(10)])
print("source test: ", source_data[0])
print("source index: ", source_data_ids[0])
print("target test: ", target_data[0])
print("target index: ", target_data_ids[0])vocab test: ['<PAD>', '<UNK>', '<GO>', '<EOS>', '呵呵', '不是', '怎么', '了', '开心', '点'] source test: ['呵呵'] source index: [4] target test: ['是', '王若', '猫', '的', '。'] target index: [27, 37846, 756, 45, 180]
5)处理数据
def process_input_data(source_data_ids, target_indexs, vocab2id):
source_inputs = []
decoder_inputs, decoder_outputs = [], []
for source, target in zip(source_data_ids, target_indexs):
source_inputs.append([vocab2id["<GO>"]] + source + [vocab2id["<EOS>"]])
decoder_inputs.append([vocab2id["<GO>"]] + target)
decoder_outputs.append(target + [vocab2id["<EOS>"]])
return source_inputs, decoder_inputs, decoder_outputs
source_input_ids, target_input_ids, target_output_ids = process_input_data(source_data_ids, target_data_ids, vocab2id)
print("encoder inputs: ", source_input_ids[:2])
print("decoder inputs: ", target_input_ids[:2])
print("decoder outputs: ", target_output_ids[:2])encoder inputs: [[2, 4, 3], [2, 5, 3]] decoder inputs: [[2, 27, 37846, 756, 45, 180], [2, 38, 27, 84, 49272]] decoder outputs: [[27, 37846, 756, 45, 180, 3], [38, 27, 84, 49272, 3]]
maxlen = 10
source_input_ids = keras.preprocessing.sequence.pad_sequences(source_input_ids, padding='post', maxlen=maxlen)
target_input_ids = keras.preprocessing.sequence.pad_sequences(target_input_ids, padding='post', maxlen=maxlen)
target_output_ids = keras.preprocessing.sequence.pad_sequences(target_output_ids, padding='post', maxlen=maxlen)
print(source_data_ids[:5])
print(target_input_ids[:5])
print(target_output_ids[:5])[[4], [5], [6, 7], [8, 9, 10, 11, 12, 13, 14, 15], [16, 17, 18, 19, 11, 20]] [[ 2 27 37846 756 45 180 0 0 0 0] [ 2 38 27 84 49272 0 0 0 0 0] [ 2 16 6692 82 49273 320 16 518 0 0] [ 2 526 0 0 0 0 0 0 0 0] [ 16 438 22 328 19 49272 15817 254 1764 49272]] [[ 27 37846 756 45 180 3 0 0 0 0] [ 38 27 84 49272 3 0 0 0 0 0] [ 16 6692 82 49273 320 16 518 3 0 0] [ 526 3 0 0 0 0 0 0 0 0] [ 438 22 328 19 49272 15817 254 1764 49272 3]]
K.clear_session()
maxlen = 10
embedding_dim = 50
hidden_units = 128
vocab_size = len(vocab2id)
model = Seq2Seq(maxlen, embedding_dim, hidden_units, vocab_size)
model.summary()Model: "model"
_______________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
=======================================================================================
encode_input (InputLayer) [(None, 10)] 0
_______________________________________________________________________________________
encoder (Encoder) ((None, 10, 128), (N 3598348 encode_input[0][0]
_______________________________________________________________________________________
decode_input (InputLayer) [(None, None)] 0
_______________________________________________________________________________________
decoder (Decoder) ((None, None, 128), 3598348 encoder[0][0]
decode_input[0][0]
encoder[0][1]
encoder[0][2]
_______________________________________________________________________________________
dense (Dense) (None, None, 70134) 9047286 decoder[0][0]
=======================================================================================
Total params: 16,243,982
Trainable params: 16,243,982
Non-trainable params: 06) 训练模型
epochs = 20
batch_size = 32
val_rate = 0.2
loss_fn = keras.losses.SparseCategoricalCrossentropy()
model.compile(loss=loss_fn, optimizer='adam')
model.fit([source_input_ids, target_input_ids], target_output_ids,
batch_size=batch_size, epochs=epochs, validation_split=val_rate)Epoch 1/20 25/25 [==============================] - 29s 807ms/step - loss: 6.4676 - val_loss: 6.7728 Epoch 2/20 25/25 [==============================] - 14s 566ms/step - loss: 5.5570 - val_loss: 4.3079 Epoch 3/20 25/25 [==============================] - 14s 567ms/step - loss: 3.3806 - val_loss: 4.3989 Epoch 4/20 25/25 [==============================] - 15s 610ms/step - loss: 3.1398 - val_loss: 4.4503 Epoch 5/20 25/25 [==============================] - 15s 604ms/step - loss: 3.1003 - val_loss: 4.4969 Epoch 6/20 25/25 [==============================] - 16s 647ms/step - loss: 3.0329 - val_loss: 4.5317 Epoch 7/20 25/25 [==============================] - 16s 658ms/step - loss: 3.0294 - val_loss: 4.5666 Epoch 8/20 25/25 [==============================] - 16s 647ms/step - loss: 3.0492 - val_loss: 4.5931 Epoch 9/20 25/25 [==============================] - 15s 623ms/step - loss: 3.0186 - val_loss: 4.6177 Epoch 10/20 25/25 [==============================] - 16s 621ms/step - loss: 3.0837 - val_loss: 4.6449 Epoch 11/20 25/25 [==============================] - 14s 554ms/step - loss: 3.0654 - val_loss: 4.6655 Epoch 12/20 25/25 [==============================] - 17s 691ms/step - loss: 3.0654 - val_loss: 4.6791 Epoch 13/20 25/25 [==============================] - 16s 615ms/step - loss: 2.9685 - val_loss: 4.6993 Epoch 14/20 25/25 [==============================] - 15s 594ms/step - loss: 3.0559 - val_loss: 4.7213 Epoch 15/20 25/25 [==============================] - 15s 600ms/step - loss: 2.9749 - val_loss: 4.7328 Epoch 16/20 25/25 [==============================] - 15s 612ms/step - loss: 2.9694 - val_loss: 4.7469 Epoch 17/20 25/25 [==============================] - 16s 633ms/step - loss: 2.9646 - val_loss: 4.7588 Epoch 18/20 25/25 [==============================] - 15s 622ms/step - loss: 3.0883 - val_loss: 4.7736 Epoch 19/20 25/25 [==============================] - 14s 577ms/step - loss: 2.9960 - val_loss: 4.7863 Epoch 20/20 25/25 [==============================] - 14s 585ms/step - loss: 3.0140 - val_loss: 4.7999
7)保存与加载模型
model.save_weights("./data/seq2seq_attention_weights.h5")
del model
K.clear_session()
model = Seq2Seq(maxlen, embedding_dim, hidden_units, vocab_size)
model.load_weights("./data/seq2seq_attention_weights.h5")
print(model.summary())8)编码
def encoder_infer(model):
encoder_model = Model(inputs = model.get_layer('encoder').input,
outputs= model.get_layer('encoder').output)
return encoder_model
encoder_model = encoder_infer(model)
print(encoder_model.summary())9)解码
def decoder_infer(model, encoder_model):
encoder_output = encoder_model.get_layer('encoder').output[0]
maxlen, hidden_units = encoder_output.shape[1:]
dec_input = model.get_layer('decode_input').input
enc_output = Input(shape=(maxlen, hidden_units), name='enc_output')
dec_input_state_h = Input(shape=(hidden_units,), name='input_state_h')
dec_input_state_c = Input(shape=(hidden_units,), name='input_state_c')
dec_input_states = [dec_input_state_h, dec_input_state_c]
decoder = model.get_layer('decoder')
dec_outputs, out_state_h, out_state_c = decoder(enc_output, dec_input, dec_input_states)
dec_output_states = [out_state_h, out_state_c]
decoder_dense = model.get_layer('dense')
dense_output = decoder_dense(dec_outputs)
decoder_model = Model(inputs=[enc_output, dec_input, dec_input_states],
outputs=[dense_output]+dec_output_states)
return decoder_model
decoder_model = decoder_infer(model, encoder_model)
decoder_model.summary()10)进行预测
import numpy as np
maxlen = 10
def infer_predict(input_text, encoder_model, decoder_model):
text_words = input_text.split()[:maxlen]
input_id = [vocab2id[w] if w in vocab2id else vocab2id["<UNK>"] for w in text_words]
input_id = [vocab2id["<GO>"]] + input_id + [vocab2id["<EOS>"]]
if len(input_id) < maxlen:
input_id = input_id + [vocab2id["<PAD>"]] * (maxlen-len(input_id))
input_source = np.array([input_id])
input_target = np.array([vocab2id["<GO>"]])
# 编码器encoder预测输出
enc_outputs, enc_state_h, enc_state_c = encoder_model.predict([input_source])
dec_inputs = input_target
dec_states_inputs = [enc_state_h, enc_state_c]
result_id = []
result_text = []
for i in range(maxlen):
# 解码器decoder预测输出
dense_outputs, dec_state_h, dec_state_c = decoder_model.predict([enc_outputs, dec_inputs] + dec_states_inputs)
pred_id = np.argmax(dense_outputs[0][0])
result_id.append(pred_id)
result_text.append(id2vocab[pred_id])
if id2vocab[pred_id] == "<EOS>":
break
dec_inputs = np.array([[pred_id]])
dec_states_inputs = [dec_state_h, dec_state_c]
return result_id, result_textinput_text = "你是"
result_id, result_text = infer_predict(input_text, encoder_model, decoder_model)
print("Input: ", input_text)
print("Output: ", result_text, result_id)Input: 你是 Output: ['<EOS>'] [3]
版权声明:本文为zy345293721原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。