作业4主要是NMT任务。
(1)完成对不同长度的句子的填充操作,使其保持相同长度。(utils.py)文件
def pad_sents(sents, pad_token):
sents_padded = []
### YOUR CODE HERE (~6 Lines)
max_len = len(sents[0])
for sentence in sents[1:] :
flag = len(sentence)
if flag > max_len:
max_len = flag
for sentence in sents:
if len(sentence)< max_len:
for i in range(len(sentence),max_len):
sentence.append(pad_token)
sents_padded.append(sentence)
### END YOUR CODE
return sents_padded
由于它没有给测试,所以自己造个句子测试。
l = [['i','want','hate','you'],['i','think','you','are','bad'],['i','like','you']]
print(pad_sents(l,'0')) #sentence,pad_token
output:
[['i', 'want', 'hate', 'you', '0'], ['i', 'think', 'you', 'are', 'bad'], ['i', 'like', 'you', '0', '0']]
(2)利用nn.embedding结构初始化source和target(model_embeddings.py)文件
class ModelEmbeddings(nn.Module):
def __init__(self, embed_size, vocab):
super(ModelEmbeddings, self).__init__()
self.embed_size = embed_size
# default values
self.source = None
self.target = None
src_pad_token_idx = vocab.src['<pad>']
tgt_pad_token_idx = vocab.tgt['<pad>']
### YOUR CODE HERE (~2 Lines)
self.source = nn.Embedding(len(vocab.src),self.embed_size,src_pad_token_idx)
self.target = nn.Embedding(len(vocab.tgt),self.embed_size,tgt_pad_token_idx)
### END YOUR CODE
(3) 建立NMT的网络结构,其中encoder利用的是Bi-LSTM,coder利用的是LSTM,加了一个多头attention机制。
class NMT(nn.Module):
def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2):
super(NMT, self).__init__()
self.model_embeddings = ModelEmbeddings(embed_size, vocab)
## 初始化ModelEmbeddings类
self.hidden_size = hidden_size
self.dropout_rate = dropout_rate
self.vocab = vocab
# default values
self.encoder = None
self.decoder = None
self.h_projection = None
self.c_projection = None
self.att_projection = None
self.combined_output_projection = None
self.target_vocab_projection = None
self.dropout = None
### YOUR CODE HERE (~8 Lines)
self.encoder = nn.LSTM(embed_size,hidden_size,bidirectional=True)
self.decoder = nn.LSTMCell(hidden_size+embed_size,hidden_size,bias=True)
self.h_projection = nn.Linear(hidden_size*2,hidden_size,bias=False)
self.c_projection = nn.Linear(hidden_size*2,hidden_size,bias=False)
self.att_projection = nn.Linear(hidden_size*2,hidden_size,bias=False)
self.combined_output_projection = nn.Linear(hidden_size*3,hidden_size,bias=False)
self.target_vocab_projection = nn.Linear(hidden_size,len(vocab.tgt),bias=False)
self.dropout = nn.Dropout(dropout_rate)
### END YOUR CODE
这块看起来还是比较简单的,因为各个连接层的维度,PDF作业中已经说明了。
(d)主要是建立encode过程
input:source_sentence
output:每个h,最后一个h和c(最后一个h和c主要是用作为decoder的input的)(每个h主要是用来做attention的)
def encode(self, source_padded: torch.Tensor, source_lengths: List[int]) -> Tuple[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
enc_hiddens, dec_init_state = None, None
### YOUR CODE HERE
source_embeddings = self.model_embeddings.source(source_padded)
#->(src_len, b, e)
X = pack_padded_sequence(source_embeddings,source_lengths,batch_first=False,enforce_sorted=False)
### ->进行压缩处理,按列压缩,每列是一个句子
### batch_first=False(default,变成 b scr_len *)
### enforce_sorted=True(default,句子按照source_lengths排序)
### https://www.cnblogs.com/sbj123456789/p/9834018.html
enc_hiddens,(last_hidden,last_cell) = self.encoder(X) ### Enconder
### enc_hiddens(src_len b, h*2)
enc_hiddens = pad_packed_sequence(enc_hiddens,batch_first=True)
enc_hiddens = enc_hiddens[0] ### ->(b,src_len,h*2)
init_decoder_hidden = self.h_projection(torch.cat((last_hidden[0],last_hidden[1]),1))
init_decoder_cell = self.c_projection(torch.cat((last_cell[0],last_cell[1]),1))
dec_init_state = (init_decoder_hidden,init_decoder_cell)
### END YOUR CODE
return enc_hiddens, dec_init_state
(e)主要是建立encode过程
(1)因为attention机制利用的是
所以后面那部分可以先计算出来,然后在等待每一个时间点的 h_dec,这块就是self.att_projection层的作用
(2)
def decode(self, enc_hiddens: torch.Tensor, enc_masks: torch.Tensor,
dec_init_state: Tuple[torch.Tensor, torch.Tensor], target_padded: torch.Tensor) -> torch.Tensor:
# Chop of the <END> token for max length sentences.
target_padded = target_padded[:-1]
# Initialize the decoder state (hidden and cell)
dec_state = dec_init_state
# Initialize previous combined output vector o_{t-1} as zero
batch_size = enc_hiddens.size(0) ### 句子个数
o_prev = torch.zeros(batch_size, self.hidden_size, device=self.device)
combined_outputs = []
### YOUR CODE HERE
enc_hiddens_proj = self.att_projection(enc_hiddens)
###
Y = self.model_embeddings.target(target_padded) #->(tgt_len, b, e)
for ids,y_t in enumerate(torch.split(Y,1,0)): #->(1,b,e)
y_t = torch.squeeze(y_t) #->(b,e)
Ybar_t = torch.cat((y_t,o_prev),1) #->(b,e) +(b,h) = (b,e+h)
###增加一个维度
o_t,cell,sate = self.step(Ybar_t,dec_state,enc_hiddens,enc_hiddens_proj,enc_masks)
combined_outputs.append(o_t[0])
o_prev = o_t[0]
combined_outputs = torch.stack(combined_outputs,dim=0) ## ->(tgt_len, b, e)
### END YOUR CODE
版权声明:本文为foneone原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。