62序列到序列seq2seq
作者:互联网
点击查看代码
import collections
import math
import torch
from torch import nn
from d2l import torch as d2l
# 实现循环神经网络编码器
#@save
class Seq2SeqEncoder(d2l.Encoder):
"""用于序列到序列学习的循环神经网络编码器"""
def __init__(self, vocab_size, embed_size, num_hiddens, num_layers,
dropout=0, **kwargs):
super(Seq2SeqEncoder, self).__init__(**kwargs)
# 嵌入层
self.embedding = nn.Embedding(vocab_size, embed_size)
# 换成LSTM也可
# encoder不需要输出层
self.rnn = nn.GRU(embed_size, num_hiddens, num_layers,
dropout=dropout)
def forward(self, X, *args):
# 输出'X'的形状:(batch_size,num_steps,embed_size)
# print('X.device : ', X.device)
X = self.embedding(X)
# print('self.embedding(X).shape', X.shape)
# 在循环神经网络模型中,第一个轴对应于时间步
# (num_steps,batch_size,embed_size)
X = X.permute(1, 0, 2)
# output 最上面的输出
# state 每一层的输出
output, state = self.rnn(X)
# output的形状:(num_steps,batch_size,num_hiddens)
# state的形状:(num_layers,batch_size,num_hiddens)
# state[0]的形状:(batch_size,num_hiddens)
return output, state
# 编码器的实现
print('编码器的实现')
encoder = Seq2SeqEncoder(vocab_size=10, embed_size=8, num_hiddens=16,
num_layers=2)
# eval下,dropout不会生效
encoder.eval()
# (4, 7) --> (batch_size, num_steps)
X = torch.zeros((4, 7), dtype=torch.long)
output, state = encoder(X)
print('output.shape', output.shape)
#output.shape torch.Size([7, 4, 16])
print('state.shape', state.shape)
# state.shape torch.Size([2, 4, 16])
print('state[0].shape', state[0].shape)
# state[0].shape torch.Size([4, 16])
# 解码器
class Seq2SeqDecoder(d2l.Decoder):
"""用于序列到序列学习的循环神经网络解码器"""
def __init__(self, vocab_size, embed_size, num_hiddens, num_layers,
dropout=0, **kwargs):
super(Seq2SeqDecoder, self).__init__(**kwargs)
self.embedding = nn.Embedding(vocab_size, embed_size)
# 假设encoder和decoder隐藏层大小一致
self.rnn = nn.GRU(embed_size + num_hiddens, num_hiddens, num_layers,
dropout=dropout)
# 输出层
self.dense = nn.Linear(num_hiddens, vocab_size)
def init_state(self, enc_outputs, *args):
# return output, state
# enc_outputs[1] -> state
return enc_outputs[1]
def forward(self, X, state):
# 输出'X'的形状:(batch_size,num_steps,embed_size)
# 在循环神经网络模型中,第一个轴对应于时间步
X = self.embedding(X).permute(1, 0, 2)
# 广播context,使其具有与X相同的num_steps
# state[-1] rnn隐藏状态最后一刻最后一层输出
print('X.shape : ', X.shape)
print('state[-1].shape', state[-1].shape)
context = state[-1].repeat(X.shape[0], 1, 1)
X_and_context = torch.cat((X, context), 2)
print('context.shape : ', context.shape)
print('X_and_context.shape : ', X_and_context.shape)
"""
X.shape : torch.Size([7, 4, 8])
state[-1].shape torch.Size([4, 16])
context.shape : torch.Size([7, 4, 16])
X_and_context.shape : torch.Size([7, 4, 24])
"""
# 标签:state,torch,seq2seq,shape,print,62,num,序列,size
来源: https://www.cnblogs.com/g932150283/p/16469806.html