首页 > 其他分享> > 62序列到序列seq2seq
62序列到序列seq2seq

2022-07-12 14:05:32 作者：互联网
点击查看代码
import collections
import math
import torch
from torch import nn
from d2l import torch as d2l

# 实现循环神经网络编码器
#@save
class Seq2SeqEncoder(d2l.Encoder):
    """用于序列到序列学习的循环神经网络编码器"""
    def __init__(self, vocab_size, embed_size, num_hiddens, num_layers,
                 dropout=0, **kwargs):
        super(Seq2SeqEncoder, self).__init__(**kwargs)
        # 嵌入层
        self.embedding = nn.Embedding(vocab_size, embed_size)
        # 换成LSTM也可
        # encoder不需要输出层
        self.rnn = nn.GRU(embed_size, num_hiddens, num_layers,
                          dropout=dropout)

    def forward(self, X, *args):
        # 输出'X'的形状：(batch_size,num_steps,embed_size)
        # print('X.device : ', X.device)
        X = self.embedding(X)
        # print('self.embedding(X).shape', X.shape)
        # 在循环神经网络模型中，第一个轴对应于时间步
        # (num_steps,batch_size,embed_size)
        X = X.permute(1, 0, 2)
        # output 最上面的输出
        # state 每一层的输出
        output, state = self.rnn(X)
        # output的形状:(num_steps,batch_size,num_hiddens)
        # state的形状:(num_layers,batch_size,num_hiddens)
        # state[0]的形状:(batch_size,num_hiddens)
        return output, state

# 编码器的实现
print('编码器的实现')
encoder = Seq2SeqEncoder(vocab_size=10, embed_size=8, num_hiddens=16,
                         num_layers=2)
# eval下，dropout不会生效
encoder.eval()
# (4, 7) --> (batch_size, num_steps)
X = torch.zeros((4, 7), dtype=torch.long)
output, state = encoder(X)
print('output.shape', output.shape)
#output.shape torch.Size([7, 4, 16])
print('state.shape', state.shape)
# state.shape torch.Size([2, 4, 16])
print('state[0].shape', state[0].shape)
# state[0].shape torch.Size([4, 16])
# 解码器
class Seq2SeqDecoder(d2l.Decoder):
    """用于序列到序列学习的循环神经网络解码器"""
    def __init__(self, vocab_size, embed_size, num_hiddens, num_layers,
                 dropout=0, **kwargs):
        super(Seq2SeqDecoder, self).__init__(**kwargs)
        self.embedding = nn.Embedding(vocab_size, embed_size)
        # 假设encoder和decoder隐藏层大小一致
        self.rnn = nn.GRU(embed_size + num_hiddens, num_hiddens, num_layers,
                          dropout=dropout)
        # 输出层
        self.dense = nn.Linear(num_hiddens, vocab_size)

    def init_state(self, enc_outputs, *args):
        # return output, state
        # enc_outputs[1] -> state
        return enc_outputs[1]

    def forward(self, X, state):
        # 输出'X'的形状：(batch_size,num_steps,embed_size)
        # 在循环神经网络模型中，第一个轴对应于时间步
        X = self.embedding(X).permute(1, 0, 2)
        # 广播context，使其具有与X相同的num_steps
        # state[-1] rnn隐藏状态最后一刻最后一层输出
        print('X.shape : ', X.shape)
        print('state[-1].shape', state[-1].shape)
        context = state[-1].repeat(X.shape[0], 1, 1)
        X_and_context = torch.cat((X, context), 2)
        print('context.shape : ', context.shape)
        print('X_and_context.shape : ', X_and_context.shape)
        """
        X.shape :  torch.Size([7, 4, 8])
        state[-1].shape torch.Size([4, 16])
        context.shape :  torch.Size([7, 4, 16])
        X_and_context.shape :  torch.Size([7, 4, 24])
        """
        # 标签：state,torch,seq2seq,shape,print,62,num,序列,size	

来源： https://www.cnblogs.com/g932150283/p/16469806.html