首页 > TAG信息列表 > hiddens
67自注意力和位置编码
点击查看代码 import math import torch from torch import nn from d2l import torch as d2l # 自注意力 num_hiddens, num_heads = 100, 5 attention = d2l.MultiHeadAttention(num_hiddens, num_hiddens, num_hiddens, num_hiddens, num68多头注意力
点击查看代码 import math import torch from torch import nn from d2l import torch as d2l # 选择缩放点积注意力作为每一个注意力头 #68transformer
点击查看代码 import math import pandas as pd import torch from torch import nn from d2l import torch as d2l #@save class PositionWiseFFN(nn.Module): """基于位置的前馈网络""" # 全连接 # num_step会变 序列长度 # 所以序列当中每一个元素做一个全连69BERT
点击查看代码 import math import pandas as pd import torch from torch import nn from d2l import torch as d2l #@save class PositionWiseFFN(nn.Module): """基于位置的前馈网络""" # 全连接 # num_step会变 序列长度 # 所以序列当中每一个元素做一个全连57长短期记忆网络LSTM
点击查看代码 import torch from torch import nn from d2l import torch as d2l batch_size, num_steps = 32, 35 train_iter, vocab = d2l.load_data_time_machine(batch_size, num_steps) # 参数初始化 def get_lstm_params(vocab_size, num_hiddens, device): num_in56门控循环单元GRU
点击查看代码 import torch from torch import nn from d2l import torch as d2l """ 额外的控制单元 """ batch_size, num_steps = 32, 35 train_iter, vocab = d2l.load_data_time_machine(batch_size, num_steps) # 初始化模型参数 def get_params(vocab_size