首页 > TAG信息列表 > hiddens

67自注意力和位置编码

点击查看代码 import math import torch from torch import nn from d2l import torch as d2l # 自注意力 num_hiddens, num_heads = 100, 5 attention = d2l.MultiHeadAttention(num_hiddens, num_hiddens, num_hiddens, num_hiddens, num

68多头注意力

点击查看代码 import math import torch from torch import nn from d2l import torch as d2l # 选择缩放点积注意力作为每一个注意力头 #

68transformer

点击查看代码 import math import pandas as pd import torch from torch import nn from d2l import torch as d2l #@save class PositionWiseFFN(nn.Module): """基于位置的前馈网络""" # 全连接 # num_step会变 序列长度 # 所以序列当中每一个元素做一个全连

69BERT

点击查看代码 import math import pandas as pd import torch from torch import nn from d2l import torch as d2l #@save class PositionWiseFFN(nn.Module): """基于位置的前馈网络""" # 全连接 # num_step会变 序列长度 # 所以序列当中每一个元素做一个全连

57长短期记忆网络LSTM

点击查看代码 import torch from torch import nn from d2l import torch as d2l batch_size, num_steps = 32, 35 train_iter, vocab = d2l.load_data_time_machine(batch_size, num_steps) # 参数初始化 def get_lstm_params(vocab_size, num_hiddens, device): num_in

56门控循环单元GRU

点击查看代码 import torch from torch import nn from d2l import torch as d2l """ 额外的控制单元 """ batch_size, num_steps = 32, 35 train_iter, vocab = d2l.load_data_time_machine(batch_size, num_steps) # 初始化模型参数 def get_params(vocab_size