samout游跨越一次

🕗 发布于 2024-10-04 16:53 windows golang python 人工智能 pytorch

在这里插入图片描述

import torch
import numpy as np


class MaxState(torch.nn.Module):
    def __init__(self, hidden_dim, heads, win):
        super(MaxState, self).__init__()

        assert hidden_dim % heads == 0, "Hidden size must be divisible by the number of heads."

        self.head_size = hidden_dim // heads
        self.head0 = torch.nn.Linear(hidden_dim, hidden_dim, bias=False)
        self.head1 = torch.nn.Linear(hidden_dim, hidden_dim, bias=False)
        self.head2 = torch.nn.Linear(hidden_dim, hidden_dim, bias=False)
        

        self.head_num = heads

        self.hidden = hidden_dim

    def forward(self, input_data, state=None):
        # self.head.to(device)
        b, s, k, h = input_data.shape[0], input_data.shape[1], self.head_num, self.head_size

        out = self.head0(input_data)
        # 0版
        # out1 = torch.max(torch.concat([1-torch.exp(self.head1(input_data).unsqueeze(-1)),1-torch.exp(out.unsqueeze(-1))], -1), -1)[0]
        # 1版
        # out1 = torch.min(torch.concat(
        #     [1-torch.exp(h ** 0.5-self.head1(input_data).unsqueeze(-1)), 1-torch.exp(h ** 0.5-out.unsqueeze(-1))],
        #     -1), -1)[0]
        # 2版  超过12层
        out1 = torch.min(torch.concat(
            [h ** 0.5 - torch.exp(self.head2(input_data).unsqueeze(-1)), h ** 0.5-torch.exp(h ** 0.5 - out.unsqueeze(-1))],
            -1), -1)[0]

        #
        out = out.reshape([b, s, k, h]).permute([0, 2, 1, 3])
        out1 = out1.reshape([b, s, k, h]).permute([0, 2, 1, 3])
        # out1 = self.head1(input_data).reshape([b, s, k, h]).permute([0, 2, 1, 3])

        out = torch.cummax(out * (torch.exp(out1)+h**0.5), 2)[0]

        out = out.permute([0, 2, 1, 3])
        out = out.reshape([b, s, -1])

        out = torch.min(torch.concat(
            [h **0.5-torch.exp(self.head2(input_data).unsqueeze(-1)), torch.exp(h **0.5-out.unsqueeze(-1))],
            -1), -1)[0]

        # out = torch.min(torch.concat(
        #     [(out-torch.exp(self.head2(input_data))).unsqueeze(-1), torch.exp(h ** 0.5-out.unsqueeze(-1))],
        #     -1), -1)[0]

        return out, state


class KAttention(torch.nn.Module):
    def __init__(self, hidden_dim, heads):
        super(KAttention, self).__init__()

        assert hidden_dim % heads == 0, "Hidden size must be divisible by the number of heads."

        self.head_size = hidden_dim // heads
        self.q = torch.nn.Linear(hidden_dim, hidden_dim, bias=False)
        self.k = torch.nn.Linear(hidden_dim, hidden_dim, bias=False)
        self.v = torch.nn.Linear(hidden_dim, hidden_dim, bias=False)
        # self.state = torch.nn.Linear(hidden_dim, hidden_dim, bias=False)
        self.head_num = heads

    def forward(self, x, state=None):
        b, s, h, d = x.shape[0], x.shape[1], self.head_num, self.head_size
        q = self.q(x).reshape([b, s, h, d]).permute([0, 2, 1, 3])
        k = self.k(x).reshape([b, s, h, d]).permute([0, 2, 1, 3])
        v = self.v(x).reshape([b, s, h, d]).permute([0, 2, 1, 3])
        qk = (q @ k.permute([0, 1, 3, 2])) / d ** 0.5
        mask = torch.triu(torch.ones(s, s).to(device))
        qk = torch.where(mask.T == 1, qk, torch.Tensor([-float('inf')]).to(device))
        qkv = torch.nn.functional.softmax(qk, -1) @ v
        #             v + torch.arange(1, 3 * s, 3).reshape([1, 1, -1, 1]).to(device) / s / 3)
        qkv = qkv.permute([0, 2, 1, 3]).reshape([b, s, -1])
        #
        return qkv, state


class FeedForward(torch.nn.Module):
    def __init__(self, hidden_size):
        super(FeedForward, self).__init__()

        self.ffn1 = torch.nn.Linear(hidden_size, hidden_size * 2)
        self.ffn2 = torch.nn.Linear(hidden_size * 2, hidden_size)
        self.gate = torch.nn.Linear(hidden_size, hidden_size * 2)
        self.relu = torch.nn.ReLU()

    def forward(self, x):
        x1 = self.ffn1(x)

        x2 = self.relu(self.gate(x))

        x = x1 * x2

        x = self.ffn2(x)
        return x





class DecoderLayer(torch.nn.Module):
    def __init__(self, hidden_size, num_heads):
        super(DecoderLayer, self).__init__()
        # self.self_attention = MaskMultiHeadAttention(hidden_size, num_heads)
        self.self_attention = MaxState(hidden_size, num_heads, 8)
        # self.self_attention = KAttention(hidden_size, num_heads)
        self.ffn = FeedForward(hidden_size)
        self.layer_norm = torch.nn.LayerNorm(hidden_size)

    def forward(self, x, state=None, seq_len=None):
        x1, state = self.self_attention(x, state)
        x = self.layer_norm(self.ffn(x1) + x)

        return x, state


class SamOut(torch.nn.Module):
    def __init__(self, voc_size, hidden_size, num_heads, num_layers):
        super(SamOut, self).__init__()
        self.em = torch.nn.Embedding(voc_size, hidden_size, padding_idx=3)
        self.pos = torch.nn.Embedding(1024, hidden_size)

        self.decoder_layers = torch.nn.ModuleList([DecoderLayer(hidden_size, num_heads) for _ in range(num_layers)])
        self.head = torch.nn.Linear(hidden_size, voc_size, False)
        # self.head_state = torch.nn.Linear(hidden_size, num_layers, False)

        self.down = torch.nn.ModuleList(
            [torch.nn.Linear(2 * hidden_size, hidden_size, False) for _ in range(num_layers)])

    def state_forward(self, state, pos, x):
        if state is None:
            state = [None] * len(self.decoder_layers)
        i = 0
        for ii, decoder_layer in enumerate(self.decoder_layers):
            x = self.down[i](torch.concat([torch.zeros([x.shape[0], 1, 1]).to(device) + pos, x], -1))

            x1, state[i] = decoder_layer(x, state[i])
            x = x1 + x
            i += 1
        return x, state

    def pos_forward(self, x):
        if x.shape[1] >= 1024:
            pos = self.pos(torch.arange(0, x.shape[1]).long().to(device) // 1024).unsqueeze(0)
            pos = self.pos(torch.arange(0, x.shape[1]).long().to(device) % 1024).unsqueeze(0) + pos

        else:
            pos = self.pos(torch.arange(0, x.shape[1]).long().to(device)).unsqueeze(0)
        return pos

    def forward(self, x0):
        x0, _ = self.one_forward(x0, state=None)

        return x0, _

    def one_forward(self, x, state=None, seq_len=None):
        x = self.em(x)

        pos = self.pos_forward(x)

        x, state = self.state_forward(state, pos, x)

        return self.head(x), state


device = "cuda"
if __name__ == '__main__':
    net = SamOut(235, 256, 16, 4)
    net.to(device)
    net(torch.randint(0, 200, [2, 8 * 13]).to(device))
    #

这段代码定义了一个基于PyTorch的神经网络模型，用于序列到序列的转换任务。以下是代码的主要组成部分和功能概述：

MaxState类：这是一个自定义的注意力机制层，用于处理序列数据。它包含了多个线性层，用于计算注意力权重，并通过累积最大值的方式来更新状态。
KAttention类：这是另一个自定义的注意力机制层，实现了基于键值对的注意力机制。
FeedForward类：这是一个前馈神经网络层，包含两个线性层和一个ReLU激活函数，用于在注意力机制之后处理数据。
DecoderLayer类：这是一个解码器层，包含一个注意力层和一个前馈神经网络层，并使用层归一化。
SamOut类：这是整个模型的主体，包含嵌入层、位置编码、多个解码器层和一个输出层。它还负责处理状态前向传播和位置编码前向传播。
设备配置：代码最后部分将模型移动到CUDA设备上，以便使用GPU进行加速计算。
主函数：在主函数中，创建了一个SamOut实例，并将其应用于一个随机整数矩阵，模拟输入数据。
整体而言，这个模型适用于处理序列数据，如自然语言处理任务中的机器翻译、文本摘要等。通过使用注意力机制和前馈神经网络，模型能够学习输入序列和输出序列之间的复杂关系。

原文地址：https://blog.csdn.net/weixin_32759777/article/details/142702818

免责声明：本站文章内容转载自网络资源，如本站内容侵犯了原著者的合法权益，可联系本站删除。更多内容请关注自学内容网（zxcms.com）！

上一篇：为什么MySQL不建议使用delete删除数据
下一篇：Python FFmpeg 安装使用教程

《机器学习》周志华-CH9（聚类）
聚类试图将数据集中的样本划分为若干个通常是不相交的子集，每个子集称为"簇"。时，“闵可夫斯基距离”是欧氏距离（Euclidean distance）,最常用的是“闵可夫斯基距离”（
阅读更多2024-10-04
深入掌握 Protobuf 与 RPC 的高效结合：实现C++工程中的高效通信
定义IDL（接口定义语言）文件：通过Protobuf的.proto文件，定义服务接口和消息格式，这是所有客户端与服务器通信的基础。编译IDL文件：使用Protobuf编译器（protoc），生成客户端
阅读更多2024-10-04
【计算机视觉】ch1-Introduction
透视投影中的四个坐标系逐级转换：首先，物体的位置由世界坐标系表示，然后转换到相机坐标系，再投影到图像平面坐标系，最后数字化为数字图像坐标系。这一过程是计算机视觉、摄影测量和图像分析等领域中理解和处理图
阅读更多2024-10-04
springboot+vue+elementui大文件分片上传
工具类方法： /** * 大文件分片上传 * @param fileName 文件名 * @param file 文件 * @param fileKey
阅读更多2024-10-04
`git restore` 和 `git checkout` 用于丢弃工作区的改动， `git switch` 和 `git checkout` 用来切换分支
git checkout可以切换分支或丢弃文件的改动，但功能较为复杂。git restore专门用于恢复文件的内容，操作更直观，推荐在恢复文件时使用。
阅读更多2024-10-04
Arduino使用网页连接修改esp8266等物联网并修改网络连接信息的基本思路
简单的记录修改wifi的整体思路
阅读更多2024-10-04
Python库pandas之四
如果“infer”和“path_or_buf”是类似路径，则检测以下扩展名的压缩：“.gz”、“.bz2”、“.zip”、“.xz”、“.zst”、“.tar” 、“.tar.gz”、“.tar.xz
阅读更多2024-10-04
PostgreSQL技术内幕13：PostgreSQL通讯协议
之前文章对于PG的内部模块做了一些介绍，接下来对PG和外部交互的部分进行介绍，本文主要介绍通讯协议的概念和PG中的消息格式和交互流程。
阅读更多2024-10-04
ISA-95制造业中企业和控制系统的集成的国际标准-(6)
通过比对MESA协会对MES的功能定义和ISA-95的普渡(purdue)模型，可以清晰的看到，MESA功能范围内并没有包含质量作业模块、库存作业模块，而这正好是需要ISA-95对MES进一步优化和完
阅读更多2024-10-04
创建django项目，编译类型选择Custom environment后，却没有manage.py文件，无法启动项目？
1、首先查看项目中是否安装了django，没有则安装。2 、创建项目（这里的myproject则表示项目名）创建后，启动项目却发现没有manage.py文件。
阅读更多2024-10-04

samout游跨越一次

相关文章