框架模型层

使用基于 PyTorch 的经典深度学习模型集合在 CUDA 平台上对 GPU NVIDIA 进行性能测试

部分模型代码展示：

LLama3：

'''
Copyright (c) 2024, 山东大学智能创新研究院(Academy of Intelligent Innovation)

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

1. Redistributions of source code must retain the above copyright notice, this
   list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright notice,
   this list of conditions and the following disclaimer in the documentation
   and/or other materials provided with the distribution.
'''
# Copyright (c) Academy of Intelligent Innovation.
# License-Identifier: BSD 2-Clause License
# AI Benchmark SDU Team

from model.model_set.model_base import BaseModel
from llama_cpp import Llama


class llama3_nvidia_amd(BaseModel):
    def __init__(self):
        super().__init__('language/generative/llama3')

    def get_input(self):
        self.input = "Q: Name the planets in the solar system? A: "

    def load_model(self):
        self.llm = Llama(
            model_path="model/model_set/pytorch/language/generative/llama3/ggml-meta-llama-3-8b-Q4_K_M.gguf",
            n_gpu_layers=99,
            #   n_gpu_layers=-1, # Uncomment to use GPU acceleration
            chat_format="llama-3",
            seed=1337, # Uncomment to set a specific seed
            n_ctx=2048, # Uncomment to increase the context window
            verbose=False
            )

    def get_params_flops(self) -> list:

        return [803, float('nan')]


    def inference(self):
        output = self.llm (
                prompt = self.input, # Prompt
                max_tokens=512, # Generate up to 32 tokens, set to None to generate up to the end of the context window
                stop=["Q:", "\n"], # Stop generating just before the model would generate a new question
                echo=True # Echo the prompt back in the output
            )
        completion_tokens = output['usage']['completion_tokens']
        return completion_tokens

CLIP：

'''
Copyright (c) 2024, 山东大学智能创新研究院(Academy of Intelligent Innovation)

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

1. Redistributions of source code must retain the above copyright notice, this
   list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright notice,
   this list of conditions and the following disclaimer in the documentation
   and/or other materials provided with the distribution.
'''
# Copyright (c) Academy of Intelligent Innovation.
# License-Identifier: BSD 2-Clause License
# AI Benchmark SDU Team

import torch
from model.model_set.model_base import BaseModel
from model.model_set.models.multimodality.classification.clip.utils.model import build_model
from model.model_set.models.multimodality.classification.clip.utils.simpletokenizer import SimpleTokenizer as _Tokenizer
from thop import profile

class clip_nvidia_amd(BaseModel):
    def __init__(self):
        super().__init__('multimodality/classification/clip')

        self.text = ["a diagram", "a dog", "a cat"]
        self.input_shape =(1, 3, 224, 224)
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.model_path = "model/model_set/pytorch/multimodality/classification/clip/ViT-B-32.pt"

    def get_input(self):
        self.img = torch.randn(self.input_shape).to(torch.float32).to(self.device)
        _tokenizer = _Tokenizer()
        sot_token = _tokenizer.encoder["<|startoftext|>"]
        eot_token = _tokenizer.encoder["<|endoftext|>"]
        all_tokens = [[sot_token] + _tokenizer.encode(text) + [eot_token] for text in self.text]
        context_length: int = 77
        truncate = False
        result = torch.zeros(len(all_tokens), context_length, dtype=torch.int)

        for i, tokens in enumerate(all_tokens):
            if len(tokens) > context_length:
                if truncate:
                    tokens = tokens[:context_length]
                    tokens[-1] = eot_token
                else:
                    raise RuntimeError(f"Input {self.text[i]} is too long for context length {context_length}")
            result[i, :len(tokens)] = torch.tensor(tokens)
  
        self.texts = result.to(self.device)

    def load_model(self):
        jit = False
        model = torch.jit.load(self.model_path, map_location=self.device if jit else "cpu").eval()
        state_dict = None
        self.model = build_model(state_dict or model.state_dict()).to(self.device)

    def get_params_flops(self) -> list:
        flops, _ = profile(self.model, (self.img, self.texts), verbose=False)
        params = sum(p.numel() for p in self.model.parameters() if p.requires_grad)
        return [flops / 1e9 * 2,  params / 1e6]

    def inference(self):
        image_features = self.model.encode_image(self.img)
        text_features = self.model.encode_text(self.texts) 
        return image_features, text_features

在 NVIDIA GeForce RTX 4080 SUPER 上的测试结果：

alt text

AI技术栈解析及应用- 作者：张真瑜 | 山东大学智能创新研究院

框架模型层