运行时环境层

以下是使用 ROCm Runtime API 获取设备信息、内存信息、设备与上下文管理等功能的示例代码。这个代码演示了如何使用 ROCm 提供的 HIP API 来查询设备属性、内存使用情况以及管理上下文。

示例代码如下:

#include <hip/hip_runtime.h>
#include <stdio.h>

// 错误检查宏
#define HIP_CHECK(cmd) \
    { \
        hipError_t err = cmd; \
        if (err != hipSuccess) { \
            printf("Error: '%s' at %s:%d\n", hipGetErrorString(err), __FILE__, __LINE__); \
            exit(EXIT_FAILURE); \
        } \
    }

void printDeviceProperties(int deviceId) {
    hipDeviceProp_t deviceProp;
    HIP_CHECK(hipGetDeviceProperties(&deviceProp, deviceId));

    printf("Device ID: %d\n", deviceId);
    printf("Device name: %s\n", deviceProp.name);
    printf("Total global memory: %zu bytes\n", deviceProp.totalGlobalMem);
    printf("Shared memory per block: %zu bytes\n", deviceProp.sharedMemPerBlock);
    printf("Warp size: %d\n", deviceProp.warpSize);
    printf("Max threads per block: %d\n", deviceProp.maxThreadsPerBlock);
    printf("Max grid size: %d x %d x %d\n", deviceProp.maxGridSize[0], deviceProp.maxGridSize[1], deviceProp.maxGridSize[2]);
    printf("Memory clock rate: %d kHz\n", deviceProp.memoryClockRate);
    printf("Memory bus width: %d bits\n", deviceProp.memoryBusWidth);
    printf("Number of multiprocessors: %d\n", deviceProp.multiProcessorCount);
}

void printMemoryUsage() {
    size_t freeMem, totalMem;
    HIP_CHECK(hipMemGetInfo(&freeMem, &totalMem));

    printf("Total memory: %zu bytes\n", totalMem);
    printf("Free memory: %zu bytes\n", freeMem);
}

int main() {
    int deviceCount = 0;

    // 获取设备数量
    HIP_CHECK(hipGetDeviceCount(&deviceCount));
    printf("Number of devices: %d\n", deviceCount);

    for (int i = 0; i < deviceCount; i++) {
        HIP_CHECK(hipSetDevice(i));

        // 打印设备属性
        printDeviceProperties(i);

        // 打印设备内存使用情况
        printMemoryUsage();
        printf("\n");
    }

    return 0;
}

结果:

Number of devices: 1
Device ID: 0
Device name: Radeon RX 7900 XTX
Total global memory: 25753026560 bytes
Shared memory per block: 65536 bytes
Warp size: 32
Max threads per block: 1024
Max grid size: 2147483647 x 65536 x 65536
Memory clock rate: 1249000 kHz
Memory bus width: 384 bits
Number of multiprocessors: 48
Total memory: 25753026560 bytes
Free memory: 25715277824 bytes