#include "stdafx.h"
#define NUM_INTS 32
// includes, CUDA
#include <cuda_runtime.h>
#include <cuda.h>
// includes, Project
typedef struct _CUDAContext_st {
CUcontext hcuContext;
CUmodule hcuModule;
CUfunction hcuFunction;
CUdeviceptr dptr;
int deviceID;
} CUDAContext;//CUDA Context结构体
extern "C" void InitCUDAContext( CUDAContext *pContext, CUdevice hcuDevice, int deviceID );
extern "C" int InitCUDA(CUDAContext * cu_context)
{
int cDevices;
int hcuDevice = 0;
CUresult status;
//调用其它函数前 必须初始化API
status = cuInit(0);
// 获得所有设备数量
status = cuDeviceGetCount( &cDevices );
if ( CUDA_SUCCESS != status )
return 1;
if ( cDevices == 0 ) {
return 1;//无硬件设备
}
//初始化所有硬件设备(多显卡)因为只有一块显卡 故只执行一次
for ( int iDevice = 0; iDevice < cDevices; iDevice++ ) {
char szName[256];
//获得当前设备句柄返回指针到 hcuDevice
status = cuDeviceGet( &hcuDevice, iDevice );
if ( CUDA_SUCCESS != status )
return 1;
//返回当前设备名称 最大大小为256
status = cuDeviceGetName( szName, 256, hcuDevice );
if ( CUDA_SUCCESS != status )
return 1;
// 获得设备的内部参数
//CUdevprop devProps;
//CUDA_SUCCESS == cuDeviceGetProperties( &devProps, hcuDevice )
// 初始化 CUDA Context
InitCUDAContext( cu_context, hcuDevice, iDevice );
}
}
//////////////////////////////////////////////////////
// //
// pContext 自定义结构体 包括Context //
// //
// //
// 初始化CUDA context //
// //
// //
// //
// //
// //
//////////////////////////////////////////////////////
void InitCUDAContext( CUDAContext *pContext, CUdevice hcuDevice, int deviceID )
{
CUcontext hcuContext = 0;
CUmodule hcuModule = 0;
CUfunction hcuFunction = 0;
CUresult status = cuCtxCreate( &hcuContext, 0, hcuDevice );
if ( CUDA_SUCCESS != status ) {
//创建线程失败
}
status = CUDA_ERROR_INVALID_IMAGE;
// 将模块加载到当前Context中
status = cuModuleLoad(&hcuModule,"iradon_kernel.cubin");
// 返回当前模块中模块中指向函数的指针
status = cuModuleGetFunction( &hcuFunction, hcuModule, "CUDA_iradon" );
//
CUdeviceptr dptr;
status = cuMemAlloc( &dptr, NUM_INTS*sizeof(int) );
// Here we must release the CUDA context from the thread context
status = cuCtxPopCurrent( NULL );
pContext->hcuContext = hcuContext;
pContext->hcuModule = hcuModule;
pContext->hcuFunction = hcuFunction;
pContext->dptr = dptr;
pContext->deviceID = deviceID;
}
void ThreadProc(CUDAContext *pParams)
{
int wrong = 0;
// 将CUDA context 放入 当前的线程中
// cuCtxPushCurrent: Attach the caller CUDA context to the thread context.
CUresult status = cuCtxPushCurrent( pParams->hcuContext );
// 设置Context中线程块的维数x,y,z
cuFuncSetBlockShape( pParams->hcuFunction, 32, 1, 1 );
// 为kernel相关的Function设置整型参量
// cuParamSeti(CUfunction func,int offset,unsigned int value);
cuParamSeti( pParams->hcuFunction, 0,10 );
cuParamSeti( pParams->hcuFunction, sizeof(int),10);
//为函数中的参数设定总参数大小
cuParamSetSize( pParams->hcuFunction, sizeof(int)*2 );
// cuLaunch: we kick off the CUDA "kernelFunction"
// 启动相关函数
status = cuLaunch( pParams->hcuFunction );
//if ( CUDA_SUCCESS == cuMemcpyDtoH( pInt, pParams->dptr, NUM_INTS*sizeof(int) ) );
//cuMemFree( pParams->dptr );
//销毁当前线程中正在使用的CUDA context
// cuCtxPopCurrent: Detach the current CUDA context from the calling thread.
cuCtxPopCurrent( NULL );
}
void InitCUDA_Radon()
{
CUDAContext cuda_context;
InitCUDA(&cuda_context);
ThreadProc(&cuda_context);
}