對于Ubuntu或其近親(Lubuntu、Kubuntu、Mint等)編寫OpenCL程序也不會太難。由于本例用的是AMD APP SDK,因此需要AMD的GPU以及相關驅動。首先,去AMD官網下載GPU驅動——AMD Catalyst。如果你用的是APU并且還有一塊獨立顯卡的話,通過AMD Catalyst Control Center可以選擇使用哪個GPU。像我現在用的聯想Z475筆記本,搭載了AMD APU A6-3420M以及一塊AMD Radeon HD 7400M,但是相比較而言,還是APU自帶的6620G的GPU性能更強一些,因此我這邊設置的是采用AMD Radeon HD 6620G。
在Linux下,AMD官方的GPU驅動是.run文件,只需使用sudo sh xxx.run即可安裝。安裝時采用默認安裝即可。
然后去developer.amd.com開發者網站下載AMD APP SDK。下載完成之后,將lib里面的動態庫文件(xxx.so)取出來,并且把include里的頭文件取出來。在你的OpenCL工程中把頭文件的路徑以及動態加載庫都設置好。在你用-l的時候,如果動態庫文件后綴名為.so.1,那么得把文件名后綴.1去掉。因為-l只能連接.a、.lib、.so等這些后綴的庫文件。
下面我們將舉一個最簡單的例子,首先看主機端代碼:
/* ============================================================================Name : OpenCLTest.cAuthor : Zenny ChenVersion :Copyright : Your copyright noticeDescription : Hello World in C, Ansi-style============================================================================ */ #include <stdio.h>
#include <
string .h>
#include <stdlib.h>
#include <unistd.h>
#include <CL/cl.h>
static int GetCurrentLocationFilePath(
char pDst[
512 ],
const char *
filename)
{ if (pDst == NULL || filename ==
NULL) return 0 ; int size = readlink(
" /proc/self/exe " , pDst,
512 ); while (pDst[size -
1 ] !=
' / ' )size --
;strcpy( &
pDst[size], filename); int retSize = strlen(filename) +
size;pDst[retSize] =
' \0 ' ; return retSize;
} int main(
void )
{ /* Step1: Getting platforms and choose an available one. */ cl_uint numPlatforms; // the NO. of platforms cl_int status = clGetPlatformIDs(
0 , NULL, &
numPlatforms); if (status !=
CL_SUCCESS){puts( " Error: Getting platforms! " ); return 0 ;}cl_platform_id platforms[ 16 ]; /* For clarity, choose the first available platform. */ if (numPlatforms >
0 ){status =
clGetPlatformIDs(numPlatforms, platforms, NULL); if (status !=
CL_SUCCESS){puts( " Failed to get platform IDs " ); return 0 ;}} /* Step 2:Query the platform and choose the first GPU device if has one.Otherwise use the CPU as device. */ cl_uint numDevices =
0 ;cl_device_id devices[ 16 ];clGetDeviceIDs(platforms[ 0 ], CL_DEVICE_TYPE_GPU,
0 , NULL, &
numDevices); if (numDevices ==
0 )
// no GPU available.
{puts( " No devices available! " ); return 0 ;} else {printf( " The number of available devices is: %u\n " , numDevices);clGetDeviceIDs(platforms[ 0 ], CL_DEVICE_TYPE_GPU, numDevices, devices, NULL);} /* Step 3: Create context. */ cl_context context = clCreateContext(NULL,
1 , devices,NULL,NULL,NULL); /* Step 4: Creating command queue associate with the context. */ cl_command_queue commandQueue = clCreateCommandQueue(context, devices[
0 ],
0 , NULL); /* Step 5: Create program object */ char filePath[
512 ];GetCurrentLocationFilePath(filePath, " test.cl " );FILE *fp = fopen(filePath,
" r " ); if (fp ==
NULL){puts( " OpenCL kernel source file open failed! " ); return 0 ;}fseek(fp, 0 , SEEK_END); long fileLength =
ftell(fp);fseek(fp, 0 , SEEK_SET); char *source = (
char *)malloc(fileLength +
1 );fread(source, 1 , fileLength, fp);fclose(fp);size_t sourceSize[] =
{fileLength};cl_program program = clCreateProgramWithSource(context,
1 , (
const char **)&
source, sourceSize, NULL);free(source); if (program ==
NULL){puts( " Failed to create the program! " ); return 0 ;} /* Step 6: Build program. */ status = clBuildProgram(program,
1 ,devices,NULL,NULL,NULL); if (status !=
CL_SUCCESS){puts( " Failed to build the program! " ); return 0 ;} /* Step 7: Initial input,output for the host and create memory objects for the kernel */ int input[
128 ]; for (
int i =
0 ; i <
128 ; i++
)input[i] = i +
1 ;cl_mem inputBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,
sizeof (input), input, NULL);cl_mem outputBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY ,
sizeof (input), NULL, NULL); /* Step 8: Create kernel object */ cl_kernel kernel = clCreateKernel(program,
" test " , NULL); /* Step 9: Sets Kernel arguments. */ status = clSetKernelArg(kernel,
0 ,
sizeof (cl_mem), &
inputBuffer);status = clSetKernelArg(kernel,
1 ,
sizeof (cl_mem), &
outputBuffer); /* Step 10: Running the kernel. */ size_t global_work_size[ 1 ] = {
128 };status = clEnqueueNDRangeKernel(commandQueue, kernel,
1 , NULL, global_work_size, NULL,
0 , NULL, NULL); /* Step 11: Read the cout put back to host memory. */ int output[
128 ];status = clEnqueueReadBuffer(commandQueue, outputBuffer, CL_TRUE,
0 ,
sizeof (input), output,
0 , NULL, NULL); /* Step 12: Clean the resources. */ status = clReleaseKernel(kernel);
// Release kernel. status = clReleaseProgram(program);
// Release the program object. status = clReleaseMemObject(inputBuffer);
// Release mem object. status =
clReleaseMemObject(outputBuffer);status = clReleaseCommandQueue(commandQueue);
// Release Command queue. status = clReleaseContext(context);
// Release context. for (
int i =
0 ; i <
128 ; i++
){ if (output[i] != i +
2 ){printf( " Error occurred @%d! " , i); return 0 ;}}puts( " Pass! " ); return 1 ;
} 在編譯選項中,使用-std=gnu99或-std=gnu11。上述代碼為純C語言,因此即便你沒有安裝g++也完全沒關系。
下面看看內核源代碼:
/* ============================================================================Name : OpenCLTest.cAuthor : Zenny ChenVersion :Copyright : Your copyright noticeDescription : Simple OpenCL kernel source============================================================================ */ __kernel void test(__global
int *
in , __global
int *
out )
{ int index = get_global_id(
0 ); out [index] =
in [index] +
1 ;
} 將此文件放在可執行文件相同路徑下,然后我們就能正常運行了。
?
總結
以上是生活随笔 為你收集整理的Ubuntu下使用AMD APP编写OpenCL程序 的全部內容,希望文章能夠幫你解決所遇到的問題。
如果覺得生活随笔 網站內容還不錯,歡迎將生活随笔 推薦給好友。