An example of opencl that can be executed in Qualcomm platform msm8953

Keywords: Android

I found some examples on the Internet, but I can't run them after testing. Here is an example of openc that can run after passing the test in msm8953. Of course, this example is also modified on the basis of others. The specific code is as follows
1.Android.mk
Code path
: vendor/qcom/proprietary/mm-camera/mm-camera2/opencl/Android.mk

LOCAL_PATH := $(call my-dir)
include $(CLEAR_VARS)
LOCAL_MODULE_TAGS := optional
LOCAL_MODULE    := opencl
LOCAL_SRC_FILES := main.cpp
LOCAL_C_INCLUDES := $(LOCAL_PATH)
LOCAL_C_INCLUDES += $(TARGET_OUT_INTERMEDIATES)/include/adreno/
#./vendor/qcom/proprietary/prebuilt_HY11/target/product/msm8953_64/obj/include/adreno/CL/cl.h
#./vendor/qcom/proprietary/prebuilt_HY11/target/product/msm8953_64/system/vendor/lib64/libOpenCL.so
LOCAL_LDFLAGS += -lOpenCL
include $(BUILD_EXECUTABLE)

2.cl_kernel2.cl
Code path
: vendor/qcom/proprietary/mm-camera/mm-camera2/opencl/cl_kernel2.cl

__kernel void cl_add(__global int *dst, __global int *src1, __global int *src2)
{
    int index = get_global_id(0);
    dst[index] = src1[index] + src2[index];
}

3.
Code path
: vendor/qcom/proprietary/mm-camera/mm-camera2/opencl/main.cpp

#include <stdio.h>
#include <stdlib.h>
#include "readyuv.h"
#include "CL/cl.h"
//#include "CL/cl_platform.h"
#include <iostream>
using namespace std;
int main(void)
{
    const int  array_size = 1024;
    cl_uint numPlatforms = 0;
    cl_platform_id platform = nullptr;
    cl_context context = nullptr;
    cl_command_queue commandQueue = nullptr;
    cl_program program = nullptr;
    cl_mem input1MemObj = nullptr;
    cl_mem input2MemObj = nullptr;
    cl_mem outputMemObj = nullptr;
    cl_kernel kernel = nullptr;
    //Step 1. Query OpenCL platform collection
    /*
     * This function is usually called twice:
     *   The first call to this function is to get the number of available platforms and then allocate memory space for platform objects
     *   The second call is used to get the platform object
     */
    cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms);
    if (status != CL_SUCCESS)
    {
        cout << "Error: Getting platforms!" << endl;
        return -1;
    }
    if (numPlatforms > 0)
    {
        cl_platform_id* platforms = (cl_platform_id*)malloc(numPlatforms* sizeof(cl_platform_id));
        status = clGetPlatformIDs(numPlatforms, platforms, NULL);
        platform = platforms[0];
    }
    else
    {
        puts("Your system does not have any OpenCL platform!");
        return -1;
    }

    cl_uint        numDevices = 0;
    cl_device_id   *devices;
    cl_int errcode_ret ;

    //setp2. Get a list of available devices on a platform
    status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices);
    if (numDevices == 0) //no GPU available.        
    {
        cout << "No GPU device available." << endl;
        cout << "Choose CPU as default device." << endl;
        status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 0, NULL, &numDevices);
        devices = (cl_device_id*)malloc(numDevices * sizeof(cl_device_id));
        status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, numDevices, devices, NULL);
    }
    else
    {
        devices = (cl_device_id*)malloc(numDevices * sizeof(cl_device_id));
        status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numDevices, devices, NULL);
        cout << "The number of devices: " << numDevices << endl;
    }
    //Step 3. Create an OpenCL context
    /*
    *Context is used by OpenCL runtime to manage things like command queues, memory queues, program objects, and kernel objects, and to execute kernel functions on one or more devices specified in the context.
    */
    context = clCreateContext(NULL, 1, devices, NULL, NULL, &errcode_ret);
    if(errcode_ret == CL_SUCCESS)
        cout << "craet OpenCL runtime sucess." << endl;
    else
        return -1;
    //Step 4. Create command queue
    commandQueue = clCreateCommandQueue(context, devices[0], 0, &errcode_ret);
    if(errcode_ret == CL_SUCCESS)
        cout << "craet CommandQueue sucess." << endl;
    else
        return -1;

    char *kernelCodeBuffer = nullptr;
    char *Buffer = nullptr;
    const char *aSource = nullptr;
    size_t kernelLength = 0;
    size_t global_work_size[1] = { array_size };

    // Read the kernel code to the buffer
    FILE *fp = fopen("cl_kernel2.cl", "rb");
    if (fp == nullptr)
    {
        puts("The kernel file not found!");
        goto RELEASE_RESOURCES;
    }
    fseek(fp, 0, SEEK_END);
    kernelLength = ftell(fp);
    cout << "kernelLength : " << kernelLength << endl;
    fseek(fp, 0, SEEK_SET);
    kernelCodeBuffer = (char*)malloc(kernelLength + 1);
    Buffer = (char*)malloc(kernelLength + 1);
    fread(kernelCodeBuffer, 1, kernelLength, fp);
    kernelCodeBuffer[kernelLength] = '\0';
    fclose(fp);
    aSource = kernelCodeBuffer;
    strcpy(Buffer,aSource);
    cout << "kernelCodeBuffer : " << Buffer  << endl;

    //Step 5. Create program object
    program = clCreateProgramWithSource(context, 1, &aSource, &kernelLength, NULL);

    //step6. Compiler object
    status = clBuildProgram(program, 1, devices, NULL, NULL, NULL);


    // Do initialization
    int i;
    int input1Buffer[array_size];
    int input2Buffer[array_size];
    int outputBuffer[array_size];
    for (i = 0; i < array_size; i++)
        input1Buffer[i] = input2Buffer[i] = i + 1;
    memset(outputBuffer, 0, sizeof(outputBuffer));

    // Create mmory object
    input1MemObj = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, array_size * sizeof(int), input1Buffer, nullptr);
    input2MemObj = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, array_size * sizeof(int), input2Buffer, nullptr);
    outputMemObj = clCreateBuffer(context, CL_MEM_WRITE_ONLY, array_size * sizeof(int), NULL, NULL);

    //Step 7. Create kernel object
    kernel = clCreateKernel(program, "cl_add", NULL);

    //Step 8. Set kernel parameters. If you want to execute the kernel, you must set kernel parameters
    status = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&outputMemObj);
    status = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&input1MemObj);
    status = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&input2MemObj);

    status = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
/*
 * clEnqueueNDRangeKernel(
 *
 * cl_command_queue queue,
 *
 * cl_kernel kernel,
 *
 * cl_uint work_dims,
 *  //if you deal with image object, you should probably set work_dims equal 2 or 3. But for buffer objects, you can set whatever dimensionality you think best.
 *  //For a buffer object containing a two-dimensional matrix, you might set work-dims equal 2.
 *
 * const size_t *global_work_offset,
 *  //the global ID offset in each dimension
 *
 * const size_t *global_work_size, 
 *  //the number of work items in each dimension
 * const size_t *local_work_size,
 *  //the number of work_items in a work_group,in each dimension
 * cl_uint num_events,
 *
 * const cl_event *wait_list,
 *
 * cl_event *event)
 */
/*
 * __kernel void cl_add(__global int *dst, __global int *src1, __global int *src2)
 * {
 *     int index = get_global_id(0);//get_global_id Gets the thread index from 0 to global work size - 1
 *     dst[index] = src1[index] + src2[index];
 * }        
 */
    clFinish(commandQueue);

    //Copy result to memory is required after execution in GPU
    status = clEnqueueReadBuffer(commandQueue, outputMemObj, CL_TRUE, 0, global_work_size[0] * sizeof(int), outputBuffer, 0, NULL, NULL);

    printf("Veryfy the rsults... ");
    for (i = 0; i < array_size; i++)
    {
        if (outputBuffer[i] != (i + 1) * 2)
        {
            puts("Results not correct!");
            break;
        }
    }
    if (i == array_size)
        puts("Correct!");

RELEASE_RESOURCES:

    status = clReleaseKernel(kernel);//*Release kernel.
    status = clReleaseProgram(program);    //Release the program object.
    status = clReleaseMemObject(input1MemObj);//Release mem object.
    status = clReleaseMemObject(input2MemObj);
    status = clReleaseMemObject(outputMemObj);
    status = clReleaseCommandQueue(commandQueue);//Release  Command queue.
    status = clReleaseContext(context);//Release context.
    free(devices);
    getchar();
    return 0;
}
/*
 * clGetPlatformIDs---------------------------Get platform ID
 *
 * clGetDeviceIDs-----------------------------Get device ID
 *
 * clCreateContext----------------------------Create context
 *
 * clCreateCommandQueue-----------------------Create command queue
 *
 * clCreateBuffer-----------------------------Create device memory
 *
 * clCreateProgramWithSource------------------Create program
 *
 * clBuildProgram-----------------------------Compiler
 *
 * clGetProgramBuildInfo----------------------Get compilation information
 *
 * clCreateKernel-----------------------------Create core
 *
 * clSetKernelArg-----------------------------Set core parameters
 *
 * clEnqueueNDRangeKernel---------------------Execution core
 *
 * clEnqueueReadBuffer------------------------Read device memory
 *
 * clReleaseMemObject-------------------------Freeing memory objects
 *
 * clReleaseKernel----------------------------Nuclear release
 *
 * clReleaseCommandQueue----------------------Release command queue
 *
 * clReleaseContext---------------------------Release context
 */

After executing the compile command, verify as follows
1, Push files to platform
1.adb root
2.adb remount
3.adb push out\target\product\msm8953_64\system\bin\opencl data
4.adb push vendor\qcom\proprietary\mm-camera\mm-camera2\opencl\cl_kernel2.cl data
2, Execute command
1 ) chmod 777 data/opencl
2) CD data / (if not executed, an error will be reported as follows: The kernel file not found!)
3 ) ./opencl

After execution, the success log is as follows

The number of devices: 1
craet OpenCL runtime sucess.
craet CommandQueue sucess.
kernelLength : 162
kernelCodeBuffer : __kernel void cl_add(__global int *dst, __global int *src1, __global int *src2)
{
    int index = get_global_id(0);
    dst[index] = src1[index] + src2[index];
}

Veryfy the rsults... Correct!

Posted by ckk on Sat, 04 Jan 2020 18:04:59 -0800