Last updated July 08, 2010 09:58, by Michael Bien

This project moved to http://jocl.jogamp.org/'


Java Binding for the OpenCL API

This project provides a easy to use Java binding for the OpenCL API. GlueGen is used to generate a low level binding directly from the official Khronos C header files. A hand written high level binding on top of generated code provides a convenient interface and reduces verbosity to a minimum.

Hello World - Java OpenCL

HelloJOCL can be found in the jocl-demos GIT repository.

package com.mbien.opencl.demos.hellojocl;

import com.mbien.opencl.*;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Random;

import static java.lang.System.*;
import static com.mbien.opencl.CLBuffer.Mem.*;

/**
 * Hello Java OpenCL example. Adds all elements of buffer A to buffer B
 * and stores the result in buffer C.<br/>
 * Sample was inspired by the Nvidia VectorAdd example written in C/C++
 * which is bundled in the Nvidia OpenCL SDK.
 * @author Michael Bien
 */
public class HelloJOCL {

    public static void main(String[] args) throws IOException {
        
        int elementCount = 11444777;      // Length of arrays to process
        int localWorkSize = 256;          // Local work size dimensions
        // rounded up to the nearest multiple of the localWorkSize
        int globalWorkSize = roundUp(localWorkSize, elementCount);

        // set up
        CLContext context = CLContext.create();

        CLProgram program = context.createProgram(
            HelloJOCL.class.getResourceAsStream("VectorAdd.cl")).build();

        CLBuffer<FloatBuffer> clBufferA = 
                           context.createFloatBuffer(globalWorkSize, READ_ONLY);
        CLBuffer<FloatBuffer> clBufferB =
                           context.createFloatBuffer(globalWorkSize, READ_ONLY);
        CLBuffer<FloatBuffer> clBufferC = 
                           context.createFloatBuffer(globalWorkSize, WRITE_ONLY);

        out.println("used device memory: "
            + (clBufferA.buffer.capacity()+clBufferB.buffer.capacity()
              +clBufferC.buffer.capacity())*4/1000000 +"MB");

        // fill read buffers with random numbers
        fillBuffer(clBufferA.buffer, 12345);
        fillBuffer(clBufferB.buffer, 67890);

        // get a reference to the kernel functon with the name 'VectorAdd'
        // and map the buffers to its input parameters.
        CLKernel kernel = program.getCLKernels().get("VectorAdd");
        kernel.setArg(0, clBufferA)
               .setArg(1, clBufferB)
               .setArg(2, clBufferC)
               .setArg(3, elementCount);

        // create command queue on fastest device.
        CLCommandQueue queue = context.getMaxFlopsDevice().createCommandQueue();

        // asynchronous write of data to GPU device, blocking read later
        // to get the computed results back.
        long time = nanoTime();
        queue.putWriteBuffer(clBufferA, false)
              .putWriteBuffer(clBufferB, false)
              .put1DRangeKernel(kernel, 0, globalWorkSize, localWorkSize)
              .putReadBuffer(clBufferC, true);
        time = nanoTime() - time;

        // cleanup all resources associated with this context.
        context.release();

        // print first few elements of the resulting buffer to the console.
        out.println("a+b=c results snapshot: ");
        for(int i = 0; i < 10; i++)
            out.print(clBufferC.buffer.get() + ", ");
        out.println("...; " + clBufferC.buffer.remaining() + " more");
        
        out.println("computation took: "+(time/1000000)+"ms");

    }

    private static final void fillBuffer(FloatBuffer buffer, int seed) {
        Random rnd = new Random(seed);
        while(buffer.remaining() != 0)
            buffer.put(rnd.nextFloat()*100);
        buffer.rewind();
    }

    private static final int roundUp(int groupSize, int globalSize) {
        int r = globalSize % groupSize;
        if (r == 0) {
            return globalSize;
        } else {
            return globalSize + groupSize - r;
        }
    }

}
OpenCL source file

    // OpenCL Kernel Function for element by element vector addition
    __kernel void VectorAdd(__global const float* a, __global const float* b, __global float* c, int numElements) {

        // get index into global data array
        int iGID = get_global_id(0);

        // bound check
        if (iGID >= numElements)  {
            return;
        }

        // add the vector elements
        c[iGID] = a[iGID] + b[iGID];
    }

  • Mysql
  • Glassfish
  • Jruby
  • Rails
  • Nblogo
Terms of Use; Privacy Policy;
© 2010, Oracle Corporation and/or its affiliates
(revision 20100521.d19488a)
 
 
loading
Please Confirm