<?xml version="1.0" encoding="UTF-8"?>
<page>
  <created-at type="datetime">2009-10-22T22:09:05Z</created-at>
  <description></description>
  <id type="integer">6891</id>
  <name>Home</name>
  <number type="integer">7</number>
  <person-id type="integer">3009</person-id>
  <text>= Java Binding for the OpenCL API =
This project provides a easy to use Java binding for the OpenCL API  and is released under the [http://www.opensource.org/licenses/bsd-license.php BSD] license. [http://kenai.com/projects/gluegen/pages/Home GlueGen] is used to generate a low level binding directly from the official Khronos C [http://www.khronos.org/registry/cl/ header] files. A hand written high level binding on top of generated code provides a convenient interface and reduces verbosity to a minimum.

==Hello World - Java OpenCL==
HelloJOCL can be found in the [http://kenai.com/projects/jocl/sources/jocl-demos-git/show jocl-demos] GIT repository.
&lt;pre name=&quot;brush: java; wrap-lines: false; gutter: false&quot;&gt;
package com.mbien.opencl.demos.hellojocl;

import com.mbien.opencl.*;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Random;

import static java.lang.System.*;
import static com.mbien.opencl.CLBuffer.Mem.*;

/**
 * Hello Java OpenCL example. Adds all elements of buffer A to buffer B
 * and stores the result in buffer C.&lt;br/&gt;
 * Sample was inspired by the Nvidia VectorAdd example written in C/C++
 * which is bundled in the Nvidia OpenCL SDK.
 * @author Michael Bien
 */
public class HelloJOCL {

    public static void main(String[] args) throws IOException {
        
        int elementCount = 11444777;      // Length of arrays to process
        int localWorkSize = 256;          // Local work size dimensions
        // rounded up to the nearest multiple of the localWorkSize
        int globalWorkSize = roundUp(localWorkSize, elementCount);

        // set up
        CLContext context = CLContext.create();

        CLProgram program = context.createProgram(
            HelloJOCL.class.getResourceAsStream(&quot;VectorAdd.cl&quot;)).build();

        CLBuffer&lt;FloatBuffer&gt; clBufferA = 
                           context.createFloatBuffer(globalWorkSize, READ_ONLY);
        CLBuffer&lt;FloatBuffer&gt; clBufferB =
                           context.createFloatBuffer(globalWorkSize, READ_ONLY);
        CLBuffer&lt;FloatBuffer&gt; clBufferC = 
                           context.createFloatBuffer(globalWorkSize, WRITE_ONLY);

        out.println(&quot;used device memory: &quot;
            + (clBufferA.buffer.capacity()+clBufferB.buffer.capacity()
              +clBufferC.buffer.capacity())*4/1000000 +&quot;MB&quot;);

        // fill read buffers with random numbers
        fillBuffer(clBufferA.buffer, 12345);
        fillBuffer(clBufferB.buffer, 67890);

        // get a reference to the kernel functon with the name 'VectorAdd'
        // and map the buffers to its input parameters.
        CLKernel kernel = program.getCLKernels().get(&quot;VectorAdd&quot;);
        kernel.setArg(0, clBufferA)
               .setArg(1, clBufferB)
               .setArg(2, clBufferC)
               .setArg(3, elementCount);

        // create command queue on fastest device.
        CLCommandQueue queue = context.getMaxFlopsDevice().createCommandQueue();

        // asynchronous write of data to GPU device, blocking read later
        // to get the computed results back.
        long time = nanoTime();
        queue.putWriteBuffer(clBufferA, false)
              .putWriteBuffer(clBufferB, false)
              .put1DRangeKernel(kernel, 0, globalWorkSize, localWorkSize)
              .putReadBuffer(clBufferC, true);
        time = nanoTime() - time;

        // cleanup all resources associated with this context.
        context.release();

        // print first few elements of the resulting buffer to the console.
        out.println(&quot;a+b=c results snapshot: &quot;);
        for(int i = 0; i &lt; 10; i++)
            out.print(clBufferC.buffer.get() + &quot;, &quot;);
        out.println(&quot;...; &quot; + clBufferC.buffer.remaining() + &quot; more&quot;);
        
        out.println(&quot;computation took: &quot;+(time/1000000)+&quot;ms&quot;);

    }

    private static final void fillBuffer(FloatBuffer buffer, int seed) {
        Random rnd = new Random(seed);
        while(buffer.remaining() != 0)
            buffer.put(rnd.nextFloat()*100);
        buffer.rewind();
    }

    private static final int roundUp(int groupSize, int globalSize) {
        int r = globalSize % groupSize;
        if (r == 0) {
            return globalSize;
        } else {
            return globalSize + groupSize - r;
        }
    }

}
&lt;/pre&gt;
'''OpenCL source file'''
&lt;pre name=&quot;brush: java; wrap-lines: false; gutter: false&quot;&gt;

    // OpenCL Kernel Function for element by element vector addition
    __kernel void VectorAdd(__global const float* a, __global const float* b, __global float* c, int numElements) {

        // get index into global data array
        int iGID = get_global_id(0);

        // bound check
        if (iGID &gt;= numElements)  {
            return;
        }

        // add the vector elements
        c[iGID] = a[iGID] + b[iGID];
    }
&lt;/pre&gt;

</text>
  <text-as-html>&lt;h1&gt;&lt;a name='Java_Binding_for_the_OpenCL_API'&gt;&lt;/a&gt; Java Binding for the OpenCL API &lt;/h1&gt;
&lt;p&gt;This project provides a easy to use Java binding for the OpenCL API  and is released under the &lt;a class='external' href=&quot;http://www.opensource.org/licenses/bsd-license.php&quot;&gt;BSD&lt;/a&gt; license. &lt;a class='external' href=&quot;http://kenai.com/projects/gluegen/pages/Home&quot;&gt;GlueGen&lt;/a&gt; is used to generate a low level binding directly from the official Khronos C &lt;a class='external' href=&quot;http://www.khronos.org/registry/cl/&quot;&gt;header&lt;/a&gt; files. A hand written high level binding on top of generated code provides a convenient interface and reduces verbosity to a minimum.

&lt;/p&gt;&lt;h2&gt;&lt;a name='Hello_World_-_Java_OpenCL'&gt;&lt;/a&gt;Hello World - Java OpenCL&lt;/h2&gt;
&lt;p&gt;HelloJOCL can be found in the &lt;a class='external' href=&quot;http://kenai.com/projects/jocl/sources/jocl-demos-git/show&quot;&gt;jocl-demos&lt;/a&gt; GIT repository.
&lt;pre name=&quot;code&quot; class=&quot;brush: java; wrap-lines: false; gutter: false&quot;&gt;
package com.mbien.opencl.demos.hellojocl;

import com.mbien.opencl.*;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Random;

import static java.lang.System.*;
import static com.mbien.opencl.CLBuffer.Mem.*;

/**
 * Hello Java OpenCL example. Adds all elements of buffer A to buffer B
 * and stores the result in buffer C.&amp;lt;br/&amp;gt;
 * Sample was inspired by the Nvidia VectorAdd example written in C/C++
 * which is bundled in the Nvidia OpenCL SDK.
 * @author Michael Bien
 */
public class HelloJOCL {

    public static void main(String[] args) throws IOException {
        
        int elementCount = 11444777;      // Length of arrays to process
        int localWorkSize = 256;          // Local work size dimensions
        // rounded up to the nearest multiple of the localWorkSize
        int globalWorkSize = roundUp(localWorkSize, elementCount);

        // set up
        CLContext context = CLContext.create();

        CLProgram program = context.createProgram(
            HelloJOCL.class.getResourceAsStream(&amp;quot;VectorAdd.cl&amp;quot;)).build();

        CLBuffer&amp;lt;FloatBuffer&amp;gt; clBufferA = 
                           context.createFloatBuffer(globalWorkSize, READ_ONLY);
        CLBuffer&amp;lt;FloatBuffer&amp;gt; clBufferB =
                           context.createFloatBuffer(globalWorkSize, READ_ONLY);
        CLBuffer&amp;lt;FloatBuffer&amp;gt; clBufferC = 
                           context.createFloatBuffer(globalWorkSize, WRITE_ONLY);

        out.println(&amp;quot;used device memory: &amp;quot;
            + (clBufferA.buffer.capacity()+clBufferB.buffer.capacity()
              +clBufferC.buffer.capacity())*4/1000000 +&amp;quot;MB&amp;quot;);

        // fill read buffers with random numbers
        fillBuffer(clBufferA.buffer, 12345);
        fillBuffer(clBufferB.buffer, 67890);

        // get a reference to the kernel functon with the name 'VectorAdd'
        // and map the buffers to its input parameters.
        CLKernel kernel = program.getCLKernels().get(&amp;quot;VectorAdd&amp;quot;);
        kernel.setArg(0, clBufferA)
               .setArg(1, clBufferB)
               .setArg(2, clBufferC)
               .setArg(3, elementCount);

        // create command queue on fastest device.
        CLCommandQueue queue = context.getMaxFlopsDevice().createCommandQueue();

        // asynchronous write of data to GPU device, blocking read later
        // to get the computed results back.
        long time = nanoTime();
        queue.putWriteBuffer(clBufferA, false)
              .putWriteBuffer(clBufferB, false)
              .put1DRangeKernel(kernel, 0, globalWorkSize, localWorkSize)
              .putReadBuffer(clBufferC, true);
        time = nanoTime() - time;

        // cleanup all resources associated with this context.
        context.release();

        // print first few elements of the resulting buffer to the console.
        out.println(&amp;quot;a+b=c results snapshot: &amp;quot;);
        for(int i = 0; i &amp;lt; 10; i++)
            out.print(clBufferC.buffer.get() + &amp;quot;, &amp;quot;);
        out.println(&amp;quot;...; &amp;quot; + clBufferC.buffer.remaining() + &amp;quot; more&amp;quot;);
        
        out.println(&amp;quot;computation took: &amp;quot;+(time/1000000)+&amp;quot;ms&amp;quot;);

    }

    private static final void fillBuffer(FloatBuffer buffer, int seed) {
        Random rnd = new Random(seed);
        while(buffer.remaining() != 0)
            buffer.put(rnd.nextFloat()*100);
        buffer.rewind();
    }

    private static final int roundUp(int groupSize, int globalSize) {
        int r = globalSize % groupSize;
        if (r == 0) {
            return globalSize;
        } else {
            return globalSize + groupSize - r;
        }
    }

}
&lt;/pre&gt;&lt;b&gt;OpenCL source file&lt;/b&gt;&lt;pre name=&quot;code&quot; class=&quot;brush: java; wrap-lines: false; gutter: false&quot;&gt;

    // OpenCL Kernel Function for element by element vector addition
    __kernel void VectorAdd(__global const float* a, __global const float* b, __global float* c, int numElements) {

        // get index into global data array
        int iGID = get_global_id(0);

        // bound check
        if (iGID &amp;gt;= numElements)  {
            return;
        }

        // add the vector elements
        c[iGID] = a[iGID] + b[iGID];
    }
&lt;/pre&gt;

&lt;/p&gt;</text-as-html>
  <updated-at type="datetime">2009-11-08T23:53:10Z</updated-at>
  <wiki-id type="integer">44087</wiki-id>
</page>
