aboutsummaryrefslogtreecommitdiffstats
path: root/Godeps/_workspace
diff options
context:
space:
mode:
authorGustav Simonsson <gustav.simonsson@gmail.com>2015-06-12 13:45:23 +0800
committerGustav Simonsson <gustav.simonsson@gmail.com>2015-10-07 19:19:30 +0800
commitec6a548ee3555813d83f86f82bd25694bfd9c303 (patch)
tree9d0ec5022dc952f7b1053b85382df07347bc48f0 /Godeps/_workspace
parent8b865fa9bf75e728d5d76f5a1460e0c37d8b5f9e (diff)
downloadgo-tangerine-ec6a548ee3555813d83f86f82bd25694bfd9c303.tar
go-tangerine-ec6a548ee3555813d83f86f82bd25694bfd9c303.tar.gz
go-tangerine-ec6a548ee3555813d83f86f82bd25694bfd9c303.tar.bz2
go-tangerine-ec6a548ee3555813d83f86f82bd25694bfd9c303.tar.lz
go-tangerine-ec6a548ee3555813d83f86f82bd25694bfd9c303.tar.xz
go-tangerine-ec6a548ee3555813d83f86f82bd25694bfd9c303.tar.zst
go-tangerine-ec6a548ee3555813d83f86f82bd25694bfd9c303.zip
all: Add GPU mining, disabled by default
Diffstat (limited to 'Godeps/_workspace')
-rw-r--r--Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/cl.go26
-rw-r--r--Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/cl_test.go254
-rw-r--r--Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/context.go161
-rw-r--r--Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/device.go510
-rw-r--r--Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/device12.go51
-rw-r--r--Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl.h1210
-rw-r--r--Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_ext.h315
-rw-r--r--Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_gl.h158
-rw-r--r--Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_gl_ext.h65
-rw-r--r--Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_platform.h1278
-rw-r--r--Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/opencl.h43
-rw-r--r--Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/image.go83
-rw-r--r--Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/kernel.go127
-rw-r--r--Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/kernel10.go7
-rw-r--r--Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/kernel12.go20
-rw-r--r--Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/platform.go83
-rw-r--r--Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/program.go105
-rw-r--r--Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/queue.go193
-rw-r--r--Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/types.go487
-rw-r--r--Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/types12.go71
-rw-r--r--Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/types_darwin.go45
-rw-r--r--Godeps/_workspace/src/github.com/ethereum/ethash/ethash.go24
-rw-r--r--Godeps/_workspace/src/github.com/ethereum/ethash/ethash_opencl.go629
-rw-r--r--Godeps/_workspace/src/github.com/ethereum/ethash/ethash_opencl_kernel_go_str.go600
-rw-r--r--Godeps/_workspace/src/github.com/ethereum/ethash/ethash_test.go6
25 files changed, 6538 insertions, 13 deletions
diff --git a/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/cl.go b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/cl.go
new file mode 100644
index 000000000..3d577b2b6
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/cl.go
@@ -0,0 +1,26 @@
+/*
+Package cl provides a binding to the OpenCL api. It's mostly a low-level
+wrapper that avoids adding functionality while still making the interface
+a little more friendly and easy to use.
+
+Resource life-cycle management:
+
+For any CL object that gets created (buffer, queue, kernel, etc..) you should
+call object.Release() when finished with it to free the CL resources. This
+explicitely calls the related clXXXRelease method for the type. However,
+as a fallback there is a finalizer set for every resource item that takes
+care of it (eventually) if Release isn't called. In this way you can have
+better control over the life cycle of resources while having a fall back
+to avoid leaks. This is similar to how file handles and such are handled
+in the Go standard packages.
+*/
+package cl
+
+// #include "headers/1.2/opencl.h"
+// #cgo CFLAGS: -Iheaders/1.2
+// #cgo darwin LDFLAGS: -framework OpenCL
+// #cgo linux LDFLAGS: -lOpenCL
+import "C"
+import "errors"
+
+var ErrUnsupported = errors.New("cl: unsupported")
diff --git a/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/cl_test.go b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/cl_test.go
new file mode 100644
index 000000000..7659ce14f
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/cl_test.go
@@ -0,0 +1,254 @@
+package cl
+
+import (
+ "math/rand"
+ "reflect"
+ "strings"
+ "testing"
+)
+
+var kernelSource = `
+__kernel void square(
+ __global float* input,
+ __global float* output,
+ const unsigned int count)
+{
+ int i = get_global_id(0);
+ if(i < count)
+ output[i] = input[i] * input[i];
+}
+`
+
+func getObjectStrings(object interface{}) map[string]string {
+ v := reflect.ValueOf(object)
+ t := reflect.TypeOf(object)
+
+ strs := make(map[string]string)
+
+ numMethods := t.NumMethod()
+ for i := 0; i < numMethods; i++ {
+ method := t.Method(i)
+ if method.Type.NumIn() == 1 && method.Type.NumOut() == 1 && method.Type.Out(0).Kind() == reflect.String {
+ // this is a string-returning method with (presumably) only a pointer receiver parameter
+ // call it
+ outs := v.Method(i).Call([]reflect.Value{})
+ // put the result in our map
+ strs[method.Name] = (outs[0].Interface()).(string)
+ }
+ }
+
+ return strs
+}
+
+func TestPlatformStringsContainNoNULs(t *testing.T) {
+ platforms, err := GetPlatforms()
+ if err != nil {
+ t.Fatalf("Failed to get platforms: %+v", err)
+ }
+
+ for _, p := range platforms {
+ for key, value := range getObjectStrings(p) {
+ if strings.Contains(value, "\x00") {
+ t.Fatalf("platform string %q = %+q contains NUL", key, value)
+ }
+ }
+ }
+}
+
+func TestDeviceStringsContainNoNULs(t *testing.T) {
+ platforms, err := GetPlatforms()
+ if err != nil {
+ t.Fatalf("Failed to get platforms: %+v", err)
+ }
+
+ for _, p := range platforms {
+ devs, err := p.GetDevices(DeviceTypeAll)
+ if err != nil {
+ t.Fatalf("Failed to get devices for platform %q: %+v", p.Name(), err)
+ }
+
+ for _, d := range devs {
+ for key, value := range getObjectStrings(d) {
+ if strings.Contains(value, "\x00") {
+ t.Fatalf("device string %q = %+q contains NUL", key, value)
+ }
+ }
+ }
+ }
+}
+
+func TestHello(t *testing.T) {
+ var data [1024]float32
+ for i := 0; i < len(data); i++ {
+ data[i] = rand.Float32()
+ }
+
+ platforms, err := GetPlatforms()
+ if err != nil {
+ t.Fatalf("Failed to get platforms: %+v", err)
+ }
+ for i, p := range platforms {
+ t.Logf("Platform %d:", i)
+ t.Logf(" Name: %s", p.Name())
+ t.Logf(" Vendor: %s", p.Vendor())
+ t.Logf(" Profile: %s", p.Profile())
+ t.Logf(" Version: %s", p.Version())
+ t.Logf(" Extensions: %s", p.Extensions())
+ }
+ platform := platforms[0]
+
+ devices, err := platform.GetDevices(DeviceTypeAll)
+ if err != nil {
+ t.Fatalf("Failed to get devices: %+v", err)
+ }
+ if len(devices) == 0 {
+ t.Fatalf("GetDevices returned no devices")
+ }
+ deviceIndex := -1
+ for i, d := range devices {
+ if deviceIndex < 0 && d.Type() == DeviceTypeGPU {
+ deviceIndex = i
+ }
+ t.Logf("Device %d (%s): %s", i, d.Type(), d.Name())
+ t.Logf(" Address Bits: %d", d.AddressBits())
+ t.Logf(" Available: %+v", d.Available())
+ // t.Logf(" Built-In Kernels: %s", d.BuiltInKernels())
+ t.Logf(" Compiler Available: %+v", d.CompilerAvailable())
+ t.Logf(" Double FP Config: %s", d.DoubleFPConfig())
+ t.Logf(" Driver Version: %s", d.DriverVersion())
+ t.Logf(" Error Correction Supported: %+v", d.ErrorCorrectionSupport())
+ t.Logf(" Execution Capabilities: %s", d.ExecutionCapabilities())
+ t.Logf(" Extensions: %s", d.Extensions())
+ t.Logf(" Global Memory Cache Type: %s", d.GlobalMemCacheType())
+ t.Logf(" Global Memory Cacheline Size: %d KB", d.GlobalMemCachelineSize()/1024)
+ t.Logf(" Global Memory Size: %d MB", d.GlobalMemSize()/(1024*1024))
+ t.Logf(" Half FP Config: %s", d.HalfFPConfig())
+ t.Logf(" Host Unified Memory: %+v", d.HostUnifiedMemory())
+ t.Logf(" Image Support: %+v", d.ImageSupport())
+ t.Logf(" Image2D Max Dimensions: %d x %d", d.Image2DMaxWidth(), d.Image2DMaxHeight())
+ t.Logf(" Image3D Max Dimenionns: %d x %d x %d", d.Image3DMaxWidth(), d.Image3DMaxHeight(), d.Image3DMaxDepth())
+ // t.Logf(" Image Max Buffer Size: %d", d.ImageMaxBufferSize())
+ // t.Logf(" Image Max Array Size: %d", d.ImageMaxArraySize())
+ // t.Logf(" Linker Available: %+v", d.LinkerAvailable())
+ t.Logf(" Little Endian: %+v", d.EndianLittle())
+ t.Logf(" Local Mem Size Size: %d KB", d.LocalMemSize()/1024)
+ t.Logf(" Local Mem Type: %s", d.LocalMemType())
+ t.Logf(" Max Clock Frequency: %d", d.MaxClockFrequency())
+ t.Logf(" Max Compute Units: %d", d.MaxComputeUnits())
+ t.Logf(" Max Constant Args: %d", d.MaxConstantArgs())
+ t.Logf(" Max Constant Buffer Size: %d KB", d.MaxConstantBufferSize()/1024)
+ t.Logf(" Max Mem Alloc Size: %d KB", d.MaxMemAllocSize()/1024)
+ t.Logf(" Max Parameter Size: %d", d.MaxParameterSize())
+ t.Logf(" Max Read-Image Args: %d", d.MaxReadImageArgs())
+ t.Logf(" Max Samplers: %d", d.MaxSamplers())
+ t.Logf(" Max Work Group Size: %d", d.MaxWorkGroupSize())
+ t.Logf(" Max Work Item Dimensions: %d", d.MaxWorkItemDimensions())
+ t.Logf(" Max Work Item Sizes: %d", d.MaxWorkItemSizes())
+ t.Logf(" Max Write-Image Args: %d", d.MaxWriteImageArgs())
+ t.Logf(" Memory Base Address Alignment: %d", d.MemBaseAddrAlign())
+ t.Logf(" Native Vector Width Char: %d", d.NativeVectorWidthChar())
+ t.Logf(" Native Vector Width Short: %d", d.NativeVectorWidthShort())
+ t.Logf(" Native Vector Width Int: %d", d.NativeVectorWidthInt())
+ t.Logf(" Native Vector Width Long: %d", d.NativeVectorWidthLong())
+ t.Logf(" Native Vector Width Float: %d", d.NativeVectorWidthFloat())
+ t.Logf(" Native Vector Width Double: %d", d.NativeVectorWidthDouble())
+ t.Logf(" Native Vector Width Half: %d", d.NativeVectorWidthHalf())
+ t.Logf(" OpenCL C Version: %s", d.OpenCLCVersion())
+ // t.Logf(" Parent Device: %+v", d.ParentDevice())
+ t.Logf(" Profile: %s", d.Profile())
+ t.Logf(" Profiling Timer Resolution: %d", d.ProfilingTimerResolution())
+ t.Logf(" Vendor: %s", d.Vendor())
+ t.Logf(" Version: %s", d.Version())
+ }
+ if deviceIndex < 0 {
+ deviceIndex = 0
+ }
+ device := devices[deviceIndex]
+ t.Logf("Using device %d", deviceIndex)
+ context, err := CreateContext([]*Device{device})
+ if err != nil {
+ t.Fatalf("CreateContext failed: %+v", err)
+ }
+ // imageFormats, err := context.GetSupportedImageFormats(0, MemObjectTypeImage2D)
+ // if err != nil {
+ // t.Fatalf("GetSupportedImageFormats failed: %+v", err)
+ // }
+ // t.Logf("Supported image formats: %+v", imageFormats)
+ queue, err := context.CreateCommandQueue(device, 0)
+ if err != nil {
+ t.Fatalf("CreateCommandQueue failed: %+v", err)
+ }
+ program, err := context.CreateProgramWithSource([]string{kernelSource})
+ if err != nil {
+ t.Fatalf("CreateProgramWithSource failed: %+v", err)
+ }
+ if err := program.BuildProgram(nil, ""); err != nil {
+ t.Fatalf("BuildProgram failed: %+v", err)
+ }
+ kernel, err := program.CreateKernel("square")
+ if err != nil {
+ t.Fatalf("CreateKernel failed: %+v", err)
+ }
+ for i := 0; i < 3; i++ {
+ name, err := kernel.ArgName(i)
+ if err == ErrUnsupported {
+ break
+ } else if err != nil {
+ t.Errorf("GetKernelArgInfo for name failed: %+v", err)
+ break
+ } else {
+ t.Logf("Kernel arg %d: %s", i, name)
+ }
+ }
+ input, err := context.CreateEmptyBuffer(MemReadOnly, 4*len(data))
+ if err != nil {
+ t.Fatalf("CreateBuffer failed for input: %+v", err)
+ }
+ output, err := context.CreateEmptyBuffer(MemReadOnly, 4*len(data))
+ if err != nil {
+ t.Fatalf("CreateBuffer failed for output: %+v", err)
+ }
+ if _, err := queue.EnqueueWriteBufferFloat32(input, true, 0, data[:], nil); err != nil {
+ t.Fatalf("EnqueueWriteBufferFloat32 failed: %+v", err)
+ }
+ if err := kernel.SetArgs(input, output, uint32(len(data))); err != nil {
+ t.Fatalf("SetKernelArgs failed: %+v", err)
+ }
+
+ local, err := kernel.WorkGroupSize(device)
+ if err != nil {
+ t.Fatalf("WorkGroupSize failed: %+v", err)
+ }
+ t.Logf("Work group size: %d", local)
+ size, _ := kernel.PreferredWorkGroupSizeMultiple(nil)
+ t.Logf("Preferred Work Group Size Multiple: %d", size)
+
+ global := len(data)
+ d := len(data) % local
+ if d != 0 {
+ global += local - d
+ }
+ if _, err := queue.EnqueueNDRangeKernel(kernel, nil, []int{global}, []int{local}, nil); err != nil {
+ t.Fatalf("EnqueueNDRangeKernel failed: %+v", err)
+ }
+
+ if err := queue.Finish(); err != nil {
+ t.Fatalf("Finish failed: %+v", err)
+ }
+
+ results := make([]float32, len(data))
+ if _, err := queue.EnqueueReadBufferFloat32(output, true, 0, results, nil); err != nil {
+ t.Fatalf("EnqueueReadBufferFloat32 failed: %+v", err)
+ }
+
+ correct := 0
+ for i, v := range data {
+ if results[i] == v*v {
+ correct++
+ }
+ }
+
+ if correct != len(data) {
+ t.Fatalf("%d/%d correct values", correct, len(data))
+ }
+}
diff --git a/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/context.go b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/context.go
new file mode 100644
index 000000000..67441bb5f
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/context.go
@@ -0,0 +1,161 @@
+package cl
+
+// #include <stdlib.h>
+// #ifdef __APPLE__
+// #include "OpenCL/opencl.h"
+// #else
+// #include "cl.h"
+// #endif
+import "C"
+
+import (
+ "runtime"
+ "unsafe"
+)
+
+const maxImageFormats = 256
+
+type Context struct {
+ clContext C.cl_context
+ devices []*Device
+}
+
+type MemObject struct {
+ clMem C.cl_mem
+ size int
+}
+
+func releaseContext(c *Context) {
+ if c.clContext != nil {
+ C.clReleaseContext(c.clContext)
+ c.clContext = nil
+ }
+}
+
+func releaseMemObject(b *MemObject) {
+ if b.clMem != nil {
+ C.clReleaseMemObject(b.clMem)
+ b.clMem = nil
+ }
+}
+
+func newMemObject(mo C.cl_mem, size int) *MemObject {
+ memObject := &MemObject{clMem: mo, size: size}
+ runtime.SetFinalizer(memObject, releaseMemObject)
+ return memObject
+}
+
+func (b *MemObject) Release() {
+ releaseMemObject(b)
+}
+
+// TODO: properties
+func CreateContext(devices []*Device) (*Context, error) {
+ deviceIds := buildDeviceIdList(devices)
+ var err C.cl_int
+ clContext := C.clCreateContext(nil, C.cl_uint(len(devices)), &deviceIds[0], nil, nil, &err)
+ if err != C.CL_SUCCESS {
+ return nil, toError(err)
+ }
+ if clContext == nil {
+ return nil, ErrUnknown
+ }
+ context := &Context{clContext: clContext, devices: devices}
+ runtime.SetFinalizer(context, releaseContext)
+ return context, nil
+}
+
+func (ctx *Context) GetSupportedImageFormats(flags MemFlag, imageType MemObjectType) ([]ImageFormat, error) {
+ var formats [maxImageFormats]C.cl_image_format
+ var nFormats C.cl_uint
+ if err := C.clGetSupportedImageFormats(ctx.clContext, C.cl_mem_flags(flags), C.cl_mem_object_type(imageType), maxImageFormats, &formats[0], &nFormats); err != C.CL_SUCCESS {
+ return nil, toError(err)
+ }
+ fmts := make([]ImageFormat, nFormats)
+ for i, f := range formats[:nFormats] {
+ fmts[i] = ImageFormat{
+ ChannelOrder: ChannelOrder(f.image_channel_order),
+ ChannelDataType: ChannelDataType(f.image_channel_data_type),
+ }
+ }
+ return fmts, nil
+}
+
+func (ctx *Context) CreateCommandQueue(device *Device, properties CommandQueueProperty) (*CommandQueue, error) {
+ var err C.cl_int
+ clQueue := C.clCreateCommandQueue(ctx.clContext, device.id, C.cl_command_queue_properties(properties), &err)
+ if err != C.CL_SUCCESS {
+ return nil, toError(err)
+ }
+ if clQueue == nil {
+ return nil, ErrUnknown
+ }
+ commandQueue := &CommandQueue{clQueue: clQueue, device: device}
+ runtime.SetFinalizer(commandQueue, releaseCommandQueue)
+ return commandQueue, nil
+}
+
+func (ctx *Context) CreateProgramWithSource(sources []string) (*Program, error) {
+ cSources := make([]*C.char, len(sources))
+ for i, s := range sources {
+ cs := C.CString(s)
+ cSources[i] = cs
+ defer C.free(unsafe.Pointer(cs))
+ }
+ var err C.cl_int
+ clProgram := C.clCreateProgramWithSource(ctx.clContext, C.cl_uint(len(sources)), &cSources[0], nil, &err)
+ if err != C.CL_SUCCESS {
+ return nil, toError(err)
+ }
+ if clProgram == nil {
+ return nil, ErrUnknown
+ }
+ program := &Program{clProgram: clProgram, devices: ctx.devices}
+ runtime.SetFinalizer(program, releaseProgram)
+ return program, nil
+}
+
+func (ctx *Context) CreateBufferUnsafe(flags MemFlag, size int, dataPtr unsafe.Pointer) (*MemObject, error) {
+ var err C.cl_int
+ clBuffer := C.clCreateBuffer(ctx.clContext, C.cl_mem_flags(flags), C.size_t(size), dataPtr, &err)
+ if err != C.CL_SUCCESS {
+ return nil, toError(err)
+ }
+ if clBuffer == nil {
+ return nil, ErrUnknown
+ }
+ return newMemObject(clBuffer, size), nil
+}
+
+func (ctx *Context) CreateEmptyBuffer(flags MemFlag, size int) (*MemObject, error) {
+ return ctx.CreateBufferUnsafe(flags, size, nil)
+}
+
+func (ctx *Context) CreateEmptyBufferFloat32(flags MemFlag, size int) (*MemObject, error) {
+ return ctx.CreateBufferUnsafe(flags, 4*size, nil)
+}
+
+func (ctx *Context) CreateBuffer(flags MemFlag, data []byte) (*MemObject, error) {
+ return ctx.CreateBufferUnsafe(flags, len(data), unsafe.Pointer(&data[0]))
+}
+
+//float64
+func (ctx *Context) CreateBufferFloat32(flags MemFlag, data []float32) (*MemObject, error) {
+ return ctx.CreateBufferUnsafe(flags, 4*len(data), unsafe.Pointer(&data[0]))
+}
+
+func (ctx *Context) CreateUserEvent() (*Event, error) {
+ var err C.cl_int
+ clEvent := C.clCreateUserEvent(ctx.clContext, &err)
+ if err != C.CL_SUCCESS {
+ return nil, toError(err)
+ }
+ return newEvent(clEvent), nil
+}
+
+func (ctx *Context) Release() {
+ releaseContext(ctx)
+}
+
+// http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clCreateSubBuffer.html
+// func (memObject *MemObject) CreateSubBuffer(flags MemFlag, bufferCreateType BufferCreateType, )
diff --git a/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/device.go b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/device.go
new file mode 100644
index 000000000..d62a6fb71
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/device.go
@@ -0,0 +1,510 @@
+package cl
+
+// #ifdef __APPLE__
+// #include "OpenCL/opencl.h"
+// #else
+// #include "cl.h"
+// #include "cl_ext.h"
+// #endif
+import "C"
+
+import (
+ "strings"
+ "unsafe"
+)
+
+const maxDeviceCount = 64
+
+type DeviceType uint
+
+const (
+ DeviceTypeCPU DeviceType = C.CL_DEVICE_TYPE_CPU
+ DeviceTypeGPU DeviceType = C.CL_DEVICE_TYPE_GPU
+ DeviceTypeAccelerator DeviceType = C.CL_DEVICE_TYPE_ACCELERATOR
+ DeviceTypeDefault DeviceType = C.CL_DEVICE_TYPE_DEFAULT
+ DeviceTypeAll DeviceType = C.CL_DEVICE_TYPE_ALL
+)
+
+type FPConfig int
+
+const (
+ FPConfigDenorm FPConfig = C.CL_FP_DENORM // denorms are supported
+ FPConfigInfNaN FPConfig = C.CL_FP_INF_NAN // INF and NaNs are supported
+ FPConfigRoundToNearest FPConfig = C.CL_FP_ROUND_TO_NEAREST // round to nearest even rounding mode supported
+ FPConfigRoundToZero FPConfig = C.CL_FP_ROUND_TO_ZERO // round to zero rounding mode supported
+ FPConfigRoundToInf FPConfig = C.CL_FP_ROUND_TO_INF // round to positive and negative infinity rounding modes supported
+ FPConfigFMA FPConfig = C.CL_FP_FMA // IEEE754-2008 fused multiply-add is supported
+ FPConfigSoftFloat FPConfig = C.CL_FP_SOFT_FLOAT // Basic floating-point operations (such as addition, subtraction, multiplication) are implemented in software
+)
+
+var fpConfigNameMap = map[FPConfig]string{
+ FPConfigDenorm: "Denorm",
+ FPConfigInfNaN: "InfNaN",
+ FPConfigRoundToNearest: "RoundToNearest",
+ FPConfigRoundToZero: "RoundToZero",
+ FPConfigRoundToInf: "RoundToInf",
+ FPConfigFMA: "FMA",
+ FPConfigSoftFloat: "SoftFloat",
+}
+
+func (c FPConfig) String() string {
+ var parts []string
+ for bit, name := range fpConfigNameMap {
+ if c&bit != 0 {
+ parts = append(parts, name)
+ }
+ }
+ if parts == nil {
+ return ""
+ }
+ return strings.Join(parts, "|")
+}
+
+func (dt DeviceType) String() string {
+ var parts []string
+ if dt&DeviceTypeCPU != 0 {
+ parts = append(parts, "CPU")
+ }
+ if dt&DeviceTypeGPU != 0 {
+ parts = append(parts, "GPU")
+ }
+ if dt&DeviceTypeAccelerator != 0 {
+ parts = append(parts, "Accelerator")
+ }
+ if dt&DeviceTypeDefault != 0 {
+ parts = append(parts, "Default")
+ }
+ if parts == nil {
+ parts = append(parts, "None")
+ }
+ return strings.Join(parts, "|")
+}
+
+type Device struct {
+ id C.cl_device_id
+}
+
+func buildDeviceIdList(devices []*Device) []C.cl_device_id {
+ deviceIds := make([]C.cl_device_id, len(devices))
+ for i, d := range devices {
+ deviceIds[i] = d.id
+ }
+ return deviceIds
+}
+
+// Obtain the list of devices available on a platform. 'platform' refers
+// to the platform returned by GetPlatforms or can be nil. If platform
+// is nil, the behavior is implementation-defined.
+func GetDevices(platform *Platform, deviceType DeviceType) ([]*Device, error) {
+ var deviceIds [maxDeviceCount]C.cl_device_id
+ var numDevices C.cl_uint
+ var platformId C.cl_platform_id
+ if platform != nil {
+ platformId = platform.id
+ }
+ if err := C.clGetDeviceIDs(platformId, C.cl_device_type(deviceType), C.cl_uint(maxDeviceCount), &deviceIds[0], &numDevices); err != C.CL_SUCCESS {
+ return nil, toError(err)
+ }
+ if numDevices > maxDeviceCount {
+ numDevices = maxDeviceCount
+ }
+ devices := make([]*Device, numDevices)
+ for i := 0; i < int(numDevices); i++ {
+ devices[i] = &Device{id: deviceIds[i]}
+ }
+ return devices, nil
+}
+
+func (d *Device) nullableId() C.cl_device_id {
+ if d == nil {
+ return nil
+ }
+ return d.id
+}
+
+func (d *Device) GetInfoString(param C.cl_device_info, panicOnError bool) (string, error) {
+ var strC [1024]C.char
+ var strN C.size_t
+ if err := C.clGetDeviceInfo(d.id, param, 1024, unsafe.Pointer(&strC), &strN); err != C.CL_SUCCESS {
+ if panicOnError {
+ panic("Should never fail")
+ }
+ return "", toError(err)
+ }
+
+ // OpenCL strings are NUL-terminated, and the terminator is included in strN
+ // Go strings aren't NUL-terminated, so subtract 1 from the length
+ return C.GoStringN((*C.char)(unsafe.Pointer(&strC)), C.int(strN-1)), nil
+}
+
+func (d *Device) getInfoUint(param C.cl_device_info, panicOnError bool) (uint, error) {
+ var val C.cl_uint
+ if err := C.clGetDeviceInfo(d.id, param, C.size_t(unsafe.Sizeof(val)), unsafe.Pointer(&val), nil); err != C.CL_SUCCESS {
+ if panicOnError {
+ panic("Should never fail")
+ }
+ return 0, toError(err)
+ }
+ return uint(val), nil
+}
+
+func (d *Device) getInfoSize(param C.cl_device_info, panicOnError bool) (int, error) {
+ var val C.size_t
+ if err := C.clGetDeviceInfo(d.id, param, C.size_t(unsafe.Sizeof(val)), unsafe.Pointer(&val), nil); err != C.CL_SUCCESS {
+ if panicOnError {
+ panic("Should never fail")
+ }
+ return 0, toError(err)
+ }
+ return int(val), nil
+}
+
+func (d *Device) getInfoUlong(param C.cl_device_info, panicOnError bool) (int64, error) {
+ var val C.cl_ulong
+ if err := C.clGetDeviceInfo(d.id, param, C.size_t(unsafe.Sizeof(val)), unsafe.Pointer(&val), nil); err != C.CL_SUCCESS {
+ if panicOnError {
+ panic("Should never fail")
+ }
+ return 0, toError(err)
+ }
+ return int64(val), nil
+}
+
+func (d *Device) getInfoBool(param C.cl_device_info, panicOnError bool) (bool, error) {
+ var val C.cl_bool
+ if err := C.clGetDeviceInfo(d.id, param, C.size_t(unsafe.Sizeof(val)), unsafe.Pointer(&val), nil); err != C.CL_SUCCESS {
+ if panicOnError {
+ panic("Should never fail")
+ }
+ return false, toError(err)
+ }
+ return val == C.CL_TRUE, nil
+}
+
+func (d *Device) Name() string {
+ str, _ := d.GetInfoString(C.CL_DEVICE_NAME, true)
+ return str
+}
+
+func (d *Device) Vendor() string {
+ str, _ := d.GetInfoString(C.CL_DEVICE_VENDOR, true)
+ return str
+}
+
+func (d *Device) Extensions() string {
+ str, _ := d.GetInfoString(C.CL_DEVICE_EXTENSIONS, true)
+ return str
+}
+
+func (d *Device) OpenCLCVersion() string {
+ str, _ := d.GetInfoString(C.CL_DEVICE_OPENCL_C_VERSION, true)
+ return str
+}
+
+func (d *Device) Profile() string {
+ str, _ := d.GetInfoString(C.CL_DEVICE_PROFILE, true)
+ return str
+}
+
+func (d *Device) Version() string {
+ str, _ := d.GetInfoString(C.CL_DEVICE_VERSION, true)
+ return str
+}
+
+func (d *Device) DriverVersion() string {
+ str, _ := d.GetInfoString(C.CL_DRIVER_VERSION, true)
+ return str
+}
+
+// The default compute device address space size specified as an
+// unsigned integer value in bits. Currently supported values are 32 or 64 bits.
+func (d *Device) AddressBits() int {
+ val, _ := d.getInfoUint(C.CL_DEVICE_ADDRESS_BITS, true)
+ return int(val)
+}
+
+// Size of global memory cache line in bytes.
+func (d *Device) GlobalMemCachelineSize() int {
+ val, _ := d.getInfoUint(C.CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, true)
+ return int(val)
+}
+
+// Maximum configured clock frequency of the device in MHz.
+func (d *Device) MaxClockFrequency() int {
+ val, _ := d.getInfoUint(C.CL_DEVICE_MAX_CLOCK_FREQUENCY, true)
+ return int(val)
+}
+
+// The number of parallel compute units on the OpenCL device.
+// A work-group executes on a single compute unit. The minimum value is 1.
+func (d *Device) MaxComputeUnits() int {
+ val, _ := d.getInfoUint(C.CL_DEVICE_MAX_COMPUTE_UNITS, true)
+ return int(val)
+}
+
+// Max number of arguments declared with the __constant qualifier in a kernel.
+// The minimum value is 8 for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
+func (d *Device) MaxConstantArgs() int {
+ val, _ := d.getInfoUint(C.CL_DEVICE_MAX_CONSTANT_ARGS, true)
+ return int(val)
+}
+
+// Max number of simultaneous image objects that can be read by a kernel.
+// The minimum value is 128 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
+func (d *Device) MaxReadImageArgs() int {
+ val, _ := d.getInfoUint(C.CL_DEVICE_MAX_READ_IMAGE_ARGS, true)
+ return int(val)
+}
+
+// Maximum number of samplers that can be used in a kernel. The minimum
+// value is 16 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE. (Also see sampler_t.)
+func (d *Device) MaxSamplers() int {
+ val, _ := d.getInfoUint(C.CL_DEVICE_MAX_SAMPLERS, true)
+ return int(val)
+}
+
+// Maximum dimensions that specify the global and local work-item IDs used
+// by the data parallel execution model. (Refer to clEnqueueNDRangeKernel).
+// The minimum value is 3 for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
+func (d *Device) MaxWorkItemDimensions() int {
+ val, _ := d.getInfoUint(C.CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, true)
+ return int(val)
+}
+
+// Max number of simultaneous image objects that can be written to by a
+// kernel. The minimum value is 8 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
+func (d *Device) MaxWriteImageArgs() int {
+ val, _ := d.getInfoUint(C.CL_DEVICE_MAX_WRITE_IMAGE_ARGS, true)
+ return int(val)
+}
+
+// The minimum value is the size (in bits) of the largest OpenCL built-in
+// data type supported by the device (long16 in FULL profile, long16 or
+// int16 in EMBEDDED profile) for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
+func (d *Device) MemBaseAddrAlign() int {
+ val, _ := d.getInfoUint(C.CL_DEVICE_MEM_BASE_ADDR_ALIGN, true)
+ return int(val)
+}
+
+func (d *Device) NativeVectorWidthChar() int {
+ val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, true)
+ return int(val)
+}
+
+func (d *Device) NativeVectorWidthShort() int {
+ val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, true)
+ return int(val)
+}
+
+func (d *Device) NativeVectorWidthInt() int {
+ val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, true)
+ return int(val)
+}
+
+func (d *Device) NativeVectorWidthLong() int {
+ val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, true)
+ return int(val)
+}
+
+func (d *Device) NativeVectorWidthFloat() int {
+ val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, true)
+ return int(val)
+}
+
+func (d *Device) NativeVectorWidthDouble() int {
+ val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, true)
+ return int(val)
+}
+
+func (d *Device) NativeVectorWidthHalf() int {
+ val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, true)
+ return int(val)
+}
+
+// Max height of 2D image in pixels. The minimum value is 8192
+// if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
+func (d *Device) Image2DMaxHeight() int {
+ val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE2D_MAX_HEIGHT, true)
+ return int(val)
+}
+
+// Max width of 2D image or 1D image not created from a buffer object in
+// pixels. The minimum value is 8192 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
+func (d *Device) Image2DMaxWidth() int {
+ val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE2D_MAX_WIDTH, true)
+ return int(val)
+}
+
+// Max depth of 3D image in pixels. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
+func (d *Device) Image3DMaxDepth() int {
+ val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE3D_MAX_DEPTH, true)
+ return int(val)
+}
+
+// Max height of 3D image in pixels. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
+func (d *Device) Image3DMaxHeight() int {
+ val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE3D_MAX_HEIGHT, true)
+ return int(val)
+}
+
+// Max width of 3D image in pixels. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
+func (d *Device) Image3DMaxWidth() int {
+ val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE3D_MAX_WIDTH, true)
+ return int(val)
+}
+
+// Max size in bytes of the arguments that can be passed to a kernel. The
+// minimum value is 1024 for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
+// For this minimum value, only a maximum of 128 arguments can be passed to a kernel.
+func (d *Device) MaxParameterSize() int {
+ val, _ := d.getInfoSize(C.CL_DEVICE_MAX_PARAMETER_SIZE, true)
+ return int(val)
+}
+
+// Maximum number of work-items in a work-group executing a kernel on a
+// single compute unit, using the data parallel execution model. (Refer
+// to clEnqueueNDRangeKernel). The minimum value is 1.
+func (d *Device) MaxWorkGroupSize() int {
+ val, _ := d.getInfoSize(C.CL_DEVICE_MAX_WORK_GROUP_SIZE, true)
+ return int(val)
+}
+
+// Describes the resolution of device timer. This is measured in nanoseconds.
+func (d *Device) ProfilingTimerResolution() int {
+ val, _ := d.getInfoSize(C.CL_DEVICE_PROFILING_TIMER_RESOLUTION, true)
+ return int(val)
+}
+
+// Size of local memory arena in bytes. The minimum value is 32 KB for
+// devices that are not of type CL_DEVICE_TYPE_CUSTOM.
+func (d *Device) LocalMemSize() int64 {
+ val, _ := d.getInfoUlong(C.CL_DEVICE_LOCAL_MEM_SIZE, true)
+ return val
+}
+
+// Max size in bytes of a constant buffer allocation. The minimum value is
+// 64 KB for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
+func (d *Device) MaxConstantBufferSize() int64 {
+ val, _ := d.getInfoUlong(C.CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, true)
+ return val
+}
+
+// Max size of memory object allocation in bytes. The minimum value is max
+// (1/4th of CL_DEVICE_GLOBAL_MEM_SIZE, 128*1024*1024) for devices that are
+// not of type CL_DEVICE_TYPE_CUSTOM.
+func (d *Device) MaxMemAllocSize() int64 {
+ val, _ := d.getInfoUlong(C.CL_DEVICE_MAX_MEM_ALLOC_SIZE, true)
+ return val
+}
+
+// Size of global device memory in bytes.
+func (d *Device) GlobalMemSize() int64 {
+ val, _ := d.getInfoUlong(C.CL_DEVICE_GLOBAL_MEM_SIZE, true)
+ return val
+}
+
+func (d *Device) Available() bool {
+ val, _ := d.getInfoBool(C.CL_DEVICE_AVAILABLE, true)
+ return val
+}
+
+func (d *Device) CompilerAvailable() bool {
+ val, _ := d.getInfoBool(C.CL_DEVICE_COMPILER_AVAILABLE, true)
+ return val
+}
+
+func (d *Device) EndianLittle() bool {
+ val, _ := d.getInfoBool(C.CL_DEVICE_ENDIAN_LITTLE, true)
+ return val
+}
+
+// Is CL_TRUE if the device implements error correction for all
+// accesses to compute device memory (global and constant). Is
+// CL_FALSE if the device does not implement such error correction.
+func (d *Device) ErrorCorrectionSupport() bool {
+ val, _ := d.getInfoBool(C.CL_DEVICE_ERROR_CORRECTION_SUPPORT, true)
+ return val
+}
+
+func (d *Device) HostUnifiedMemory() bool {
+ val, _ := d.getInfoBool(C.CL_DEVICE_HOST_UNIFIED_MEMORY, true)
+ return val
+}
+
+func (d *Device) ImageSupport() bool {
+ val, _ := d.getInfoBool(C.CL_DEVICE_IMAGE_SUPPORT, true)
+ return val
+}
+
+func (d *Device) Type() DeviceType {
+ var deviceType C.cl_device_type
+ if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_TYPE, C.size_t(unsafe.Sizeof(deviceType)), unsafe.Pointer(&deviceType), nil); err != C.CL_SUCCESS {
+ panic("Failed to get device type")
+ }
+ return DeviceType(deviceType)
+}
+
+// Describes double precision floating-point capability of the OpenCL device
+func (d *Device) DoubleFPConfig() FPConfig {
+ var fpConfig C.cl_device_fp_config
+ if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_DOUBLE_FP_CONFIG, C.size_t(unsafe.Sizeof(fpConfig)), unsafe.Pointer(&fpConfig), nil); err != C.CL_SUCCESS {
+ panic("Failed to get double FP config")
+ }
+ return FPConfig(fpConfig)
+}
+
+// Describes the OPTIONAL half precision floating-point capability of the OpenCL device
+func (d *Device) HalfFPConfig() FPConfig {
+ var fpConfig C.cl_device_fp_config
+ err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_HALF_FP_CONFIG, C.size_t(unsafe.Sizeof(fpConfig)), unsafe.Pointer(&fpConfig), nil)
+ if err != C.CL_SUCCESS {
+ return FPConfig(0)
+ }
+ return FPConfig(fpConfig)
+}
+
+// Type of local memory supported. This can be set to CL_LOCAL implying dedicated
+// local memory storage such as SRAM, or CL_GLOBAL. For custom devices, CL_NONE
+// can also be returned indicating no local memory support.
+func (d *Device) LocalMemType() LocalMemType {
+ var memType C.cl_device_local_mem_type
+ if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_LOCAL_MEM_TYPE, C.size_t(unsafe.Sizeof(memType)), unsafe.Pointer(&memType), nil); err != C.CL_SUCCESS {
+ return LocalMemType(C.CL_NONE)
+ }
+ return LocalMemType(memType)
+}
+
+// Describes the execution capabilities of the device. The mandated minimum capability is CL_EXEC_KERNEL.
+func (d *Device) ExecutionCapabilities() ExecCapability {
+ var execCap C.cl_device_exec_capabilities
+ if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_EXECUTION_CAPABILITIES, C.size_t(unsafe.Sizeof(execCap)), unsafe.Pointer(&execCap), nil); err != C.CL_SUCCESS {
+ panic("Failed to get execution capabilities")
+ }
+ return ExecCapability(execCap)
+}
+
+func (d *Device) GlobalMemCacheType() MemCacheType {
+ var memType C.cl_device_mem_cache_type
+ if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, C.size_t(unsafe.Sizeof(memType)), unsafe.Pointer(&memType), nil); err != C.CL_SUCCESS {
+ return MemCacheType(C.CL_NONE)
+ }
+ return MemCacheType(memType)
+}
+
+// Maximum number of work-items that can be specified in each dimension of the work-group to clEnqueueNDRangeKernel.
+//
+// Returns n size_t entries, where n is the value returned by the query for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS.
+//
+// The minimum value is (1, 1, 1) for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
+func (d *Device) MaxWorkItemSizes() []int {
+ dims := d.MaxWorkItemDimensions()
+ sizes := make([]C.size_t, dims)
+ if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_MAX_WORK_ITEM_SIZES, C.size_t(int(unsafe.Sizeof(sizes[0]))*dims), unsafe.Pointer(&sizes[0]), nil); err != C.CL_SUCCESS {
+ panic("Failed to get max work item sizes")
+ }
+ intSizes := make([]int, dims)
+ for i, s := range sizes {
+ intSizes[i] = int(s)
+ }
+ return intSizes
+}
diff --git a/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/device12.go b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/device12.go
new file mode 100644
index 000000000..b4b559a6a
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/device12.go
@@ -0,0 +1,51 @@
+// +build cl12
+
+package cl
+
+// #ifdef __APPLE__
+// #include "OpenCL/opencl.h"
+// #else
+// #include "cl.h"
+// #endif
+import "C"
+import "unsafe"
+
+const FPConfigCorrectlyRoundedDivideSqrt FPConfig = C.CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT
+
+func init() {
+ fpConfigNameMap[FPConfigCorrectlyRoundedDivideSqrt] = "CorrectlyRoundedDivideSqrt"
+}
+
+func (d *Device) BuiltInKernels() string {
+ str, _ := d.getInfoString(C.CL_DEVICE_BUILT_IN_KERNELS, true)
+ return str
+}
+
+// Is CL_FALSE if the implementation does not have a linker available. Is CL_TRUE if the linker is available. This can be CL_FALSE for the embedded platform profile only. This must be CL_TRUE if CL_DEVICE_COMPILER_AVAILABLE is CL_TRUE
+func (d *Device) LinkerAvailable() bool {
+ val, _ := d.getInfoBool(C.CL_DEVICE_LINKER_AVAILABLE, true)
+ return val
+}
+
+func (d *Device) ParentDevice() *Device {
+ var deviceId C.cl_device_id
+ if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_PARENT_DEVICE, C.size_t(unsafe.Sizeof(deviceId)), unsafe.Pointer(&deviceId), nil); err != C.CL_SUCCESS {
+ panic("ParentDevice failed")
+ }
+ if deviceId == nil {
+ return nil
+ }
+ return &Device{id: deviceId}
+}
+
+// Max number of pixels for a 1D image created from a buffer object. The minimum value is 65536 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
+func (d *Device) ImageMaxBufferSize() int {
+ val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, true)
+ return int(val)
+}
+
+// Max number of images in a 1D or 2D image array. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
+func (d *Device) ImageMaxArraySize() int {
+ val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, true)
+ return int(val)
+}
diff --git a/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl.h b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl.h
new file mode 100644
index 000000000..c38aa1eda
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl.h
@@ -0,0 +1,1210 @@
+/*******************************************************************************
+ * Copyright (c) 2008 - 2012 The Khronos Group Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and/or associated documentation files (the
+ * "Materials"), to deal in the Materials without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Materials, and to
+ * permit persons to whom the Materials are furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Materials.
+ *
+ * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+ ******************************************************************************/
+
+#ifndef __OPENCL_CL_H
+#define __OPENCL_CL_H
+
+#include <cl_platform.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/******************************************************************************/
+
+typedef struct _cl_platform_id * cl_platform_id;
+typedef struct _cl_device_id * cl_device_id;
+typedef struct _cl_context * cl_context;
+typedef struct _cl_command_queue * cl_command_queue;
+typedef struct _cl_mem * cl_mem;
+typedef struct _cl_program * cl_program;
+typedef struct _cl_kernel * cl_kernel;
+typedef struct _cl_event * cl_event;
+typedef struct _cl_sampler * cl_sampler;
+
+typedef cl_uint cl_bool; /* WARNING! Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */
+typedef cl_ulong cl_bitfield;
+typedef cl_bitfield cl_device_type;
+typedef cl_uint cl_platform_info;
+typedef cl_uint cl_device_info;
+typedef cl_bitfield cl_device_fp_config;
+typedef cl_uint cl_device_mem_cache_type;
+typedef cl_uint cl_device_local_mem_type;
+typedef cl_bitfield cl_device_exec_capabilities;
+typedef cl_bitfield cl_command_queue_properties;
+typedef intptr_t cl_device_partition_property;
+typedef cl_bitfield cl_device_affinity_domain;
+
+typedef intptr_t cl_context_properties;
+typedef cl_uint cl_context_info;
+typedef cl_uint cl_command_queue_info;
+typedef cl_uint cl_channel_order;
+typedef cl_uint cl_channel_type;
+typedef cl_bitfield cl_mem_flags;
+typedef cl_uint cl_mem_object_type;
+typedef cl_uint cl_mem_info;
+typedef cl_bitfield cl_mem_migration_flags;
+typedef cl_uint cl_image_info;
+typedef cl_uint cl_buffer_create_type;
+typedef cl_uint cl_addressing_mode;
+typedef cl_uint cl_filter_mode;
+typedef cl_uint cl_sampler_info;
+typedef cl_bitfield cl_map_flags;
+typedef cl_uint cl_program_info;
+typedef cl_uint cl_program_build_info;
+typedef cl_uint cl_program_binary_type;
+typedef cl_int cl_build_status;
+typedef cl_uint cl_kernel_info;
+typedef cl_uint cl_kernel_arg_info;
+typedef cl_uint cl_kernel_arg_address_qualifier;
+typedef cl_uint cl_kernel_arg_access_qualifier;
+typedef cl_bitfield cl_kernel_arg_type_qualifier;
+typedef cl_uint cl_kernel_work_group_info;
+typedef cl_uint cl_event_info;
+typedef cl_uint cl_command_type;
+typedef cl_uint cl_profiling_info;
+
+
+typedef struct _cl_image_format {
+ cl_channel_order image_channel_order;
+ cl_channel_type image_channel_data_type;
+} cl_image_format;
+
+typedef struct _cl_image_desc {
+ cl_mem_object_type image_type;
+ size_t image_width;
+ size_t image_height;
+ size_t image_depth;
+ size_t image_array_size;
+ size_t image_row_pitch;
+ size_t image_slice_pitch;
+ cl_uint num_mip_levels;
+ cl_uint num_samples;
+ cl_mem buffer;
+} cl_image_desc;
+
+typedef struct _cl_buffer_region {
+ size_t origin;
+ size_t size;
+} cl_buffer_region;
+
+
+/******************************************************************************/
+
+/* Error Codes */
+#define CL_SUCCESS 0
+#define CL_DEVICE_NOT_FOUND -1
+#define CL_DEVICE_NOT_AVAILABLE -2
+#define CL_COMPILER_NOT_AVAILABLE -3
+#define CL_MEM_OBJECT_ALLOCATION_FAILURE -4
+#define CL_OUT_OF_RESOURCES -5
+#define CL_OUT_OF_HOST_MEMORY -6
+#define CL_PROFILING_INFO_NOT_AVAILABLE -7
+#define CL_MEM_COPY_OVERLAP -8
+#define CL_IMAGE_FORMAT_MISMATCH -9
+#define CL_IMAGE_FORMAT_NOT_SUPPORTED -10
+#define CL_BUILD_PROGRAM_FAILURE -11
+#define CL_MAP_FAILURE -12
+#define CL_MISALIGNED_SUB_BUFFER_OFFSET -13
+#define CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST -14
+#define CL_COMPILE_PROGRAM_FAILURE -15
+#define CL_LINKER_NOT_AVAILABLE -16
+#define CL_LINK_PROGRAM_FAILURE -17
+#define CL_DEVICE_PARTITION_FAILED -18
+#define CL_KERNEL_ARG_INFO_NOT_AVAILABLE -19
+
+#define CL_INVALID_VALUE -30
+#define CL_INVALID_DEVICE_TYPE -31
+#define CL_INVALID_PLATFORM -32
+#define CL_INVALID_DEVICE -33
+#define CL_INVALID_CONTEXT -34
+#define CL_INVALID_QUEUE_PROPERTIES -35
+#define CL_INVALID_COMMAND_QUEUE -36
+#define CL_INVALID_HOST_PTR -37
+#define CL_INVALID_MEM_OBJECT -38
+#define CL_INVALID_IMAGE_FORMAT_DESCRIPTOR -39
+#define CL_INVALID_IMAGE_SIZE -40
+#define CL_INVALID_SAMPLER -41
+#define CL_INVALID_BINARY -42
+#define CL_INVALID_BUILD_OPTIONS -43
+#define CL_INVALID_PROGRAM -44
+#define CL_INVALID_PROGRAM_EXECUTABLE -45
+#define CL_INVALID_KERNEL_NAME -46
+#define CL_INVALID_KERNEL_DEFINITION -47
+#define CL_INVALID_KERNEL -48
+#define CL_INVALID_ARG_INDEX -49
+#define CL_INVALID_ARG_VALUE -50
+#define CL_INVALID_ARG_SIZE -51
+#define CL_INVALID_KERNEL_ARGS -52
+#define CL_INVALID_WORK_DIMENSION -53
+#define CL_INVALID_WORK_GROUP_SIZE -54
+#define CL_INVALID_WORK_ITEM_SIZE -55
+#define CL_INVALID_GLOBAL_OFFSET -56
+#define CL_INVALID_EVENT_WAIT_LIST -57
+#define CL_INVALID_EVENT -58
+#define CL_INVALID_OPERATION -59
+#define CL_INVALID_GL_OBJECT -60
+#define CL_INVALID_BUFFER_SIZE -61
+#define CL_INVALID_MIP_LEVEL -62
+#define CL_INVALID_GLOBAL_WORK_SIZE -63
+#define CL_INVALID_PROPERTY -64
+#define CL_INVALID_IMAGE_DESCRIPTOR -65
+#define CL_INVALID_COMPILER_OPTIONS -66
+#define CL_INVALID_LINKER_OPTIONS -67
+#define CL_INVALID_DEVICE_PARTITION_COUNT -68
+
+/* OpenCL Version */
+#define CL_VERSION_1_0 1
+#define CL_VERSION_1_1 1
+#define CL_VERSION_1_2 1
+
+/* cl_bool */
+#define CL_FALSE 0
+#define CL_TRUE 1
+#define CL_BLOCKING CL_TRUE
+#define CL_NON_BLOCKING CL_FALSE
+
+/* cl_platform_info */
+#define CL_PLATFORM_PROFILE 0x0900
+#define CL_PLATFORM_VERSION 0x0901
+#define CL_PLATFORM_NAME 0x0902
+#define CL_PLATFORM_VENDOR 0x0903
+#define CL_PLATFORM_EXTENSIONS 0x0904
+
+/* cl_device_type - bitfield */
+#define CL_DEVICE_TYPE_DEFAULT (1 << 0)
+#define CL_DEVICE_TYPE_CPU (1 << 1)
+#define CL_DEVICE_TYPE_GPU (1 << 2)
+#define CL_DEVICE_TYPE_ACCELERATOR (1 << 3)
+#define CL_DEVICE_TYPE_CUSTOM (1 << 4)
+#define CL_DEVICE_TYPE_ALL 0xFFFFFFFF
+
+/* cl_device_info */
+#define CL_DEVICE_TYPE 0x1000
+#define CL_DEVICE_VENDOR_ID 0x1001
+#define CL_DEVICE_MAX_COMPUTE_UNITS 0x1002
+#define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS 0x1003
+#define CL_DEVICE_MAX_WORK_GROUP_SIZE 0x1004
+#define CL_DEVICE_MAX_WORK_ITEM_SIZES 0x1005
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR 0x1006
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT 0x1007
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT 0x1008
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG 0x1009
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT 0x100A
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE 0x100B
+#define CL_DEVICE_MAX_CLOCK_FREQUENCY 0x100C
+#define CL_DEVICE_ADDRESS_BITS 0x100D
+#define CL_DEVICE_MAX_READ_IMAGE_ARGS 0x100E
+#define CL_DEVICE_MAX_WRITE_IMAGE_ARGS 0x100F
+#define CL_DEVICE_MAX_MEM_ALLOC_SIZE 0x1010
+#define CL_DEVICE_IMAGE2D_MAX_WIDTH 0x1011
+#define CL_DEVICE_IMAGE2D_MAX_HEIGHT 0x1012
+#define CL_DEVICE_IMAGE3D_MAX_WIDTH 0x1013
+#define CL_DEVICE_IMAGE3D_MAX_HEIGHT 0x1014
+#define CL_DEVICE_IMAGE3D_MAX_DEPTH 0x1015
+#define CL_DEVICE_IMAGE_SUPPORT 0x1016
+#define CL_DEVICE_MAX_PARAMETER_SIZE 0x1017
+#define CL_DEVICE_MAX_SAMPLERS 0x1018
+#define CL_DEVICE_MEM_BASE_ADDR_ALIGN 0x1019
+#define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE 0x101A
+#define CL_DEVICE_SINGLE_FP_CONFIG 0x101B
+#define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE 0x101C
+#define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE 0x101D
+#define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE 0x101E
+#define CL_DEVICE_GLOBAL_MEM_SIZE 0x101F
+#define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE 0x1020
+#define CL_DEVICE_MAX_CONSTANT_ARGS 0x1021
+#define CL_DEVICE_LOCAL_MEM_TYPE 0x1022
+#define CL_DEVICE_LOCAL_MEM_SIZE 0x1023
+#define CL_DEVICE_ERROR_CORRECTION_SUPPORT 0x1024
+#define CL_DEVICE_PROFILING_TIMER_RESOLUTION 0x1025
+#define CL_DEVICE_ENDIAN_LITTLE 0x1026
+#define CL_DEVICE_AVAILABLE 0x1027
+#define CL_DEVICE_COMPILER_AVAILABLE 0x1028
+#define CL_DEVICE_EXECUTION_CAPABILITIES 0x1029
+#define CL_DEVICE_QUEUE_PROPERTIES 0x102A
+#define CL_DEVICE_NAME 0x102B
+#define CL_DEVICE_VENDOR 0x102C
+#define CL_DRIVER_VERSION 0x102D
+#define CL_DEVICE_PROFILE 0x102E
+#define CL_DEVICE_VERSION 0x102F
+#define CL_DEVICE_EXTENSIONS 0x1030
+#define CL_DEVICE_PLATFORM 0x1031
+#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032
+/* 0x1033 reserved for CL_DEVICE_HALF_FP_CONFIG */
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF 0x1034
+#define CL_DEVICE_HOST_UNIFIED_MEMORY 0x1035
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR 0x1036
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT 0x1037
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT 0x1038
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG 0x1039
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT 0x103A
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE 0x103B
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF 0x103C
+#define CL_DEVICE_OPENCL_C_VERSION 0x103D
+#define CL_DEVICE_LINKER_AVAILABLE 0x103E
+#define CL_DEVICE_BUILT_IN_KERNELS 0x103F
+#define CL_DEVICE_IMAGE_MAX_BUFFER_SIZE 0x1040
+#define CL_DEVICE_IMAGE_MAX_ARRAY_SIZE 0x1041
+#define CL_DEVICE_PARENT_DEVICE 0x1042
+#define CL_DEVICE_PARTITION_MAX_SUB_DEVICES 0x1043
+#define CL_DEVICE_PARTITION_PROPERTIES 0x1044
+#define CL_DEVICE_PARTITION_AFFINITY_DOMAIN 0x1045
+#define CL_DEVICE_PARTITION_TYPE 0x1046
+#define CL_DEVICE_REFERENCE_COUNT 0x1047
+#define CL_DEVICE_PREFERRED_INTEROP_USER_SYNC 0x1048
+#define CL_DEVICE_PRINTF_BUFFER_SIZE 0x1049
+#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT 0x104A
+#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT 0x104B
+
+/* cl_device_fp_config - bitfield */
+#define CL_FP_DENORM (1 << 0)
+#define CL_FP_INF_NAN (1 << 1)
+#define CL_FP_ROUND_TO_NEAREST (1 << 2)
+#define CL_FP_ROUND_TO_ZERO (1 << 3)
+#define CL_FP_ROUND_TO_INF (1 << 4)
+#define CL_FP_FMA (1 << 5)
+#define CL_FP_SOFT_FLOAT (1 << 6)
+#define CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT (1 << 7)
+
+/* cl_device_mem_cache_type */
+#define CL_NONE 0x0
+#define CL_READ_ONLY_CACHE 0x1
+#define CL_READ_WRITE_CACHE 0x2
+
+/* cl_device_local_mem_type */
+#define CL_LOCAL 0x1
+#define CL_GLOBAL 0x2
+
+/* cl_device_exec_capabilities - bitfield */
+#define CL_EXEC_KERNEL (1 << 0)
+#define CL_EXEC_NATIVE_KERNEL (1 << 1)
+
+/* cl_command_queue_properties - bitfield */
+#define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE (1 << 0)
+#define CL_QUEUE_PROFILING_ENABLE (1 << 1)
+
+/* cl_context_info */
+#define CL_CONTEXT_REFERENCE_COUNT 0x1080
+#define CL_CONTEXT_DEVICES 0x1081
+#define CL_CONTEXT_PROPERTIES 0x1082
+#define CL_CONTEXT_NUM_DEVICES 0x1083
+
+/* cl_context_properties */
+#define CL_CONTEXT_PLATFORM 0x1084
+#define CL_CONTEXT_INTEROP_USER_SYNC 0x1085
+
+/* cl_device_partition_property */
+#define CL_DEVICE_PARTITION_EQUALLY 0x1086
+#define CL_DEVICE_PARTITION_BY_COUNTS 0x1087
+#define CL_DEVICE_PARTITION_BY_COUNTS_LIST_END 0x0
+#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN 0x1088
+
+/* cl_device_affinity_domain */
+#define CL_DEVICE_AFFINITY_DOMAIN_NUMA (1 << 0)
+#define CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE (1 << 1)
+#define CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE (1 << 2)
+#define CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE (1 << 3)
+#define CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE (1 << 4)
+#define CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE (1 << 5)
+
+/* cl_command_queue_info */
+#define CL_QUEUE_CONTEXT 0x1090
+#define CL_QUEUE_DEVICE 0x1091
+#define CL_QUEUE_REFERENCE_COUNT 0x1092
+#define CL_QUEUE_PROPERTIES 0x1093
+
+/* cl_mem_flags - bitfield */
+#define CL_MEM_READ_WRITE (1 << 0)
+#define CL_MEM_WRITE_ONLY (1 << 1)
+#define CL_MEM_READ_ONLY (1 << 2)
+#define CL_MEM_USE_HOST_PTR (1 << 3)
+#define CL_MEM_ALLOC_HOST_PTR (1 << 4)
+#define CL_MEM_COPY_HOST_PTR (1 << 5)
+/* reserved (1 << 6) */
+#define CL_MEM_HOST_WRITE_ONLY (1 << 7)
+#define CL_MEM_HOST_READ_ONLY (1 << 8)
+#define CL_MEM_HOST_NO_ACCESS (1 << 9)
+
+/* cl_mem_migration_flags - bitfield */
+#define CL_MIGRATE_MEM_OBJECT_HOST (1 << 0)
+#define CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED (1 << 1)
+
+/* cl_channel_order */
+#define CL_R 0x10B0
+#define CL_A 0x10B1
+#define CL_RG 0x10B2
+#define CL_RA 0x10B3
+#define CL_RGB 0x10B4
+#define CL_RGBA 0x10B5
+#define CL_BGRA 0x10B6
+#define CL_ARGB 0x10B7
+#define CL_INTENSITY 0x10B8
+#define CL_LUMINANCE 0x10B9
+#define CL_Rx 0x10BA
+#define CL_RGx 0x10BB
+#define CL_RGBx 0x10BC
+#define CL_DEPTH 0x10BD
+#define CL_DEPTH_STENCIL 0x10BE
+
+/* cl_channel_type */
+#define CL_SNORM_INT8 0x10D0
+#define CL_SNORM_INT16 0x10D1
+#define CL_UNORM_INT8 0x10D2
+#define CL_UNORM_INT16 0x10D3
+#define CL_UNORM_SHORT_565 0x10D4
+#define CL_UNORM_SHORT_555 0x10D5
+#define CL_UNORM_INT_101010 0x10D6
+#define CL_SIGNED_INT8 0x10D7
+#define CL_SIGNED_INT16 0x10D8
+#define CL_SIGNED_INT32 0x10D9
+#define CL_UNSIGNED_INT8 0x10DA
+#define CL_UNSIGNED_INT16 0x10DB
+#define CL_UNSIGNED_INT32 0x10DC
+#define CL_HALF_FLOAT 0x10DD
+#define CL_FLOAT 0x10DE
+#define CL_UNORM_INT24 0x10DF
+
+/* cl_mem_object_type */
+#define CL_MEM_OBJECT_BUFFER 0x10F0
+#define CL_MEM_OBJECT_IMAGE2D 0x10F1
+#define CL_MEM_OBJECT_IMAGE3D 0x10F2
+#define CL_MEM_OBJECT_IMAGE2D_ARRAY 0x10F3
+#define CL_MEM_OBJECT_IMAGE1D 0x10F4
+#define CL_MEM_OBJECT_IMAGE1D_ARRAY 0x10F5
+#define CL_MEM_OBJECT_IMAGE1D_BUFFER 0x10F6
+
+/* cl_mem_info */
+#define CL_MEM_TYPE 0x1100
+#define CL_MEM_FLAGS 0x1101
+#define CL_MEM_SIZE 0x1102
+#define CL_MEM_HOST_PTR 0x1103
+#define CL_MEM_MAP_COUNT 0x1104
+#define CL_MEM_REFERENCE_COUNT 0x1105
+#define CL_MEM_CONTEXT 0x1106
+#define CL_MEM_ASSOCIATED_MEMOBJECT 0x1107
+#define CL_MEM_OFFSET 0x1108
+
+/* cl_image_info */
+#define CL_IMAGE_FORMAT 0x1110
+#define CL_IMAGE_ELEMENT_SIZE 0x1111
+#define CL_IMAGE_ROW_PITCH 0x1112
+#define CL_IMAGE_SLICE_PITCH 0x1113
+#define CL_IMAGE_WIDTH 0x1114
+#define CL_IMAGE_HEIGHT 0x1115
+#define CL_IMAGE_DEPTH 0x1116
+#define CL_IMAGE_ARRAY_SIZE 0x1117
+#define CL_IMAGE_BUFFER 0x1118
+#define CL_IMAGE_NUM_MIP_LEVELS 0x1119
+#define CL_IMAGE_NUM_SAMPLES 0x111A
+
+/* cl_addressing_mode */
+#define CL_ADDRESS_NONE 0x1130
+#define CL_ADDRESS_CLAMP_TO_EDGE 0x1131
+#define CL_ADDRESS_CLAMP 0x1132
+#define CL_ADDRESS_REPEAT 0x1133
+#define CL_ADDRESS_MIRRORED_REPEAT 0x1134
+
+/* cl_filter_mode */
+#define CL_FILTER_NEAREST 0x1140
+#define CL_FILTER_LINEAR 0x1141
+
+/* cl_sampler_info */
+#define CL_SAMPLER_REFERENCE_COUNT 0x1150
+#define CL_SAMPLER_CONTEXT 0x1151
+#define CL_SAMPLER_NORMALIZED_COORDS 0x1152
+#define CL_SAMPLER_ADDRESSING_MODE 0x1153
+#define CL_SAMPLER_FILTER_MODE 0x1154
+
+/* cl_map_flags - bitfield */
+#define CL_MAP_READ (1 << 0)
+#define CL_MAP_WRITE (1 << 1)
+#define CL_MAP_WRITE_INVALIDATE_REGION (1 << 2)
+
+/* cl_program_info */
+#define CL_PROGRAM_REFERENCE_COUNT 0x1160
+#define CL_PROGRAM_CONTEXT 0x1161
+#define CL_PROGRAM_NUM_DEVICES 0x1162
+#define CL_PROGRAM_DEVICES 0x1163
+#define CL_PROGRAM_SOURCE 0x1164
+#define CL_PROGRAM_BINARY_SIZES 0x1165
+#define CL_PROGRAM_BINARIES 0x1166
+#define CL_PROGRAM_NUM_KERNELS 0x1167
+#define CL_PROGRAM_KERNEL_NAMES 0x1168
+
+/* cl_program_build_info */
+#define CL_PROGRAM_BUILD_STATUS 0x1181
+#define CL_PROGRAM_BUILD_OPTIONS 0x1182
+#define CL_PROGRAM_BUILD_LOG 0x1183
+#define CL_PROGRAM_BINARY_TYPE 0x1184
+
+/* cl_program_binary_type */
+#define CL_PROGRAM_BINARY_TYPE_NONE 0x0
+#define CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT 0x1
+#define CL_PROGRAM_BINARY_TYPE_LIBRARY 0x2
+#define CL_PROGRAM_BINARY_TYPE_EXECUTABLE 0x4
+
+/* cl_build_status */
+#define CL_BUILD_SUCCESS 0
+#define CL_BUILD_NONE -1
+#define CL_BUILD_ERROR -2
+#define CL_BUILD_IN_PROGRESS -3
+
+/* cl_kernel_info */
+#define CL_KERNEL_FUNCTION_NAME 0x1190
+#define CL_KERNEL_NUM_ARGS 0x1191
+#define CL_KERNEL_REFERENCE_COUNT 0x1192
+#define CL_KERNEL_CONTEXT 0x1193
+#define CL_KERNEL_PROGRAM 0x1194
+#define CL_KERNEL_ATTRIBUTES 0x1195
+
+/* cl_kernel_arg_info */
+#define CL_KERNEL_ARG_ADDRESS_QUALIFIER 0x1196
+#define CL_KERNEL_ARG_ACCESS_QUALIFIER 0x1197
+#define CL_KERNEL_ARG_TYPE_NAME 0x1198
+#define CL_KERNEL_ARG_TYPE_QUALIFIER 0x1199
+#define CL_KERNEL_ARG_NAME 0x119A
+
+/* cl_kernel_arg_address_qualifier */
+#define CL_KERNEL_ARG_ADDRESS_GLOBAL 0x119B
+#define CL_KERNEL_ARG_ADDRESS_LOCAL 0x119C
+#define CL_KERNEL_ARG_ADDRESS_CONSTANT 0x119D
+#define CL_KERNEL_ARG_ADDRESS_PRIVATE 0x119E
+
+/* cl_kernel_arg_access_qualifier */
+#define CL_KERNEL_ARG_ACCESS_READ_ONLY 0x11A0
+#define CL_KERNEL_ARG_ACCESS_WRITE_ONLY 0x11A1
+#define CL_KERNEL_ARG_ACCESS_READ_WRITE 0x11A2
+#define CL_KERNEL_ARG_ACCESS_NONE 0x11A3
+
+/* cl_kernel_arg_type_qualifer */
+#define CL_KERNEL_ARG_TYPE_NONE 0
+#define CL_KERNEL_ARG_TYPE_CONST (1 << 0)
+#define CL_KERNEL_ARG_TYPE_RESTRICT (1 << 1)
+#define CL_KERNEL_ARG_TYPE_VOLATILE (1 << 2)
+
+/* cl_kernel_work_group_info */
+#define CL_KERNEL_WORK_GROUP_SIZE 0x11B0
+#define CL_KERNEL_COMPILE_WORK_GROUP_SIZE 0x11B1
+#define CL_KERNEL_LOCAL_MEM_SIZE 0x11B2
+#define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3
+#define CL_KERNEL_PRIVATE_MEM_SIZE 0x11B4
+#define CL_KERNEL_GLOBAL_WORK_SIZE 0x11B5
+
+/* cl_event_info */
+#define CL_EVENT_COMMAND_QUEUE 0x11D0
+#define CL_EVENT_COMMAND_TYPE 0x11D1
+#define CL_EVENT_REFERENCE_COUNT 0x11D2
+#define CL_EVENT_COMMAND_EXECUTION_STATUS 0x11D3
+#define CL_EVENT_CONTEXT 0x11D4
+
+/* cl_command_type */
+#define CL_COMMAND_NDRANGE_KERNEL 0x11F0
+#define CL_COMMAND_TASK 0x11F1
+#define CL_COMMAND_NATIVE_KERNEL 0x11F2
+#define CL_COMMAND_READ_BUFFER 0x11F3
+#define CL_COMMAND_WRITE_BUFFER 0x11F4
+#define CL_COMMAND_COPY_BUFFER 0x11F5
+#define CL_COMMAND_READ_IMAGE 0x11F6
+#define CL_COMMAND_WRITE_IMAGE 0x11F7
+#define CL_COMMAND_COPY_IMAGE 0x11F8
+#define CL_COMMAND_COPY_IMAGE_TO_BUFFER 0x11F9
+#define CL_COMMAND_COPY_BUFFER_TO_IMAGE 0x11FA
+#define CL_COMMAND_MAP_BUFFER 0x11FB
+#define CL_COMMAND_MAP_IMAGE 0x11FC
+#define CL_COMMAND_UNMAP_MEM_OBJECT 0x11FD
+#define CL_COMMAND_MARKER 0x11FE
+#define CL_COMMAND_ACQUIRE_GL_OBJECTS 0x11FF
+#define CL_COMMAND_RELEASE_GL_OBJECTS 0x1200
+#define CL_COMMAND_READ_BUFFER_RECT 0x1201
+#define CL_COMMAND_WRITE_BUFFER_RECT 0x1202
+#define CL_COMMAND_COPY_BUFFER_RECT 0x1203
+#define CL_COMMAND_USER 0x1204
+#define CL_COMMAND_BARRIER 0x1205
+#define CL_COMMAND_MIGRATE_MEM_OBJECTS 0x1206
+#define CL_COMMAND_FILL_BUFFER 0x1207
+#define CL_COMMAND_FILL_IMAGE 0x1208
+
+/* command execution status */
+#define CL_COMPLETE 0x0
+#define CL_RUNNING 0x1
+#define CL_SUBMITTED 0x2
+#define CL_QUEUED 0x3
+
+/* cl_buffer_create_type */
+#define CL_BUFFER_CREATE_TYPE_REGION 0x1220
+
+/* cl_profiling_info */
+#define CL_PROFILING_COMMAND_QUEUED 0x1280
+#define CL_PROFILING_COMMAND_SUBMIT 0x1281
+#define CL_PROFILING_COMMAND_START 0x1282
+#define CL_PROFILING_COMMAND_END 0x1283
+
+/********************************************************************************************************/
+
+/* Platform API */
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetPlatformIDs(cl_uint /* num_entries */,
+ cl_platform_id * /* platforms */,
+ cl_uint * /* num_platforms */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetPlatformInfo(cl_platform_id /* platform */,
+ cl_platform_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+/* Device APIs */
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetDeviceIDs(cl_platform_id /* platform */,
+ cl_device_type /* device_type */,
+ cl_uint /* num_entries */,
+ cl_device_id * /* devices */,
+ cl_uint * /* num_devices */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetDeviceInfo(cl_device_id /* device */,
+ cl_device_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clCreateSubDevices(cl_device_id /* in_device */,
+ const cl_device_partition_property * /* properties */,
+ cl_uint /* num_devices */,
+ cl_device_id * /* out_devices */,
+ cl_uint * /* num_devices_ret */) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainDevice(cl_device_id /* device */) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseDevice(cl_device_id /* device */) CL_API_SUFFIX__VERSION_1_2;
+
+/* Context APIs */
+extern CL_API_ENTRY cl_context CL_API_CALL
+clCreateContext(const cl_context_properties * /* properties */,
+ cl_uint /* num_devices */,
+ const cl_device_id * /* devices */,
+ void (CL_CALLBACK * /* pfn_notify */)(const char *, const void *, size_t, void *),
+ void * /* user_data */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_context CL_API_CALL
+clCreateContextFromType(const cl_context_properties * /* properties */,
+ cl_device_type /* device_type */,
+ void (CL_CALLBACK * /* pfn_notify*/ )(const char *, const void *, size_t, void *),
+ void * /* user_data */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetContextInfo(cl_context /* context */,
+ cl_context_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+/* Command Queue APIs */
+extern CL_API_ENTRY cl_command_queue CL_API_CALL
+clCreateCommandQueue(cl_context /* context */,
+ cl_device_id /* device */,
+ cl_command_queue_properties /* properties */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetCommandQueueInfo(cl_command_queue /* command_queue */,
+ cl_command_queue_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+/* Memory Object APIs */
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateBuffer(cl_context /* context */,
+ cl_mem_flags /* flags */,
+ size_t /* size */,
+ void * /* host_ptr */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateSubBuffer(cl_mem /* buffer */,
+ cl_mem_flags /* flags */,
+ cl_buffer_create_type /* buffer_create_type */,
+ const void * /* buffer_create_info */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1;
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateImage(cl_context /* context */,
+ cl_mem_flags /* flags */,
+ const cl_image_format * /* image_format */,
+ const cl_image_desc * /* image_desc */,
+ void * /* host_ptr */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetSupportedImageFormats(cl_context /* context */,
+ cl_mem_flags /* flags */,
+ cl_mem_object_type /* image_type */,
+ cl_uint /* num_entries */,
+ cl_image_format * /* image_formats */,
+ cl_uint * /* num_image_formats */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetMemObjectInfo(cl_mem /* memobj */,
+ cl_mem_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetImageInfo(cl_mem /* image */,
+ cl_image_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clSetMemObjectDestructorCallback( cl_mem /* memobj */,
+ void (CL_CALLBACK * /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/),
+ void * /*user_data */ ) CL_API_SUFFIX__VERSION_1_1;
+
+/* Sampler APIs */
+extern CL_API_ENTRY cl_sampler CL_API_CALL
+clCreateSampler(cl_context /* context */,
+ cl_bool /* normalized_coords */,
+ cl_addressing_mode /* addressing_mode */,
+ cl_filter_mode /* filter_mode */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetSamplerInfo(cl_sampler /* sampler */,
+ cl_sampler_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+/* Program Object APIs */
+extern CL_API_ENTRY cl_program CL_API_CALL
+clCreateProgramWithSource(cl_context /* context */,
+ cl_uint /* count */,
+ const char ** /* strings */,
+ const size_t * /* lengths */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_program CL_API_CALL
+clCreateProgramWithBinary(cl_context /* context */,
+ cl_uint /* num_devices */,
+ const cl_device_id * /* device_list */,
+ const size_t * /* lengths */,
+ const unsigned char ** /* binaries */,
+ cl_int * /* binary_status */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_program CL_API_CALL
+clCreateProgramWithBuiltInKernels(cl_context /* context */,
+ cl_uint /* num_devices */,
+ const cl_device_id * /* device_list */,
+ const char * /* kernel_names */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clBuildProgram(cl_program /* program */,
+ cl_uint /* num_devices */,
+ const cl_device_id * /* device_list */,
+ const char * /* options */,
+ void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
+ void * /* user_data */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clCompileProgram(cl_program /* program */,
+ cl_uint /* num_devices */,
+ const cl_device_id * /* device_list */,
+ const char * /* options */,
+ cl_uint /* num_input_headers */,
+ const cl_program * /* input_headers */,
+ const char ** /* header_include_names */,
+ void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
+ void * /* user_data */) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_program CL_API_CALL
+clLinkProgram(cl_context /* context */,
+ cl_uint /* num_devices */,
+ const cl_device_id * /* device_list */,
+ const char * /* options */,
+ cl_uint /* num_input_programs */,
+ const cl_program * /* input_programs */,
+ void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
+ void * /* user_data */,
+ cl_int * /* errcode_ret */ ) CL_API_SUFFIX__VERSION_1_2;
+
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clUnloadPlatformCompiler(cl_platform_id /* platform */) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetProgramInfo(cl_program /* program */,
+ cl_program_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetProgramBuildInfo(cl_program /* program */,
+ cl_device_id /* device */,
+ cl_program_build_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+/* Kernel Object APIs */
+extern CL_API_ENTRY cl_kernel CL_API_CALL
+clCreateKernel(cl_program /* program */,
+ const char * /* kernel_name */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clCreateKernelsInProgram(cl_program /* program */,
+ cl_uint /* num_kernels */,
+ cl_kernel * /* kernels */,
+ cl_uint * /* num_kernels_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clSetKernelArg(cl_kernel /* kernel */,
+ cl_uint /* arg_index */,
+ size_t /* arg_size */,
+ const void * /* arg_value */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetKernelInfo(cl_kernel /* kernel */,
+ cl_kernel_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetKernelArgInfo(cl_kernel /* kernel */,
+ cl_uint /* arg_indx */,
+ cl_kernel_arg_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetKernelWorkGroupInfo(cl_kernel /* kernel */,
+ cl_device_id /* device */,
+ cl_kernel_work_group_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+/* Event Object APIs */
+extern CL_API_ENTRY cl_int CL_API_CALL
+clWaitForEvents(cl_uint /* num_events */,
+ const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetEventInfo(cl_event /* event */,
+ cl_event_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_event CL_API_CALL
+clCreateUserEvent(cl_context /* context */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clSetUserEventStatus(cl_event /* event */,
+ cl_int /* execution_status */) CL_API_SUFFIX__VERSION_1_1;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clSetEventCallback( cl_event /* event */,
+ cl_int /* command_exec_callback_type */,
+ void (CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *),
+ void * /* user_data */) CL_API_SUFFIX__VERSION_1_1;
+
+/* Profiling APIs */
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetEventProfilingInfo(cl_event /* event */,
+ cl_profiling_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+/* Flush and Finish APIs */
+extern CL_API_ENTRY cl_int CL_API_CALL
+clFlush(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clFinish(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
+
+/* Enqueued Commands APIs */
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueReadBuffer(cl_command_queue /* command_queue */,
+ cl_mem /* buffer */,
+ cl_bool /* blocking_read */,
+ size_t /* offset */,
+ size_t /* size */,
+ void * /* ptr */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueReadBufferRect(cl_command_queue /* command_queue */,
+ cl_mem /* buffer */,
+ cl_bool /* blocking_read */,
+ const size_t * /* buffer_offset */,
+ const size_t * /* host_offset */,
+ const size_t * /* region */,
+ size_t /* buffer_row_pitch */,
+ size_t /* buffer_slice_pitch */,
+ size_t /* host_row_pitch */,
+ size_t /* host_slice_pitch */,
+ void * /* ptr */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueWriteBuffer(cl_command_queue /* command_queue */,
+ cl_mem /* buffer */,
+ cl_bool /* blocking_write */,
+ size_t /* offset */,
+ size_t /* size */,
+ const void * /* ptr */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueWriteBufferRect(cl_command_queue /* command_queue */,
+ cl_mem /* buffer */,
+ cl_bool /* blocking_write */,
+ const size_t * /* buffer_offset */,
+ const size_t * /* host_offset */,
+ const size_t * /* region */,
+ size_t /* buffer_row_pitch */,
+ size_t /* buffer_slice_pitch */,
+ size_t /* host_row_pitch */,
+ size_t /* host_slice_pitch */,
+ const void * /* ptr */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueFillBuffer(cl_command_queue /* command_queue */,
+ cl_mem /* buffer */,
+ const void * /* pattern */,
+ size_t /* pattern_size */,
+ size_t /* offset */,
+ size_t /* size */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueCopyBuffer(cl_command_queue /* command_queue */,
+ cl_mem /* src_buffer */,
+ cl_mem /* dst_buffer */,
+ size_t /* src_offset */,
+ size_t /* dst_offset */,
+ size_t /* size */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueCopyBufferRect(cl_command_queue /* command_queue */,
+ cl_mem /* src_buffer */,
+ cl_mem /* dst_buffer */,
+ const size_t * /* src_origin */,
+ const size_t * /* dst_origin */,
+ const size_t * /* region */,
+ size_t /* src_row_pitch */,
+ size_t /* src_slice_pitch */,
+ size_t /* dst_row_pitch */,
+ size_t /* dst_slice_pitch */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueReadImage(cl_command_queue /* command_queue */,
+ cl_mem /* image */,
+ cl_bool /* blocking_read */,
+ const size_t * /* origin[3] */,
+ const size_t * /* region[3] */,
+ size_t /* row_pitch */,
+ size_t /* slice_pitch */,
+ void * /* ptr */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueWriteImage(cl_command_queue /* command_queue */,
+ cl_mem /* image */,
+ cl_bool /* blocking_write */,
+ const size_t * /* origin[3] */,
+ const size_t * /* region[3] */,
+ size_t /* input_row_pitch */,
+ size_t /* input_slice_pitch */,
+ const void * /* ptr */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueFillImage(cl_command_queue /* command_queue */,
+ cl_mem /* image */,
+ const void * /* fill_color */,
+ const size_t * /* origin[3] */,
+ const size_t * /* region[3] */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueCopyImage(cl_command_queue /* command_queue */,
+ cl_mem /* src_image */,
+ cl_mem /* dst_image */,
+ const size_t * /* src_origin[3] */,
+ const size_t * /* dst_origin[3] */,
+ const size_t * /* region[3] */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueCopyImageToBuffer(cl_command_queue /* command_queue */,
+ cl_mem /* src_image */,
+ cl_mem /* dst_buffer */,
+ const size_t * /* src_origin[3] */,
+ const size_t * /* region[3] */,
+ size_t /* dst_offset */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueCopyBufferToImage(cl_command_queue /* command_queue */,
+ cl_mem /* src_buffer */,
+ cl_mem /* dst_image */,
+ size_t /* src_offset */,
+ const size_t * /* dst_origin[3] */,
+ const size_t * /* region[3] */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY void * CL_API_CALL
+clEnqueueMapBuffer(cl_command_queue /* command_queue */,
+ cl_mem /* buffer */,
+ cl_bool /* blocking_map */,
+ cl_map_flags /* map_flags */,
+ size_t /* offset */,
+ size_t /* size */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY void * CL_API_CALL
+clEnqueueMapImage(cl_command_queue /* command_queue */,
+ cl_mem /* image */,
+ cl_bool /* blocking_map */,
+ cl_map_flags /* map_flags */,
+ const size_t * /* origin[3] */,
+ const size_t * /* region[3] */,
+ size_t * /* image_row_pitch */,
+ size_t * /* image_slice_pitch */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueUnmapMemObject(cl_command_queue /* command_queue */,
+ cl_mem /* memobj */,
+ void * /* mapped_ptr */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueMigrateMemObjects(cl_command_queue /* command_queue */,
+ cl_uint /* num_mem_objects */,
+ const cl_mem * /* mem_objects */,
+ cl_mem_migration_flags /* flags */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueNDRangeKernel(cl_command_queue /* command_queue */,
+ cl_kernel /* kernel */,
+ cl_uint /* work_dim */,
+ const size_t * /* global_work_offset */,
+ const size_t * /* global_work_size */,
+ const size_t * /* local_work_size */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueTask(cl_command_queue /* command_queue */,
+ cl_kernel /* kernel */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueNativeKernel(cl_command_queue /* command_queue */,
+ void (CL_CALLBACK * /*user_func*/)(void *),
+ void * /* args */,
+ size_t /* cb_args */,
+ cl_uint /* num_mem_objects */,
+ const cl_mem * /* mem_list */,
+ const void ** /* args_mem_loc */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueMarkerWithWaitList(cl_command_queue /* command_queue */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueBarrierWithWaitList(cl_command_queue /* command_queue */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2;
+
+
+/* Extension function access
+ *
+ * Returns the extension function address for the given function name,
+ * or NULL if a valid function can not be found. The client must
+ * check to make sure the address is not NULL, before using or
+ * calling the returned function address.
+ */
+extern CL_API_ENTRY void * CL_API_CALL
+clGetExtensionFunctionAddressForPlatform(cl_platform_id /* platform */,
+ const char * /* func_name */) CL_API_SUFFIX__VERSION_1_2;
+
+
+/* Deprecated OpenCL 1.1 APIs */
+extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
+clCreateImage2D(cl_context /* context */,
+ cl_mem_flags /* flags */,
+ const cl_image_format * /* image_format */,
+ size_t /* image_width */,
+ size_t /* image_height */,
+ size_t /* image_row_pitch */,
+ void * /* host_ptr */,
+ cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
+
+extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
+clCreateImage3D(cl_context /* context */,
+ cl_mem_flags /* flags */,
+ const cl_image_format * /* image_format */,
+ size_t /* image_width */,
+ size_t /* image_height */,
+ size_t /* image_depth */,
+ size_t /* image_row_pitch */,
+ size_t /* image_slice_pitch */,
+ void * /* host_ptr */,
+ cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
+
+extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL
+clEnqueueMarker(cl_command_queue /* command_queue */,
+ cl_event * /* event */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
+
+extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL
+clEnqueueWaitForEvents(cl_command_queue /* command_queue */,
+ cl_uint /* num_events */,
+ const cl_event * /* event_list */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
+
+extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL
+clEnqueueBarrier(cl_command_queue /* command_queue */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
+
+extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL
+clUnloadCompiler(void) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
+
+extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED void * CL_API_CALL
+clGetExtensionFunctionAddress(const char * /* func_name */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __OPENCL_CL_H */
+
diff --git a/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_ext.h b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_ext.h
new file mode 100644
index 000000000..a998c9c51
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_ext.h
@@ -0,0 +1,315 @@
+/*******************************************************************************
+ * Copyright (c) 2008-2013 The Khronos Group Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and/or associated documentation files (the
+ * "Materials"), to deal in the Materials without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Materials, and to
+ * permit persons to whom the Materials are furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Materials.
+ *
+ * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+ ******************************************************************************/
+
+/* $Revision: 11928 $ on $Date: 2010-07-13 09:04:56 -0700 (Tue, 13 Jul 2010) $ */
+
+/* cl_ext.h contains OpenCL extensions which don't have external */
+/* (OpenGL, D3D) dependencies. */
+
+#ifndef __CL_EXT_H
+#define __CL_EXT_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef __APPLE__
+#include <AvailabilityMacros.h>
+#endif
+
+#include <cl.h>
+
+/* cl_khr_fp16 extension - no extension #define since it has no functions */
+#define CL_DEVICE_HALF_FP_CONFIG 0x1033
+
+/* Memory object destruction
+ *
+ * Apple extension for use to manage externally allocated buffers used with cl_mem objects with CL_MEM_USE_HOST_PTR
+ *
+ * Registers a user callback function that will be called when the memory object is deleted and its resources
+ * freed. Each call to clSetMemObjectCallbackFn registers the specified user callback function on a callback
+ * stack associated with memobj. The registered user callback functions are called in the reverse order in
+ * which they were registered. The user callback functions are called and then the memory object is deleted
+ * and its resources freed. This provides a mechanism for the application (and libraries) using memobj to be
+ * notified when the memory referenced by host_ptr, specified when the memory object is created and used as
+ * the storage bits for the memory object, can be reused or freed.
+ *
+ * The application may not call CL api's with the cl_mem object passed to the pfn_notify.
+ *
+ * Please check for the "cl_APPLE_SetMemObjectDestructor" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
+ * before using.
+ */
+#define cl_APPLE_SetMemObjectDestructor 1
+cl_int CL_API_ENTRY clSetMemObjectDestructorAPPLE( cl_mem /* memobj */,
+ void (* /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/),
+ void * /*user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
+
+
+/* Context Logging Functions
+ *
+ * The next three convenience functions are intended to be used as the pfn_notify parameter to clCreateContext().
+ * Please check for the "cl_APPLE_ContextLoggingFunctions" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
+ * before using.
+ *
+ * clLogMessagesToSystemLog fowards on all log messages to the Apple System Logger
+ */
+#define cl_APPLE_ContextLoggingFunctions 1
+extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE( const char * /* errstr */,
+ const void * /* private_info */,
+ size_t /* cb */,
+ void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
+
+/* clLogMessagesToStdout sends all log messages to the file descriptor stdout */
+extern void CL_API_ENTRY clLogMessagesToStdoutAPPLE( const char * /* errstr */,
+ const void * /* private_info */,
+ size_t /* cb */,
+ void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
+
+/* clLogMessagesToStderr sends all log messages to the file descriptor stderr */
+extern void CL_API_ENTRY clLogMessagesToStderrAPPLE( const char * /* errstr */,
+ const void * /* private_info */,
+ size_t /* cb */,
+ void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
+
+
+/************************
+* cl_khr_icd extension *
+************************/
+#define cl_khr_icd 1
+
+/* cl_platform_info */
+#define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920
+
+/* Additional Error Codes */
+#define CL_PLATFORM_NOT_FOUND_KHR -1001
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clIcdGetPlatformIDsKHR(cl_uint /* num_entries */,
+ cl_platform_id * /* platforms */,
+ cl_uint * /* num_platforms */);
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *clIcdGetPlatformIDsKHR_fn)(
+ cl_uint /* num_entries */,
+ cl_platform_id * /* platforms */,
+ cl_uint * /* num_platforms */);
+
+
+/* Extension: cl_khr_image2D_buffer
+ *
+ * This extension allows a 2D image to be created from a cl_mem buffer without a copy.
+ * The type associated with a 2D image created from a buffer in an OpenCL program is image2d_t.
+ * Both the sampler and sampler-less read_image built-in functions are supported for 2D images
+ * and 2D images created from a buffer. Similarly, the write_image built-ins are also supported
+ * for 2D images created from a buffer.
+ *
+ * When the 2D image from buffer is created, the client must specify the width,
+ * height, image format (i.e. channel order and channel data type) and optionally the row pitch
+ *
+ * The pitch specified must be a multiple of CL_DEVICE_IMAGE_PITCH_ALIGNMENT pixels.
+ * The base address of the buffer must be aligned to CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT pixels.
+ */
+
+/*************************************
+ * cl_khr_initalize_memory extension *
+ *************************************/
+
+#define CL_CONTEXT_MEMORY_INITIALIZE_KHR 0x200E
+
+
+/**************************************
+ * cl_khr_terminate_context extension *
+ **************************************/
+
+#define CL_DEVICE_TERMINATE_CAPABILITY_KHR 0x200F
+#define CL_CONTEXT_TERMINATE_KHR 0x2010
+
+#define cl_khr_terminate_context 1
+extern CL_API_ENTRY cl_int CL_API_CALL clTerminateContextKHR(cl_context /* context */) CL_EXT_SUFFIX__VERSION_1_2;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *clTerminateContextKHR_fn)(cl_context /* context */) CL_EXT_SUFFIX__VERSION_1_2;
+
+
+/*
+ * Extension: cl_khr_spir
+ *
+ * This extension adds support to create an OpenCL program object from a
+ * Standard Portable Intermediate Representation (SPIR) instance
+ */
+
+#define CL_DEVICE_SPIR_VERSIONS 0x40E0
+#define CL_PROGRAM_BINARY_TYPE_INTERMEDIATE 0x40E1
+
+
+/******************************************
+* cl_nv_device_attribute_query extension *
+******************************************/
+/* cl_nv_device_attribute_query extension - no extension #define since it has no functions */
+#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000
+#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001
+#define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002
+#define CL_DEVICE_WARP_SIZE_NV 0x4003
+#define CL_DEVICE_GPU_OVERLAP_NV 0x4004
+#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005
+#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006
+
+/*********************************
+* cl_amd_device_attribute_query *
+*********************************/
+#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036
+
+/*********************************
+* cl_arm_printf extension
+*********************************/
+#define CL_PRINTF_CALLBACK_ARM 0x40B0
+#define CL_PRINTF_BUFFERSIZE_ARM 0x40B1
+
+#ifdef CL_VERSION_1_1
+ /***********************************
+ * cl_ext_device_fission extension *
+ ***********************************/
+ #define cl_ext_device_fission 1
+
+ extern CL_API_ENTRY cl_int CL_API_CALL
+ clReleaseDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
+
+ typedef CL_API_ENTRY cl_int
+ (CL_API_CALL *clReleaseDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
+
+ extern CL_API_ENTRY cl_int CL_API_CALL
+ clRetainDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
+
+ typedef CL_API_ENTRY cl_int
+ (CL_API_CALL *clRetainDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
+
+ typedef cl_ulong cl_device_partition_property_ext;
+ extern CL_API_ENTRY cl_int CL_API_CALL
+ clCreateSubDevicesEXT( cl_device_id /*in_device*/,
+ const cl_device_partition_property_ext * /* properties */,
+ cl_uint /*num_entries*/,
+ cl_device_id * /*out_devices*/,
+ cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1;
+
+ typedef CL_API_ENTRY cl_int
+ ( CL_API_CALL * clCreateSubDevicesEXT_fn)( cl_device_id /*in_device*/,
+ const cl_device_partition_property_ext * /* properties */,
+ cl_uint /*num_entries*/,
+ cl_device_id * /*out_devices*/,
+ cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1;
+
+ /* cl_device_partition_property_ext */
+ #define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050
+ #define CL_DEVICE_PARTITION_BY_COUNTS_EXT 0x4051
+ #define CL_DEVICE_PARTITION_BY_NAMES_EXT 0x4052
+ #define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT 0x4053
+
+ /* clDeviceGetInfo selectors */
+ #define CL_DEVICE_PARENT_DEVICE_EXT 0x4054
+ #define CL_DEVICE_PARTITION_TYPES_EXT 0x4055
+ #define CL_DEVICE_AFFINITY_DOMAINS_EXT 0x4056
+ #define CL_DEVICE_REFERENCE_COUNT_EXT 0x4057
+ #define CL_DEVICE_PARTITION_STYLE_EXT 0x4058
+
+ /* error codes */
+ #define CL_DEVICE_PARTITION_FAILED_EXT -1057
+ #define CL_INVALID_PARTITION_COUNT_EXT -1058
+ #define CL_INVALID_PARTITION_NAME_EXT -1059
+
+ /* CL_AFFINITY_DOMAINs */
+ #define CL_AFFINITY_DOMAIN_L1_CACHE_EXT 0x1
+ #define CL_AFFINITY_DOMAIN_L2_CACHE_EXT 0x2
+ #define CL_AFFINITY_DOMAIN_L3_CACHE_EXT 0x3
+ #define CL_AFFINITY_DOMAIN_L4_CACHE_EXT 0x4
+ #define CL_AFFINITY_DOMAIN_NUMA_EXT 0x10
+ #define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT 0x100
+
+ /* cl_device_partition_property_ext list terminators */
+ #define CL_PROPERTIES_LIST_END_EXT ((cl_device_partition_property_ext) 0)
+ #define CL_PARTITION_BY_COUNTS_LIST_END_EXT ((cl_device_partition_property_ext) 0)
+ #define CL_PARTITION_BY_NAMES_LIST_END_EXT ((cl_device_partition_property_ext) 0 - 1)
+
+/*********************************
+* cl_qcom_ext_host_ptr extension
+*********************************/
+
+#define CL_MEM_EXT_HOST_PTR_QCOM (1 << 29)
+
+#define CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM 0x40A0
+#define CL_DEVICE_PAGE_SIZE_QCOM 0x40A1
+#define CL_IMAGE_ROW_ALIGNMENT_QCOM 0x40A2
+#define CL_IMAGE_SLICE_ALIGNMENT_QCOM 0x40A3
+#define CL_MEM_HOST_UNCACHED_QCOM 0x40A4
+#define CL_MEM_HOST_WRITEBACK_QCOM 0x40A5
+#define CL_MEM_HOST_WRITETHROUGH_QCOM 0x40A6
+#define CL_MEM_HOST_WRITE_COMBINING_QCOM 0x40A7
+
+typedef cl_uint cl_image_pitch_info_qcom;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetDeviceImageInfoQCOM(cl_device_id device,
+ size_t image_width,
+ size_t image_height,
+ const cl_image_format *image_format,
+ cl_image_pitch_info_qcom param_name,
+ size_t param_value_size,
+ void *param_value,
+ size_t *param_value_size_ret);
+
+typedef struct _cl_mem_ext_host_ptr
+{
+ /* Type of external memory allocation. */
+ /* Legal values will be defined in layered extensions. */
+ cl_uint allocation_type;
+
+ /* Host cache policy for this external memory allocation. */
+ cl_uint host_cache_policy;
+
+} cl_mem_ext_host_ptr;
+
+/*********************************
+* cl_qcom_ion_host_ptr extension
+*********************************/
+
+#define CL_MEM_ION_HOST_PTR_QCOM 0x40A8
+
+typedef struct _cl_mem_ion_host_ptr
+{
+ /* Type of external memory allocation. */
+ /* Must be CL_MEM_ION_HOST_PTR_QCOM for ION allocations. */
+ cl_mem_ext_host_ptr ext_host_ptr;
+
+ /* ION file descriptor */
+ int ion_filedesc;
+
+ /* Host pointer to the ION allocated memory */
+ void* ion_hostptr;
+
+} cl_mem_ion_host_ptr;
+
+#endif /* CL_VERSION_1_1 */
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* __CL_EXT_H */
diff --git a/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_gl.h b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_gl.h
new file mode 100644
index 000000000..ad914cbd4
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_gl.h
@@ -0,0 +1,158 @@
+/**********************************************************************************
+ * Copyright (c) 2008 - 2012 The Khronos Group Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and/or associated documentation files (the
+ * "Materials"), to deal in the Materials without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Materials, and to
+ * permit persons to whom the Materials are furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Materials.
+ *
+ * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+ **********************************************************************************/
+
+#ifndef __OPENCL_CL_GL_H
+#define __OPENCL_CL_GL_H
+
+#include <cl.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef cl_uint cl_gl_object_type;
+typedef cl_uint cl_gl_texture_info;
+typedef cl_uint cl_gl_platform_info;
+typedef struct __GLsync *cl_GLsync;
+
+/* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken */
+#define CL_GL_OBJECT_BUFFER 0x2000
+#define CL_GL_OBJECT_TEXTURE2D 0x2001
+#define CL_GL_OBJECT_TEXTURE3D 0x2002
+#define CL_GL_OBJECT_RENDERBUFFER 0x2003
+#define CL_GL_OBJECT_TEXTURE2D_ARRAY 0x200E
+#define CL_GL_OBJECT_TEXTURE1D 0x200F
+#define CL_GL_OBJECT_TEXTURE1D_ARRAY 0x2010
+#define CL_GL_OBJECT_TEXTURE_BUFFER 0x2011
+
+/* cl_gl_texture_info */
+#define CL_GL_TEXTURE_TARGET 0x2004
+#define CL_GL_MIPMAP_LEVEL 0x2005
+#define CL_GL_NUM_SAMPLES 0x2012
+
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateFromGLBuffer(cl_context /* context */,
+ cl_mem_flags /* flags */,
+ cl_GLuint /* bufobj */,
+ int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateFromGLTexture(cl_context /* context */,
+ cl_mem_flags /* flags */,
+ cl_GLenum /* target */,
+ cl_GLint /* miplevel */,
+ cl_GLuint /* texture */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateFromGLRenderbuffer(cl_context /* context */,
+ cl_mem_flags /* flags */,
+ cl_GLuint /* renderbuffer */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetGLObjectInfo(cl_mem /* memobj */,
+ cl_gl_object_type * /* gl_object_type */,
+ cl_GLuint * /* gl_object_name */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetGLTextureInfo(cl_mem /* memobj */,
+ cl_gl_texture_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueAcquireGLObjects(cl_command_queue /* command_queue */,
+ cl_uint /* num_objects */,
+ const cl_mem * /* mem_objects */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueReleaseGLObjects(cl_command_queue /* command_queue */,
+ cl_uint /* num_objects */,
+ const cl_mem * /* mem_objects */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+
+/* Deprecated OpenCL 1.1 APIs */
+extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
+clCreateFromGLTexture2D(cl_context /* context */,
+ cl_mem_flags /* flags */,
+ cl_GLenum /* target */,
+ cl_GLint /* miplevel */,
+ cl_GLuint /* texture */,
+ cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
+
+extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
+clCreateFromGLTexture3D(cl_context /* context */,
+ cl_mem_flags /* flags */,
+ cl_GLenum /* target */,
+ cl_GLint /* miplevel */,
+ cl_GLuint /* texture */,
+ cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
+
+/* cl_khr_gl_sharing extension */
+
+#define cl_khr_gl_sharing 1
+
+typedef cl_uint cl_gl_context_info;
+
+/* Additional Error Codes */
+#define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -1000
+
+/* cl_gl_context_info */
+#define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006
+#define CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007
+
+/* Additional cl_context_properties */
+#define CL_GL_CONTEXT_KHR 0x2008
+#define CL_EGL_DISPLAY_KHR 0x2009
+#define CL_GLX_DISPLAY_KHR 0x200A
+#define CL_WGL_HDC_KHR 0x200B
+#define CL_CGL_SHAREGROUP_KHR 0x200C
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetGLContextInfoKHR(const cl_context_properties * /* properties */,
+ cl_gl_context_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
+ const cl_context_properties * properties,
+ cl_gl_context_info param_name,
+ size_t param_value_size,
+ void * param_value,
+ size_t * param_value_size_ret);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __OPENCL_CL_GL_H */
diff --git a/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_gl_ext.h b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_gl_ext.h
new file mode 100644
index 000000000..0c10fedfa
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_gl_ext.h
@@ -0,0 +1,65 @@
+/**********************************************************************************
+ * Copyright (c) 2008-2012 The Khronos Group Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and/or associated documentation files (the
+ * "Materials"), to deal in the Materials without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Materials, and to
+ * permit persons to whom the Materials are furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Materials.
+ *
+ * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+ **********************************************************************************/
+
+/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
+
+/* cl_gl_ext.h contains vendor (non-KHR) OpenCL extensions which have */
+/* OpenGL dependencies. */
+
+#ifndef __OPENCL_CL_GL_EXT_H
+#define __OPENCL_CL_GL_EXT_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <cl_gl.h>
+
+/*
+ * For each extension, follow this template
+ * cl_VEN_extname extension */
+/* #define cl_VEN_extname 1
+ * ... define new types, if any
+ * ... define new tokens, if any
+ * ... define new APIs, if any
+ *
+ * If you need GLtypes here, mirror them with a cl_GLtype, rather than including a GL header
+ * This allows us to avoid having to decide whether to include GL headers or GLES here.
+ */
+
+/*
+ * cl_khr_gl_event extension
+ * See section 9.9 in the OpenCL 1.1 spec for more information
+ */
+#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D
+
+extern CL_API_ENTRY cl_event CL_API_CALL
+clCreateEventFromGLsyncKHR(cl_context /* context */,
+ cl_GLsync /* cl_GLsync */,
+ cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __OPENCL_CL_GL_EXT_H */
diff --git a/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_platform.h b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_platform.h
new file mode 100644
index 000000000..7f6f5e8a7
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_platform.h
@@ -0,0 +1,1278 @@
+/**********************************************************************************
+ * Copyright (c) 2008-2012 The Khronos Group Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and/or associated documentation files (the
+ * "Materials"), to deal in the Materials without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Materials, and to
+ * permit persons to whom the Materials are furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Materials.
+ *
+ * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+ **********************************************************************************/
+
+/* $Revision: 11803 $ on $Date: 2010-06-25 10:02:12 -0700 (Fri, 25 Jun 2010) $ */
+
+#ifndef __CL_PLATFORM_H
+#define __CL_PLATFORM_H
+
+#ifdef __APPLE__
+ /* Contains #defines for AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER below */
+ #include <AvailabilityMacros.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined(_WIN32)
+ #define CL_API_ENTRY
+ #define CL_API_CALL __stdcall
+ #define CL_CALLBACK __stdcall
+#else
+ #define CL_API_ENTRY
+ #define CL_API_CALL
+ #define CL_CALLBACK
+#endif
+
+#ifdef __APPLE__
+ #define CL_EXTENSION_WEAK_LINK __attribute__((weak_import))
+ #define CL_API_SUFFIX__VERSION_1_0 AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
+ #define CL_EXT_SUFFIX__VERSION_1_0 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
+ #define CL_API_SUFFIX__VERSION_1_1 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
+ #define GCL_API_SUFFIX__VERSION_1_1 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
+ #define CL_EXT_SUFFIX__VERSION_1_1 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
+ #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7
+
+ #ifdef AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
+ #define CL_API_SUFFIX__VERSION_1_2 AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
+ #define GCL_API_SUFFIX__VERSION_1_2 AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
+ #define CL_EXT_SUFFIX__VERSION_1_2 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
+ #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
+ #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8
+ #else
+ #warning This path should never happen outside of internal operating system development. AvailabilityMacros do not function correctly here!
+ #define CL_API_SUFFIX__VERSION_1_2 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
+ #define GCL_API_SUFFIX__VERSION_1_2 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
+ #define CL_EXT_SUFFIX__VERSION_1_2 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
+ #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
+ #endif
+#else
+ #define CL_EXTENSION_WEAK_LINK
+ #define CL_API_SUFFIX__VERSION_1_0
+ #define CL_EXT_SUFFIX__VERSION_1_0
+ #define CL_API_SUFFIX__VERSION_1_1
+ #define CL_EXT_SUFFIX__VERSION_1_1
+ #define CL_API_SUFFIX__VERSION_1_2
+ #define CL_EXT_SUFFIX__VERSION_1_2
+
+ #ifdef __GNUC__
+ #ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
+ #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED
+ #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED
+ #else
+ #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED __attribute__((deprecated))
+ #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED
+ #endif
+
+ #ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS
+ #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
+ #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
+ #else
+ #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED __attribute__((deprecated))
+ #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
+ #endif
+ #elif _WIN32
+ #ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
+ #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED
+ #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED
+ #else
+ #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED
+ #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED __declspec(deprecated)
+ #endif
+
+ #ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS
+ #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
+ #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
+ #else
+ #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
+ #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED __declspec(deprecated)
+ #endif
+ #else
+ #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED
+ #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED
+
+ #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
+ #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
+ #endif
+#endif
+
+#if (defined (_WIN32) && defined(_MSC_VER))
+
+/* scalar types */
+typedef signed __int8 cl_char;
+typedef unsigned __int8 cl_uchar;
+typedef signed __int16 cl_short;
+typedef unsigned __int16 cl_ushort;
+typedef signed __int32 cl_int;
+typedef unsigned __int32 cl_uint;
+typedef signed __int64 cl_long;
+typedef unsigned __int64 cl_ulong;
+
+typedef unsigned __int16 cl_half;
+typedef float cl_float;
+typedef double cl_double;
+
+/* Macro names and corresponding values defined by OpenCL */
+#define CL_CHAR_BIT 8
+#define CL_SCHAR_MAX 127
+#define CL_SCHAR_MIN (-127-1)
+#define CL_CHAR_MAX CL_SCHAR_MAX
+#define CL_CHAR_MIN CL_SCHAR_MIN
+#define CL_UCHAR_MAX 255
+#define CL_SHRT_MAX 32767
+#define CL_SHRT_MIN (-32767-1)
+#define CL_USHRT_MAX 65535
+#define CL_INT_MAX 2147483647
+#define CL_INT_MIN (-2147483647-1)
+#define CL_UINT_MAX 0xffffffffU
+#define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL)
+#define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL)
+#define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL)
+
+#define CL_FLT_DIG 6
+#define CL_FLT_MANT_DIG 24
+#define CL_FLT_MAX_10_EXP +38
+#define CL_FLT_MAX_EXP +128
+#define CL_FLT_MIN_10_EXP -37
+#define CL_FLT_MIN_EXP -125
+#define CL_FLT_RADIX 2
+#define CL_FLT_MAX 340282346638528859811704183484516925440.0f
+#define CL_FLT_MIN 1.175494350822287507969e-38f
+#define CL_FLT_EPSILON 0x1.0p-23f
+
+#define CL_DBL_DIG 15
+#define CL_DBL_MANT_DIG 53
+#define CL_DBL_MAX_10_EXP +308
+#define CL_DBL_MAX_EXP +1024
+#define CL_DBL_MIN_10_EXP -307
+#define CL_DBL_MIN_EXP -1021
+#define CL_DBL_RADIX 2
+#define CL_DBL_MAX 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.0
+#define CL_DBL_MIN 2.225073858507201383090e-308
+#define CL_DBL_EPSILON 2.220446049250313080847e-16
+
+#define CL_M_E 2.718281828459045090796
+#define CL_M_LOG2E 1.442695040888963387005
+#define CL_M_LOG10E 0.434294481903251816668
+#define CL_M_LN2 0.693147180559945286227
+#define CL_M_LN10 2.302585092994045901094
+#define CL_M_PI 3.141592653589793115998
+#define CL_M_PI_2 1.570796326794896557999
+#define CL_M_PI_4 0.785398163397448278999
+#define CL_M_1_PI 0.318309886183790691216
+#define CL_M_2_PI 0.636619772367581382433
+#define CL_M_2_SQRTPI 1.128379167095512558561
+#define CL_M_SQRT2 1.414213562373095145475
+#define CL_M_SQRT1_2 0.707106781186547572737
+
+#define CL_M_E_F 2.71828174591064f
+#define CL_M_LOG2E_F 1.44269502162933f
+#define CL_M_LOG10E_F 0.43429449200630f
+#define CL_M_LN2_F 0.69314718246460f
+#define CL_M_LN10_F 2.30258512496948f
+#define CL_M_PI_F 3.14159274101257f
+#define CL_M_PI_2_F 1.57079637050629f
+#define CL_M_PI_4_F 0.78539818525314f
+#define CL_M_1_PI_F 0.31830987334251f
+#define CL_M_2_PI_F 0.63661974668503f
+#define CL_M_2_SQRTPI_F 1.12837922573090f
+#define CL_M_SQRT2_F 1.41421353816986f
+#define CL_M_SQRT1_2_F 0.70710676908493f
+
+#define CL_NAN (CL_INFINITY - CL_INFINITY)
+#define CL_HUGE_VALF ((cl_float) 1e50)
+#define CL_HUGE_VAL ((cl_double) 1e500)
+#define CL_MAXFLOAT CL_FLT_MAX
+#define CL_INFINITY CL_HUGE_VALF
+
+#else
+
+#include <stdint.h>
+
+/* scalar types */
+typedef int8_t cl_char;
+typedef uint8_t cl_uchar;
+typedef int16_t cl_short __attribute__((aligned(2)));
+typedef uint16_t cl_ushort __attribute__((aligned(2)));
+typedef int32_t cl_int __attribute__((aligned(4)));
+typedef uint32_t cl_uint __attribute__((aligned(4)));
+typedef int64_t cl_long __attribute__((aligned(8)));
+typedef uint64_t cl_ulong __attribute__((aligned(8)));
+
+typedef uint16_t cl_half __attribute__((aligned(2)));
+typedef float cl_float __attribute__((aligned(4)));
+typedef double cl_double __attribute__((aligned(8)));
+
+/* Macro names and corresponding values defined by OpenCL */
+#define CL_CHAR_BIT 8
+#define CL_SCHAR_MAX 127
+#define CL_SCHAR_MIN (-127-1)
+#define CL_CHAR_MAX CL_SCHAR_MAX
+#define CL_CHAR_MIN CL_SCHAR_MIN
+#define CL_UCHAR_MAX 255
+#define CL_SHRT_MAX 32767
+#define CL_SHRT_MIN (-32767-1)
+#define CL_USHRT_MAX 65535
+#define CL_INT_MAX 2147483647
+#define CL_INT_MIN (-2147483647-1)
+#define CL_UINT_MAX 0xffffffffU
+#define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL)
+#define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL)
+#define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL)
+
+#define CL_FLT_DIG 6
+#define CL_FLT_MANT_DIG 24
+#define CL_FLT_MAX_10_EXP +38
+#define CL_FLT_MAX_EXP +128
+#define CL_FLT_MIN_10_EXP -37
+#define CL_FLT_MIN_EXP -125
+#define CL_FLT_RADIX 2
+#define CL_FLT_MAX 0x1.fffffep127f
+#define CL_FLT_MIN 0x1.0p-126f
+#define CL_FLT_EPSILON 0x1.0p-23f
+
+#define CL_DBL_DIG 15
+#define CL_DBL_MANT_DIG 53
+#define CL_DBL_MAX_10_EXP +308
+#define CL_DBL_MAX_EXP +1024
+#define CL_DBL_MIN_10_EXP -307
+#define CL_DBL_MIN_EXP -1021
+#define CL_DBL_RADIX 2
+#define CL_DBL_MAX 0x1.fffffffffffffp1023
+#define CL_DBL_MIN 0x1.0p-1022
+#define CL_DBL_EPSILON 0x1.0p-52
+
+#define CL_M_E 2.718281828459045090796
+#define CL_M_LOG2E 1.442695040888963387005
+#define CL_M_LOG10E 0.434294481903251816668
+#define CL_M_LN2 0.693147180559945286227
+#define CL_M_LN10 2.302585092994045901094
+#define CL_M_PI 3.141592653589793115998
+#define CL_M_PI_2 1.570796326794896557999
+#define CL_M_PI_4 0.785398163397448278999
+#define CL_M_1_PI 0.318309886183790691216
+#define CL_M_2_PI 0.636619772367581382433
+#define CL_M_2_SQRTPI 1.128379167095512558561
+#define CL_M_SQRT2 1.414213562373095145475
+#define CL_M_SQRT1_2 0.707106781186547572737
+
+#define CL_M_E_F 2.71828174591064f
+#define CL_M_LOG2E_F 1.44269502162933f
+#define CL_M_LOG10E_F 0.43429449200630f
+#define CL_M_LN2_F 0.69314718246460f
+#define CL_M_LN10_F 2.30258512496948f
+#define CL_M_PI_F 3.14159274101257f
+#define CL_M_PI_2_F 1.57079637050629f
+#define CL_M_PI_4_F 0.78539818525314f
+#define CL_M_1_PI_F 0.31830987334251f
+#define CL_M_2_PI_F 0.63661974668503f
+#define CL_M_2_SQRTPI_F 1.12837922573090f
+#define CL_M_SQRT2_F 1.41421353816986f
+#define CL_M_SQRT1_2_F 0.70710676908493f
+
+#if defined( __GNUC__ )
+ #define CL_HUGE_VALF __builtin_huge_valf()
+ #define CL_HUGE_VAL __builtin_huge_val()
+ #define CL_NAN __builtin_nanf( "" )
+#else
+ #define CL_HUGE_VALF ((cl_float) 1e50)
+ #define CL_HUGE_VAL ((cl_double) 1e500)
+ float nanf( const char * );
+ #define CL_NAN nanf( "" )
+#endif
+#define CL_MAXFLOAT CL_FLT_MAX
+#define CL_INFINITY CL_HUGE_VALF
+
+#endif
+
+#include <stddef.h>
+
+/* Mirror types to GL types. Mirror types allow us to avoid deciding which 87s to load based on whether we are using GL or GLES here. */
+typedef unsigned int cl_GLuint;
+typedef int cl_GLint;
+typedef unsigned int cl_GLenum;
+
+/*
+ * Vector types
+ *
+ * Note: OpenCL requires that all types be naturally aligned.
+ * This means that vector types must be naturally aligned.
+ * For example, a vector of four floats must be aligned to
+ * a 16 byte boundary (calculated as 4 * the natural 4-byte
+ * alignment of the float). The alignment qualifiers here
+ * will only function properly if your compiler supports them
+ * and if you don't actively work to defeat them. For example,
+ * in order for a cl_float4 to be 16 byte aligned in a struct,
+ * the start of the struct must itself be 16-byte aligned.
+ *
+ * Maintaining proper alignment is the user's responsibility.
+ */
+
+/* Define basic vector types */
+#if defined( __VEC__ )
+ #include <altivec.h> /* may be omitted depending on compiler. AltiVec spec provides no way to detect whether the header is required. */
+ typedef vector unsigned char __cl_uchar16;
+ typedef vector signed char __cl_char16;
+ typedef vector unsigned short __cl_ushort8;
+ typedef vector signed short __cl_short8;
+ typedef vector unsigned int __cl_uint4;
+ typedef vector signed int __cl_int4;
+ typedef vector float __cl_float4;
+ #define __CL_UCHAR16__ 1
+ #define __CL_CHAR16__ 1
+ #define __CL_USHORT8__ 1
+ #define __CL_SHORT8__ 1
+ #define __CL_UINT4__ 1
+ #define __CL_INT4__ 1
+ #define __CL_FLOAT4__ 1
+#endif
+
+#if defined( __SSE__ )
+ #if defined( __MINGW64__ )
+ #include <intrin.h>
+ #else
+ #include <xmmintrin.h>
+ #endif
+ #if defined( __GNUC__ )
+ typedef float __cl_float4 __attribute__((vector_size(16)));
+ #else
+ typedef __m128 __cl_float4;
+ #endif
+ #define __CL_FLOAT4__ 1
+#endif
+
+#if defined( __SSE2__ )
+ #if defined( __MINGW64__ )
+ #include <intrin.h>
+ #else
+ #include <emmintrin.h>
+ #endif
+ #if defined( __GNUC__ )
+ typedef cl_uchar __cl_uchar16 __attribute__((vector_size(16)));
+ typedef cl_char __cl_char16 __attribute__((vector_size(16)));
+ typedef cl_ushort __cl_ushort8 __attribute__((vector_size(16)));
+ typedef cl_short __cl_short8 __attribute__((vector_size(16)));
+ typedef cl_uint __cl_uint4 __attribute__((vector_size(16)));
+ typedef cl_int __cl_int4 __attribute__((vector_size(16)));
+ typedef cl_ulong __cl_ulong2 __attribute__((vector_size(16)));
+ typedef cl_long __cl_long2 __attribute__((vector_size(16)));
+ typedef cl_double __cl_double2 __attribute__((vector_size(16)));
+ #else
+ typedef __m128i __cl_uchar16;
+ typedef __m128i __cl_char16;
+ typedef __m128i __cl_ushort8;
+ typedef __m128i __cl_short8;
+ typedef __m128i __cl_uint4;
+ typedef __m128i __cl_int4;
+ typedef __m128i __cl_ulong2;
+ typedef __m128i __cl_long2;
+ typedef __m128d __cl_double2;
+ #endif
+ #define __CL_UCHAR16__ 1
+ #define __CL_CHAR16__ 1
+ #define __CL_USHORT8__ 1
+ #define __CL_SHORT8__ 1
+ #define __CL_INT4__ 1
+ #define __CL_UINT4__ 1
+ #define __CL_ULONG2__ 1
+ #define __CL_LONG2__ 1
+ #define __CL_DOUBLE2__ 1
+#endif
+
+#if defined( __MMX__ )
+ #include <mmintrin.h>
+ #if defined( __GNUC__ )
+ typedef cl_uchar __cl_uchar8 __attribute__((vector_size(8)));
+ typedef cl_char __cl_char8 __attribute__((vector_size(8)));
+ typedef cl_ushort __cl_ushort4 __attribute__((vector_size(8)));
+ typedef cl_short __cl_short4 __attribute__((vector_size(8)));
+ typedef cl_uint __cl_uint2 __attribute__((vector_size(8)));
+ typedef cl_int __cl_int2 __attribute__((vector_size(8)));
+ typedef cl_ulong __cl_ulong1 __attribute__((vector_size(8)));
+ typedef cl_long __cl_long1 __attribute__((vector_size(8)));
+ typedef cl_float __cl_float2 __attribute__((vector_size(8)));
+ #else
+ typedef __m64 __cl_uchar8;
+ typedef __m64 __cl_char8;
+ typedef __m64 __cl_ushort4;
+ typedef __m64 __cl_short4;
+ typedef __m64 __cl_uint2;
+ typedef __m64 __cl_int2;
+ typedef __m64 __cl_ulong1;
+ typedef __m64 __cl_long1;
+ typedef __m64 __cl_float2;
+ #endif
+ #define __CL_UCHAR8__ 1
+ #define __CL_CHAR8__ 1
+ #define __CL_USHORT4__ 1
+ #define __CL_SHORT4__ 1
+ #define __CL_INT2__ 1
+ #define __CL_UINT2__ 1
+ #define __CL_ULONG1__ 1
+ #define __CL_LONG1__ 1
+ #define __CL_FLOAT2__ 1
+#endif
+
+#if defined( __AVX__ )
+ #if defined( __MINGW64__ )
+ #include <intrin.h>
+ #else
+ #include <immintrin.h>
+ #endif
+ #if defined( __GNUC__ )
+ typedef cl_float __cl_float8 __attribute__((vector_size(32)));
+ typedef cl_double __cl_double4 __attribute__((vector_size(32)));
+ #else
+ typedef __m256 __cl_float8;
+ typedef __m256d __cl_double4;
+ #endif
+ #define __CL_FLOAT8__ 1
+ #define __CL_DOUBLE4__ 1
+#endif
+
+/* Define capabilities for anonymous struct members. */
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+#define __CL_HAS_ANON_STRUCT__ 1
+#define __CL_ANON_STRUCT__ __extension__
+#elif defined( _WIN32) && (_MSC_VER >= 1500)
+ /* Microsoft Developer Studio 2008 supports anonymous structs, but
+ * complains by default. */
+#define __CL_HAS_ANON_STRUCT__ 1
+#define __CL_ANON_STRUCT__
+ /* Disable warning C4201: nonstandard extension used : nameless
+ * struct/union */
+#pragma warning( push )
+#pragma warning( disable : 4201 )
+#else
+#define __CL_HAS_ANON_STRUCT__ 0
+#define __CL_ANON_STRUCT__
+#endif
+
+/* Define alignment keys */
+#if defined( __GNUC__ )
+ #define CL_ALIGNED(_x) __attribute__ ((aligned(_x)))
+#elif defined( _WIN32) && (_MSC_VER)
+ /* Alignment keys neutered on windows because MSVC can't swallow function arguments with alignment requirements */
+ /* http://msdn.microsoft.com/en-us/library/373ak2y1%28VS.71%29.aspx */
+ /* #include <crtdefs.h> */
+ /* #define CL_ALIGNED(_x) _CRT_ALIGN(_x) */
+ #define CL_ALIGNED(_x)
+#else
+ #warning Need to implement some method to align data here
+ #define CL_ALIGNED(_x)
+#endif
+
+/* Indicate whether .xyzw, .s0123 and .hi.lo are supported */
+#if __CL_HAS_ANON_STRUCT__
+ /* .xyzw and .s0123...{f|F} are supported */
+ #define CL_HAS_NAMED_VECTOR_FIELDS 1
+ /* .hi and .lo are supported */
+ #define CL_HAS_HI_LO_VECTOR_FIELDS 1
+#endif
+
+/* Define cl_vector types */
+
+/* ---- cl_charn ---- */
+typedef union
+{
+ cl_char CL_ALIGNED(2) s[2];
+#if __CL_HAS_ANON_STRUCT__
+ __CL_ANON_STRUCT__ struct{ cl_char x, y; };
+ __CL_ANON_STRUCT__ struct{ cl_char s0, s1; };
+ __CL_ANON_STRUCT__ struct{ cl_char lo, hi; };
+#endif
+#if defined( __CL_CHAR2__)
+ __cl_char2 v2;
+#endif
+}cl_char2;
+
+typedef union
+{
+ cl_char CL_ALIGNED(4) s[4];
+#if __CL_HAS_ANON_STRUCT__
+ __CL_ANON_STRUCT__ struct{ cl_char x, y, z, w; };
+ __CL_ANON_STRUCT__ struct{ cl_char s0, s1, s2, s3; };
+ __CL_ANON_STRUCT__ struct{ cl_char2 lo, hi; };
+#endif
+#if defined( __CL_CHAR2__)
+ __cl_char2 v2[2];
+#endif
+#if defined( __CL_CHAR4__)
+ __cl_char4 v4;
+#endif
+}cl_char4;
+
+/* cl_char3 is identical in size, alignment and behavior to cl_char4. See section 6.1.5. */
+typedef cl_char4 cl_char3;
+
+typedef union
+{
+ cl_char CL_ALIGNED(8) s[8];
+#if __CL_HAS_ANON_STRUCT__
+ __CL_ANON_STRUCT__ struct{ cl_char x, y, z, w; };
+ __CL_ANON_STRUCT__ struct{ cl_char s0, s1, s2, s3, s4, s5, s6, s7; };
+ __CL_ANON_STRUCT__ struct{ cl_char4 lo, hi; };
+#endif
+#if defined( __CL_CHAR2__)
+ __cl_char2 v2[4];
+#endif
+#if defined( __CL_CHAR4__)
+ __cl_char4 v4[2];
+#endif
+#if defined( __CL_CHAR8__ )
+ __cl_char8 v8;
+#endif
+}cl_char8;
+
+typedef union
+{
+ cl_char CL_ALIGNED(16) s[16];
+#if __CL_HAS_ANON_STRUCT__
+ __CL_ANON_STRUCT__ struct{ cl_char x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+ __CL_ANON_STRUCT__ struct{ cl_char s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+ __CL_ANON_STRUCT__ struct{ cl_char8 lo, hi; };
+#endif
+#if defined( __CL_CHAR2__)
+ __cl_char2 v2[8];
+#endif
+#if defined( __CL_CHAR4__)
+ __cl_char4 v4[4];
+#endif
+#if defined( __CL_CHAR8__ )
+ __cl_char8 v8[2];
+#endif
+#if defined( __CL_CHAR16__ )
+ __cl_char16 v16;
+#endif
+}cl_char16;
+
+
+/* ---- cl_ucharn ---- */
+typedef union
+{
+ cl_uchar CL_ALIGNED(2) s[2];
+#if __CL_HAS_ANON_STRUCT__
+ __CL_ANON_STRUCT__ struct{ cl_uchar x, y; };
+ __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1; };
+ __CL_ANON_STRUCT__ struct{ cl_uchar lo, hi; };
+#endif
+#if defined( __cl_uchar2__)
+ __cl_uchar2 v2;
+#endif
+}cl_uchar2;
+
+typedef union
+{
+ cl_uchar CL_ALIGNED(4) s[4];
+#if __CL_HAS_ANON_STRUCT__
+ __CL_ANON_STRUCT__ struct{ cl_uchar x, y, z, w; };
+ __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1, s2, s3; };
+ __CL_ANON_STRUCT__ struct{ cl_uchar2 lo, hi; };
+#endif
+#if defined( __CL_UCHAR2__)
+ __cl_uchar2 v2[2];
+#endif
+#if defined( __CL_UCHAR4__)
+ __cl_uchar4 v4;
+#endif
+}cl_uchar4;
+
+/* cl_uchar3 is identical in size, alignment and behavior to cl_uchar4. See section 6.1.5. */
+typedef cl_uchar4 cl_uchar3;
+
+typedef union
+{
+ cl_uchar CL_ALIGNED(8) s[8];
+#if __CL_HAS_ANON_STRUCT__
+ __CL_ANON_STRUCT__ struct{ cl_uchar x, y, z, w; };
+ __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1, s2, s3, s4, s5, s6, s7; };
+ __CL_ANON_STRUCT__ struct{ cl_uchar4 lo, hi; };
+#endif
+#if defined( __CL_UCHAR2__)
+ __cl_uchar2 v2[4];
+#endif
+#if defined( __CL_UCHAR4__)
+ __cl_uchar4 v4[2];
+#endif
+#if defined( __CL_UCHAR8__ )
+ __cl_uchar8 v8;
+#endif
+}cl_uchar8;
+
+typedef union
+{
+ cl_uchar CL_ALIGNED(16) s[16];
+#if __CL_HAS_ANON_STRUCT__
+ __CL_ANON_STRUCT__ struct{ cl_uchar x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+ __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+ __CL_ANON_STRUCT__ struct{ cl_uchar8 lo, hi; };
+#endif
+#if defined( __CL_UCHAR2__)
+ __cl_uchar2 v2[8];
+#endif
+#if defined( __CL_UCHAR4__)
+ __cl_uchar4 v4[4];
+#endif
+#if defined( __CL_UCHAR8__ )
+ __cl_uchar8 v8[2];
+#endif
+#if defined( __CL_UCHAR16__ )
+ __cl_uchar16 v16;
+#endif
+}cl_uchar16;
+
+
+/* ---- cl_shortn ---- */
+typedef union
+{
+ cl_short CL_ALIGNED(4) s[2];
+#if __CL_HAS_ANON_STRUCT__
+ __CL_ANON_STRUCT__ struct{ cl_short x, y; };
+ __CL_ANON_STRUCT__ struct{ cl_short s0, s1; };
+ __CL_ANON_STRUCT__ struct{ cl_short lo, hi; };
+#endif
+#if defined( __CL_SHORT2__)
+ __cl_short2 v2;
+#endif
+}cl_short2;
+
+typedef union
+{
+ cl_short CL_ALIGNED(8) s[4];
+#if __CL_HAS_ANON_STRUCT__
+ __CL_ANON_STRUCT__ struct{ cl_short x, y, z, w; };
+ __CL_ANON_STRUCT__ struct{ cl_short s0, s1, s2, s3; };
+ __CL_ANON_STRUCT__ struct{ cl_short2 lo, hi; };
+#endif
+#if defined( __CL_SHORT2__)
+ __cl_short2 v2[2];
+#endif
+#if defined( __CL_SHORT4__)
+ __cl_short4 v4;
+#endif
+}cl_short4;
+
+/* cl_short3 is identical in size, alignment and behavior to cl_short4. See section 6.1.5. */
+typedef cl_short4 cl_short3;
+
+typedef union
+{
+ cl_short CL_ALIGNED(16) s[8];
+#if __CL_HAS_ANON_STRUCT__
+ __CL_ANON_STRUCT__ struct{ cl_short x, y, z, w; };
+ __CL_ANON_STRUCT__ struct{ cl_short s0, s1, s2, s3, s4, s5, s6, s7; };
+ __CL_ANON_STRUCT__ struct{ cl_short4 lo, hi; };
+#endif
+#if defined( __CL_SHORT2__)
+ __cl_short2 v2[4];
+#endif
+#if defined( __CL_SHORT4__)
+ __cl_short4 v4[2];
+#endif
+#if defined( __CL_SHORT8__ )
+ __cl_short8 v8;
+#endif
+}cl_short8;
+
+typedef union
+{
+ cl_short CL_ALIGNED(32) s[16];
+#if __CL_HAS_ANON_STRUCT__
+ __CL_ANON_STRUCT__ struct{ cl_short x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+ __CL_ANON_STRUCT__ struct{ cl_short s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+ __CL_ANON_STRUCT__ struct{ cl_short8 lo, hi; };
+#endif
+#if defined( __CL_SHORT2__)
+ __cl_short2 v2[8];
+#endif
+#if defined( __CL_SHORT4__)
+ __cl_short4 v4[4];
+#endif
+#if defined( __CL_SHORT8__ )
+ __cl_short8 v8[2];
+#endif
+#if defined( __CL_SHORT16__ )
+ __cl_short16 v16;
+#endif
+}cl_short16;
+
+
+/* ---- cl_ushortn ---- */
+typedef union
+{
+ cl_ushort CL_ALIGNED(4) s[2];
+#if __CL_HAS_ANON_STRUCT__
+ __CL_ANON_STRUCT__ struct{ cl_ushort x, y; };
+ __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1; };
+ __CL_ANON_STRUCT__ struct{ cl_ushort lo, hi; };
+#endif
+#if defined( __CL_USHORT2__)
+ __cl_ushort2 v2;
+#endif
+}cl_ushort2;
+
+typedef union
+{
+ cl_ushort CL_ALIGNED(8) s[4];
+#if __CL_HAS_ANON_STRUCT__
+ __CL_ANON_STRUCT__ struct{ cl_ushort x, y, z, w; };
+ __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1, s2, s3; };
+ __CL_ANON_STRUCT__ struct{ cl_ushort2 lo, hi; };
+#endif
+#if defined( __CL_USHORT2__)
+ __cl_ushort2 v2[2];
+#endif
+#if defined( __CL_USHORT4__)
+ __cl_ushort4 v4;
+#endif
+}cl_ushort4;
+
+/* cl_ushort3 is identical in size, alignment and behavior to cl_ushort4. See section 6.1.5. */
+typedef cl_ushort4 cl_ushort3;
+
+typedef union
+{
+ cl_ushort CL_ALIGNED(16) s[8];
+#if __CL_HAS_ANON_STRUCT__
+ __CL_ANON_STRUCT__ struct{ cl_ushort x, y, z, w; };
+ __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1, s2, s3, s4, s5, s6, s7; };
+ __CL_ANON_STRUCT__ struct{ cl_ushort4 lo, hi; };
+#endif
+#if defined( __CL_USHORT2__)
+ __cl_ushort2 v2[4];
+#endif
+#if defined( __CL_USHORT4__)
+ __cl_ushort4 v4[2];
+#endif
+#if defined( __CL_USHORT8__ )
+ __cl_ushort8 v8;
+#endif
+}cl_ushort8;
+
+typedef union
+{
+ cl_ushort CL_ALIGNED(32) s[16];
+#if __CL_HAS_ANON_STRUCT__
+ __CL_ANON_STRUCT__ struct{ cl_ushort x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+ __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+ __CL_ANON_STRUCT__ struct{ cl_ushort8 lo, hi; };
+#endif
+#if defined( __CL_USHORT2__)
+ __cl_ushort2 v2[8];
+#endif
+#if defined( __CL_USHORT4__)
+ __cl_ushort4 v4[4];
+#endif
+#if defined( __CL_USHORT8__ )
+ __cl_ushort8 v8[2];
+#endif
+#if defined( __CL_USHORT16__ )
+ __cl_ushort16 v16;
+#endif
+}cl_ushort16;
+
+/* ---- cl_intn ---- */
+typedef union
+{
+ cl_int CL_ALIGNED(8) s[2];
+#if __CL_HAS_ANON_STRUCT__
+ __CL_ANON_STRUCT__ struct{ cl_int x, y; };
+ __CL_ANON_STRUCT__ struct{ cl_int s0, s1; };
+ __CL_ANON_STRUCT__ struct{ cl_int lo, hi; };
+#endif
+#if defined( __CL_INT2__)
+ __cl_int2 v2;
+#endif
+}cl_int2;
+
+typedef union
+{
+ cl_int CL_ALIGNED(16) s[4];
+#if __CL_HAS_ANON_STRUCT__
+ __CL_ANON_STRUCT__ struct{ cl_int x, y, z, w; };
+ __CL_ANON_STRUCT__ struct{ cl_int s0, s1, s2, s3; };
+ __CL_ANON_STRUCT__ struct{ cl_int2 lo, hi; };
+#endif
+#if defined( __CL_INT2__)
+ __cl_int2 v2[2];
+#endif
+#if defined( __CL_INT4__)
+ __cl_int4 v4;
+#endif
+}cl_int4;
+
+/* cl_int3 is identical in size, alignment and behavior to cl_int4. See section 6.1.5. */
+typedef cl_int4 cl_int3;
+
+typedef union
+{
+ cl_int CL_ALIGNED(32) s[8];
+#if __CL_HAS_ANON_STRUCT__
+ __CL_ANON_STRUCT__ struct{ cl_int x, y, z, w; };
+ __CL_ANON_STRUCT__ struct{ cl_int s0, s1, s2, s3, s4, s5, s6, s7; };
+ __CL_ANON_STRUCT__ struct{ cl_int4 lo, hi; };
+#endif
+#if defined( __CL_INT2__)
+ __cl_int2 v2[4];
+#endif
+#if defined( __CL_INT4__)
+ __cl_int4 v4[2];
+#endif
+#if defined( __CL_INT8__ )
+ __cl_int8 v8;
+#endif
+}cl_int8;
+
+typedef union
+{
+ cl_int CL_ALIGNED(64) s[16];
+#if __CL_HAS_ANON_STRUCT__
+ __CL_ANON_STRUCT__ struct{ cl_int x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+ __CL_ANON_STRUCT__ struct{ cl_int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+ __CL_ANON_STRUCT__ struct{ cl_int8 lo, hi; };
+#endif
+#if defined( __CL_INT2__)
+ __cl_int2 v2[8];
+#endif
+#if defined( __CL_INT4__)
+ __cl_int4 v4[4];
+#endif
+#if defined( __CL_INT8__ )
+ __cl_int8 v8[2];
+#endif
+#if defined( __CL_INT16__ )
+ __cl_int16 v16;
+#endif
+}cl_int16;
+
+
+/* ---- cl_uintn ---- */
+typedef union
+{
+ cl_uint CL_ALIGNED(8) s[2];
+#if __CL_HAS_ANON_STRUCT__
+ __CL_ANON_STRUCT__ struct{ cl_uint x, y; };
+ __CL_ANON_STRUCT__ struct{ cl_uint s0, s1; };
+ __CL_ANON_STRUCT__ struct{ cl_uint lo, hi; };
+#endif
+#if defined( __CL_UINT2__)
+ __cl_uint2 v2;
+#endif
+}cl_uint2;
+
+typedef union
+{
+ cl_uint CL_ALIGNED(16) s[4];
+#if __CL_HAS_ANON_STRUCT__
+ __CL_ANON_STRUCT__ struct{ cl_uint x, y, z, w; };
+ __CL_ANON_STRUCT__ struct{ cl_uint s0, s1, s2, s3; };
+ __CL_ANON_STRUCT__ struct{ cl_uint2 lo, hi; };
+#endif
+#if defined( __CL_UINT2__)
+ __cl_uint2 v2[2];
+#endif
+#if defined( __CL_UINT4__)
+ __cl_uint4 v4;
+#endif
+}cl_uint4;
+
+/* cl_uint3 is identical in size, alignment and behavior to cl_uint4. See section 6.1.5. */
+typedef cl_uint4 cl_uint3;
+
+typedef union
+{
+ cl_uint CL_ALIGNED(32) s[8];
+#if __CL_HAS_ANON_STRUCT__
+ __CL_ANON_STRUCT__ struct{ cl_uint x, y, z, w; };
+ __CL_ANON_STRUCT__ struct{ cl_uint s0, s1, s2, s3, s4, s5, s6, s7; };
+ __CL_ANON_STRUCT__ struct{ cl_uint4 lo, hi; };
+#endif
+#if defined( __CL_UINT2__)
+ __cl_uint2 v2[4];
+#endif
+#if defined( __CL_UINT4__)
+ __cl_uint4 v4[2];
+#endif
+#if defined( __CL_UINT8__ )
+ __cl_uint8 v8;
+#endif
+}cl_uint8;
+
+typedef union
+{
+ cl_uint CL_ALIGNED(64) s[16];
+#if __CL_HAS_ANON_STRUCT__
+ __CL_ANON_STRUCT__ struct{ cl_uint x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+ __CL_ANON_STRUCT__ struct{ cl_uint s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+ __CL_ANON_STRUCT__ struct{ cl_uint8 lo, hi; };
+#endif
+#if defined( __CL_UINT2__)
+ __cl_uint2 v2[8];
+#endif
+#if defined( __CL_UINT4__)
+ __cl_uint4 v4[4];
+#endif
+#if defined( __CL_UINT8__ )
+ __cl_uint8 v8[2];
+#endif
+#if defined( __CL_UINT16__ )
+ __cl_uint16 v16;
+#endif
+}cl_uint16;
+
+/* ---- cl_longn ---- */
+typedef union
+{
+ cl_long CL_ALIGNED(16) s[2];
+#if __CL_HAS_ANON_STRUCT__
+ __CL_ANON_STRUCT__ struct{ cl_long x, y; };
+ __CL_ANON_STRUCT__ struct{ cl_long s0, s1; };
+ __CL_ANON_STRUCT__ struct{ cl_long lo, hi; };
+#endif
+#if defined( __CL_LONG2__)
+ __cl_long2 v2;
+#endif
+}cl_long2;
+
+typedef union
+{
+ cl_long CL_ALIGNED(32) s[4];
+#if __CL_HAS_ANON_STRUCT__
+ __CL_ANON_STRUCT__ struct{ cl_long x, y, z, w; };
+ __CL_ANON_STRUCT__ struct{ cl_long s0, s1, s2, s3; };
+ __CL_ANON_STRUCT__ struct{ cl_long2 lo, hi; };
+#endif
+#if defined( __CL_LONG2__)
+ __cl_long2 v2[2];
+#endif
+#if defined( __CL_LONG4__)
+ __cl_long4 v4;
+#endif
+}cl_long4;
+
+/* cl_long3 is identical in size, alignment and behavior to cl_long4. See section 6.1.5. */
+typedef cl_long4 cl_long3;
+
+typedef union
+{
+ cl_long CL_ALIGNED(64) s[8];
+#if __CL_HAS_ANON_STRUCT__
+ __CL_ANON_STRUCT__ struct{ cl_long x, y, z, w; };
+ __CL_ANON_STRUCT__ struct{ cl_long s0, s1, s2, s3, s4, s5, s6, s7; };
+ __CL_ANON_STRUCT__ struct{ cl_long4 lo, hi; };
+#endif
+#if defined( __CL_LONG2__)
+ __cl_long2 v2[4];
+#endif
+#if defined( __CL_LONG4__)
+ __cl_long4 v4[2];
+#endif
+#if defined( __CL_LONG8__ )
+ __cl_long8 v8;
+#endif
+}cl_long8;
+
+typedef union
+{
+ cl_long CL_ALIGNED(128) s[16];
+#if __CL_HAS_ANON_STRUCT__
+ __CL_ANON_STRUCT__ struct{ cl_long x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+ __CL_ANON_STRUCT__ struct{ cl_long s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+ __CL_ANON_STRUCT__ struct{ cl_long8 lo, hi; };
+#endif
+#if defined( __CL_LONG2__)
+ __cl_long2 v2[8];
+#endif
+#if defined( __CL_LONG4__)
+ __cl_long4 v4[4];
+#endif
+#if defined( __CL_LONG8__ )
+ __cl_long8 v8[2];
+#endif
+#if defined( __CL_LONG16__ )
+ __cl_long16 v16;
+#endif
+}cl_long16;
+
+
+/* ---- cl_ulongn ---- */
+typedef union
+{
+ cl_ulong CL_ALIGNED(16) s[2];
+#if __CL_HAS_ANON_STRUCT__
+ __CL_ANON_STRUCT__ struct{ cl_ulong x, y; };
+ __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1; };
+ __CL_ANON_STRUCT__ struct{ cl_ulong lo, hi; };
+#endif
+#if defined( __CL_ULONG2__)
+ __cl_ulong2 v2;
+#endif
+}cl_ulong2;
+
+typedef union
+{
+ cl_ulong CL_ALIGNED(32) s[4];
+#if __CL_HAS_ANON_STRUCT__
+ __CL_ANON_STRUCT__ struct{ cl_ulong x, y, z, w; };
+ __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1, s2, s3; };
+ __CL_ANON_STRUCT__ struct{ cl_ulong2 lo, hi; };
+#endif
+#if defined( __CL_ULONG2__)
+ __cl_ulong2 v2[2];
+#endif
+#if defined( __CL_ULONG4__)
+ __cl_ulong4 v4;
+#endif
+}cl_ulong4;
+
+/* cl_ulong3 is identical in size, alignment and behavior to cl_ulong4. See section 6.1.5. */
+typedef cl_ulong4 cl_ulong3;
+
+typedef union
+{
+ cl_ulong CL_ALIGNED(64) s[8];
+#if __CL_HAS_ANON_STRUCT__
+ __CL_ANON_STRUCT__ struct{ cl_ulong x, y, z, w; };
+ __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1, s2, s3, s4, s5, s6, s7; };
+ __CL_ANON_STRUCT__ struct{ cl_ulong4 lo, hi; };
+#endif
+#if defined( __CL_ULONG2__)
+ __cl_ulong2 v2[4];
+#endif
+#if defined( __CL_ULONG4__)
+ __cl_ulong4 v4[2];
+#endif
+#if defined( __CL_ULONG8__ )
+ __cl_ulong8 v8;
+#endif
+}cl_ulong8;
+
+typedef union
+{
+ cl_ulong CL_ALIGNED(128) s[16];
+#if __CL_HAS_ANON_STRUCT__
+ __CL_ANON_STRUCT__ struct{ cl_ulong x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+ __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+ __CL_ANON_STRUCT__ struct{ cl_ulong8 lo, hi; };
+#endif
+#if defined( __CL_ULONG2__)
+ __cl_ulong2 v2[8];
+#endif
+#if defined( __CL_ULONG4__)
+ __cl_ulong4 v4[4];
+#endif
+#if defined( __CL_ULONG8__ )
+ __cl_ulong8 v8[2];
+#endif
+#if defined( __CL_ULONG16__ )
+ __cl_ulong16 v16;
+#endif
+}cl_ulong16;
+
+
+/* --- cl_floatn ---- */
+
+typedef union
+{
+ cl_float CL_ALIGNED(8) s[2];
+#if __CL_HAS_ANON_STRUCT__
+ __CL_ANON_STRUCT__ struct{ cl_float x, y; };
+ __CL_ANON_STRUCT__ struct{ cl_float s0, s1; };
+ __CL_ANON_STRUCT__ struct{ cl_float lo, hi; };
+#endif
+#if defined( __CL_FLOAT2__)
+ __cl_float2 v2;
+#endif
+}cl_float2;
+
+typedef union
+{
+ cl_float CL_ALIGNED(16) s[4];
+#if __CL_HAS_ANON_STRUCT__
+ __CL_ANON_STRUCT__ struct{ cl_float x, y, z, w; };
+ __CL_ANON_STRUCT__ struct{ cl_float s0, s1, s2, s3; };
+ __CL_ANON_STRUCT__ struct{ cl_float2 lo, hi; };
+#endif
+#if defined( __CL_FLOAT2__)
+ __cl_float2 v2[2];
+#endif
+#if defined( __CL_FLOAT4__)
+ __cl_float4 v4;
+#endif
+}cl_float4;
+
+/* cl_float3 is identical in size, alignment and behavior to cl_float4. See section 6.1.5. */
+typedef cl_float4 cl_float3;
+
+typedef union
+{
+ cl_float CL_ALIGNED(32) s[8];
+#if __CL_HAS_ANON_STRUCT__
+ __CL_ANON_STRUCT__ struct{ cl_float x, y, z, w; };
+ __CL_ANON_STRUCT__ struct{ cl_float s0, s1, s2, s3, s4, s5, s6, s7; };
+ __CL_ANON_STRUCT__ struct{ cl_float4 lo, hi; };
+#endif
+#if defined( __CL_FLOAT2__)
+ __cl_float2 v2[4];
+#endif
+#if defined( __CL_FLOAT4__)
+ __cl_float4 v4[2];
+#endif
+#if defined( __CL_FLOAT8__ )
+ __cl_float8 v8;
+#endif
+}cl_float8;
+
+typedef union
+{
+ cl_float CL_ALIGNED(64) s[16];
+#if __CL_HAS_ANON_STRUCT__
+ __CL_ANON_STRUCT__ struct{ cl_float x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+ __CL_ANON_STRUCT__ struct{ cl_float s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+ __CL_ANON_STRUCT__ struct{ cl_float8 lo, hi; };
+#endif
+#if defined( __CL_FLOAT2__)
+ __cl_float2 v2[8];
+#endif
+#if defined( __CL_FLOAT4__)
+ __cl_float4 v4[4];
+#endif
+#if defined( __CL_FLOAT8__ )
+ __cl_float8 v8[2];
+#endif
+#if defined( __CL_FLOAT16__ )
+ __cl_float16 v16;
+#endif
+}cl_float16;
+
+/* --- cl_doublen ---- */
+
+typedef union
+{
+ cl_double CL_ALIGNED(16) s[2];
+#if __CL_HAS_ANON_STRUCT__
+ __CL_ANON_STRUCT__ struct{ cl_double x, y; };
+ __CL_ANON_STRUCT__ struct{ cl_double s0, s1; };
+ __CL_ANON_STRUCT__ struct{ cl_double lo, hi; };
+#endif
+#if defined( __CL_DOUBLE2__)
+ __cl_double2 v2;
+#endif
+}cl_double2;
+
+typedef union
+{
+ cl_double CL_ALIGNED(32) s[4];
+#if __CL_HAS_ANON_STRUCT__
+ __CL_ANON_STRUCT__ struct{ cl_double x, y, z, w; };
+ __CL_ANON_STRUCT__ struct{ cl_double s0, s1, s2, s3; };
+ __CL_ANON_STRUCT__ struct{ cl_double2 lo, hi; };
+#endif
+#if defined( __CL_DOUBLE2__)
+ __cl_double2 v2[2];
+#endif
+#if defined( __CL_DOUBLE4__)
+ __cl_double4 v4;
+#endif
+}cl_double4;
+
+/* cl_double3 is identical in size, alignment and behavior to cl_double4. See section 6.1.5. */
+typedef cl_double4 cl_double3;
+
+typedef union
+{
+ cl_double CL_ALIGNED(64) s[8];
+#if __CL_HAS_ANON_STRUCT__
+ __CL_ANON_STRUCT__ struct{ cl_double x, y, z, w; };
+ __CL_ANON_STRUCT__ struct{ cl_double s0, s1, s2, s3, s4, s5, s6, s7; };
+ __CL_ANON_STRUCT__ struct{ cl_double4 lo, hi; };
+#endif
+#if defined( __CL_DOUBLE2__)
+ __cl_double2 v2[4];
+#endif
+#if defined( __CL_DOUBLE4__)
+ __cl_double4 v4[2];
+#endif
+#if defined( __CL_DOUBLE8__ )
+ __cl_double8 v8;
+#endif
+}cl_double8;
+
+typedef union
+{
+ cl_double CL_ALIGNED(128) s[16];
+#if __CL_HAS_ANON_STRUCT__
+ __CL_ANON_STRUCT__ struct{ cl_double x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+ __CL_ANON_STRUCT__ struct{ cl_double s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+ __CL_ANON_STRUCT__ struct{ cl_double8 lo, hi; };
+#endif
+#if defined( __CL_DOUBLE2__)
+ __cl_double2 v2[8];
+#endif
+#if defined( __CL_DOUBLE4__)
+ __cl_double4 v4[4];
+#endif
+#if defined( __CL_DOUBLE8__ )
+ __cl_double8 v8[2];
+#endif
+#if defined( __CL_DOUBLE16__ )
+ __cl_double16 v16;
+#endif
+}cl_double16;
+
+/* Macro to facilitate debugging
+ * Usage:
+ * Place CL_PROGRAM_STRING_DEBUG_INFO on the line before the first line of your source.
+ * The first line ends with: CL_PROGRAM_STRING_DEBUG_INFO \"
+ * Each line thereafter of OpenCL C source must end with: \n\
+ * The last line ends in ";
+ *
+ * Example:
+ *
+ * const char *my_program = CL_PROGRAM_STRING_DEBUG_INFO "\
+ * kernel void foo( int a, float * b ) \n\
+ * { \n\
+ * // my comment \n\
+ * *b[ get_global_id(0)] = a; \n\
+ * } \n\
+ * ";
+ *
+ * This should correctly set up the line, (column) and file information for your source
+ * string so you can do source level debugging.
+ */
+#define __CL_STRINGIFY( _x ) # _x
+#define _CL_STRINGIFY( _x ) __CL_STRINGIFY( _x )
+#define CL_PROGRAM_STRING_DEBUG_INFO "#line " _CL_STRINGIFY(__LINE__) " \"" __FILE__ "\" \n\n"
+
+#ifdef __cplusplus
+}
+#endif
+
+#undef __CL_HAS_ANON_STRUCT__
+#undef __CL_ANON_STRUCT__
+#if defined( _WIN32) && (_MSC_VER >= 1500)
+#pragma warning( pop )
+#endif
+
+#endif /* __CL_PLATFORM_H */
diff --git a/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/opencl.h b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/opencl.h
new file mode 100644
index 000000000..fccacd168
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/opencl.h
@@ -0,0 +1,43 @@
+/*******************************************************************************
+ * Copyright (c) 2008-2012 The Khronos Group Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and/or associated documentation files (the
+ * "Materials"), to deal in the Materials without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Materials, and to
+ * permit persons to whom the Materials are furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Materials.
+ *
+ * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+ ******************************************************************************/
+
+/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
+
+#ifndef __OPENCL_H
+#define __OPENCL_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <cl.h>
+#include <cl_gl.h>
+#include <cl_gl_ext.h>
+#include <cl_ext.h>
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __OPENCL_H */
+
diff --git a/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/image.go b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/image.go
new file mode 100644
index 000000000..d6a996377
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/image.go
@@ -0,0 +1,83 @@
+// +build cl12
+
+package cl
+
+// #ifdef __APPLE__
+// #include "OpenCL/opencl.h"
+// #else
+// #include "cl.h"
+// #endif
+import "C"
+import (
+ "image"
+ "unsafe"
+)
+
+func (ctx *Context) CreateImage(flags MemFlag, imageFormat ImageFormat, imageDesc ImageDescription, data []byte) (*MemObject, error) {
+ format := imageFormat.toCl()
+ desc := imageDesc.toCl()
+ var dataPtr unsafe.Pointer
+ if data != nil {
+ dataPtr = unsafe.Pointer(&data[0])
+ }
+ var err C.cl_int
+ clBuffer := C.clCreateImage(ctx.clContext, C.cl_mem_flags(flags), &format, &desc, dataPtr, &err)
+ if err != C.CL_SUCCESS {
+ return nil, toError(err)
+ }
+ if clBuffer == nil {
+ return nil, ErrUnknown
+ }
+ return newMemObject(clBuffer, len(data)), nil
+}
+
+func (ctx *Context) CreateImageSimple(flags MemFlag, width, height int, channelOrder ChannelOrder, channelDataType ChannelDataType, data []byte) (*MemObject, error) {
+ format := ImageFormat{channelOrder, channelDataType}
+ desc := ImageDescription{
+ Type: MemObjectTypeImage2D,
+ Width: width,
+ Height: height,
+ }
+ return ctx.CreateImage(flags, format, desc, data)
+}
+
+func (ctx *Context) CreateImageFromImage(flags MemFlag, img image.Image) (*MemObject, error) {
+ switch m := img.(type) {
+ case *image.Gray:
+ format := ImageFormat{ChannelOrderIntensity, ChannelDataTypeUNormInt8}
+ desc := ImageDescription{
+ Type: MemObjectTypeImage2D,
+ Width: m.Bounds().Dx(),
+ Height: m.Bounds().Dy(),
+ RowPitch: m.Stride,
+ }
+ return ctx.CreateImage(flags, format, desc, m.Pix)
+ case *image.RGBA:
+ format := ImageFormat{ChannelOrderRGBA, ChannelDataTypeUNormInt8}
+ desc := ImageDescription{
+ Type: MemObjectTypeImage2D,
+ Width: m.Bounds().Dx(),
+ Height: m.Bounds().Dy(),
+ RowPitch: m.Stride,
+ }
+ return ctx.CreateImage(flags, format, desc, m.Pix)
+ }
+
+ b := img.Bounds()
+ w := b.Dx()
+ h := b.Dy()
+ data := make([]byte, w*h*4)
+ dataOffset := 0
+ for y := 0; y < h; y++ {
+ for x := 0; x < w; x++ {
+ c := img.At(x+b.Min.X, y+b.Min.Y)
+ r, g, b, a := c.RGBA()
+ data[dataOffset] = uint8(r >> 8)
+ data[dataOffset+1] = uint8(g >> 8)
+ data[dataOffset+2] = uint8(b >> 8)
+ data[dataOffset+3] = uint8(a >> 8)
+ dataOffset += 4
+ }
+ }
+ return ctx.CreateImageSimple(flags, w, h, ChannelOrderRGBA, ChannelDataTypeUNormInt8, data)
+}
diff --git a/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/kernel.go b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/kernel.go
new file mode 100644
index 000000000..894a775e8
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/kernel.go
@@ -0,0 +1,127 @@
+package cl
+
+// #ifdef __APPLE__
+// #include "OpenCL/opencl.h"
+// #else
+// #include "cl.h"
+// #endif
+import "C"
+
+import (
+ "fmt"
+ "unsafe"
+)
+
+type ErrUnsupportedArgumentType struct {
+ Index int
+ Value interface{}
+}
+
+func (e ErrUnsupportedArgumentType) Error() string {
+ return fmt.Sprintf("cl: unsupported argument type for index %d: %+v", e.Index, e.Value)
+}
+
+type Kernel struct {
+ clKernel C.cl_kernel
+ name string
+}
+
+type LocalBuffer int
+
+func releaseKernel(k *Kernel) {
+ if k.clKernel != nil {
+ C.clReleaseKernel(k.clKernel)
+ k.clKernel = nil
+ }
+}
+
+func (k *Kernel) Release() {
+ releaseKernel(k)
+}
+
+func (k *Kernel) SetArgs(args ...interface{}) error {
+ for index, arg := range args {
+ if err := k.SetArg(index, arg); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func (k *Kernel) SetArg(index int, arg interface{}) error {
+ switch val := arg.(type) {
+ case uint8:
+ return k.SetArgUint8(index, val)
+ case int8:
+ return k.SetArgInt8(index, val)
+ case uint32:
+ return k.SetArgUint32(index, val)
+ case uint64:
+ return k.SetArgUint64(index, val)
+ case int32:
+ return k.SetArgInt32(index, val)
+ case float32:
+ return k.SetArgFloat32(index, val)
+ case *MemObject:
+ return k.SetArgBuffer(index, val)
+ case LocalBuffer:
+ return k.SetArgLocal(index, int(val))
+ default:
+ return ErrUnsupportedArgumentType{Index: index, Value: arg}
+ }
+}
+
+func (k *Kernel) SetArgBuffer(index int, buffer *MemObject) error {
+ return k.SetArgUnsafe(index, int(unsafe.Sizeof(buffer.clMem)), unsafe.Pointer(&buffer.clMem))
+}
+
+func (k *Kernel) SetArgFloat32(index int, val float32) error {
+ return k.SetArgUnsafe(index, int(unsafe.Sizeof(val)), unsafe.Pointer(&val))
+}
+
+func (k *Kernel) SetArgInt8(index int, val int8) error {
+ return k.SetArgUnsafe(index, int(unsafe.Sizeof(val)), unsafe.Pointer(&val))
+}
+
+func (k *Kernel) SetArgUint8(index int, val uint8) error {
+ return k.SetArgUnsafe(index, int(unsafe.Sizeof(val)), unsafe.Pointer(&val))
+}
+
+func (k *Kernel) SetArgInt32(index int, val int32) error {
+ return k.SetArgUnsafe(index, int(unsafe.Sizeof(val)), unsafe.Pointer(&val))
+}
+
+func (k *Kernel) SetArgUint32(index int, val uint32) error {
+ return k.SetArgUnsafe(index, int(unsafe.Sizeof(val)), unsafe.Pointer(&val))
+}
+
+func (k *Kernel) SetArgUint64(index int, val uint64) error {
+ return k.SetArgUnsafe(index, int(unsafe.Sizeof(val)), unsafe.Pointer(&val))
+}
+
+func (k *Kernel) SetArgLocal(index int, size int) error {
+ return k.SetArgUnsafe(index, size, nil)
+}
+
+func (k *Kernel) SetArgUnsafe(index, argSize int, arg unsafe.Pointer) error {
+ //fmt.Println("FUNKY: ", index, argSize)
+ return toError(C.clSetKernelArg(k.clKernel, C.cl_uint(index), C.size_t(argSize), arg))
+}
+
+func (k *Kernel) PreferredWorkGroupSizeMultiple(device *Device) (int, error) {
+ var size C.size_t
+ err := C.clGetKernelWorkGroupInfo(k.clKernel, device.nullableId(), C.CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, C.size_t(unsafe.Sizeof(size)), unsafe.Pointer(&size), nil)
+ return int(size), toError(err)
+}
+
+func (k *Kernel) WorkGroupSize(device *Device) (int, error) {
+ var size C.size_t
+ err := C.clGetKernelWorkGroupInfo(k.clKernel, device.nullableId(), C.CL_KERNEL_WORK_GROUP_SIZE, C.size_t(unsafe.Sizeof(size)), unsafe.Pointer(&size), nil)
+ return int(size), toError(err)
+}
+
+func (k *Kernel) NumArgs() (int, error) {
+ var num C.cl_uint
+ err := C.clGetKernelInfo(k.clKernel, C.CL_KERNEL_NUM_ARGS, C.size_t(unsafe.Sizeof(num)), unsafe.Pointer(&num), nil)
+ return int(num), toError(err)
+}
diff --git a/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/kernel10.go b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/kernel10.go
new file mode 100644
index 000000000..579946068
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/kernel10.go
@@ -0,0 +1,7 @@
+// +build !cl12
+
+package cl
+
+func (k *Kernel) ArgName(index int) (string, error) {
+ return "", ErrUnsupported
+}
diff --git a/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/kernel12.go b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/kernel12.go
new file mode 100644
index 000000000..d9e6f3546
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/kernel12.go
@@ -0,0 +1,20 @@
+// +build cl12
+
+package cl
+
+// #ifdef __APPLE__
+// #include "OpenCL/opencl.h"
+// #else
+// #include "cl.h"
+// #endif
+import "C"
+import "unsafe"
+
+func (k *Kernel) ArgName(index int) (string, error) {
+ var strC [1024]byte
+ var strN C.size_t
+ if err := C.clGetKernelArgInfo(k.clKernel, C.cl_uint(index), C.CL_KERNEL_ARG_NAME, 1024, unsafe.Pointer(&strC[0]), &strN); err != C.CL_SUCCESS {
+ return "", toError(err)
+ }
+ return string(strC[:strN]), nil
+}
diff --git a/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/platform.go b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/platform.go
new file mode 100644
index 000000000..fd1d162cf
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/platform.go
@@ -0,0 +1,83 @@
+package cl
+
+// #ifdef __APPLE__
+// #include "OpenCL/opencl.h"
+// #else
+// #include "cl.h"
+// #endif
+import "C"
+
+import "unsafe"
+
+const maxPlatforms = 32
+
+type Platform struct {
+ id C.cl_platform_id
+}
+
+// Obtain the list of platforms available.
+func GetPlatforms() ([]*Platform, error) {
+ var platformIds [maxPlatforms]C.cl_platform_id
+ var nPlatforms C.cl_uint
+ if err := C.clGetPlatformIDs(C.cl_uint(maxPlatforms), &platformIds[0], &nPlatforms); err != C.CL_SUCCESS {
+ return nil, toError(err)
+ }
+ platforms := make([]*Platform, nPlatforms)
+ for i := 0; i < int(nPlatforms); i++ {
+ platforms[i] = &Platform{id: platformIds[i]}
+ }
+ return platforms, nil
+}
+
+func (p *Platform) GetDevices(deviceType DeviceType) ([]*Device, error) {
+ return GetDevices(p, deviceType)
+}
+
+func (p *Platform) getInfoString(param C.cl_platform_info) (string, error) {
+ var strC [2048]byte
+ var strN C.size_t
+ if err := C.clGetPlatformInfo(p.id, param, 2048, unsafe.Pointer(&strC[0]), &strN); err != C.CL_SUCCESS {
+ return "", toError(err)
+ }
+ return string(strC[:(strN - 1)]), nil
+}
+
+func (p *Platform) Name() string {
+ if str, err := p.getInfoString(C.CL_PLATFORM_NAME); err != nil {
+ panic("Platform.Name() should never fail")
+ } else {
+ return str
+ }
+}
+
+func (p *Platform) Vendor() string {
+ if str, err := p.getInfoString(C.CL_PLATFORM_VENDOR); err != nil {
+ panic("Platform.Vendor() should never fail")
+ } else {
+ return str
+ }
+}
+
+func (p *Platform) Profile() string {
+ if str, err := p.getInfoString(C.CL_PLATFORM_PROFILE); err != nil {
+ panic("Platform.Profile() should never fail")
+ } else {
+ return str
+ }
+}
+
+func (p *Platform) Version() string {
+ if str, err := p.getInfoString(C.CL_PLATFORM_VERSION); err != nil {
+ panic("Platform.Version() should never fail")
+ } else {
+ return str
+ }
+}
+
+func (p *Platform) Extensions() string {
+ if str, err := p.getInfoString(C.CL_PLATFORM_EXTENSIONS); err != nil {
+ panic("Platform.Extensions() should never fail")
+ } else {
+ return str
+ }
+}
diff --git a/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/program.go b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/program.go
new file mode 100644
index 000000000..e75f7ee0e
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/program.go
@@ -0,0 +1,105 @@
+package cl
+
+// #include <stdlib.h>
+// #ifdef __APPLE__
+// #include "OpenCL/opencl.h"
+// #else
+// #include "cl.h"
+// #endif
+import "C"
+
+import (
+ "fmt"
+ "runtime"
+ "unsafe"
+)
+
+type BuildError struct {
+ Message string
+ Device *Device
+}
+
+func (e BuildError) Error() string {
+ if e.Device != nil {
+ return fmt.Sprintf("cl: build error on %q: %s", e.Device.Name(), e.Message)
+ } else {
+ return fmt.Sprintf("cl: build error: %s", e.Message)
+ }
+}
+
+type Program struct {
+ clProgram C.cl_program
+ devices []*Device
+}
+
+func releaseProgram(p *Program) {
+ if p.clProgram != nil {
+ C.clReleaseProgram(p.clProgram)
+ p.clProgram = nil
+ }
+}
+
+func (p *Program) Release() {
+ releaseProgram(p)
+}
+
+func (p *Program) BuildProgram(devices []*Device, options string) error {
+ var cOptions *C.char
+ if options != "" {
+ cOptions = C.CString(options)
+ defer C.free(unsafe.Pointer(cOptions))
+ }
+ var deviceList []C.cl_device_id
+ var deviceListPtr *C.cl_device_id
+ numDevices := C.cl_uint(len(devices))
+ if devices != nil && len(devices) > 0 {
+ deviceList = buildDeviceIdList(devices)
+ deviceListPtr = &deviceList[0]
+ }
+ if err := C.clBuildProgram(p.clProgram, numDevices, deviceListPtr, cOptions, nil, nil); err != C.CL_SUCCESS {
+ buffer := make([]byte, 4096)
+ var bLen C.size_t
+ var err C.cl_int
+
+ for _, dev := range p.devices {
+ for i := 2; i >= 0; i-- {
+ err = C.clGetProgramBuildInfo(p.clProgram, dev.id, C.CL_PROGRAM_BUILD_LOG, C.size_t(len(buffer)), unsafe.Pointer(&buffer[0]), &bLen)
+ if err == C.CL_INVALID_VALUE && i > 0 && bLen < 1024*1024 {
+ // INVALID_VALUE probably means our buffer isn't large enough
+ buffer = make([]byte, bLen)
+ } else {
+ break
+ }
+ }
+ if err != C.CL_SUCCESS {
+ return toError(err)
+ }
+
+ if bLen > 1 {
+ return BuildError{
+ Device: dev,
+ Message: string(buffer[:bLen-1]),
+ }
+ }
+ }
+
+ return BuildError{
+ Device: nil,
+ Message: "build failed and produced no log entries",
+ }
+ }
+ return nil
+}
+
+func (p *Program) CreateKernel(name string) (*Kernel, error) {
+ cName := C.CString(name)
+ defer C.free(unsafe.Pointer(cName))
+ var err C.cl_int
+ clKernel := C.clCreateKernel(p.clProgram, cName, &err)
+ if err != C.CL_SUCCESS {
+ return nil, toError(err)
+ }
+ kernel := &Kernel{clKernel: clKernel, name: name}
+ runtime.SetFinalizer(kernel, releaseKernel)
+ return kernel, nil
+}
diff --git a/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/queue.go b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/queue.go
new file mode 100644
index 000000000..7762746da
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/queue.go
@@ -0,0 +1,193 @@
+package cl
+
+// #ifdef __APPLE__
+// #include "OpenCL/opencl.h"
+// #else
+// #include "cl.h"
+// #endif
+import "C"
+
+import "unsafe"
+
+type CommandQueueProperty int
+
+const (
+ CommandQueueOutOfOrderExecModeEnable CommandQueueProperty = C.CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE
+ CommandQueueProfilingEnable CommandQueueProperty = C.CL_QUEUE_PROFILING_ENABLE
+)
+
+type CommandQueue struct {
+ clQueue C.cl_command_queue
+ device *Device
+}
+
+func releaseCommandQueue(q *CommandQueue) {
+ if q.clQueue != nil {
+ C.clReleaseCommandQueue(q.clQueue)
+ q.clQueue = nil
+ }
+}
+
+// Call clReleaseCommandQueue on the CommandQueue. Using the CommandQueue after Release will cause a panick.
+func (q *CommandQueue) Release() {
+ releaseCommandQueue(q)
+}
+
+// Blocks until all previously queued OpenCL commands in a command-queue are issued to the associated device and have completed.
+func (q *CommandQueue) Finish() error {
+ return toError(C.clFinish(q.clQueue))
+}
+
+// Issues all previously queued OpenCL commands in a command-queue to the device associated with the command-queue.
+func (q *CommandQueue) Flush() error {
+ return toError(C.clFlush(q.clQueue))
+}
+
+// Enqueues a command to map a region of the buffer object given by buffer into the host address space and returns a pointer to this mapped region.
+func (q *CommandQueue) EnqueueMapBuffer(buffer *MemObject, blocking bool, flags MapFlag, offset, size int, eventWaitList []*Event) (*MappedMemObject, *Event, error) {
+ var event C.cl_event
+ var err C.cl_int
+ ptr := C.clEnqueueMapBuffer(q.clQueue, buffer.clMem, clBool(blocking), flags.toCl(), C.size_t(offset), C.size_t(size), C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event, &err)
+ if err != C.CL_SUCCESS {
+ return nil, nil, toError(err)
+ }
+ ev := newEvent(event)
+ if ptr == nil {
+ return nil, ev, ErrUnknown
+ }
+ return &MappedMemObject{ptr: ptr, size: size}, ev, nil
+}
+
+// Enqueues a command to map a region of an image object into the host address space and returns a pointer to this mapped region.
+func (q *CommandQueue) EnqueueMapImage(buffer *MemObject, blocking bool, flags MapFlag, origin, region [3]int, eventWaitList []*Event) (*MappedMemObject, *Event, error) {
+ cOrigin := sizeT3(origin)
+ cRegion := sizeT3(region)
+ var event C.cl_event
+ var err C.cl_int
+ var rowPitch, slicePitch C.size_t
+ ptr := C.clEnqueueMapImage(q.clQueue, buffer.clMem, clBool(blocking), flags.toCl(), &cOrigin[0], &cRegion[0], &rowPitch, &slicePitch, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event, &err)
+ if err != C.CL_SUCCESS {
+ return nil, nil, toError(err)
+ }
+ ev := newEvent(event)
+ if ptr == nil {
+ return nil, ev, ErrUnknown
+ }
+ size := 0 // TODO: could calculate this
+ return &MappedMemObject{ptr: ptr, size: size, rowPitch: int(rowPitch), slicePitch: int(slicePitch)}, ev, nil
+}
+
+// Enqueues a command to unmap a previously mapped region of a memory object.
+func (q *CommandQueue) EnqueueUnmapMemObject(buffer *MemObject, mappedObj *MappedMemObject, eventWaitList []*Event) (*Event, error) {
+ var event C.cl_event
+ if err := C.clEnqueueUnmapMemObject(q.clQueue, buffer.clMem, mappedObj.ptr, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event); err != C.CL_SUCCESS {
+ return nil, toError(err)
+ }
+ return newEvent(event), nil
+}
+
+// Enqueues a command to copy a buffer object to another buffer object.
+func (q *CommandQueue) EnqueueCopyBuffer(srcBuffer, dstBuffer *MemObject, srcOffset, dstOffset, byteCount int, eventWaitList []*Event) (*Event, error) {
+ var event C.cl_event
+ err := toError(C.clEnqueueCopyBuffer(q.clQueue, srcBuffer.clMem, dstBuffer.clMem, C.size_t(srcOffset), C.size_t(dstOffset), C.size_t(byteCount), C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event))
+ return newEvent(event), err
+}
+
+// Enqueue commands to write to a buffer object from host memory.
+func (q *CommandQueue) EnqueueWriteBuffer(buffer *MemObject, blocking bool, offset, dataSize int, dataPtr unsafe.Pointer, eventWaitList []*Event) (*Event, error) {
+ var event C.cl_event
+ err := toError(C.clEnqueueWriteBuffer(q.clQueue, buffer.clMem, clBool(blocking), C.size_t(offset), C.size_t(dataSize), dataPtr, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event))
+ return newEvent(event), err
+}
+
+func (q *CommandQueue) EnqueueWriteBufferFloat32(buffer *MemObject, blocking bool, offset int, data []float32, eventWaitList []*Event) (*Event, error) {
+ dataPtr := unsafe.Pointer(&data[0])
+ dataSize := int(unsafe.Sizeof(data[0])) * len(data)
+ return q.EnqueueWriteBuffer(buffer, blocking, offset, dataSize, dataPtr, eventWaitList)
+}
+
+// Enqueue commands to read from a buffer object to host memory.
+func (q *CommandQueue) EnqueueReadBuffer(buffer *MemObject, blocking bool, offset, dataSize int, dataPtr unsafe.Pointer, eventWaitList []*Event) (*Event, error) {
+ var event C.cl_event
+ err := toError(C.clEnqueueReadBuffer(q.clQueue, buffer.clMem, clBool(blocking), C.size_t(offset), C.size_t(dataSize), dataPtr, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event))
+ return newEvent(event), err
+}
+
+func (q *CommandQueue) EnqueueReadBufferFloat32(buffer *MemObject, blocking bool, offset int, data []float32, eventWaitList []*Event) (*Event, error) {
+ dataPtr := unsafe.Pointer(&data[0])
+ dataSize := int(unsafe.Sizeof(data[0])) * len(data)
+ return q.EnqueueReadBuffer(buffer, blocking, offset, dataSize, dataPtr, eventWaitList)
+}
+
+// Enqueues a command to execute a kernel on a device.
+func (q *CommandQueue) EnqueueNDRangeKernel(kernel *Kernel, globalWorkOffset, globalWorkSize, localWorkSize []int, eventWaitList []*Event) (*Event, error) {
+ workDim := len(globalWorkSize)
+ var globalWorkOffsetList []C.size_t
+ var globalWorkOffsetPtr *C.size_t
+ if globalWorkOffset != nil {
+ globalWorkOffsetList = make([]C.size_t, len(globalWorkOffset))
+ for i, off := range globalWorkOffset {
+ globalWorkOffsetList[i] = C.size_t(off)
+ }
+ globalWorkOffsetPtr = &globalWorkOffsetList[0]
+ }
+ var globalWorkSizeList []C.size_t
+ var globalWorkSizePtr *C.size_t
+ if globalWorkSize != nil {
+ globalWorkSizeList = make([]C.size_t, len(globalWorkSize))
+ for i, off := range globalWorkSize {
+ globalWorkSizeList[i] = C.size_t(off)
+ }
+ globalWorkSizePtr = &globalWorkSizeList[0]
+ }
+ var localWorkSizeList []C.size_t
+ var localWorkSizePtr *C.size_t
+ if localWorkSize != nil {
+ localWorkSizeList = make([]C.size_t, len(localWorkSize))
+ for i, off := range localWorkSize {
+ localWorkSizeList[i] = C.size_t(off)
+ }
+ localWorkSizePtr = &localWorkSizeList[0]
+ }
+ var event C.cl_event
+ err := toError(C.clEnqueueNDRangeKernel(q.clQueue, kernel.clKernel, C.cl_uint(workDim), globalWorkOffsetPtr, globalWorkSizePtr, localWorkSizePtr, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event))
+ return newEvent(event), err
+}
+
+// Enqueues a command to read from a 2D or 3D image object to host memory.
+func (q *CommandQueue) EnqueueReadImage(image *MemObject, blocking bool, origin, region [3]int, rowPitch, slicePitch int, data []byte, eventWaitList []*Event) (*Event, error) {
+ cOrigin := sizeT3(origin)
+ cRegion := sizeT3(region)
+ var event C.cl_event
+ err := toError(C.clEnqueueReadImage(q.clQueue, image.clMem, clBool(blocking), &cOrigin[0], &cRegion[0], C.size_t(rowPitch), C.size_t(slicePitch), unsafe.Pointer(&data[0]), C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event))
+ return newEvent(event), err
+}
+
+// Enqueues a command to write from a 2D or 3D image object to host memory.
+func (q *CommandQueue) EnqueueWriteImage(image *MemObject, blocking bool, origin, region [3]int, rowPitch, slicePitch int, data []byte, eventWaitList []*Event) (*Event, error) {
+ cOrigin := sizeT3(origin)
+ cRegion := sizeT3(region)
+ var event C.cl_event
+ err := toError(C.clEnqueueWriteImage(q.clQueue, image.clMem, clBool(blocking), &cOrigin[0], &cRegion[0], C.size_t(rowPitch), C.size_t(slicePitch), unsafe.Pointer(&data[0]), C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event))
+ return newEvent(event), err
+}
+
+func (q *CommandQueue) EnqueueFillBuffer(buffer *MemObject, pattern unsafe.Pointer, patternSize, offset, size int, eventWaitList []*Event) (*Event, error) {
+ var event C.cl_event
+ err := toError(C.clEnqueueFillBuffer(q.clQueue, buffer.clMem, pattern, C.size_t(patternSize), C.size_t(offset), C.size_t(size), C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event))
+ return newEvent(event), err
+}
+
+// A synchronization point that enqueues a barrier operation.
+func (q *CommandQueue) EnqueueBarrierWithWaitList(eventWaitList []*Event) (*Event, error) {
+ var event C.cl_event
+ err := toError(C.clEnqueueBarrierWithWaitList(q.clQueue, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event))
+ return newEvent(event), err
+}
+
+// Enqueues a marker command which waits for either a list of events to complete, or all previously enqueued commands to complete.
+func (q *CommandQueue) EnqueueMarkerWithWaitList(eventWaitList []*Event) (*Event, error) {
+ var event C.cl_event
+ err := toError(C.clEnqueueMarkerWithWaitList(q.clQueue, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event))
+ return newEvent(event), err
+}
diff --git a/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/types.go b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/types.go
new file mode 100644
index 000000000..00c846ce7
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/types.go
@@ -0,0 +1,487 @@
+package cl
+
+// #ifdef __APPLE__
+// #include "OpenCL/opencl.h"
+// #else
+// #include "cl.h"
+// #endif
+import "C"
+
+import (
+ "errors"
+ "fmt"
+ "reflect"
+ "runtime"
+ "strings"
+ "unsafe"
+)
+
+var (
+ ErrUnknown = errors.New("cl: unknown error") // Generally an unexpected result from an OpenCL function (e.g. CL_SUCCESS but null pointer)
+)
+
+type ErrOther int
+
+func (e ErrOther) Error() string {
+ return fmt.Sprintf("cl: error %d", int(e))
+}
+
+var (
+ ErrDeviceNotFound = errors.New("cl: Device Not Found")
+ ErrDeviceNotAvailable = errors.New("cl: Device Not Available")
+ ErrCompilerNotAvailable = errors.New("cl: Compiler Not Available")
+ ErrMemObjectAllocationFailure = errors.New("cl: Mem Object Allocation Failure")
+ ErrOutOfResources = errors.New("cl: Out Of Resources")
+ ErrOutOfHostMemory = errors.New("cl: Out Of Host Memory")
+ ErrProfilingInfoNotAvailable = errors.New("cl: Profiling Info Not Available")
+ ErrMemCopyOverlap = errors.New("cl: Mem Copy Overlap")
+ ErrImageFormatMismatch = errors.New("cl: Image Format Mismatch")
+ ErrImageFormatNotSupported = errors.New("cl: Image Format Not Supported")
+ ErrBuildProgramFailure = errors.New("cl: Build Program Failure")
+ ErrMapFailure = errors.New("cl: Map Failure")
+ ErrMisalignedSubBufferOffset = errors.New("cl: Misaligned Sub Buffer Offset")
+ ErrExecStatusErrorForEventsInWaitList = errors.New("cl: Exec Status Error For Events In Wait List")
+ ErrCompileProgramFailure = errors.New("cl: Compile Program Failure")
+ ErrLinkerNotAvailable = errors.New("cl: Linker Not Available")
+ ErrLinkProgramFailure = errors.New("cl: Link Program Failure")
+ ErrDevicePartitionFailed = errors.New("cl: Device Partition Failed")
+ ErrKernelArgInfoNotAvailable = errors.New("cl: Kernel Arg Info Not Available")
+ ErrInvalidValue = errors.New("cl: Invalid Value")
+ ErrInvalidDeviceType = errors.New("cl: Invalid Device Type")
+ ErrInvalidPlatform = errors.New("cl: Invalid Platform")
+ ErrInvalidDevice = errors.New("cl: Invalid Device")
+ ErrInvalidContext = errors.New("cl: Invalid Context")
+ ErrInvalidQueueProperties = errors.New("cl: Invalid Queue Properties")
+ ErrInvalidCommandQueue = errors.New("cl: Invalid Command Queue")
+ ErrInvalidHostPtr = errors.New("cl: Invalid Host Ptr")
+ ErrInvalidMemObject = errors.New("cl: Invalid Mem Object")
+ ErrInvalidImageFormatDescriptor = errors.New("cl: Invalid Image Format Descriptor")
+ ErrInvalidImageSize = errors.New("cl: Invalid Image Size")
+ ErrInvalidSampler = errors.New("cl: Invalid Sampler")
+ ErrInvalidBinary = errors.New("cl: Invalid Binary")
+ ErrInvalidBuildOptions = errors.New("cl: Invalid Build Options")
+ ErrInvalidProgram = errors.New("cl: Invalid Program")
+ ErrInvalidProgramExecutable = errors.New("cl: Invalid Program Executable")
+ ErrInvalidKernelName = errors.New("cl: Invalid Kernel Name")
+ ErrInvalidKernelDefinition = errors.New("cl: Invalid Kernel Definition")
+ ErrInvalidKernel = errors.New("cl: Invalid Kernel")
+ ErrInvalidArgIndex = errors.New("cl: Invalid Arg Index")
+ ErrInvalidArgValue = errors.New("cl: Invalid Arg Value")
+ ErrInvalidArgSize = errors.New("cl: Invalid Arg Size")
+ ErrInvalidKernelArgs = errors.New("cl: Invalid Kernel Args")
+ ErrInvalidWorkDimension = errors.New("cl: Invalid Work Dimension")
+ ErrInvalidWorkGroupSize = errors.New("cl: Invalid Work Group Size")
+ ErrInvalidWorkItemSize = errors.New("cl: Invalid Work Item Size")
+ ErrInvalidGlobalOffset = errors.New("cl: Invalid Global Offset")
+ ErrInvalidEventWaitList = errors.New("cl: Invalid Event Wait List")
+ ErrInvalidEvent = errors.New("cl: Invalid Event")
+ ErrInvalidOperation = errors.New("cl: Invalid Operation")
+ ErrInvalidGlObject = errors.New("cl: Invalid Gl Object")
+ ErrInvalidBufferSize = errors.New("cl: Invalid Buffer Size")
+ ErrInvalidMipLevel = errors.New("cl: Invalid Mip Level")
+ ErrInvalidGlobalWorkSize = errors.New("cl: Invalid Global Work Size")
+ ErrInvalidProperty = errors.New("cl: Invalid Property")
+ ErrInvalidImageDescriptor = errors.New("cl: Invalid Image Descriptor")
+ ErrInvalidCompilerOptions = errors.New("cl: Invalid Compiler Options")
+ ErrInvalidLinkerOptions = errors.New("cl: Invalid Linker Options")
+ ErrInvalidDevicePartitionCount = errors.New("cl: Invalid Device Partition Count")
+)
+var errorMap = map[C.cl_int]error{
+ C.CL_SUCCESS: nil,
+ C.CL_DEVICE_NOT_FOUND: ErrDeviceNotFound,
+ C.CL_DEVICE_NOT_AVAILABLE: ErrDeviceNotAvailable,
+ C.CL_COMPILER_NOT_AVAILABLE: ErrCompilerNotAvailable,
+ C.CL_MEM_OBJECT_ALLOCATION_FAILURE: ErrMemObjectAllocationFailure,
+ C.CL_OUT_OF_RESOURCES: ErrOutOfResources,
+ C.CL_OUT_OF_HOST_MEMORY: ErrOutOfHostMemory,
+ C.CL_PROFILING_INFO_NOT_AVAILABLE: ErrProfilingInfoNotAvailable,
+ C.CL_MEM_COPY_OVERLAP: ErrMemCopyOverlap,
+ C.CL_IMAGE_FORMAT_MISMATCH: ErrImageFormatMismatch,
+ C.CL_IMAGE_FORMAT_NOT_SUPPORTED: ErrImageFormatNotSupported,
+ C.CL_BUILD_PROGRAM_FAILURE: ErrBuildProgramFailure,
+ C.CL_MAP_FAILURE: ErrMapFailure,
+ C.CL_MISALIGNED_SUB_BUFFER_OFFSET: ErrMisalignedSubBufferOffset,
+ C.CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: ErrExecStatusErrorForEventsInWaitList,
+ C.CL_INVALID_VALUE: ErrInvalidValue,
+ C.CL_INVALID_DEVICE_TYPE: ErrInvalidDeviceType,
+ C.CL_INVALID_PLATFORM: ErrInvalidPlatform,
+ C.CL_INVALID_DEVICE: ErrInvalidDevice,
+ C.CL_INVALID_CONTEXT: ErrInvalidContext,
+ C.CL_INVALID_QUEUE_PROPERTIES: ErrInvalidQueueProperties,
+ C.CL_INVALID_COMMAND_QUEUE: ErrInvalidCommandQueue,
+ C.CL_INVALID_HOST_PTR: ErrInvalidHostPtr,
+ C.CL_INVALID_MEM_OBJECT: ErrInvalidMemObject,
+ C.CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: ErrInvalidImageFormatDescriptor,
+ C.CL_INVALID_IMAGE_SIZE: ErrInvalidImageSize,
+ C.CL_INVALID_SAMPLER: ErrInvalidSampler,
+ C.CL_INVALID_BINARY: ErrInvalidBinary,
+ C.CL_INVALID_BUILD_OPTIONS: ErrInvalidBuildOptions,
+ C.CL_INVALID_PROGRAM: ErrInvalidProgram,
+ C.CL_INVALID_PROGRAM_EXECUTABLE: ErrInvalidProgramExecutable,
+ C.CL_INVALID_KERNEL_NAME: ErrInvalidKernelName,
+ C.CL_INVALID_KERNEL_DEFINITION: ErrInvalidKernelDefinition,
+ C.CL_INVALID_KERNEL: ErrInvalidKernel,
+ C.CL_INVALID_ARG_INDEX: ErrInvalidArgIndex,
+ C.CL_INVALID_ARG_VALUE: ErrInvalidArgValue,
+ C.CL_INVALID_ARG_SIZE: ErrInvalidArgSize,
+ C.CL_INVALID_KERNEL_ARGS: ErrInvalidKernelArgs,
+ C.CL_INVALID_WORK_DIMENSION: ErrInvalidWorkDimension,
+ C.CL_INVALID_WORK_GROUP_SIZE: ErrInvalidWorkGroupSize,
+ C.CL_INVALID_WORK_ITEM_SIZE: ErrInvalidWorkItemSize,
+ C.CL_INVALID_GLOBAL_OFFSET: ErrInvalidGlobalOffset,
+ C.CL_INVALID_EVENT_WAIT_LIST: ErrInvalidEventWaitList,
+ C.CL_INVALID_EVENT: ErrInvalidEvent,
+ C.CL_INVALID_OPERATION: ErrInvalidOperation,
+ C.CL_INVALID_GL_OBJECT: ErrInvalidGlObject,
+ C.CL_INVALID_BUFFER_SIZE: ErrInvalidBufferSize,
+ C.CL_INVALID_MIP_LEVEL: ErrInvalidMipLevel,
+ C.CL_INVALID_GLOBAL_WORK_SIZE: ErrInvalidGlobalWorkSize,
+ C.CL_INVALID_PROPERTY: ErrInvalidProperty,
+}
+
+func toError(code C.cl_int) error {
+ if err, ok := errorMap[code]; ok {
+ return err
+ }
+ return ErrOther(code)
+}
+
+type LocalMemType int
+
+const (
+ LocalMemTypeNone LocalMemType = C.CL_NONE
+ LocalMemTypeGlobal LocalMemType = C.CL_GLOBAL
+ LocalMemTypeLocal LocalMemType = C.CL_LOCAL
+)
+
+var localMemTypeMap = map[LocalMemType]string{
+ LocalMemTypeNone: "None",
+ LocalMemTypeGlobal: "Global",
+ LocalMemTypeLocal: "Local",
+}
+
+func (t LocalMemType) String() string {
+ name := localMemTypeMap[t]
+ if name == "" {
+ name = "Unknown"
+ }
+ return name
+}
+
+type ExecCapability int
+
+const (
+ ExecCapabilityKernel ExecCapability = C.CL_EXEC_KERNEL // The OpenCL device can execute OpenCL kernels.
+ ExecCapabilityNativeKernel ExecCapability = C.CL_EXEC_NATIVE_KERNEL // The OpenCL device can execute native kernels.
+)
+
+func (ec ExecCapability) String() string {
+ var parts []string
+ if ec&ExecCapabilityKernel != 0 {
+ parts = append(parts, "Kernel")
+ }
+ if ec&ExecCapabilityNativeKernel != 0 {
+ parts = append(parts, "NativeKernel")
+ }
+ if parts == nil {
+ return ""
+ }
+ return strings.Join(parts, "|")
+}
+
+type MemCacheType int
+
+const (
+ MemCacheTypeNone MemCacheType = C.CL_NONE
+ MemCacheTypeReadOnlyCache MemCacheType = C.CL_READ_ONLY_CACHE
+ MemCacheTypeReadWriteCache MemCacheType = C.CL_READ_WRITE_CACHE
+)
+
+func (ct MemCacheType) String() string {
+ switch ct {
+ case MemCacheTypeNone:
+ return "None"
+ case MemCacheTypeReadOnlyCache:
+ return "ReadOnly"
+ case MemCacheTypeReadWriteCache:
+ return "ReadWrite"
+ }
+ return fmt.Sprintf("Unknown(%x)", int(ct))
+}
+
+type MemFlag int
+
+const (
+ MemReadWrite MemFlag = C.CL_MEM_READ_WRITE
+ MemWriteOnly MemFlag = C.CL_MEM_WRITE_ONLY
+ MemReadOnly MemFlag = C.CL_MEM_READ_ONLY
+ MemUseHostPtr MemFlag = C.CL_MEM_USE_HOST_PTR
+ MemAllocHostPtr MemFlag = C.CL_MEM_ALLOC_HOST_PTR
+ MemCopyHostPtr MemFlag = C.CL_MEM_COPY_HOST_PTR
+
+ MemWriteOnlyHost MemFlag = C.CL_MEM_HOST_WRITE_ONLY
+ MemReadOnlyHost MemFlag = C.CL_MEM_HOST_READ_ONLY
+ MemNoAccessHost MemFlag = C.CL_MEM_HOST_NO_ACCESS
+)
+
+type MemObjectType int
+
+const (
+ MemObjectTypeBuffer MemObjectType = C.CL_MEM_OBJECT_BUFFER
+ MemObjectTypeImage2D MemObjectType = C.CL_MEM_OBJECT_IMAGE2D
+ MemObjectTypeImage3D MemObjectType = C.CL_MEM_OBJECT_IMAGE3D
+)
+
+type MapFlag int
+
+const (
+ // This flag specifies that the region being mapped in the memory object is being mapped for reading.
+ MapFlagRead MapFlag = C.CL_MAP_READ
+ MapFlagWrite MapFlag = C.CL_MAP_WRITE
+ MapFlagWriteInvalidateRegion MapFlag = C.CL_MAP_WRITE_INVALIDATE_REGION
+)
+
+func (mf MapFlag) toCl() C.cl_map_flags {
+ return C.cl_map_flags(mf)
+}
+
+type ChannelOrder int
+
+const (
+ ChannelOrderR ChannelOrder = C.CL_R
+ ChannelOrderA ChannelOrder = C.CL_A
+ ChannelOrderRG ChannelOrder = C.CL_RG
+ ChannelOrderRA ChannelOrder = C.CL_RA
+ ChannelOrderRGB ChannelOrder = C.CL_RGB
+ ChannelOrderRGBA ChannelOrder = C.CL_RGBA
+ ChannelOrderBGRA ChannelOrder = C.CL_BGRA
+ ChannelOrderARGB ChannelOrder = C.CL_ARGB
+ ChannelOrderIntensity ChannelOrder = C.CL_INTENSITY
+ ChannelOrderLuminance ChannelOrder = C.CL_LUMINANCE
+ ChannelOrderRx ChannelOrder = C.CL_Rx
+ ChannelOrderRGx ChannelOrder = C.CL_RGx
+ ChannelOrderRGBx ChannelOrder = C.CL_RGBx
+)
+
+var channelOrderNameMap = map[ChannelOrder]string{
+ ChannelOrderR: "R",
+ ChannelOrderA: "A",
+ ChannelOrderRG: "RG",
+ ChannelOrderRA: "RA",
+ ChannelOrderRGB: "RGB",
+ ChannelOrderRGBA: "RGBA",
+ ChannelOrderBGRA: "BGRA",
+ ChannelOrderARGB: "ARGB",
+ ChannelOrderIntensity: "Intensity",
+ ChannelOrderLuminance: "Luminance",
+ ChannelOrderRx: "Rx",
+ ChannelOrderRGx: "RGx",
+ ChannelOrderRGBx: "RGBx",
+}
+
+func (co ChannelOrder) String() string {
+ name := channelOrderNameMap[co]
+ if name == "" {
+ name = fmt.Sprintf("Unknown(%x)", int(co))
+ }
+ return name
+}
+
+type ChannelDataType int
+
+const (
+ ChannelDataTypeSNormInt8 ChannelDataType = C.CL_SNORM_INT8
+ ChannelDataTypeSNormInt16 ChannelDataType = C.CL_SNORM_INT16
+ ChannelDataTypeUNormInt8 ChannelDataType = C.CL_UNORM_INT8
+ ChannelDataTypeUNormInt16 ChannelDataType = C.CL_UNORM_INT16
+ ChannelDataTypeUNormShort565 ChannelDataType = C.CL_UNORM_SHORT_565
+ ChannelDataTypeUNormShort555 ChannelDataType = C.CL_UNORM_SHORT_555
+ ChannelDataTypeUNormInt101010 ChannelDataType = C.CL_UNORM_INT_101010
+ ChannelDataTypeSignedInt8 ChannelDataType = C.CL_SIGNED_INT8
+ ChannelDataTypeSignedInt16 ChannelDataType = C.CL_SIGNED_INT16
+ ChannelDataTypeSignedInt32 ChannelDataType = C.CL_SIGNED_INT32
+ ChannelDataTypeUnsignedInt8 ChannelDataType = C.CL_UNSIGNED_INT8
+ ChannelDataTypeUnsignedInt16 ChannelDataType = C.CL_UNSIGNED_INT16
+ ChannelDataTypeUnsignedInt32 ChannelDataType = C.CL_UNSIGNED_INT32
+ ChannelDataTypeHalfFloat ChannelDataType = C.CL_HALF_FLOAT
+ ChannelDataTypeFloat ChannelDataType = C.CL_FLOAT
+)
+
+var channelDataTypeNameMap = map[ChannelDataType]string{
+ ChannelDataTypeSNormInt8: "SNormInt8",
+ ChannelDataTypeSNormInt16: "SNormInt16",
+ ChannelDataTypeUNormInt8: "UNormInt8",
+ ChannelDataTypeUNormInt16: "UNormInt16",
+ ChannelDataTypeUNormShort565: "UNormShort565",
+ ChannelDataTypeUNormShort555: "UNormShort555",
+ ChannelDataTypeUNormInt101010: "UNormInt101010",
+ ChannelDataTypeSignedInt8: "SignedInt8",
+ ChannelDataTypeSignedInt16: "SignedInt16",
+ ChannelDataTypeSignedInt32: "SignedInt32",
+ ChannelDataTypeUnsignedInt8: "UnsignedInt8",
+ ChannelDataTypeUnsignedInt16: "UnsignedInt16",
+ ChannelDataTypeUnsignedInt32: "UnsignedInt32",
+ ChannelDataTypeHalfFloat: "HalfFloat",
+ ChannelDataTypeFloat: "Float",
+}
+
+func (ct ChannelDataType) String() string {
+ name := channelDataTypeNameMap[ct]
+ if name == "" {
+ name = fmt.Sprintf("Unknown(%x)", int(ct))
+ }
+ return name
+}
+
+type ImageFormat struct {
+ ChannelOrder ChannelOrder
+ ChannelDataType ChannelDataType
+}
+
+func (f ImageFormat) toCl() C.cl_image_format {
+ var format C.cl_image_format
+ format.image_channel_order = C.cl_channel_order(f.ChannelOrder)
+ format.image_channel_data_type = C.cl_channel_type(f.ChannelDataType)
+ return format
+}
+
+type ProfilingInfo int
+
+const (
+ // A 64-bit value that describes the current device time counter in
+ // nanoseconds when the command identified by event is enqueued in
+ // a command-queue by the host.
+ ProfilingInfoCommandQueued ProfilingInfo = C.CL_PROFILING_COMMAND_QUEUED
+ // A 64-bit value that describes the current device time counter in
+ // nanoseconds when the command identified by event that has been
+ // enqueued is submitted by the host to the device associated with the command-queue.
+ ProfilingInfoCommandSubmit ProfilingInfo = C.CL_PROFILING_COMMAND_SUBMIT
+ // A 64-bit value that describes the current device time counter in
+ // nanoseconds when the command identified by event starts execution on the device.
+ ProfilingInfoCommandStart ProfilingInfo = C.CL_PROFILING_COMMAND_START
+ // A 64-bit value that describes the current device time counter in
+ // nanoseconds when the command identified by event has finished
+ // execution on the device.
+ ProfilingInfoCommandEnd ProfilingInfo = C.CL_PROFILING_COMMAND_END
+)
+
+type CommmandExecStatus int
+
+const (
+ CommmandExecStatusComplete CommmandExecStatus = C.CL_COMPLETE
+ CommmandExecStatusRunning CommmandExecStatus = C.CL_RUNNING
+ CommmandExecStatusSubmitted CommmandExecStatus = C.CL_SUBMITTED
+ CommmandExecStatusQueued CommmandExecStatus = C.CL_QUEUED
+)
+
+type Event struct {
+ clEvent C.cl_event
+}
+
+func releaseEvent(ev *Event) {
+ if ev.clEvent != nil {
+ C.clReleaseEvent(ev.clEvent)
+ ev.clEvent = nil
+ }
+}
+
+func (e *Event) Release() {
+ releaseEvent(e)
+}
+
+func (e *Event) GetEventProfilingInfo(paramName ProfilingInfo) (int64, error) {
+ var paramValue C.cl_ulong
+ if err := C.clGetEventProfilingInfo(e.clEvent, C.cl_profiling_info(paramName), C.size_t(unsafe.Sizeof(paramValue)), unsafe.Pointer(&paramValue), nil); err != C.CL_SUCCESS {
+ return 0, toError(err)
+ }
+ return int64(paramValue), nil
+}
+
+// Sets the execution status of a user event object.
+//
+// `status` specifies the new execution status to be set and
+// can be CL_COMPLETE or a negative integer value to indicate
+// an error. A negative integer value causes all enqueued commands
+// that wait on this user event to be terminated. clSetUserEventStatus
+// can only be called once to change the execution status of event.
+func (e *Event) SetUserEventStatus(status int) error {
+ return toError(C.clSetUserEventStatus(e.clEvent, C.cl_int(status)))
+}
+
+// Waits on the host thread for commands identified by event objects in
+// events to complete. A command is considered complete if its execution
+// status is CL_COMPLETE or a negative value. The events specified in
+// event_list act as synchronization points.
+//
+// If the cl_khr_gl_event extension is enabled, event objects can also be
+// used to reflect the status of an OpenGL sync object. The sync object
+// in turn refers to a fence command executing in an OpenGL command
+// stream. This provides another method of coordinating sharing of buffers
+// and images between OpenGL and OpenCL.
+func WaitForEvents(events []*Event) error {
+ return toError(C.clWaitForEvents(C.cl_uint(len(events)), eventListPtr(events)))
+}
+
+func newEvent(clEvent C.cl_event) *Event {
+ ev := &Event{clEvent: clEvent}
+ runtime.SetFinalizer(ev, releaseEvent)
+ return ev
+}
+
+func eventListPtr(el []*Event) *C.cl_event {
+ if el == nil {
+ return nil
+ }
+ elist := make([]C.cl_event, len(el))
+ for i, e := range el {
+ elist[i] = e.clEvent
+ }
+ return (*C.cl_event)(&elist[0])
+}
+
+func clBool(b bool) C.cl_bool {
+ if b {
+ return C.CL_TRUE
+ }
+ return C.CL_FALSE
+}
+
+func sizeT3(i3 [3]int) [3]C.size_t {
+ var val [3]C.size_t
+ val[0] = C.size_t(i3[0])
+ val[1] = C.size_t(i3[1])
+ val[2] = C.size_t(i3[2])
+ return val
+}
+
+type MappedMemObject struct {
+ ptr unsafe.Pointer
+ size int
+ rowPitch int
+ slicePitch int
+}
+
+func (mb *MappedMemObject) ByteSlice() []byte {
+ var byteSlice []byte
+ sliceHeader := (*reflect.SliceHeader)(unsafe.Pointer(&byteSlice))
+ sliceHeader.Cap = mb.size
+ sliceHeader.Len = mb.size
+ sliceHeader.Data = uintptr(mb.ptr)
+ return byteSlice
+}
+
+func (mb *MappedMemObject) Ptr() unsafe.Pointer {
+ return mb.ptr
+}
+
+func (mb *MappedMemObject) Size() int {
+ return mb.size
+}
+
+func (mb *MappedMemObject) RowPitch() int {
+ return mb.rowPitch
+}
+
+func (mb *MappedMemObject) SlicePitch() int {
+ return mb.slicePitch
+}
diff --git a/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/types12.go b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/types12.go
new file mode 100644
index 000000000..58023cb60
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/types12.go
@@ -0,0 +1,71 @@
+// +build cl12
+
+package cl
+
+// #ifdef __APPLE__
+// #include "OpenCL/opencl.h"
+// #else
+// #include "cl.h"
+// #endif
+import "C"
+
+const (
+ ChannelDataTypeUNormInt24 ChannelDataType = C.CL_UNORM_INT24
+ ChannelOrderDepth ChannelOrder = C.CL_DEPTH
+ ChannelOrderDepthStencil ChannelOrder = C.CL_DEPTH_STENCIL
+ MemHostNoAccess MemFlag = C.CL_MEM_HOST_NO_ACCESS // OpenCL 1.2
+ MemHostReadOnly MemFlag = C.CL_MEM_HOST_READ_ONLY // OpenCL 1.2
+ MemHostWriteOnly MemFlag = C.CL_MEM_HOST_WRITE_ONLY // OpenCL 1.2
+ MemObjectTypeImage1D MemObjectType = C.CL_MEM_OBJECT_IMAGE1D
+ MemObjectTypeImage1DArray MemObjectType = C.CL_MEM_OBJECT_IMAGE1D_ARRAY
+ MemObjectTypeImage1DBuffer MemObjectType = C.CL_MEM_OBJECT_IMAGE1D_BUFFER
+ MemObjectTypeImage2DArray MemObjectType = C.CL_MEM_OBJECT_IMAGE2D_ARRAY
+ // This flag specifies that the region being mapped in the memory object is being mapped for writing.
+ //
+ // The contents of the region being mapped are to be discarded. This is typically the case when the
+ // region being mapped is overwritten by the host. This flag allows the implementation to no longer
+ // guarantee that the pointer returned by clEnqueueMapBuffer or clEnqueueMapImage contains the
+ // latest bits in the region being mapped which can be a significant performance enhancement.
+ MapFlagWriteInvalidateRegion MapFlag = C.CL_MAP_WRITE_INVALIDATE_REGION
+)
+
+func init() {
+ errorMap[C.CL_COMPILE_PROGRAM_FAILURE] = ErrCompileProgramFailure
+ errorMap[C.CL_DEVICE_PARTITION_FAILED] = ErrDevicePartitionFailed
+ errorMap[C.CL_INVALID_COMPILER_OPTIONS] = ErrInvalidCompilerOptions
+ errorMap[C.CL_INVALID_DEVICE_PARTITION_COUNT] = ErrInvalidDevicePartitionCount
+ errorMap[C.CL_INVALID_IMAGE_DESCRIPTOR] = ErrInvalidImageDescriptor
+ errorMap[C.CL_INVALID_LINKER_OPTIONS] = ErrInvalidLinkerOptions
+ errorMap[C.CL_KERNEL_ARG_INFO_NOT_AVAILABLE] = ErrKernelArgInfoNotAvailable
+ errorMap[C.CL_LINK_PROGRAM_FAILURE] = ErrLinkProgramFailure
+ errorMap[C.CL_LINKER_NOT_AVAILABLE] = ErrLinkerNotAvailable
+ channelOrderNameMap[ChannelOrderDepth] = "Depth"
+ channelOrderNameMap[ChannelOrderDepthStencil] = "DepthStencil"
+ channelDataTypeNameMap[ChannelDataTypeUNormInt24] = "UNormInt24"
+}
+
+type ImageDescription struct {
+ Type MemObjectType
+ Width, Height, Depth int
+ ArraySize, RowPitch, SlicePitch int
+ NumMipLevels, NumSamples int
+ Buffer *MemObject
+}
+
+func (d ImageDescription) toCl() C.cl_image_desc {
+ var desc C.cl_image_desc
+ desc.image_type = C.cl_mem_object_type(d.Type)
+ desc.image_width = C.size_t(d.Width)
+ desc.image_height = C.size_t(d.Height)
+ desc.image_depth = C.size_t(d.Depth)
+ desc.image_array_size = C.size_t(d.ArraySize)
+ desc.image_row_pitch = C.size_t(d.RowPitch)
+ desc.image_slice_pitch = C.size_t(d.SlicePitch)
+ desc.num_mip_levels = C.cl_uint(d.NumMipLevels)
+ desc.num_samples = C.cl_uint(d.NumSamples)
+ desc.buffer = nil
+ if d.Buffer != nil {
+ desc.buffer = d.Buffer.clMem
+ }
+ return desc
+}
diff --git a/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/types_darwin.go b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/types_darwin.go
new file mode 100644
index 000000000..ddcf74906
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/types_darwin.go
@@ -0,0 +1,45 @@
+package cl
+
+// #ifdef __APPLE__
+// #include "OpenCL/opencl.h"
+// #else
+// #include "cl.h"
+// #endif
+import "C"
+
+// Extension: cl_APPLE_fixed_alpha_channel_orders
+//
+// These selectors may be passed to clCreateImage2D() in the cl_image_format.image_channel_order field.
+// They are like CL_BGRA and CL_ARGB except that the alpha channel to be ignored. On calls to read_imagef,
+// the alpha will be 0xff (1.0f) if the sample falls in the image and 0 if it does not fall in the image.
+// On calls to write_imagef, the alpha value is ignored and 0xff (1.0f) is written. These formats are
+// currently only available for the CL_UNORM_INT8 cl_channel_type. They are intended to support legacy
+// image formats.
+const (
+ ChannelOrder1RGBApple ChannelOrder = C.CL_1RGB_APPLE // Introduced in MacOS X.7.
+ ChannelOrderBGR1Apple ChannelOrder = C.CL_BGR1_APPLE // Introduced in MacOS X.7.
+)
+
+// Extension: cl_APPLE_biased_fixed_point_image_formats
+//
+// This selector may be passed to clCreateImage2D() in the cl_image_format.image_channel_data_type field.
+// It defines a biased signed 1.14 fixed point storage format, with range [-1, 3). The conversion from
+// float to this fixed point format is defined as follows:
+//
+// ushort float_to_sfixed14( float x ){
+// int i = convert_int_sat_rte( x * 0x1.0p14f ); // scale [-1, 3.0) to [-16384, 3*16384), round to nearest integer
+// i = add_sat( i, 0x4000 ); // apply bias, to convert to [0, 65535) range
+// return convert_ushort_sat(i); // clamp to destination size
+// }
+//
+// The inverse conversion is the reverse process. The formats are currently only available on the CPU with
+// the CL_RGBA channel layout.
+const (
+ ChannelDataTypeSFixed14Apple ChannelDataType = C.CL_SFIXED14_APPLE // Introduced in MacOS X.7.
+)
+
+func init() {
+ channelOrderNameMap[ChannelOrder1RGBApple] = "1RGBApple"
+ channelOrderNameMap[ChannelOrderBGR1Apple] = "RGB1Apple"
+ channelDataTypeNameMap[ChannelDataTypeSFixed14Apple] = "SFixed14Apple"
+}
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/ethash.go b/Godeps/_workspace/src/github.com/ethereum/ethash/ethash.go
index d0864da7f..ddb8ba583 100644
--- a/Godeps/_workspace/src/github.com/ethereum/ethash/ethash.go
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/ethash.go
@@ -30,8 +30,8 @@ import (
)
var (
- minDifficulty = new(big.Int).Exp(big.NewInt(2), big.NewInt(256), big.NewInt(0))
- sharedLight = new(Light)
+ maxUint256 = new(big.Int).Exp(big.NewInt(2), big.NewInt(256), big.NewInt(0))
+ sharedLight = new(Light)
)
const (
@@ -140,7 +140,7 @@ func (l *Light) Verify(block pow.Block) bool {
// the finalizer before the call completes.
_ = cache
// The actual check.
- target := new(big.Int).Div(minDifficulty, difficulty)
+ target := new(big.Int).Div(maxUint256, difficulty)
return h256ToHash(ret.result).Big().Cmp(target) <= 0
}
@@ -199,7 +199,7 @@ func (d *dag) generate() {
if d.dir == "" {
d.dir = DefaultDir
}
- glog.V(logger.Info).Infof("Generating DAG for epoch %d (%x)", d.epoch, seedHash)
+ glog.V(logger.Info).Infof("Generating DAG for epoch %d (size %d) (%x)", d.epoch, dagSize, seedHash)
// Generate a temporary cache.
// TODO: this could share the cache with Light
cache := C.ethash_light_new_internal(cacheSize, (*C.ethash_h256_t)(unsafe.Pointer(&seedHash[0])))
@@ -220,14 +220,18 @@ func (d *dag) generate() {
})
}
-func freeDAG(h *dag) {
- C.ethash_full_delete(h.ptr)
- h.ptr = nil
+func freeDAG(d *dag) {
+ C.ethash_full_delete(d.ptr)
+ d.ptr = nil
+}
+
+func (d *dag) Ptr() unsafe.Pointer {
+ return unsafe.Pointer(d.ptr.data)
}
//export ethashGoCallback
func ethashGoCallback(percent C.unsigned) C.int {
- glog.V(logger.Info).Infof("Still generating DAG: %d%%", percent)
+ glog.V(logger.Info).Infof("Generating DAG: %d%%", percent)
return 0
}
@@ -273,7 +277,7 @@ func (pow *Full) getDAG(blockNum uint64) (d *dag) {
return d
}
-func (pow *Full) Search(block pow.Block, stop <-chan struct{}) (nonce uint64, mixDigest []byte) {
+func (pow *Full) Search(block pow.Block, stop <-chan struct{}, index int) (nonce uint64, mixDigest []byte) {
dag := pow.getDAG(block.NumberU64())
r := rand.New(rand.NewSource(time.Now().UnixNano()))
@@ -286,7 +290,7 @@ func (pow *Full) Search(block pow.Block, stop <-chan struct{}) (nonce uint64, mi
nonce = uint64(r.Int63())
hash := hashToH256(block.HashNoNonce())
- target := new(big.Int).Div(minDifficulty, diff)
+ target := new(big.Int).Div(maxUint256, diff)
for {
select {
case <-stop:
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/ethash_opencl.go b/Godeps/_workspace/src/github.com/ethereum/ethash/ethash_opencl.go
new file mode 100644
index 000000000..332b7f524
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/ethash_opencl.go
@@ -0,0 +1,629 @@
+// Copyright 2014 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+// +build opencl
+
+package ethash
+
+//#cgo LDFLAGS: -w
+//#include <stdint.h>
+//#include <string.h>
+//#include "src/libethash/internal.h"
+import "C"
+
+import (
+ crand "crypto/rand"
+ "encoding/binary"
+ "fmt"
+ "math"
+ "math/big"
+ mrand "math/rand"
+ "strconv"
+ "strings"
+ "sync"
+ "sync/atomic"
+ "time"
+ "unsafe"
+
+ "github.com/Gustav-Simonsson/go-opencl/cl"
+ "github.com/ethereum/go-ethereum/common"
+ "github.com/ethereum/go-ethereum/pow"
+)
+
+/*
+
+ This code have two main entry points:
+
+ 1. The initCL(...) function configures one or more OpenCL device
+ (for now only GPU) and loads the Ethash DAG onto device memory
+
+ 2. The Search(...) function loads a Ethash nonce into device(s) memory and
+ executes the Ethash OpenCL kernel.
+
+ Throughout the code, we refer to "host memory" and "device memory".
+ For most systems (e.g. regular PC GPU miner) the host memory is RAM and
+ device memory is the GPU global memory (e.g. GDDR5).
+
+ References mentioned in code comments:
+
+ 1. https://github.com/ethereum/wiki/wiki/Ethash
+ 2. https://github.com/ethereum/cpp-ethereum/blob/develop/libethash-cl/ethash_cl_miner.cpp
+ 3. https://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/
+ 4. http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2013/12/AMD_OpenCL_Programming_User_Guide.pdf
+
+*/
+
+type OpenCLDevice struct {
+ deviceId int
+ device *cl.Device
+ openCL11 bool // OpenCL version 1.1 and 1.2 are handled a bit different
+ openCL12 bool
+
+ dagBuf *cl.MemObject // Ethash full DAG in device mem
+ headerBuf *cl.MemObject // Hash of block-to-mine in device mem
+ searchBuffers []*cl.MemObject
+
+ searchKernel *cl.Kernel
+ hashKernel *cl.Kernel
+
+ queue *cl.CommandQueue
+ ctx *cl.Context
+ workGroupSize int
+
+ nonceRand *mrand.Rand // seeded by crypto/rand, see comments where it's initialised
+ result common.Hash
+}
+
+type OpenCLMiner struct {
+ mu sync.Mutex
+
+ ethash *Ethash // Ethash full DAG & cache in host mem
+
+ deviceIds []int
+ devices []*OpenCLDevice
+
+ dagSize uint64
+
+ hashRate int32 // Go atomics & uint64 have some issues; int32 is supported on all platforms
+}
+
+type pendingSearch struct {
+ bufIndex uint32
+ startNonce uint64
+}
+
+const (
+ SIZEOF_UINT32 = 4
+
+ // See [1]
+ ethashMixBytesLen = 128
+ ethashAccesses = 64
+
+ // See [4]
+ workGroupSize = 32 // must be multiple of 8
+ maxSearchResults = 63
+ searchBufSize = 2
+ globalWorkSize = 1024 * 256
+)
+
+func NewCL(deviceIds []int) *OpenCLMiner {
+ ids := make([]int, len(deviceIds))
+ copy(ids, deviceIds)
+ return &OpenCLMiner{
+ ethash: New(),
+ dagSize: 0, // to see if we need to update DAG.
+ deviceIds: ids,
+ }
+}
+
+func PrintDevices() {
+ fmt.Println("=============================================")
+ fmt.Println("============ OpenCL Device Info =============")
+ fmt.Println("=============================================")
+
+ var found []*cl.Device
+
+ platforms, err := cl.GetPlatforms()
+ if err != nil {
+ fmt.Println("Plaform error (check your OpenCL installation): %v", err)
+ return
+ }
+
+ for i, p := range platforms {
+ fmt.Println("Platform id ", i)
+ fmt.Println("Platform Name ", p.Name())
+ fmt.Println("Platform Vendor ", p.Vendor())
+ fmt.Println("Platform Version ", p.Version())
+ fmt.Println("Platform Extensions ", p.Extensions())
+ fmt.Println("Platform Profile ", p.Profile())
+ fmt.Println("")
+
+ devices, err := cl.GetDevices(p, cl.DeviceTypeGPU)
+ if err != nil {
+ fmt.Println("Device error (check your GPU drivers) :", err)
+ return
+ }
+
+ for _, d := range devices {
+ fmt.Println("Device OpenCL id ", i)
+ fmt.Println("Device id for mining ", len(found))
+ fmt.Println("Device Name ", d.Name())
+ fmt.Println("Vendor ", d.Vendor())
+ fmt.Println("Version ", d.Version())
+ fmt.Println("Driver version ", d.DriverVersion())
+ fmt.Println("Address bits ", d.AddressBits())
+ fmt.Println("Max clock freq ", d.MaxClockFrequency())
+ fmt.Println("Global mem size ", d.GlobalMemSize())
+ fmt.Println("Max constant buffer size", d.MaxConstantBufferSize())
+ fmt.Println("Max mem alloc size ", d.MaxMemAllocSize())
+ fmt.Println("Max compute units ", d.MaxComputeUnits())
+ fmt.Println("Max work group size ", d.MaxWorkGroupSize())
+ fmt.Println("Max work item sizes ", d.MaxWorkItemSizes())
+ fmt.Println("=============================================")
+
+ found = append(found, d)
+ }
+ }
+ if len(found) == 0 {
+ fmt.Println("Found no GPU(s). Check that your OS can see the GPU(s)")
+ } else {
+ var idsFormat string
+ for i := 0; i < len(found); i++ {
+ idsFormat += strconv.Itoa(i)
+ if i != len(found)-1 {
+ idsFormat += ","
+ }
+ }
+ fmt.Printf("Found %v devices. Benchmark first GPU: geth gpubench 0\n", len(found))
+ fmt.Printf("Mine using all GPUs: geth --minegpu %v\n", idsFormat)
+ }
+}
+
+// See [2]. We basically do the same here, but the Go OpenCL bindings
+// are at a slightly higher abtraction level.
+func InitCL(blockNum uint64, c *OpenCLMiner) error {
+ platforms, err := cl.GetPlatforms()
+ if err != nil {
+ return fmt.Errorf("Plaform error: %v\nCheck your OpenCL installation and then run geth gpuinfo", err)
+ }
+
+ var devices []*cl.Device
+ for _, p := range platforms {
+ ds, err := cl.GetDevices(p, cl.DeviceTypeGPU)
+ if err != nil {
+ return fmt.Errorf("Devices error: %v\nCheck your GPU drivers and then run geth gpuinfo", err)
+ }
+ for _, d := range ds {
+ devices = append(devices, d)
+ }
+ }
+
+ pow := New()
+ _ = pow.getDAG(blockNum) // generates DAG if we don't have it
+ pow.Light.getCache(blockNum) // and cache
+
+ c.ethash = pow
+ dagSize := uint64(C.ethash_get_datasize(C.uint64_t(blockNum)))
+ c.dagSize = dagSize
+
+ for _, id := range c.deviceIds {
+ if id > len(devices)-1 {
+ return fmt.Errorf("Device id not found. See available device ids with: geth gpuinfo")
+ } else {
+ err := initCLDevice(id, devices[id], c)
+ if err != nil {
+ return err
+ }
+ }
+ }
+ if len(c.devices) == 0 {
+ return fmt.Errorf("No GPU devices found")
+ }
+ return nil
+}
+
+func initCLDevice(deviceId int, device *cl.Device, c *OpenCLMiner) error {
+ devMaxAlloc := uint64(device.MaxMemAllocSize())
+ devGlobalMem := uint64(device.GlobalMemSize())
+
+ // TODO: more fine grained version logic
+ if device.Version() == "OpenCL 1.0" {
+ fmt.Println("Device OpenCL version not supported: ", device.Version())
+ return fmt.Errorf("opencl version not supported")
+ }
+
+ var cl11, cl12 bool
+ if device.Version() == "OpenCL 1.1" {
+ cl11 = true
+ }
+ if device.Version() == "OpenCL 1.2" {
+ cl12 = true
+ }
+
+ // log warnings but carry on; some device drivers report inaccurate values
+ if c.dagSize > devGlobalMem {
+ fmt.Printf("WARNING: device memory may be insufficient: %v. DAG size: %v.\n", devGlobalMem, c.dagSize)
+ }
+
+ if c.dagSize > devMaxAlloc {
+ fmt.Printf("WARNING: DAG size (%v) larger than device max memory allocation size (%v).\n", c.dagSize, devMaxAlloc)
+ fmt.Printf("You probably have to export GPU_MAX_ALLOC_PERCENT=95\n")
+ }
+
+ fmt.Printf("Initialising device %v: %v\n", deviceId, device.Name())
+
+ context, err := cl.CreateContext([]*cl.Device{device})
+ if err != nil {
+ return fmt.Errorf("failed creating context:", err)
+ }
+
+ // TODO: test running with CL_QUEUE_PROFILING_ENABLE for profiling?
+ queue, err := context.CreateCommandQueue(device, 0)
+ if err != nil {
+ return fmt.Errorf("command queue err:", err)
+ }
+
+ // See [4] section 3.2 and [3] "clBuildProgram".
+ // The OpenCL kernel code is compiled at run-time.
+ kvs := make(map[string]string, 4)
+ kvs["GROUP_SIZE"] = strconv.FormatUint(workGroupSize, 10)
+ kvs["DAG_SIZE"] = strconv.FormatUint(c.dagSize/ethashMixBytesLen, 10)
+ kvs["ACCESSES"] = strconv.FormatUint(ethashAccesses, 10)
+ kvs["MAX_OUTPUTS"] = strconv.FormatUint(maxSearchResults, 10)
+ kernelCode := replaceWords(kernel, kvs)
+
+ program, err := context.CreateProgramWithSource([]string{kernelCode})
+ if err != nil {
+ return fmt.Errorf("program err:", err)
+ }
+
+ /* if using AMD OpenCL impl, you can set this to debug on x86 CPU device.
+ see AMD OpenCL programming guide section 4.2
+
+ export in shell before running:
+ export AMD_OCL_BUILD_OPTIONS_APPEND="-g -O0"
+ export CPU_MAX_COMPUTE_UNITS=1
+
+ buildOpts := "-g -cl-opt-disable"
+
+ */
+ buildOpts := ""
+ err = program.BuildProgram([]*cl.Device{device}, buildOpts)
+ if err != nil {
+ return fmt.Errorf("program build err:", err)
+ }
+
+ var searchKernelName, hashKernelName string
+ searchKernelName = "ethash_search"
+ hashKernelName = "ethash_hash"
+
+ searchKernel, err := program.CreateKernel(searchKernelName)
+ hashKernel, err := program.CreateKernel(hashKernelName)
+ if err != nil {
+ return fmt.Errorf("kernel err:", err)
+ }
+
+ // TODO: when this DAG size appears, patch the Go bindings
+ // (context.go) to work with uint64 as size_t
+ if c.dagSize > math.MaxInt32 {
+ fmt.Println("DAG too large for allocation.")
+ return fmt.Errorf("DAG too large for alloc")
+ }
+
+ // TODO: patch up Go bindings to work with size_t, will overflow if > maxint32
+ // TODO: fuck. shit's gonna overflow around 2017-06-09 12:17:02
+ dagBuf := *(new(*cl.MemObject))
+ dagBuf, err = context.CreateEmptyBuffer(cl.MemReadOnly, int(c.dagSize))
+ if err != nil {
+ return fmt.Errorf("allocating dag buf failed: ", err)
+ }
+
+ // write DAG to device mem
+ dagPtr := unsafe.Pointer(c.ethash.Full.current.ptr.data)
+ _, err = queue.EnqueueWriteBuffer(dagBuf, true, 0, int(c.dagSize), dagPtr, nil)
+ if err != nil {
+ return fmt.Errorf("writing to dag buf failed: ", err)
+ }
+
+ searchBuffers := make([]*cl.MemObject, searchBufSize)
+ for i := 0; i < searchBufSize; i++ {
+ searchBuff, err := context.CreateEmptyBuffer(cl.MemWriteOnly, (1+maxSearchResults)*SIZEOF_UINT32)
+ if err != nil {
+ return fmt.Errorf("search buffer err:", err)
+ }
+ searchBuffers[i] = searchBuff
+ }
+
+ headerBuf, err := context.CreateEmptyBuffer(cl.MemReadOnly, 32)
+ if err != nil {
+ return fmt.Errorf("header buffer err:", err)
+ }
+
+ // Unique, random nonces are crucial for mining efficieny.
+ // While we do not need cryptographically secure PRNG for nonces,
+ // we want to have uniform distribution and minimal repetition of nonces.
+ // We could guarantee strict uniqueness of nonces by generating unique ranges,
+ // but a int64 seed from crypto/rand should be good enough.
+ // we then use math/rand for speed and to avoid draining OS entropy pool
+ seed, err := crand.Int(crand.Reader, big.NewInt(math.MaxInt64))
+ if err != nil {
+ return err
+ }
+ nonceRand := mrand.New(mrand.NewSource(seed.Int64()))
+
+ deviceStruct := &OpenCLDevice{
+ deviceId: deviceId,
+ device: device,
+ openCL11: cl11,
+ openCL12: cl12,
+
+ dagBuf: dagBuf,
+ headerBuf: headerBuf,
+ searchBuffers: searchBuffers,
+
+ searchKernel: searchKernel,
+ hashKernel: hashKernel,
+
+ queue: queue,
+ ctx: context,
+
+ workGroupSize: workGroupSize,
+
+ nonceRand: nonceRand,
+ }
+ c.devices = append(c.devices, deviceStruct)
+
+ return nil
+}
+
+func (c *OpenCLMiner) Search(block pow.Block, stop <-chan struct{}, index int) (uint64, []byte) {
+ c.mu.Lock()
+ newDagSize := uint64(C.ethash_get_datasize(C.uint64_t(block.NumberU64())))
+ if newDagSize > c.dagSize {
+ // TODO: clean up buffers from previous DAG?
+ err := InitCL(block.NumberU64(), c)
+ if err != nil {
+ fmt.Println("OpenCL init error: ", err)
+ return 0, []byte{0}
+ }
+ }
+ defer c.mu.Unlock()
+
+ // Avoid unneeded OpenCL initialisation if we received stop while running InitCL
+ select {
+ case <-stop:
+ return 0, []byte{0}
+ default:
+ }
+
+ headerHash := block.HashNoNonce()
+ diff := block.Difficulty()
+ target256 := new(big.Int).Div(maxUint256, diff)
+ target64 := new(big.Int).Rsh(target256, 192).Uint64()
+ var zero uint32 = 0
+
+ d := c.devices[index]
+
+ _, err := d.queue.EnqueueWriteBuffer(d.headerBuf, false, 0, 32, unsafe.Pointer(&headerHash[0]), nil)
+ if err != nil {
+ fmt.Println("Error in Search clEnqueueWriterBuffer : ", err)
+ return 0, []byte{0}
+ }
+
+ for i := 0; i < searchBufSize; i++ {
+ _, err := d.queue.EnqueueWriteBuffer(d.searchBuffers[i], false, 0, 4, unsafe.Pointer(&zero), nil)
+ if err != nil {
+ fmt.Println("Error in Search clEnqueueWriterBuffer : ", err)
+ return 0, []byte{0}
+ }
+ }
+
+ // wait for all search buffers to complete
+ err = d.queue.Finish()
+ if err != nil {
+ fmt.Println("Error in Search clFinish : ", err)
+ return 0, []byte{0}
+ }
+
+ err = d.searchKernel.SetArg(1, d.headerBuf)
+ if err != nil {
+ fmt.Println("Error in Search clSetKernelArg : ", err)
+ return 0, []byte{0}
+ }
+
+ err = d.searchKernel.SetArg(2, d.dagBuf)
+ if err != nil {
+ fmt.Println("Error in Search clSetKernelArg : ", err)
+ return 0, []byte{0}
+ }
+
+ err = d.searchKernel.SetArg(4, target64)
+ if err != nil {
+ fmt.Println("Error in Search clSetKernelArg : ", err)
+ return 0, []byte{0}
+ }
+ err = d.searchKernel.SetArg(5, uint32(math.MaxUint32))
+ if err != nil {
+ fmt.Println("Error in Search clSetKernelArg : ", err)
+ return 0, []byte{0}
+ }
+
+ // wait on this before returning
+ var preReturnEvent *cl.Event
+ if d.openCL12 {
+ preReturnEvent, err = d.ctx.CreateUserEvent()
+ if err != nil {
+ fmt.Println("Error in Search create CL user event : ", err)
+ return 0, []byte{0}
+ }
+ }
+
+ pending := make([]pendingSearch, 0, searchBufSize)
+ var p *pendingSearch
+ searchBufIndex := uint32(0)
+ var checkNonce uint64
+ loops := int64(0)
+ prevHashRate := int32(0)
+ start := time.Now().UnixNano()
+ // we grab a single random nonce and sets this as argument to the kernel search function
+ // the device will then add each local threads gid to the nonce, creating a unique nonce
+ // for each device computing unit executing in parallel
+ initNonce := uint64(d.nonceRand.Int63())
+ for nonce := initNonce; ; nonce += uint64(globalWorkSize) {
+ select {
+ case <-stop:
+
+ /*
+ if d.openCL12 {
+ err = cl.WaitForEvents([]*cl.Event{preReturnEvent})
+ if err != nil {
+ fmt.Println("Error in Search WaitForEvents: ", err)
+ }
+ }
+ */
+
+ atomic.AddInt32(&c.hashRate, -prevHashRate)
+ return 0, []byte{0}
+ default:
+ }
+
+ if (loops % (1 << 7)) == 0 {
+ elapsed := time.Now().UnixNano() - start
+ // TODO: verify if this is correct hash rate calculation
+ hashes := (float64(1e9) / float64(elapsed)) * float64(loops*1024*256)
+ hashrateDiff := int32(hashes) - prevHashRate
+ prevHashRate = int32(hashes)
+ atomic.AddInt32(&c.hashRate, hashrateDiff)
+ }
+ loops++
+
+ err = d.searchKernel.SetArg(0, d.searchBuffers[searchBufIndex])
+ if err != nil {
+ fmt.Println("Error in Search clSetKernelArg : ", err)
+ return 0, []byte{0}
+ }
+ err = d.searchKernel.SetArg(3, nonce)
+ if err != nil {
+ fmt.Println("Error in Search clSetKernelArg : ", err)
+ return 0, []byte{0}
+ }
+
+ // execute kernel
+ _, err := d.queue.EnqueueNDRangeKernel(
+ d.searchKernel,
+ []int{0},
+ []int{globalWorkSize},
+ []int{d.workGroupSize},
+ nil)
+ if err != nil {
+ fmt.Println("Error in Search clEnqueueNDRangeKernel : ", err)
+ return 0, []byte{0}
+ }
+
+ pending = append(pending, pendingSearch{bufIndex: searchBufIndex, startNonce: nonce})
+ searchBufIndex = (searchBufIndex + 1) % searchBufSize
+
+ if len(pending) == searchBufSize {
+ p = &(pending[searchBufIndex])
+ cres, _, err := d.queue.EnqueueMapBuffer(d.searchBuffers[p.bufIndex], true,
+ cl.MapFlagRead, 0, (1+maxSearchResults)*SIZEOF_UINT32,
+ nil)
+ if err != nil {
+ fmt.Println("Error in Search clEnqueueMapBuffer: ", err)
+ return 0, []byte{0}
+ }
+
+ results := cres.ByteSlice()
+ nfound := binary.LittleEndian.Uint32(results)
+ nfound = uint32(math.Min(float64(nfound), float64(maxSearchResults)))
+ // OpenCL returns the offsets from the start nonce
+ for i := uint32(0); i < nfound; i++ {
+ lo := (i + 1) * SIZEOF_UINT32
+ hi := (i + 2) * SIZEOF_UINT32
+ upperNonce := uint64(binary.LittleEndian.Uint32(results[lo:hi]))
+ checkNonce = p.startNonce + upperNonce
+ if checkNonce != 0 {
+ cn := C.uint64_t(checkNonce)
+ ds := C.uint64_t(c.dagSize)
+ // We verify that the nonce is indeed a solution by
+ // executing the Ethash verification function (on the CPU).
+ ret := C.ethash_light_compute_internal(c.ethash.Light.current.ptr, ds, hashToH256(headerHash), cn)
+ // TODO: return result first
+ if ret.success && h256ToHash(ret.result).Big().Cmp(target256) <= 0 {
+ _, err = d.queue.EnqueueUnmapMemObject(d.searchBuffers[p.bufIndex], cres, nil)
+ if err != nil {
+ fmt.Println("Error in Search clEnqueueUnmapMemObject: ", err)
+ }
+ if d.openCL12 {
+ err = cl.WaitForEvents([]*cl.Event{preReturnEvent})
+ if err != nil {
+ fmt.Println("Error in Search WaitForEvents: ", err)
+ }
+ }
+ return checkNonce, C.GoBytes(unsafe.Pointer(&ret.mix_hash), C.int(32))
+ }
+
+ _, err := d.queue.EnqueueWriteBuffer(d.searchBuffers[p.bufIndex], false, 0, 4, unsafe.Pointer(&zero), nil)
+ if err != nil {
+ fmt.Println("Error in Search cl: EnqueueWriteBuffer", err)
+ return 0, []byte{0}
+ }
+ }
+ }
+ _, err = d.queue.EnqueueUnmapMemObject(d.searchBuffers[p.bufIndex], cres, nil)
+ if err != nil {
+ fmt.Println("Error in Search clEnqueueUnMapMemObject: ", err)
+ return 0, []byte{0}
+ }
+ pending = append(pending[:searchBufIndex], pending[searchBufIndex+1:]...)
+ }
+ }
+ if d.openCL12 {
+ err := cl.WaitForEvents([]*cl.Event{preReturnEvent})
+ if err != nil {
+ fmt.Println("Error in Search clWaitForEvents: ", err)
+ return 0, []byte{0}
+ }
+ }
+ return 0, []byte{0}
+}
+
+func (c *OpenCLMiner) Verify(block pow.Block) bool {
+ return c.ethash.Light.Verify(block)
+}
+func (c *OpenCLMiner) GetHashrate() int64 {
+ return int64(atomic.LoadInt32(&c.hashRate))
+}
+func (c *OpenCLMiner) Turbo(on bool) {
+ // This is GPU mining. Always be turbo.
+}
+
+func replaceWords(text string, kvs map[string]string) string {
+ for k, v := range kvs {
+ text = strings.Replace(text, k, v, -1)
+ }
+ return text
+}
+
+func logErr(err error) {
+ if err != nil {
+ fmt.Println("Error in OpenCL call:", err)
+ }
+}
+
+func argErr(err error) error {
+ return fmt.Errorf("arg err: %v", err)
+}
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/ethash_opencl_kernel_go_str.go b/Godeps/_workspace/src/github.com/ethereum/ethash/ethash_opencl_kernel_go_str.go
new file mode 100644
index 000000000..695ff1829
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/ethash_opencl_kernel_go_str.go
@@ -0,0 +1,600 @@
+package ethash
+
+/* DO NOT EDIT!!!
+
+ This code is version controlled at
+ https://github.com/ethereum/cpp-ethereum/blob/develop/libethash-cl/ethash_cl_miner_kernel.cl
+
+ If needed change it there first, then copy over here.
+*/
+
+const kernel = `
+// author Tim Hughes <tim@twistedfury.com>
+// Tested on Radeon HD 7850
+// Hashrate: 15940347 hashes/s
+// Bandwidth: 124533 MB/s
+// search kernel should fit in <= 84 VGPRS (3 wavefronts)
+
+#define THREADS_PER_HASH (128 / 16)
+#define HASHES_PER_LOOP (GROUP_SIZE / THREADS_PER_HASH)
+
+#define FNV_PRIME 0x01000193
+
+__constant uint2 const Keccak_f1600_RC[24] = {
+ (uint2)(0x00000001, 0x00000000),
+ (uint2)(0x00008082, 0x00000000),
+ (uint2)(0x0000808a, 0x80000000),
+ (uint2)(0x80008000, 0x80000000),
+ (uint2)(0x0000808b, 0x00000000),
+ (uint2)(0x80000001, 0x00000000),
+ (uint2)(0x80008081, 0x80000000),
+ (uint2)(0x00008009, 0x80000000),
+ (uint2)(0x0000008a, 0x00000000),
+ (uint2)(0x00000088, 0x00000000),
+ (uint2)(0x80008009, 0x00000000),
+ (uint2)(0x8000000a, 0x00000000),
+ (uint2)(0x8000808b, 0x00000000),
+ (uint2)(0x0000008b, 0x80000000),
+ (uint2)(0x00008089, 0x80000000),
+ (uint2)(0x00008003, 0x80000000),
+ (uint2)(0x00008002, 0x80000000),
+ (uint2)(0x00000080, 0x80000000),
+ (uint2)(0x0000800a, 0x00000000),
+ (uint2)(0x8000000a, 0x80000000),
+ (uint2)(0x80008081, 0x80000000),
+ (uint2)(0x00008080, 0x80000000),
+ (uint2)(0x80000001, 0x00000000),
+ (uint2)(0x80008008, 0x80000000),
+};
+
+void keccak_f1600_round(uint2* a, uint r, uint out_size)
+{
+ #if !__ENDIAN_LITTLE__
+ for (uint i = 0; i != 25; ++i)
+ a[i] = a[i].yx;
+ #endif
+
+ uint2 b[25];
+ uint2 t;
+
+ // Theta
+ b[0] = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20];
+ b[1] = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21];
+ b[2] = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22];
+ b[3] = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23];
+ b[4] = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24];
+ t = b[4] ^ (uint2)(b[1].x << 1 | b[1].y >> 31, b[1].y << 1 | b[1].x >> 31);
+ a[0] ^= t;
+ a[5] ^= t;
+ a[10] ^= t;
+ a[15] ^= t;
+ a[20] ^= t;
+ t = b[0] ^ (uint2)(b[2].x << 1 | b[2].y >> 31, b[2].y << 1 | b[2].x >> 31);
+ a[1] ^= t;
+ a[6] ^= t;
+ a[11] ^= t;
+ a[16] ^= t;
+ a[21] ^= t;
+ t = b[1] ^ (uint2)(b[3].x << 1 | b[3].y >> 31, b[3].y << 1 | b[3].x >> 31);
+ a[2] ^= t;
+ a[7] ^= t;
+ a[12] ^= t;
+ a[17] ^= t;
+ a[22] ^= t;
+ t = b[2] ^ (uint2)(b[4].x << 1 | b[4].y >> 31, b[4].y << 1 | b[4].x >> 31);
+ a[3] ^= t;
+ a[8] ^= t;
+ a[13] ^= t;
+ a[18] ^= t;
+ a[23] ^= t;
+ t = b[3] ^ (uint2)(b[0].x << 1 | b[0].y >> 31, b[0].y << 1 | b[0].x >> 31);
+ a[4] ^= t;
+ a[9] ^= t;
+ a[14] ^= t;
+ a[19] ^= t;
+ a[24] ^= t;
+
+ // Rho Pi
+ b[0] = a[0];
+ b[10] = (uint2)(a[1].x << 1 | a[1].y >> 31, a[1].y << 1 | a[1].x >> 31);
+ b[7] = (uint2)(a[10].x << 3 | a[10].y >> 29, a[10].y << 3 | a[10].x >> 29);
+ b[11] = (uint2)(a[7].x << 6 | a[7].y >> 26, a[7].y << 6 | a[7].x >> 26);
+ b[17] = (uint2)(a[11].x << 10 | a[11].y >> 22, a[11].y << 10 | a[11].x >> 22);
+ b[18] = (uint2)(a[17].x << 15 | a[17].y >> 17, a[17].y << 15 | a[17].x >> 17);
+ b[3] = (uint2)(a[18].x << 21 | a[18].y >> 11, a[18].y << 21 | a[18].x >> 11);
+ b[5] = (uint2)(a[3].x << 28 | a[3].y >> 4, a[3].y << 28 | a[3].x >> 4);
+ b[16] = (uint2)(a[5].y << 4 | a[5].x >> 28, a[5].x << 4 | a[5].y >> 28);
+ b[8] = (uint2)(a[16].y << 13 | a[16].x >> 19, a[16].x << 13 | a[16].y >> 19);
+ b[21] = (uint2)(a[8].y << 23 | a[8].x >> 9, a[8].x << 23 | a[8].y >> 9);
+ b[24] = (uint2)(a[21].x << 2 | a[21].y >> 30, a[21].y << 2 | a[21].x >> 30);
+ b[4] = (uint2)(a[24].x << 14 | a[24].y >> 18, a[24].y << 14 | a[24].x >> 18);
+ b[15] = (uint2)(a[4].x << 27 | a[4].y >> 5, a[4].y << 27 | a[4].x >> 5);
+ b[23] = (uint2)(a[15].y << 9 | a[15].x >> 23, a[15].x << 9 | a[15].y >> 23);
+ b[19] = (uint2)(a[23].y << 24 | a[23].x >> 8, a[23].x << 24 | a[23].y >> 8);
+ b[13] = (uint2)(a[19].x << 8 | a[19].y >> 24, a[19].y << 8 | a[19].x >> 24);
+ b[12] = (uint2)(a[13].x << 25 | a[13].y >> 7, a[13].y << 25 | a[13].x >> 7);
+ b[2] = (uint2)(a[12].y << 11 | a[12].x >> 21, a[12].x << 11 | a[12].y >> 21);
+ b[20] = (uint2)(a[2].y << 30 | a[2].x >> 2, a[2].x << 30 | a[2].y >> 2);
+ b[14] = (uint2)(a[20].x << 18 | a[20].y >> 14, a[20].y << 18 | a[20].x >> 14);
+ b[22] = (uint2)(a[14].y << 7 | a[14].x >> 25, a[14].x << 7 | a[14].y >> 25);
+ b[9] = (uint2)(a[22].y << 29 | a[22].x >> 3, a[22].x << 29 | a[22].y >> 3);
+ b[6] = (uint2)(a[9].x << 20 | a[9].y >> 12, a[9].y << 20 | a[9].x >> 12);
+ b[1] = (uint2)(a[6].y << 12 | a[6].x >> 20, a[6].x << 12 | a[6].y >> 20);
+
+ // Chi
+ a[0] = bitselect(b[0] ^ b[2], b[0], b[1]);
+ a[1] = bitselect(b[1] ^ b[3], b[1], b[2]);
+ a[2] = bitselect(b[2] ^ b[4], b[2], b[3]);
+ a[3] = bitselect(b[3] ^ b[0], b[3], b[4]);
+ if (out_size >= 4)
+ {
+ a[4] = bitselect(b[4] ^ b[1], b[4], b[0]);
+ a[5] = bitselect(b[5] ^ b[7], b[5], b[6]);
+ a[6] = bitselect(b[6] ^ b[8], b[6], b[7]);
+ a[7] = bitselect(b[7] ^ b[9], b[7], b[8]);
+ a[8] = bitselect(b[8] ^ b[5], b[8], b[9]);
+ if (out_size >= 8)
+ {
+ a[9] = bitselect(b[9] ^ b[6], b[9], b[5]);
+ a[10] = bitselect(b[10] ^ b[12], b[10], b[11]);
+ a[11] = bitselect(b[11] ^ b[13], b[11], b[12]);
+ a[12] = bitselect(b[12] ^ b[14], b[12], b[13]);
+ a[13] = bitselect(b[13] ^ b[10], b[13], b[14]);
+ a[14] = bitselect(b[14] ^ b[11], b[14], b[10]);
+ a[15] = bitselect(b[15] ^ b[17], b[15], b[16]);
+ a[16] = bitselect(b[16] ^ b[18], b[16], b[17]);
+ a[17] = bitselect(b[17] ^ b[19], b[17], b[18]);
+ a[18] = bitselect(b[18] ^ b[15], b[18], b[19]);
+ a[19] = bitselect(b[19] ^ b[16], b[19], b[15]);
+ a[20] = bitselect(b[20] ^ b[22], b[20], b[21]);
+ a[21] = bitselect(b[21] ^ b[23], b[21], b[22]);
+ a[22] = bitselect(b[22] ^ b[24], b[22], b[23]);
+ a[23] = bitselect(b[23] ^ b[20], b[23], b[24]);
+ a[24] = bitselect(b[24] ^ b[21], b[24], b[20]);
+ }
+ }
+
+ // Iota
+ a[0] ^= Keccak_f1600_RC[r];
+
+ #if !__ENDIAN_LITTLE__
+ for (uint i = 0; i != 25; ++i)
+ a[i] = a[i].yx;
+ #endif
+}
+
+void keccak_f1600_no_absorb(ulong* a, uint in_size, uint out_size, uint isolate)
+{
+ for (uint i = in_size; i != 25; ++i)
+ {
+ a[i] = 0;
+ }
+#if __ENDIAN_LITTLE__
+ a[in_size] ^= 0x0000000000000001;
+ a[24-out_size*2] ^= 0x8000000000000000;
+#else
+ a[in_size] ^= 0x0100000000000000;
+ a[24-out_size*2] ^= 0x0000000000000080;
+#endif
+
+ // Originally I unrolled the first and last rounds to interface
+ // better with surrounding code, however I haven't done this
+ // without causing the AMD compiler to blow up the VGPR usage.
+ uint r = 0;
+ do
+ {
+ // This dynamic branch stops the AMD compiler unrolling the loop
+ // and additionally saves about 33% of the VGPRs, enough to gain another
+ // wavefront. Ideally we'd get 4 in flight, but 3 is the best I can
+ // massage out of the compiler. It doesn't really seem to matter how
+ // much we try and help the compiler save VGPRs because it seems to throw
+ // that information away, hence the implementation of keccak here
+ // doesn't bother.
+ if (isolate)
+ {
+ keccak_f1600_round((uint2*)a, r++, 25);
+ }
+ }
+ while (r < 23);
+
+ // final round optimised for digest size
+ keccak_f1600_round((uint2*)a, r++, out_size);
+}
+
+#define copy(dst, src, count) for (uint i = 0; i != count; ++i) { (dst)[i] = (src)[i]; }
+
+#define countof(x) (sizeof(x) / sizeof(x[0]))
+
+uint fnv(uint x, uint y)
+{
+ return x * FNV_PRIME ^ y;
+}
+
+uint4 fnv4(uint4 x, uint4 y)
+{
+ return x * FNV_PRIME ^ y;
+}
+
+uint fnv_reduce(uint4 v)
+{
+ return fnv(fnv(fnv(v.x, v.y), v.z), v.w);
+}
+
+typedef union
+{
+ ulong ulongs[32 / sizeof(ulong)];
+ uint uints[32 / sizeof(uint)];
+} hash32_t;
+
+typedef union
+{
+ ulong ulongs[64 / sizeof(ulong)];
+ uint4 uint4s[64 / sizeof(uint4)];
+} hash64_t;
+
+typedef union
+{
+ uint uints[128 / sizeof(uint)];
+ uint4 uint4s[128 / sizeof(uint4)];
+} hash128_t;
+
+hash64_t init_hash(__constant hash32_t const* header, ulong nonce, uint isolate)
+{
+ hash64_t init;
+ uint const init_size = countof(init.ulongs);
+ uint const hash_size = countof(header->ulongs);
+
+ // sha3_512(header .. nonce)
+ ulong state[25];
+ copy(state, header->ulongs, hash_size);
+ state[hash_size] = nonce;
+ keccak_f1600_no_absorb(state, hash_size + 1, init_size, isolate);
+
+ copy(init.ulongs, state, init_size);
+ return init;
+}
+
+uint inner_loop_chunks(uint4 init, uint thread_id, __local uint* share, __global hash128_t const* g_dag, __global hash128_t const* g_dag1, __global hash128_t const* g_dag2, __global hash128_t const* g_dag3, uint isolate)
+{
+ uint4 mix = init;
+
+ // share init0
+ if (thread_id == 0)
+ *share = mix.x;
+ barrier(CLK_LOCAL_MEM_FENCE);
+ uint init0 = *share;
+
+ uint a = 0;
+ do
+ {
+ bool update_share = thread_id == (a/4) % THREADS_PER_HASH;
+
+ #pragma unroll
+ for (uint i = 0; i != 4; ++i)
+ {
+ if (update_share)
+ {
+ uint m[4] = { mix.x, mix.y, mix.z, mix.w };
+ *share = fnv(init0 ^ (a+i), m[i]) % DAG_SIZE;
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ mix = fnv4(mix, *share>=3 * DAG_SIZE / 4 ? g_dag3[*share - 3 * DAG_SIZE / 4].uint4s[thread_id] : *share>=DAG_SIZE / 2 ? g_dag2[*share - DAG_SIZE / 2].uint4s[thread_id] : *share>=DAG_SIZE / 4 ? g_dag1[*share - DAG_SIZE / 4].uint4s[thread_id]:g_dag[*share].uint4s[thread_id]);
+ }
+ } while ((a += 4) != (ACCESSES & isolate));
+
+ return fnv_reduce(mix);
+}
+
+
+
+uint inner_loop(uint4 init, uint thread_id, __local uint* share, __global hash128_t const* g_dag, uint isolate)
+{
+ uint4 mix = init;
+
+ // share init0
+ if (thread_id == 0)
+ *share = mix.x;
+ barrier(CLK_LOCAL_MEM_FENCE);
+ uint init0 = *share;
+
+ uint a = 0;
+ do
+ {
+ bool update_share = thread_id == (a/4) % THREADS_PER_HASH;
+
+ #pragma unroll
+ for (uint i = 0; i != 4; ++i)
+ {
+ if (update_share)
+ {
+ uint m[4] = { mix.x, mix.y, mix.z, mix.w };
+ *share = fnv(init0 ^ (a+i), m[i]) % DAG_SIZE;
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ mix = fnv4(mix, g_dag[*share].uint4s[thread_id]);
+ }
+ }
+ while ((a += 4) != (ACCESSES & isolate));
+
+ return fnv_reduce(mix);
+}
+
+
+hash32_t final_hash(hash64_t const* init, hash32_t const* mix, uint isolate)
+{
+ ulong state[25];
+
+ hash32_t hash;
+ uint const hash_size = countof(hash.ulongs);
+ uint const init_size = countof(init->ulongs);
+ uint const mix_size = countof(mix->ulongs);
+
+ // keccak_256(keccak_512(header..nonce) .. mix);
+ copy(state, init->ulongs, init_size);
+ copy(state + init_size, mix->ulongs, mix_size);
+ keccak_f1600_no_absorb(state, init_size+mix_size, hash_size, isolate);
+
+ // copy out
+ copy(hash.ulongs, state, hash_size);
+ return hash;
+}
+
+hash32_t compute_hash_simple(
+ __constant hash32_t const* g_header,
+ __global hash128_t const* g_dag,
+ ulong nonce,
+ uint isolate
+ )
+{
+ hash64_t init = init_hash(g_header, nonce, isolate);
+
+ hash128_t mix;
+ for (uint i = 0; i != countof(mix.uint4s); ++i)
+ {
+ mix.uint4s[i] = init.uint4s[i % countof(init.uint4s)];
+ }
+
+ uint mix_val = mix.uints[0];
+ uint init0 = mix.uints[0];
+ uint a = 0;
+ do
+ {
+ uint pi = fnv(init0 ^ a, mix_val) % DAG_SIZE;
+ uint n = (a+1) % countof(mix.uints);
+
+ #pragma unroll
+ for (uint i = 0; i != countof(mix.uints); ++i)
+ {
+ mix.uints[i] = fnv(mix.uints[i], g_dag[pi].uints[i]);
+ mix_val = i == n ? mix.uints[i] : mix_val;
+ }
+ }
+ while (++a != (ACCESSES & isolate));
+
+ // reduce to output
+ hash32_t fnv_mix;
+ for (uint i = 0; i != countof(fnv_mix.uints); ++i)
+ {
+ fnv_mix.uints[i] = fnv_reduce(mix.uint4s[i]);
+ }
+
+ return final_hash(&init, &fnv_mix, isolate);
+}
+
+typedef union
+{
+ struct
+ {
+ hash64_t init;
+ uint pad; // avoid lds bank conflicts
+ };
+ hash32_t mix;
+} compute_hash_share;
+
+
+hash32_t compute_hash(
+ __local compute_hash_share* share,
+ __constant hash32_t const* g_header,
+ __global hash128_t const* g_dag,
+ ulong nonce,
+ uint isolate
+ )
+{
+ uint const gid = get_global_id(0);
+
+ // Compute one init hash per work item.
+ hash64_t init = init_hash(g_header, nonce, isolate);
+
+ // Threads work together in this phase in groups of 8.
+ uint const thread_id = gid % THREADS_PER_HASH;
+ uint const hash_id = (gid % GROUP_SIZE) / THREADS_PER_HASH;
+
+ hash32_t mix;
+ uint i = 0;
+ do
+ {
+ // share init with other threads
+ if (i == thread_id)
+ share[hash_id].init = init;
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ uint4 thread_init = share[hash_id].init.uint4s[thread_id % (64 / sizeof(uint4))];
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ uint thread_mix = inner_loop(thread_init, thread_id, share[hash_id].mix.uints, g_dag, isolate);
+
+ share[hash_id].mix.uints[thread_id] = thread_mix;
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if (i == thread_id)
+ mix = share[hash_id].mix;
+ barrier(CLK_LOCAL_MEM_FENCE);
+ }
+ while (++i != (THREADS_PER_HASH & isolate));
+
+ return final_hash(&init, &mix, isolate);
+}
+
+
+hash32_t compute_hash_chunks(
+ __local compute_hash_share* share,
+ __constant hash32_t const* g_header,
+ __global hash128_t const* g_dag,
+ __global hash128_t const* g_dag1,
+ __global hash128_t const* g_dag2,
+ __global hash128_t const* g_dag3,
+ ulong nonce,
+ uint isolate
+ )
+{
+ uint const gid = get_global_id(0);
+
+ // Compute one init hash per work item.
+ hash64_t init = init_hash(g_header, nonce, isolate);
+
+ // Threads work together in this phase in groups of 8.
+ uint const thread_id = gid % THREADS_PER_HASH;
+ uint const hash_id = (gid % GROUP_SIZE) / THREADS_PER_HASH;
+
+ hash32_t mix;
+ uint i = 0;
+ do
+ {
+ // share init with other threads
+ if (i == thread_id)
+ share[hash_id].init = init;
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ uint4 thread_init = share[hash_id].init.uint4s[thread_id % (64 / sizeof(uint4))];
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ uint thread_mix = inner_loop_chunks(thread_init, thread_id, share[hash_id].mix.uints, g_dag, g_dag1, g_dag2, g_dag3, isolate);
+
+ share[hash_id].mix.uints[thread_id] = thread_mix;
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if (i == thread_id)
+ mix = share[hash_id].mix;
+ barrier(CLK_LOCAL_MEM_FENCE);
+ }
+ while (++i != (THREADS_PER_HASH & isolate));
+
+ return final_hash(&init, &mix, isolate);
+}
+
+__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
+__kernel void ethash_hash_simple(
+ __global hash32_t* g_hashes,
+ __constant hash32_t const* g_header,
+ __global hash128_t const* g_dag,
+ ulong start_nonce,
+ uint isolate
+ )
+{
+ uint const gid = get_global_id(0);
+ g_hashes[gid] = compute_hash_simple(g_header, g_dag, start_nonce + gid, isolate);
+}
+
+__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
+__kernel void ethash_search_simple(
+ __global volatile uint* restrict g_output,
+ __constant hash32_t const* g_header,
+ __global hash128_t const* g_dag,
+ ulong start_nonce,
+ ulong target,
+ uint isolate
+ )
+{
+ uint const gid = get_global_id(0);
+ hash32_t hash = compute_hash_simple(g_header, g_dag, start_nonce + gid, isolate);
+
+ if (hash.ulongs[countof(hash.ulongs)-1] < target)
+ {
+ uint slot = min(convert_uint(MAX_OUTPUTS), convert_uint(atomic_inc(&g_output[0]) + 1));
+ g_output[slot] = gid;
+ }
+}
+
+
+__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
+__kernel void ethash_hash(
+ __global hash32_t* g_hashes,
+ __constant hash32_t const* g_header,
+ __global hash128_t const* g_dag,
+ ulong start_nonce,
+ uint isolate
+ )
+{
+ __local compute_hash_share share[HASHES_PER_LOOP];
+
+ uint const gid = get_global_id(0);
+ g_hashes[gid] = compute_hash(share, g_header, g_dag, start_nonce + gid, isolate);
+}
+
+__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
+__kernel void ethash_search(
+ __global volatile uint* restrict g_output,
+ __constant hash32_t const* g_header,
+ __global hash128_t const* g_dag,
+ ulong start_nonce,
+ ulong target,
+ uint isolate
+ )
+{
+ __local compute_hash_share share[HASHES_PER_LOOP];
+
+ uint const gid = get_global_id(0);
+ hash32_t hash = compute_hash(share, g_header, g_dag, start_nonce + gid, isolate);
+
+ if (as_ulong(as_uchar8(hash.ulongs[0]).s76543210) < target)
+ {
+ uint slot = min((uint)MAX_OUTPUTS, atomic_inc(&g_output[0]) + 1);
+ g_output[slot] = gid;
+ }
+}
+
+__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
+__kernel void ethash_hash_chunks(
+ __global hash32_t* g_hashes,
+ __constant hash32_t const* g_header,
+ __global hash128_t const* g_dag,
+ __global hash128_t const* g_dag1,
+ __global hash128_t const* g_dag2,
+ __global hash128_t const* g_dag3,
+ ulong start_nonce,
+ uint isolate
+ )
+{
+ __local compute_hash_share share[HASHES_PER_LOOP];
+
+ uint const gid = get_global_id(0);
+ g_hashes[gid] = compute_hash_chunks(share, g_header, g_dag, g_dag1, g_dag2, g_dag3,start_nonce + gid, isolate);
+}
+
+__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
+__kernel void ethash_search_chunks(
+ __global volatile uint* restrict g_output,
+ __constant hash32_t const* g_header,
+ __global hash128_t const* g_dag,
+ __global hash128_t const* g_dag1,
+ __global hash128_t const* g_dag2,
+ __global hash128_t const* g_dag3,
+ ulong start_nonce,
+ ulong target,
+ uint isolate
+ )
+{
+ __local compute_hash_share share[HASHES_PER_LOOP];
+
+ uint const gid = get_global_id(0);
+ hash32_t hash = compute_hash_chunks(share, g_header, g_dag, g_dag1, g_dag2, g_dag3, start_nonce + gid, isolate);
+
+ if (as_ulong(as_uchar8(hash.ulongs[0]).s76543210) < target)
+ {
+ uint slot = min(convert_uint(MAX_OUTPUTS), convert_uint(atomic_inc(&g_output[0]) + 1));
+ g_output[slot] = gid;
+ }
+}
+`
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/ethash_test.go b/Godeps/_workspace/src/github.com/ethereum/ethash/ethash_test.go
index 1e1de989d..c19e45d1d 100644
--- a/Godeps/_workspace/src/github.com/ethereum/ethash/ethash_test.go
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/ethash_test.go
@@ -92,7 +92,7 @@ func TestEthashConcurrentVerify(t *testing.T) {
defer os.RemoveAll(eth.Full.Dir)
block := &testBlock{difficulty: big.NewInt(10)}
- nonce, md := eth.Search(block, nil)
+ nonce, md := eth.Search(block, nil, 0)
block.nonce = nonce
block.mixDigest = common.BytesToHash(md)
@@ -135,7 +135,7 @@ func TestEthashConcurrentSearch(t *testing.T) {
// launch n searches concurrently.
for i := 0; i < nsearch; i++ {
go func() {
- nonce, md := eth.Search(block, stop)
+ nonce, md := eth.Search(block, stop, 0)
select {
case found <- searchRes{n: nonce, md: md}:
case <-stop:
@@ -167,7 +167,7 @@ func TestEthashSearchAcrossEpoch(t *testing.T) {
for i := epochLength - 40; i < epochLength+40; i++ {
block := &testBlock{number: i, difficulty: big.NewInt(90)}
rand.Read(block.hashNoNonce[:])
- nonce, md := eth.Search(block, nil)
+ nonce, md := eth.Search(block, nil, 0)
block.nonce = nonce
block.mixDigest = common.BytesToHash(md)
if !eth.Verify(block) {