aboutsummaryrefslogtreecommitdiffstats
path: root/Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/device.go
blob: d62a6fb71fe6c18d93e07f73833251c30171b70c (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
package cl

// #ifdef __APPLE__
// #include "OpenCL/opencl.h"
// #else
// #include "cl.h"
// #include "cl_ext.h"
// #endif
import "C"

import (
    "strings"
    "unsafe"
)

const maxDeviceCount = 64

type DeviceType uint

const (
    DeviceTypeCPU         DeviceType = C.CL_DEVICE_TYPE_CPU
    DeviceTypeGPU         DeviceType = C.CL_DEVICE_TYPE_GPU
    DeviceTypeAccelerator DeviceType = C.CL_DEVICE_TYPE_ACCELERATOR
    DeviceTypeDefault     DeviceType = C.CL_DEVICE_TYPE_DEFAULT
    DeviceTypeAll         DeviceType = C.CL_DEVICE_TYPE_ALL
)

type FPConfig int

const (
    FPConfigDenorm         FPConfig = C.CL_FP_DENORM           // denorms are supported
    FPConfigInfNaN         FPConfig = C.CL_FP_INF_NAN          // INF and NaNs are supported
    FPConfigRoundToNearest FPConfig = C.CL_FP_ROUND_TO_NEAREST // round to nearest even rounding mode supported
    FPConfigRoundToZero    FPConfig = C.CL_FP_ROUND_TO_ZERO    // round to zero rounding mode supported
    FPConfigRoundToInf     FPConfig = C.CL_FP_ROUND_TO_INF     // round to positive and negative infinity rounding modes supported
    FPConfigFMA            FPConfig = C.CL_FP_FMA              // IEEE754-2008 fused multiply-add is supported
    FPConfigSoftFloat      FPConfig = C.CL_FP_SOFT_FLOAT       // Basic floating-point operations (such as addition, subtraction, multiplication) are implemented in software
)

var fpConfigNameMap = map[FPConfig]string{
    FPConfigDenorm:         "Denorm",
    FPConfigInfNaN:         "InfNaN",
    FPConfigRoundToNearest: "RoundToNearest",
    FPConfigRoundToZero:    "RoundToZero",
    FPConfigRoundToInf:     "RoundToInf",
    FPConfigFMA:            "FMA",
    FPConfigSoftFloat:      "SoftFloat",
}

func (c FPConfig) String() string {
    var parts []string
    for bit, name := range fpConfigNameMap {
        if c&bit != 0 {
            parts = append(parts, name)
        }
    }
    if parts == nil {
        return ""
    }
    return strings.Join(parts, "|")
}

func (dt DeviceType) String() string {
    var parts []string
    if dt&DeviceTypeCPU != 0 {
        parts = append(parts, "CPU")
    }
    if dt&DeviceTypeGPU != 0 {
        parts = append(parts, "GPU")
    }
    if dt&DeviceTypeAccelerator != 0 {
        parts = append(parts, "Accelerator")
    }
    if dt&DeviceTypeDefault != 0 {
        parts = append(parts, "Default")
    }
    if parts == nil {
        parts = append(parts, "None")
    }
    return strings.Join(parts, "|")
}

type Device struct {
    id C.cl_device_id
}

func buildDeviceIdList(devices []*Device) []C.cl_device_id {
    deviceIds := make([]C.cl_device_id, len(devices))
    for i, d := range devices {
        deviceIds[i] = d.id
    }
    return deviceIds
}

// Obtain the list of devices available on a platform. 'platform' refers
// to the platform returned by GetPlatforms or can be nil. If platform
// is nil, the behavior is implementation-defined.
func GetDevices(platform *Platform, deviceType DeviceType) ([]*Device, error) {
    var deviceIds [maxDeviceCount]C.cl_device_id
    var numDevices C.cl_uint
    var platformId C.cl_platform_id
    if platform != nil {
        platformId = platform.id
    }
    if err := C.clGetDeviceIDs(platformId, C.cl_device_type(deviceType), C.cl_uint(maxDeviceCount), &deviceIds[0], &numDevices); err != C.CL_SUCCESS {
        return nil, toError(err)
    }
    if numDevices > maxDeviceCount {
        numDevices = maxDeviceCount
    }
    devices := make([]*Device, numDevices)
    for i := 0; i < int(numDevices); i++ {
        devices[i] = &Device{id: deviceIds[i]}
    }
    return devices, nil
}

func (d *Device) nullableId() C.cl_device_id {
    if d == nil {
        return nil
    }
    return d.id
}

func (d *Device) GetInfoString(param C.cl_device_info, panicOnError bool) (string, error) {
    var strC [1024]C.char
    var strN C.size_t
    if err := C.clGetDeviceInfo(d.id, param, 1024, unsafe.Pointer(&strC), &strN); err != C.CL_SUCCESS {
        if panicOnError {
            panic("Should never fail")
        }
        return "", toError(err)
    }

    // OpenCL strings are NUL-terminated, and the terminator is included in strN
    // Go strings aren't NUL-terminated, so subtract 1 from the length
    return C.GoStringN((*C.char)(unsafe.Pointer(&strC)), C.int(strN-1)), nil
}

func (d *Device) getInfoUint(param C.cl_device_info, panicOnError bool) (uint, error) {
    var val C.cl_uint
    if err := C.clGetDeviceInfo(d.id, param, C.size_t(unsafe.Sizeof(val)), unsafe.Pointer(&val), nil); err != C.CL_SUCCESS {
        if panicOnError {
            panic("Should never fail")
        }
        return 0, toError(err)
    }
    return uint(val), nil
}

func (d *Device) getInfoSize(param C.cl_device_info, panicOnError bool) (int, error) {
    var val C.size_t
    if err := C.clGetDeviceInfo(d.id, param, C.size_t(unsafe.Sizeof(val)), unsafe.Pointer(&val), nil); err != C.CL_SUCCESS {
        if panicOnError {
            panic("Should never fail")
        }
        return 0, toError(err)
    }
    return int(val), nil
}

func (d *Device) getInfoUlong(param C.cl_device_info, panicOnError bool) (int64, error) {
    var val C.cl_ulong
    if err := C.clGetDeviceInfo(d.id, param, C.size_t(unsafe.Sizeof(val)), unsafe.Pointer(&val), nil); err != C.CL_SUCCESS {
        if panicOnError {
            panic("Should never fail")
        }
        return 0, toError(err)
    }
    return int64(val), nil
}

func (d *Device) getInfoBool(param C.cl_device_info, panicOnError bool) (bool, error) {
    var val C.cl_bool
    if err := C.clGetDeviceInfo(d.id, param, C.size_t(unsafe.Sizeof(val)), unsafe.Pointer(&val), nil); err != C.CL_SUCCESS {
        if panicOnError {
            panic("Should never fail")
        }
        return false, toError(err)
    }
    return val == C.CL_TRUE, nil
}

func (d *Device) Name() string {
    str, _ := d.GetInfoString(C.CL_DEVICE_NAME, true)
    return str
}

func (d *Device) Vendor() string {
    str, _ := d.GetInfoString(C.CL_DEVICE_VENDOR, true)
    return str
}

func (d *Device) Extensions() string {
    str, _ := d.GetInfoString(C.CL_DEVICE_EXTENSIONS, true)
    return str
}

func (d *Device) OpenCLCVersion() string {
    str, _ := d.GetInfoString(C.CL_DEVICE_OPENCL_C_VERSION, true)
    return str
}

func (d *Device) Profile() string {
    str, _ := d.GetInfoString(C.CL_DEVICE_PROFILE, true)
    return str
}

func (d *Device) Version() string {
    str, _ := d.GetInfoString(C.CL_DEVICE_VERSION, true)
    return str
}

func (d *Device) DriverVersion() string {
    str, _ := d.GetInfoString(C.CL_DRIVER_VERSION, true)
    return str
}

// The default compute device address space size specified as an
// unsigned integer value in bits. Currently supported values are 32 or 64 bits.
func (d *Device) AddressBits() int {
    val, _ := d.getInfoUint(C.CL_DEVICE_ADDRESS_BITS, true)
    return int(val)
}

// Size of global memory cache line in bytes.
func (d *Device) GlobalMemCachelineSize() int {
    val, _ := d.getInfoUint(C.CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, true)
    return int(val)
}

// Maximum configured clock frequency of the device in MHz.
func (d *Device) MaxClockFrequency() int {
    val, _ := d.getInfoUint(C.CL_DEVICE_MAX_CLOCK_FREQUENCY, true)
    return int(val)
}

// The number of parallel compute units on the OpenCL device.
// A work-group executes on a single compute unit. The minimum value is 1.
func (d *Device) MaxComputeUnits() int {
    val, _ := d.getInfoUint(C.CL_DEVICE_MAX_COMPUTE_UNITS, true)
    return int(val)
}

// Max number of arguments declared with the __constant qualifier in a kernel.
// The minimum value is 8 for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
func (d *Device) MaxConstantArgs() int {
    val, _ := d.getInfoUint(C.CL_DEVICE_MAX_CONSTANT_ARGS, true)
    return int(val)
}

// Max number of simultaneous image objects that can be read by a kernel.
// The minimum value is 128 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
func (d *Device) MaxReadImageArgs() int {
    val, _ := d.getInfoUint(C.CL_DEVICE_MAX_READ_IMAGE_ARGS, true)
    return int(val)
}

// Maximum number of samplers that can be used in a kernel. The minimum
// value is 16 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE. (Also see sampler_t.)
func (d *Device) MaxSamplers() int {
    val, _ := d.getInfoUint(C.CL_DEVICE_MAX_SAMPLERS, true)
    return int(val)
}

// Maximum dimensions that specify the global and local work-item IDs used
// by the data parallel execution model. (Refer to clEnqueueNDRangeKernel).
// The minimum value is 3 for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
func (d *Device) MaxWorkItemDimensions() int {
    val, _ := d.getInfoUint(C.CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, true)
    return int(val)
}

// Max number of simultaneous image objects that can be written to by a
// kernel. The minimum value is 8 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
func (d *Device) MaxWriteImageArgs() int {
    val, _ := d.getInfoUint(C.CL_DEVICE_MAX_WRITE_IMAGE_ARGS, true)
    return int(val)
}

// The minimum value is the size (in bits) of the largest OpenCL built-in
// data type supported by the device (long16 in FULL profile, long16 or
// int16 in EMBEDDED profile) for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
func (d *Device) MemBaseAddrAlign() int {
    val, _ := d.getInfoUint(C.CL_DEVICE_MEM_BASE_ADDR_ALIGN, true)
    return int(val)
}

func (d *Device) NativeVectorWidthChar() int {
    val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, true)
    return int(val)
}

func (d *Device) NativeVectorWidthShort() int {
    val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, true)
    return int(val)
}

func (d *Device) NativeVectorWidthInt() int {
    val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, true)
    return int(val)
}

func (d *Device) NativeVectorWidthLong() int {
    val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, true)
    return int(val)
}

func (d *Device) NativeVectorWidthFloat() int {
    val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, true)
    return int(val)
}

func (d *Device) NativeVectorWidthDouble() int {
    val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, true)
    return int(val)
}

func (d *Device) NativeVectorWidthHalf() int {
    val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, true)
    return int(val)
}

// Max height of 2D image in pixels. The minimum value is 8192
// if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
func (d *Device) Image2DMaxHeight() int {
    val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE2D_MAX_HEIGHT, true)
    return int(val)
}

// Max width of 2D image or 1D image not created from a buffer object in
// pixels. The minimum value is 8192 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
func (d *Device) Image2DMaxWidth() int {
    val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE2D_MAX_WIDTH, true)
    return int(val)
}

// Max depth of 3D image in pixels. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
func (d *Device) Image3DMaxDepth() int {
    val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE3D_MAX_DEPTH, true)
    return int(val)
}

// Max height of 3D image in pixels. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
func (d *Device) Image3DMaxHeight() int {
    val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE3D_MAX_HEIGHT, true)
    return int(val)
}

// Max width of 3D image in pixels. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
func (d *Device) Image3DMaxWidth() int {
    val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE3D_MAX_WIDTH, true)
    return int(val)
}

// Max size in bytes of the arguments that can be passed to a kernel. The
// minimum value is 1024 for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
// For this minimum value, only a maximum of 128 arguments can be passed to a kernel.
func (d *Device) MaxParameterSize() int {
    val, _ := d.getInfoSize(C.CL_DEVICE_MAX_PARAMETER_SIZE, true)
    return int(val)
}

// Maximum number of work-items in a work-group executing a kernel on a
// single compute unit, using the data parallel execution model. (Refer
// to clEnqueueNDRangeKernel). The minimum value is 1.
func (d *Device) MaxWorkGroupSize() int {
    val, _ := d.getInfoSize(C.CL_DEVICE_MAX_WORK_GROUP_SIZE, true)
    return int(val)
}

// Describes the resolution of device timer. This is measured in nanoseconds.
func (d *Device) ProfilingTimerResolution() int {
    val, _ := d.getInfoSize(C.CL_DEVICE_PROFILING_TIMER_RESOLUTION, true)
    return int(val)
}

// Size of local memory arena in bytes. The minimum value is 32 KB for
// devices that are not of type CL_DEVICE_TYPE_CUSTOM.
func (d *Device) LocalMemSize() int64 {
    val, _ := d.getInfoUlong(C.CL_DEVICE_LOCAL_MEM_SIZE, true)
    return val
}

// Max size in bytes of a constant buffer allocation. The minimum value is
// 64 KB for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
func (d *Device) MaxConstantBufferSize() int64 {
    val, _ := d.getInfoUlong(C.CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, true)
    return val
}

// Max size of memory object allocation in bytes. The minimum value is max
// (1/4th of CL_DEVICE_GLOBAL_MEM_SIZE, 128*1024*1024) for devices that are
// not of type CL_DEVICE_TYPE_CUSTOM.
func (d *Device) MaxMemAllocSize() int64 {
    val, _ := d.getInfoUlong(C.CL_DEVICE_MAX_MEM_ALLOC_SIZE, true)
    return val
}

// Size of global device memory in bytes.
func (d *Device) GlobalMemSize() int64 {
    val, _ := d.getInfoUlong(C.CL_DEVICE_GLOBAL_MEM_SIZE, true)
    return val
}

func (d *Device) Available() bool {
    val, _ := d.getInfoBool(C.CL_DEVICE_AVAILABLE, true)
    return val
}

func (d *Device) CompilerAvailable() bool {
    val, _ := d.getInfoBool(C.CL_DEVICE_COMPILER_AVAILABLE, true)
    return val
}

func (d *Device) EndianLittle() bool {
    val, _ := d.getInfoBool(C.CL_DEVICE_ENDIAN_LITTLE, true)
    return val
}

// Is CL_TRUE if the device implements error correction for all
// accesses to compute device memory (global and constant). Is
// CL_FALSE if the device does not implement such error correction.
func (d *Device) ErrorCorrectionSupport() bool {
    val, _ := d.getInfoBool(C.CL_DEVICE_ERROR_CORRECTION_SUPPORT, true)
    return val
}

func (d *Device) HostUnifiedMemory() bool {
    val, _ := d.getInfoBool(C.CL_DEVICE_HOST_UNIFIED_MEMORY, true)
    return val
}

func (d *Device) ImageSupport() bool {
    val, _ := d.getInfoBool(C.CL_DEVICE_IMAGE_SUPPORT, true)
    return val
}

func (d *Device) Type() DeviceType {
    var deviceType C.cl_device_type
    if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_TYPE, C.size_t(unsafe.Sizeof(deviceType)), unsafe.Pointer(&deviceType), nil); err != C.CL_SUCCESS {
        panic("Failed to get device type")
    }
    return DeviceType(deviceType)
}

// Describes double precision floating-point capability of the OpenCL device
func (d *Device) DoubleFPConfig() FPConfig {
    var fpConfig C.cl_device_fp_config
    if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_DOUBLE_FP_CONFIG, C.size_t(unsafe.Sizeof(fpConfig)), unsafe.Pointer(&fpConfig), nil); err != C.CL_SUCCESS {
        panic("Failed to get double FP config")
    }
    return FPConfig(fpConfig)
}

// Describes the OPTIONAL half precision floating-point capability of the OpenCL device
func (d *Device) HalfFPConfig() FPConfig {
    var fpConfig C.cl_device_fp_config
    err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_HALF_FP_CONFIG, C.size_t(unsafe.Sizeof(fpConfig)), unsafe.Pointer(&fpConfig), nil)
    if err != C.CL_SUCCESS {
        return FPConfig(0)
    }
    return FPConfig(fpConfig)
}

// Type of local memory supported. This can be set to CL_LOCAL implying dedicated
// local memory storage such as SRAM, or CL_GLOBAL. For custom devices, CL_NONE
// can also be returned indicating no local memory support.
func (d *Device) LocalMemType() LocalMemType {
    var memType C.cl_device_local_mem_type
    if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_LOCAL_MEM_TYPE, C.size_t(unsafe.Sizeof(memType)), unsafe.Pointer(&memType), nil); err != C.CL_SUCCESS {
        return LocalMemType(C.CL_NONE)
    }
    return LocalMemType(memType)
}

// Describes the execution capabilities of the device. The mandated minimum capability is CL_EXEC_KERNEL.
func (d *Device) ExecutionCapabilities() ExecCapability {
    var execCap C.cl_device_exec_capabilities
    if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_EXECUTION_CAPABILITIES, C.size_t(unsafe.Sizeof(execCap)), unsafe.Pointer(&execCap), nil); err != C.CL_SUCCESS {
        panic("Failed to get execution capabilities")
    }
    return ExecCapability(execCap)
}

func (d *Device) GlobalMemCacheType() MemCacheType {
    var memType C.cl_device_mem_cache_type
    if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, C.size_t(unsafe.Sizeof(memType)), unsafe.Pointer(&memType), nil); err != C.CL_SUCCESS {
        return MemCacheType(C.CL_NONE)
    }
    return MemCacheType(memType)
}

// Maximum number of work-items that can be specified in each dimension of the work-group to clEnqueueNDRangeKernel.
//
// Returns n size_t entries, where n is the value returned by the query for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS.
//
// The minimum value is (1, 1, 1) for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
func (d *Device) MaxWorkItemSizes() []int {
    dims := d.MaxWorkItemDimensions()
    sizes := make([]C.size_t, dims)
    if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_MAX_WORK_ITEM_SIZES, C.size_t(int(unsafe.Sizeof(sizes[0]))*dims), unsafe.Pointer(&sizes[0]), nil); err != C.CL_SUCCESS {
        panic("Failed to get max work item sizes")
    }
    intSizes := make([]int, dims)
    for i, s := range sizes {
        intSizes[i] = int(s)
    }
    return intSizes
}