aboutsummaryrefslogtreecommitdiffstats
path: root/Godeps/_workspace/src
diff options
context:
space:
mode:
authorMatthew Wampler-Doty <matthew.wampler.doty@gmail.com>2015-03-01 03:58:37 +0800
committerMatthew Wampler-Doty <matthew.wampler.doty@gmail.com>2015-03-03 11:29:34 +0800
commitde9f79133faa1ff5dcd16fb4fd13d06b7799ded9 (patch)
treefcc9001d6b681ccdd1b3bbe516e3611e808341d7 /Godeps/_workspace/src
parent080823bdeebbab2bcffdaefad703896700ed2c30 (diff)
downloadgo-tangerine-de9f79133faa1ff5dcd16fb4fd13d06b7799ded9.tar
go-tangerine-de9f79133faa1ff5dcd16fb4fd13d06b7799ded9.tar.gz
go-tangerine-de9f79133faa1ff5dcd16fb4fd13d06b7799ded9.tar.bz2
go-tangerine-de9f79133faa1ff5dcd16fb4fd13d06b7799ded9.tar.lz
go-tangerine-de9f79133faa1ff5dcd16fb4fd13d06b7799ded9.tar.xz
go-tangerine-de9f79133faa1ff5dcd16fb4fd13d06b7799ded9.tar.zst
go-tangerine-de9f79133faa1ff5dcd16fb4fd13d06b7799ded9.zip
Introducing ethash
Diffstat (limited to 'Godeps/_workspace/src')
-rw-r--r--Godeps/_workspace/src/github.com/ethereum/ethash/.gitignore5
-rw-r--r--Godeps/_workspace/src/github.com/ethereum/ethash/CMakeLists.txt14
-rw-r--r--Godeps/_workspace/src/github.com/ethereum/ethash/benchmark/CMakeLists.txt53
-rw-r--r--Godeps/_workspace/src/github.com/ethereum/ethash/benchmark/benchmark.cpp260
-rw-r--r--Godeps/_workspace/src/github.com/ethereum/ethash/cmake/modules/FindCryptoPP.cmake108
-rw-r--r--Godeps/_workspace/src/github.com/ethereum/ethash/cmake/modules/FindOpenCL.cmake91
-rw-r--r--Godeps/_workspace/src/github.com/ethereum/ethash/cryptopp/CMakeLists.txt13
-rw-r--r--Godeps/_workspace/src/github.com/ethereum/ethash/ethash.go296
-rw-r--r--Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/CMakeLists.txt12
-rw-r--r--Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/cl.hpp12452
-rw-r--r--Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/ethash_cl_miner.cpp754
-rw-r--r--Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/ethash_cl_miner.h43
-rw-r--r--Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/CMakeLists.txt15
-rw-r--r--Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/cuPrintf.cu879
-rw-r--r--Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/cuPrintf.cuh162
-rw-r--r--Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/libethash.cu27
-rw-r--r--Godeps/_workspace/src/github.com/ethereum/ethash/libethash/CMakeLists.txt33
-rw-r--r--Godeps/_workspace/src/github.com/ethereum/ethash/libethash/compiler.h33
-rw-r--r--Godeps/_workspace/src/github.com/ethereum/ethash/libethash/data_sizes.h248
-rw-r--r--Godeps/_workspace/src/github.com/ethereum/ethash/libethash/endian.h74
-rw-r--r--Godeps/_workspace/src/github.com/ethereum/ethash/libethash/ethash.h88
-rw-r--r--Godeps/_workspace/src/github.com/ethereum/ethash/libethash/fnv.h38
-rw-r--r--Godeps/_workspace/src/github.com/ethereum/ethash/libethash/internal.c297
-rw-r--r--Godeps/_workspace/src/github.com/ethereum/ethash/libethash/internal.h48
-rw-r--r--Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3.c151
-rw-r--r--Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3.h27
-rw-r--r--Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3_cryptopp.cpp34
-rw-r--r--Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3_cryptopp.h15
-rw-r--r--Godeps/_workspace/src/github.com/ethereum/ethash/libethash/util.c41
-rw-r--r--Godeps/_workspace/src/github.com/ethereum/ethash/libethash/util.h47
-rw-r--r--Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/binding.gyp29
-rw-r--r--Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/ethash.cc587
-rw-r--r--Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/package.json13
-rw-r--r--Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/readme.md12
-rw-r--r--Godeps/_workspace/src/github.com/ethereum/ethash/test/CMakeLists.txt30
-rw-r--r--Godeps/_workspace/src/github.com/ethereum/ethash/test/go/ethash_test.go54
-rw-r--r--Godeps/_workspace/src/github.com/ethereum/ethash/test/test.cpp233
37 files changed, 17316 insertions, 0 deletions
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/.gitignore b/Godeps/_workspace/src/github.com/ethereum/ethash/.gitignore
new file mode 100644
index 000000000..6bb36ed15
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/.gitignore
@@ -0,0 +1,5 @@
+.idea/
+.DS_Store
+*/**/*un~
+.vagrant/
+cpp-build/
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/CMakeLists.txt b/Godeps/_workspace/src/github.com/ethereum/ethash/CMakeLists.txt
new file mode 100644
index 000000000..ac189457f
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/CMakeLists.txt
@@ -0,0 +1,14 @@
+cmake_minimum_required(VERSION 2.8.2)
+project(ethash)
+
+set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/Modules/")
+set(ETHHASH_LIBS ethash)
+
+if (WIN32 AND WANT_CRYPTOPP)
+ add_subdirectory(cryptopp)
+endif()
+
+add_subdirectory(libethash)
+add_subdirectory(libethash-cl EXCLUDE_FROM_ALL)
+add_subdirectory(benchmark EXCLUDE_FROM_ALL)
+add_subdirectory(test EXCLUDE_FROM_ALL)
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/benchmark/CMakeLists.txt b/Godeps/_workspace/src/github.com/ethereum/ethash/benchmark/CMakeLists.txt
new file mode 100644
index 000000000..e6ba85790
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/benchmark/CMakeLists.txt
@@ -0,0 +1,53 @@
+include_directories(..)
+
+set(CMAKE_BUILD_TYPE Release)
+
+if (MSVC)
+ add_definitions("/openmp")
+endif()
+
+if (NOT MPI_FOUND)
+ find_package(MPI)
+endif()
+
+if (NOT CRYPTOPP_FOUND)
+ find_package(CryptoPP 5.6.2)
+endif()
+
+if (CRYPTOPP_FOUND)
+ add_definitions(-DWITH_CRYPTOPP)
+endif()
+
+if (NOT OpenCL_FOUND)
+ find_package(OpenCL)
+endif()
+if (OpenCL_FOUND)
+ add_definitions(-DWITH_OPENCL)
+ include_directories(${OpenCL_INCLUDE_DIRS})
+ list(APPEND FILES ethash_cl_miner.cpp ethash_cl_miner.h)
+endif()
+
+if (MPI_FOUND)
+ include_directories(${MPI_INCLUDE_PATH})
+ add_executable (Benchmark_MPI_FULL benchmark.cpp)
+ target_link_libraries (Benchmark_MPI_FULL ${ETHHASH_LIBS} ${MPI_LIBRARIES})
+ SET_TARGET_PROPERTIES(Benchmark_MPI_FULL PROPERTIES COMPILE_FLAGS "${COMPILE_FLAGS} ${MPI_COMPILE_FLAGS} -DFULL -DMPI")
+
+ add_executable (Benchmark_MPI_LIGHT benchmark.cpp)
+ target_link_libraries (Benchmark_MPI_LIGHT ${ETHHASH_LIBS} ${MPI_LIBRARIES})
+ SET_TARGET_PROPERTIES(Benchmark_MPI_LIGHT PROPERTIES COMPILE_FLAGS "${COMPILE_FLAGS} ${MPI_COMPILE_FLAGS} -DMPI")
+endif()
+
+add_executable (Benchmark_FULL benchmark.cpp)
+target_link_libraries (Benchmark_FULL ${ETHHASH_LIBS})
+SET_TARGET_PROPERTIES(Benchmark_FULL PROPERTIES COMPILE_FLAGS "${COMPILE_FLAGS} -DFULL")
+
+add_executable (Benchmark_LIGHT benchmark.cpp)
+target_link_libraries (Benchmark_LIGHT ${ETHHASH_LIBS})
+
+if (OpenCL_FOUND)
+ add_executable (Benchmark_CL benchmark.cpp)
+ target_link_libraries (Benchmark_CL ${ETHHASH_LIBS} ethash-cl)
+ SET_TARGET_PROPERTIES(Benchmark_CL PROPERTIES COMPILE_FLAGS "${COMPILE_FLAGS} -DOPENCL")
+endif()
+
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/benchmark/benchmark.cpp b/Godeps/_workspace/src/github.com/ethereum/ethash/benchmark/benchmark.cpp
new file mode 100644
index 000000000..4c8f700c5
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/benchmark/benchmark.cpp
@@ -0,0 +1,260 @@
+/*
+ This file is part of cpp-ethereum.
+
+ cpp-ethereum is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ cpp-ethereum is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>.
+*/
+/** @file benchmark.cpp
+ * @author Tim Hughes <tim@twistedfury.com>
+ * @date 2015
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <libethash/ethash.h>
+#include <libethash/util.h>
+#ifdef OPENCL
+#include <libethash-cl/ethash_cl_miner.h>
+#endif
+#include <vector>
+#include <algorithm>
+
+#ifdef WITH_CRYPTOPP
+#include <libethash/SHA3_cryptopp.h>
+#include <string>
+
+#else
+#include "libethash/sha3.h"
+#endif // WITH_CRYPTOPP
+
+#undef min
+#undef max
+
+#if defined(OPENCL)
+const unsigned trials = 1024*1024*32;
+#elif defined(FULL)
+const unsigned trials = 1024*1024/8;
+#else
+const unsigned trials = 1024*1024/1024;
+#endif
+uint8_t g_hashes[1024*32];
+
+static char nibbleToChar(unsigned nibble)
+{
+ return (char) ((nibble >= 10 ? 'a'-10 : '0') + nibble);
+}
+
+static uint8_t charToNibble(char chr)
+{
+ if (chr >= '0' && chr <= '9')
+ {
+ return (uint8_t) (chr - '0');
+ }
+ if (chr >= 'a' && chr <= 'z')
+ {
+ return (uint8_t) (chr - 'a' + 10);
+ }
+ if (chr >= 'A' && chr <= 'Z')
+ {
+ return (uint8_t) (chr - 'A' + 10);
+ }
+ return 0;
+}
+
+static std::vector<uint8_t> hexStringToBytes(char const* str)
+{
+ std::vector<uint8_t> bytes(strlen(str) >> 1);
+ for (unsigned i = 0; i != bytes.size(); ++i)
+ {
+ bytes[i] = charToNibble(str[i*2 | 0]) << 4;
+ bytes[i] |= charToNibble(str[i*2 | 1]);
+ }
+ return bytes;
+}
+
+static std::string bytesToHexString(uint8_t const* bytes, unsigned size)
+{
+ std::string str;
+ for (unsigned i = 0; i != size; ++i)
+ {
+ str += nibbleToChar(bytes[i] >> 4);
+ str += nibbleToChar(bytes[i] & 0xf);
+ }
+ return str;
+}
+
+extern "C" int main(void)
+{
+ // params for ethash
+ ethash_params params;
+ ethash_params_init(&params, 0);
+ //params.full_size = 262147 * 4096; // 1GBish;
+ //params.full_size = 32771 * 4096; // 128MBish;
+ //params.full_size = 8209 * 4096; // 8MBish;
+ //params.cache_size = 8209*4096;
+ //params.cache_size = 2053*4096;
+ uint8_t seed[32], previous_hash[32];
+
+ memcpy(seed, hexStringToBytes("9410b944535a83d9adf6bbdcc80e051f30676173c16ca0d32d6f1263fc246466").data(), 32);
+ memcpy(previous_hash, hexStringToBytes("c5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470").data(), 32);
+
+ // allocate page aligned buffer for dataset
+#ifdef FULL
+ void* full_mem_buf = malloc(params.full_size + 4095);
+ void* full_mem = (void*)((uintptr_t(full_mem_buf) + 4095) & ~4095);
+#endif
+ void* cache_mem_buf = malloc(params.cache_size + 63);
+ void* cache_mem = (void*)((uintptr_t(cache_mem_buf) + 63) & ~63);
+
+ ethash_cache cache;
+ cache.mem = cache_mem;
+
+ // compute cache or full data
+ {
+ clock_t startTime = clock();
+ ethash_mkcache(&cache, &params, seed);
+ clock_t time = clock() - startTime;
+
+ uint8_t cache_hash[32];
+ SHA3_256(cache_hash, (uint8_t const*)cache_mem, params.cache_size);
+ debugf("ethash_mkcache: %ums, sha3: %s\n", (unsigned)((time*1000)/CLOCKS_PER_SEC), bytesToHexString(cache_hash,sizeof(cache_hash)).data());
+
+ // print a couple of test hashes
+ {
+ const clock_t startTime = clock();
+ ethash_return_value hash;
+ ethash_light(&hash, &cache, &params, previous_hash, 0);
+ const clock_t time = clock() - startTime;
+ debugf("ethash_light test: %ums, %s\n", (unsigned)((time*1000)/CLOCKS_PER_SEC), bytesToHexString(hash.result, 32).data());
+ }
+
+ #ifdef FULL
+ startTime = clock();
+ ethash_compute_full_data(full_mem, &params, &cache);
+ time = clock() - startTime;
+ debugf("ethash_compute_full_data: %ums\n", (unsigned)((time*1000)/CLOCKS_PER_SEC));
+ #endif // FULL
+ }
+
+#ifdef OPENCL
+ ethash_cl_miner miner;
+ {
+ const clock_t startTime = clock();
+ if (!miner.init(params, seed))
+ exit(-1);
+ const clock_t time = clock() - startTime;
+ debugf("ethash_cl_miner init: %ums\n", (unsigned)((time*1000)/CLOCKS_PER_SEC));
+ }
+#endif
+
+
+#ifdef FULL
+ {
+ const clock_t startTime = clock();
+ ethash_return_value hash;
+ ethash_full(&hash, full_mem, &params, previous_hash, 0);
+ const clock_t time = clock() - startTime;
+ debugf("ethash_full test: %uns, %s\n", (unsigned)((time*1000000)/CLOCKS_PER_SEC), bytesToHexString(hash.result, 32).data());
+ }
+#endif
+
+#ifdef OPENCL
+ // validate 1024 hashes against CPU
+ miner.hash(g_hashes, previous_hash, 0, 1024);
+ for (unsigned i = 0; i != 1024; ++i)
+ {
+ ethash_return_value hash;
+ ethash_light(&hash, &cache, &params, previous_hash, i);
+ if (memcmp(hash.result, g_hashes + 32*i, 32) != 0)
+ {
+ debugf("nonce %u failed: %s %s\n", i, bytesToHexString(g_hashes + 32*i, 32).c_str(), bytesToHexString(hash.result, 32).c_str());
+ static unsigned c = 0;
+ if (++c == 16)
+ {
+ exit(-1);
+ }
+ }
+ }
+#endif
+
+
+ clock_t startTime = clock();
+ unsigned hash_count = trials;
+
+ #ifdef OPENCL
+ {
+ struct search_hook : ethash_cl_miner::search_hook
+ {
+ unsigned hash_count;
+ std::vector<uint64_t> nonce_vec;
+
+ virtual bool found(uint64_t const* nonces, uint32_t count)
+ {
+ nonce_vec.assign(nonces, nonces + count);
+ return false;
+ }
+
+ virtual bool searched(uint64_t start_nonce, uint32_t count)
+ {
+ // do nothing
+ hash_count += count;
+ return hash_count >= trials;
+ }
+ };
+ search_hook hook;
+ hook.hash_count = 0;
+
+ miner.search(previous_hash, 0x000000ffffffffff, hook);
+
+ for (unsigned i = 0; i != hook.nonce_vec.size(); ++i)
+ {
+ uint64_t nonce = hook.nonce_vec[i];
+ ethash_return_value hash;
+ ethash_light(&hash, &cache, &params, previous_hash, nonce);
+ debugf("found: %.8x%.8x -> %s\n", unsigned(nonce>>32), unsigned(nonce), bytesToHexString(hash.result, 32).c_str());
+ }
+
+ hash_count = hook.hash_count;
+ }
+ #else
+ {
+ //#pragma omp parallel for
+ for (int nonce = 0; nonce < trials; ++nonce)
+ {
+ ethash_return_value hash;
+ #ifdef FULL
+ ethash_full(&hash, full_mem, &params, previous_hash, nonce);
+ #else
+ ethash_light(&hash, &cache, &params, previous_hash, nonce);
+ #endif // FULL
+ }
+ }
+ #endif
+
+ clock_t time = std::max((clock_t)1u, clock() - startTime);
+
+ unsigned read_size = ACCESSES * MIX_BYTES;
+ debugf(
+ "hashrate: %8u, bw: %6u MB/s\n",
+ (unsigned)(((uint64_t)hash_count*CLOCKS_PER_SEC)/time),
+ (unsigned)((((uint64_t)hash_count*read_size*CLOCKS_PER_SEC)/time) / (1024*1024))
+ );
+
+ free(cache_mem_buf);
+#ifdef FULL
+ free(full_mem_buf);
+#endif
+
+ return 0;
+}
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/cmake/modules/FindCryptoPP.cmake b/Godeps/_workspace/src/github.com/ethereum/ethash/cmake/modules/FindCryptoPP.cmake
new file mode 100644
index 000000000..5ca01e446
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/cmake/modules/FindCryptoPP.cmake
@@ -0,0 +1,108 @@
+# Module for locating the Crypto++ encryption library.
+#
+# Customizable variables:
+# CRYPTOPP_ROOT_DIR
+# This variable points to the CryptoPP root directory. On Windows the
+# library location typically will have to be provided explicitly using the
+# -D command-line option. The directory should include the include/cryptopp,
+# lib and/or bin sub-directories.
+#
+# Read-only variables:
+# CRYPTOPP_FOUND
+# Indicates whether the library has been found.
+#
+# CRYPTOPP_INCLUDE_DIRS
+# Points to the CryptoPP include directory.
+#
+# CRYPTOPP_LIBRARIES
+# Points to the CryptoPP libraries that should be passed to
+# target_link_libararies.
+#
+#
+# Copyright (c) 2012 Sergiu Dotenco
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+INCLUDE (FindPackageHandleStandardArgs)
+
+FIND_PATH (CRYPTOPP_ROOT_DIR
+ NAMES cryptopp/cryptlib.h include/cryptopp/cryptlib.h
+ PATHS ENV CRYPTOPPROOT
+ DOC "CryptoPP root directory")
+
+# Re-use the previous path:
+FIND_PATH (CRYPTOPP_INCLUDE_DIR
+ NAMES cryptopp/cryptlib.h
+ HINTS ${CRYPTOPP_ROOT_DIR}
+ PATH_SUFFIXES include
+ DOC "CryptoPP include directory")
+
+FIND_LIBRARY (CRYPTOPP_LIBRARY_DEBUG
+ NAMES cryptlibd cryptoppd
+ HINTS ${CRYPTOPP_ROOT_DIR}
+ PATH_SUFFIXES lib
+ DOC "CryptoPP debug library")
+
+FIND_LIBRARY (CRYPTOPP_LIBRARY_RELEASE
+ NAMES cryptlib cryptopp
+ HINTS ${CRYPTOPP_ROOT_DIR}
+ PATH_SUFFIXES lib
+ DOC "CryptoPP release library")
+
+IF (CRYPTOPP_LIBRARY_DEBUG AND CRYPTOPP_LIBRARY_RELEASE)
+ SET (CRYPTOPP_LIBRARY
+ optimized ${CRYPTOPP_LIBRARY_RELEASE}
+ debug ${CRYPTOPP_LIBRARY_DEBUG} CACHE DOC "CryptoPP library")
+ELSEIF (CRYPTOPP_LIBRARY_RELEASE)
+ SET (CRYPTOPP_LIBRARY ${CRYPTOPP_LIBRARY_RELEASE} CACHE DOC
+ "CryptoPP library")
+ENDIF (CRYPTOPP_LIBRARY_DEBUG AND CRYPTOPP_LIBRARY_RELEASE)
+
+IF (CRYPTOPP_INCLUDE_DIR)
+ SET (_CRYPTOPP_VERSION_HEADER ${CRYPTOPP_INCLUDE_DIR}/cryptopp/config.h)
+
+ IF (EXISTS ${_CRYPTOPP_VERSION_HEADER})
+ FILE (STRINGS ${_CRYPTOPP_VERSION_HEADER} _CRYPTOPP_VERSION_TMP REGEX
+ "^#define CRYPTOPP_VERSION[ \t]+[0-9]+$")
+
+ STRING (REGEX REPLACE
+ "^#define CRYPTOPP_VERSION[ \t]+([0-9]+)" "\\1" _CRYPTOPP_VERSION_TMP
+ ${_CRYPTOPP_VERSION_TMP})
+
+ STRING (REGEX REPLACE "([0-9]+)[0-9][0-9]" "\\1" CRYPTOPP_VERSION_MAJOR
+ ${_CRYPTOPP_VERSION_TMP})
+ STRING (REGEX REPLACE "[0-9]([0-9])[0-9]" "\\1" CRYPTOPP_VERSION_MINOR
+ ${_CRYPTOPP_VERSION_TMP})
+ STRING (REGEX REPLACE "[0-9][0-9]([0-9])" "\\1" CRYPTOPP_VERSION_PATCH
+ ${_CRYPTOPP_VERSION_TMP})
+
+ SET (CRYPTOPP_VERSION_COUNT 3)
+ SET (CRYPTOPP_VERSION
+ ${CRYPTOPP_VERSION_MAJOR}.${CRYPTOPP_VERSION_MINOR}.${CRYPTOPP_VERSION_PATCH})
+ ENDIF (EXISTS ${_CRYPTOPP_VERSION_HEADER})
+ENDIF (CRYPTOPP_INCLUDE_DIR)
+
+SET (CRYPTOPP_INCLUDE_DIRS ${CRYPTOPP_INCLUDE_DIR})
+SET (CRYPTOPP_LIBRARIES ${CRYPTOPP_LIBRARY})
+
+MARK_AS_ADVANCED (CRYPTOPP_INCLUDE_DIR CRYPTOPP_LIBRARY CRYPTOPP_LIBRARY_DEBUG
+ CRYPTOPP_LIBRARY_RELEASE)
+
+FIND_PACKAGE_HANDLE_STANDARD_ARGS (CryptoPP REQUIRED_VARS CRYPTOPP_ROOT_DIR
+ CRYPTOPP_INCLUDE_DIR CRYPTOPP_LIBRARY VERSION_VAR CRYPTOPP_VERSION) \ No newline at end of file
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/cmake/modules/FindOpenCL.cmake b/Godeps/_workspace/src/github.com/ethereum/ethash/cmake/modules/FindOpenCL.cmake
new file mode 100644
index 000000000..cc567c95e
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/cmake/modules/FindOpenCL.cmake
@@ -0,0 +1,91 @@
+#
+# This file taken from FindOpenCL project @ http://gitorious.com/findopencl
+#
+# - Try to find OpenCL
+# This module tries to find an OpenCL implementation on your system. It supports
+# AMD / ATI, Apple and NVIDIA implementations, but shoudl work, too.
+#
+# Once done this will define
+# OPENCL_FOUND - system has OpenCL
+# OPENCL_INCLUDE_DIRS - the OpenCL include directory
+# OPENCL_LIBRARIES - link these to use OpenCL
+#
+# WIN32 should work, but is untested
+
+FIND_PACKAGE( PackageHandleStandardArgs )
+
+SET (OPENCL_VERSION_STRING "0.1.0")
+SET (OPENCL_VERSION_MAJOR 0)
+SET (OPENCL_VERSION_MINOR 1)
+SET (OPENCL_VERSION_PATCH 0)
+
+IF (APPLE)
+
+ FIND_LIBRARY(OPENCL_LIBRARIES OpenCL DOC "OpenCL lib for OSX")
+ FIND_PATH(OPENCL_INCLUDE_DIRS OpenCL/cl.h DOC "Include for OpenCL on OSX")
+ FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS OpenCL/cl.hpp DOC "Include for OpenCL CPP bindings on OSX")
+
+ELSE (APPLE)
+
+ IF (WIN32)
+
+ FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h)
+ FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp)
+
+ # The AMD SDK currently installs both x86 and x86_64 libraries
+ # This is only a hack to find out architecture
+ IF( ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64" )
+ SET(OPENCL_LIB_DIR "$ENV{ATISTREAMSDKROOT}/lib/x86_64")
+ SET(OPENCL_LIB_DIR "$ENV{ATIINTERNALSTREAMSDKROOT}/lib/x86_64")
+ ELSE (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64")
+ SET(OPENCL_LIB_DIR "$ENV{ATISTREAMSDKROOT}/lib/x86")
+ SET(OPENCL_LIB_DIR "$ENV{ATIINTERNALSTREAMSDKROOT}/lib/x86")
+ ENDIF( ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64" )
+
+ # find out if the user asked for a 64-bit build, and use the corresponding
+ # 64 or 32 bit NVIDIA library paths to the search:
+ STRING(REGEX MATCH "Win64" ISWIN64 ${CMAKE_GENERATOR})
+ IF("${ISWIN64}" STREQUAL "Win64")
+ FIND_LIBRARY(OPENCL_LIBRARIES OpenCL.lib ${OPENCL_LIB_DIR} $ENV{CUDA_LIB_PATH} $ENV{CUDA_PATH}/lib/x64)
+ ELSE("${ISWIN64}" STREQUAL "Win64")
+ FIND_LIBRARY(OPENCL_LIBRARIES OpenCL.lib ${OPENCL_LIB_DIR} $ENV{CUDA_LIB_PATH} $ENV{CUDA_PATH}/lib/Win32)
+ ENDIF("${ISWIN64}" STREQUAL "Win64")
+
+ GET_FILENAME_COMPONENT(_OPENCL_INC_CAND ${OPENCL_LIB_DIR}/../../include ABSOLUTE)
+
+ # On Win32 search relative to the library
+ FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h PATHS "${_OPENCL_INC_CAND}" $ENV{CUDA_INC_PATH} $ENV{CUDA_PATH}/include)
+ FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp PATHS "${_OPENCL_INC_CAND}" $ENV{CUDA_INC_PATH} $ENV{CUDA_PATH}/include)
+
+ ELSE (WIN32)
+
+ # Unix style platforms
+ FIND_LIBRARY(OPENCL_LIBRARIES OpenCL
+ ENV LD_LIBRARY_PATH
+ )
+
+ GET_FILENAME_COMPONENT(OPENCL_LIB_DIR ${OPENCL_LIBRARIES} PATH)
+ GET_FILENAME_COMPONENT(_OPENCL_INC_CAND ${OPENCL_LIB_DIR}/../../include ABSOLUTE)
+
+ # The AMD SDK currently does not place its headers
+ # in /usr/include, therefore also search relative
+ # to the library
+ FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h PATHS ${_OPENCL_INC_CAND} "/usr/local/cuda/include")
+ FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp PATHS ${_OPENCL_INC_CAND} "/usr/local/cuda/include")
+
+ ENDIF (WIN32)
+
+ENDIF (APPLE)
+
+FIND_PACKAGE_HANDLE_STANDARD_ARGS( OpenCL DEFAULT_MSG OPENCL_LIBRARIES OPENCL_INCLUDE_DIRS )
+
+IF( _OPENCL_CPP_INCLUDE_DIRS )
+ SET( OPENCL_HAS_CPP_BINDINGS TRUE )
+ LIST( APPEND OPENCL_INCLUDE_DIRS ${_OPENCL_CPP_INCLUDE_DIRS} )
+ # This is often the same, so clean up
+ LIST( REMOVE_DUPLICATES OPENCL_INCLUDE_DIRS )
+ENDIF( _OPENCL_CPP_INCLUDE_DIRS )
+
+MARK_AS_ADVANCED(
+ OPENCL_INCLUDE_DIRS
+) \ No newline at end of file
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/cryptopp/CMakeLists.txt b/Godeps/_workspace/src/github.com/ethereum/ethash/cryptopp/CMakeLists.txt
new file mode 100644
index 000000000..4cd9f36c6
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/cryptopp/CMakeLists.txt
@@ -0,0 +1,13 @@
+set(LIBRARY cryptopp)
+
+include_directories(../../cryptopp)
+
+# todo, subset
+file(GLOB HEADERS "../../cryptopp/*.h")
+file(GLOB SOURCE "../../cryptopp/*.cpp")
+
+add_library(${LIBRARY} ${HEADERS} ${SOURCE})
+
+set(CRYPTOPP_INCLUDE_DIRS "../.." PARENT_SCOPE)
+set(CRYPTOPP_LIBRARIES ${LIBRARY} PARENT_SCOPE)
+set(CRYPTOPP_FOUND TRUE PARENT_SCOPE)
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/ethash.go b/Godeps/_workspace/src/github.com/ethereum/ethash/ethash.go
new file mode 100644
index 000000000..32d3f0264
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/ethash.go
@@ -0,0 +1,296 @@
+package ethash
+
+/*
+#cgo CFLAGS: -std=gnu99 -Wall
+#include "libethash/ethash.h"
+#include "libethash/util.c"
+#include "libethash/internal.c"
+#include "libethash/sha3.c"
+*/
+import "C"
+
+import (
+ "bytes"
+ "encoding/binary"
+ "log"
+ "math/big"
+ "math/rand"
+ "sync"
+ "time"
+ "unsafe"
+
+ "github.com/ethereum/go-ethereum/logger"
+ "github.com/ethereum/go-ethereum/pow"
+)
+
+var powlogger = logger.NewLogger("POW")
+
+type DAG struct {
+ SeedBlockNum uint64
+ dag unsafe.Pointer // full GB of memory for dag
+}
+
+type ParamsAndCache struct {
+ params *C.ethash_params
+ cache *C.ethash_cache
+ SeedBlockNum uint64
+}
+
+type Ethash struct {
+ turbo bool
+ HashRate int64
+ chainManager pow.ChainManager
+ dag *DAG
+ paramsAndCache *ParamsAndCache
+ nextdag unsafe.Pointer
+ ret *C.ethash_return_value
+ dagMutex *sync.Mutex
+ cacheMutex *sync.Mutex
+}
+
+func parseNonce(nonce []byte) (uint64, error) {
+ nonceBuf := bytes.NewBuffer(nonce)
+ nonceInt, err := binary.ReadUvarint(nonceBuf)
+ if err != nil {
+ return 0, err
+ }
+ return nonceInt, nil
+}
+
+const epochLength uint64 = 30000
+
+func getSeedBlockNum(blockNum uint64) uint64 {
+ var seedBlockNum uint64 = 0
+ if blockNum >= 2*epochLength {
+ seedBlockNum = ((blockNum / epochLength) - 1) * epochLength
+ }
+ return seedBlockNum
+}
+
+func makeParamsAndCache(chainManager pow.ChainManager, blockNum uint64) *ParamsAndCache {
+ seedBlockNum := getSeedBlockNum(blockNum)
+ paramsAndCache := &ParamsAndCache{
+ params: new(C.ethash_params),
+ cache: new(C.ethash_cache),
+ SeedBlockNum: seedBlockNum,
+ }
+ C.ethash_params_init(paramsAndCache.params, C.uint32_t(seedBlockNum))
+ paramsAndCache.cache.mem = C.malloc(paramsAndCache.params.cache_size)
+ seedHash := chainManager.GetBlockByNumber(seedBlockNum).Header().Hash()
+ log.Println("Params", paramsAndCache.params)
+
+ log.Println("Making Cache")
+ start := time.Now()
+ C.ethash_mkcache(paramsAndCache.cache, paramsAndCache.params, (*C.uint8_t)((unsafe.Pointer)(&seedHash[0])))
+ log.Println("Took:", time.Since(start))
+ return paramsAndCache
+}
+
+func (pow *Ethash) updateCache() {
+ pow.cacheMutex.Lock()
+ seedNum := getSeedBlockNum(pow.chainManager.CurrentBlock().NumberU64())
+ if pow.paramsAndCache.SeedBlockNum != seedNum {
+ pow.paramsAndCache = makeParamsAndCache(pow.chainManager, pow.chainManager.CurrentBlock().NumberU64())
+ }
+ pow.cacheMutex.Unlock()
+}
+
+func makeDAG(p *ParamsAndCache) *DAG {
+ d := &DAG{
+ dag: C.malloc(p.params.full_size),
+ SeedBlockNum: p.SeedBlockNum,
+ }
+ C.ethash_compute_full_data(d.dag, p.params, p.cache)
+ return d
+}
+
+func (pow *Ethash) updateDAG() {
+ pow.cacheMutex.Lock()
+ pow.dagMutex.Lock()
+
+ seedNum := getSeedBlockNum(pow.chainManager.CurrentBlock().NumberU64())
+ if pow.dag == nil || pow.dag.SeedBlockNum != seedNum {
+ pow.dag = nil
+ log.Println("Making Dag")
+ start := time.Now()
+ pow.dag = makeDAG(pow.paramsAndCache)
+ log.Println("Took:", time.Since(start))
+ }
+
+ pow.dagMutex.Unlock()
+ pow.cacheMutex.Unlock()
+}
+
+func New(chainManager pow.ChainManager) *Ethash {
+ return &Ethash{
+ turbo: false,
+ paramsAndCache: makeParamsAndCache(chainManager, chainManager.CurrentBlock().NumberU64()),
+ chainManager: chainManager,
+ dag: nil,
+ ret: new(C.ethash_return_value),
+ cacheMutex: new(sync.Mutex),
+ dagMutex: new(sync.Mutex),
+ }
+}
+
+func (pow *Ethash) DAGSize() uint64 {
+ return uint64(pow.paramsAndCache.params.full_size)
+}
+
+func (pow *Ethash) CacheSize() uint64 {
+ return uint64(pow.paramsAndCache.params.cache_size)
+}
+
+func (pow *Ethash) GetSeedHash(blockNum uint64) []byte {
+ return pow.chainManager.GetBlockByNumber(getSeedBlockNum(blockNum)).Header().Hash()
+}
+
+func (pow *Ethash) Stop() {
+ pow.cacheMutex.Lock()
+ pow.dagMutex.Lock()
+ if pow.paramsAndCache.cache != nil {
+ C.free(pow.paramsAndCache.cache.mem)
+ }
+ if pow.dag != nil {
+ C.free(pow.dag.dag)
+ }
+ pow.dagMutex.Unlock()
+ pow.cacheMutex.Unlock()
+}
+
+func (pow *Ethash) Search(block pow.Block, stop <-chan struct{}) ([]byte, []byte, []byte) {
+ pow.updateDAG()
+
+ // Not very elegant, multiple mining instances are not supported
+ pow.dagMutex.Lock()
+ pow.cacheMutex.Lock()
+ defer pow.cacheMutex.Unlock()
+ defer pow.dagMutex.Unlock()
+
+ r := rand.New(rand.NewSource(time.Now().UnixNano()))
+ miningHash := block.HashNoNonce()
+ diff := block.Difficulty()
+ log.Println("difficulty", diff)
+ i := int64(0)
+ start := time.Now().UnixNano()
+ t := time.Now()
+
+ nonce := uint64(r.Int63())
+
+ for {
+ select {
+ case <-stop:
+ powlogger.Infoln("Breaking from mining")
+ pow.HashRate = 0
+ pow.dagMutex.Unlock()
+ return nil, nil, nil
+ default:
+ i++
+
+ if time.Since(t) > (1 * time.Second) {
+ elapsed := time.Now().UnixNano() - start
+ hashes := ((float64(1e9) / float64(elapsed)) * float64(i)) / 1000
+ pow.HashRate = int64(hashes)
+ powlogger.Infoln("Hashing @", pow.HashRate, "khash")
+
+ t = time.Now()
+ }
+
+ cMiningHash := (*C.uint8_t)(unsafe.Pointer(&miningHash))
+ cnonce := C.uint64_t(nonce)
+ log.Printf("seed hash, nonce: %x %x\n", miningHash, nonce)
+ // pow.hash is the output/return of ethash_full
+ C.ethash_full(pow.ret, pow.dag.dag, pow.paramsAndCache.params, cMiningHash, cnonce)
+ res := C.ethash_check_difficulty((*C.uint8_t)(&pow.ret.result[0]), (*C.uint8_t)(unsafe.Pointer(&diff.Bytes()[0])))
+ if res == 1 {
+ mixDigest := C.GoBytes(unsafe.Pointer(&pow.ret.mix_hash[0]), 32)
+ // We don't really nead 32 bytes here
+ buf := make([]byte, 32)
+ binary.PutUvarint(buf, nonce)
+ return buf, mixDigest, pow.GetSeedHash(block.NumberU64())
+ }
+ nonce += 1
+ }
+
+ if !pow.turbo {
+ time.Sleep(20 * time.Microsecond)
+ }
+ }
+}
+
+func (pow *Ethash) Verify(block pow.Block) bool {
+ // Make sure the SeedHash is set correctly
+ if bytes.Compare(block.SeedHash(), pow.GetSeedHash(block.NumberU64())) != 0 {
+ log.Println("Block had wrong SeedHash")
+ log.Println("Expected: ", pow.GetSeedHash(block.NumberU64()))
+ log.Println("Actual: ", block.SeedHash())
+ return false
+ }
+
+ nonceInt, err := parseNonce(block.Nonce())
+ if err != nil {
+ log.Println("nonce to int err:", err)
+ return false
+ }
+ return pow.verify(block.HashNoNonce(), block.MixDigest(), block.Difficulty(), block.NumberU64(), nonceInt)
+}
+
+func (pow *Ethash) verify(hash []byte, mixDigest []byte, difficulty *big.Int, blockNum uint64, nonce uint64) bool {
+ // First check: make sure header, mixDigest, nonce are correct without hitting the DAG
+ // This is to prevent DOS attacks
+ chash := (*C.uint8_t)(unsafe.Pointer(&hash))
+ cnonce := C.uint64_t(nonce)
+ cmixDigest := (*C.uint8_t)(unsafe.Pointer(&mixDigest))
+ cdifficulty := (*C.uint8_t)(unsafe.Pointer(&difficulty.Bytes()[0]))
+ if C.ethash_quick_check_difficulty(chash, cnonce, cmixDigest, cdifficulty) != 1 {
+ log.Println("Failed to pass quick check. Are you sure that the mix digest is correct?")
+ return false
+ }
+
+ var pAc *ParamsAndCache
+ // If its an old block (doesn't use the current cache)
+ // get the cache for it but don't update (so we don't need the mutex)
+ // Otherwise, it's the current block or a future.
+ // If current, updateCache will do nothing.
+ if getSeedBlockNum(blockNum) < pow.paramsAndCache.SeedBlockNum {
+ pAc = makeParamsAndCache(pow.chainManager, blockNum)
+ } else {
+ pow.updateCache()
+ pow.cacheMutex.Lock()
+ defer pow.cacheMutex.Unlock()
+ pAc = pow.paramsAndCache
+ }
+
+ C.ethash_light(pow.ret, pAc.cache, pAc.params, chash, cnonce)
+ res := C.ethash_check_difficulty((*C.uint8_t)(unsafe.Pointer(&pow.ret.result[0])), cdifficulty)
+ return res == 1
+}
+
+func (pow *Ethash) GetHashrate() int64 {
+ return pow.HashRate
+}
+
+func (pow *Ethash) Turbo(on bool) {
+ pow.turbo = on
+}
+
+func (pow *Ethash) FullHash(nonce uint64, miningHash []byte) []byte {
+ pow.updateDAG()
+ pow.dagMutex.Lock()
+ defer pow.dagMutex.Unlock()
+ cMiningHash := (*C.uint8_t)(unsafe.Pointer(&miningHash))
+ cnonce := C.uint64_t(nonce)
+ log.Println("seed hash, nonce:", miningHash, nonce)
+ // pow.hash is the output/return of ethash_full
+ C.ethash_full(pow.ret, pow.dag.dag, pow.paramsAndCache.params, cMiningHash, cnonce)
+ ghash_full := C.GoBytes(unsafe.Pointer(&pow.ret.result[0]), 32)
+ return ghash_full
+}
+
+func (pow *Ethash) LightHash(nonce uint64, miningHash []byte) []byte {
+ cMiningHash := (*C.uint8_t)(unsafe.Pointer(&miningHash))
+ cnonce := C.uint64_t(nonce)
+ C.ethash_light(pow.ret, pow.paramsAndCache.cache, pow.paramsAndCache.params, cMiningHash, cnonce)
+ ghash_light := C.GoBytes(unsafe.Pointer(&pow.ret.result[0]), 32)
+ return ghash_light
+}
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/CMakeLists.txt b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/CMakeLists.txt
new file mode 100644
index 000000000..19d2fecbf
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/CMakeLists.txt
@@ -0,0 +1,12 @@
+set(LIBRARY ethash-cl)
+set(CMAKE_BUILD_TYPE Release)
+
+if (NOT OPENCL_FOUND)
+ find_package(OpenCL)
+endif()
+if (OPENCL_FOUND)
+ include_directories(${OPENCL_INCLUDE_DIRS})
+ include_directories(..)
+ add_library(${LIBRARY} ethash_cl_miner.cpp ethash_cl_miner.h)
+ TARGET_LINK_LIBRARIES(${LIBRARY} ${OPENCL_LIBRARIES} ethash)
+endif() \ No newline at end of file
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/cl.hpp b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/cl.hpp
new file mode 100644
index 000000000..38fac1962
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/cl.hpp
@@ -0,0 +1,12452 @@
+/*******************************************************************************
+ * Copyright (c) 2008-2013 The Khronos Group Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and/or associated documentation files (the
+ * "Materials"), to deal in the Materials without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Materials, and to
+ * permit persons to whom the Materials are furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Materials.
+ *
+ * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+ ******************************************************************************/
+
+/*! \file
+ *
+ * \brief C++ bindings for OpenCL 1.0 (rev 48), OpenCL 1.1 (rev 33) and
+ * OpenCL 1.2 (rev 15)
+ * \author Benedict R. Gaster, Laurent Morichetti and Lee Howes
+ *
+ * Additions and fixes from:
+ * Brian Cole, March 3rd 2010 and April 2012
+ * Matt Gruenke, April 2012.
+ * Bruce Merry, February 2013.
+ * Tom Deakin and Simon McIntosh-Smith, July 2013
+ *
+ * \version 1.2.6
+ * \date August 2013
+ *
+ * Optional extension support
+ *
+ * cl
+ * cl_ext_device_fission
+ * #define USE_CL_DEVICE_FISSION
+ */
+
+/*! \mainpage
+ * \section intro Introduction
+ * For many large applications C++ is the language of choice and so it seems
+ * reasonable to define C++ bindings for OpenCL.
+ *
+ *
+ * The interface is contained with a single C++ header file \em cl.hpp and all
+ * definitions are contained within the namespace \em cl. There is no additional
+ * requirement to include \em cl.h and to use either the C++ or original C
+ * bindings it is enough to simply include \em cl.hpp.
+ *
+ * The bindings themselves are lightweight and correspond closely to the
+ * underlying C API. Using the C++ bindings introduces no additional execution
+ * overhead.
+ *
+ * For detail documentation on the bindings see:
+ *
+ * The OpenCL C++ Wrapper API 1.2 (revision 09)
+ * http://www.khronos.org/registry/cl/specs/opencl-cplusplus-1.2.pdf
+ *
+ * \section example Example
+ *
+ * The following example shows a general use case for the C++
+ * bindings, including support for the optional exception feature and
+ * also the supplied vector and string classes, see following sections for
+ * decriptions of these features.
+ *
+ * \code
+ * #define __CL_ENABLE_EXCEPTIONS
+ *
+ * #if defined(__APPLE__) || defined(__MACOSX)
+ * #include <OpenCL/cl.hpp>
+ * #else
+ * #include <CL/cl.hpp>
+ * #endif
+ * #include <cstdio>
+ * #include <cstdlib>
+ * #include <iostream>
+ *
+ * const char * helloStr = "__kernel void "
+ * "hello(void) "
+ * "{ "
+ * " "
+ * "} ";
+ *
+ * int
+ * main(void)
+ * {
+ * cl_int err = CL_SUCCESS;
+ * try {
+ *
+ * std::vector<cl::Platform> platforms;
+ * cl::Platform::get(&platforms);
+ * if (platforms.size() == 0) {
+ * std::cout << "Platform size 0\n";
+ * return -1;
+ * }
+ *
+ * cl_context_properties properties[] =
+ * { CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0};
+ * cl::Context context(CL_DEVICE_TYPE_CPU, properties);
+ *
+ * std::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
+ *
+ * cl::Program::Sources source(1,
+ * std::make_pair(helloStr,strlen(helloStr)));
+ * cl::Program program_ = cl::Program(context, source);
+ * program_.build(devices);
+ *
+ * cl::Kernel kernel(program_, "hello", &err);
+ *
+ * cl::Event event;
+ * cl::CommandQueue queue(context, devices[0], 0, &err);
+ * queue.enqueueNDRangeKernel(
+ * kernel,
+ * cl::NullRange,
+ * cl::NDRange(4,4),
+ * cl::NullRange,
+ * NULL,
+ * &event);
+ *
+ * event.wait();
+ * }
+ * catch (cl::Error err) {
+ * std::cerr
+ * << "ERROR: "
+ * << err.what()
+ * << "("
+ * << err.err()
+ * << ")"
+ * << std::endl;
+ * }
+ *
+ * return EXIT_SUCCESS;
+ * }
+ *
+ * \endcode
+ *
+ */
+#ifndef CL_HPP_
+#define CL_HPP_
+
+#ifdef _WIN32
+
+#include <windows.h>
+#include <malloc.h>
+#include <iterator>
+#include <intrin.h>
+
+#if defined(__CL_ENABLE_EXCEPTIONS)
+#include <exception>
+#endif // #if defined(__CL_ENABLE_EXCEPTIONS)
+
+#pragma push_macro("max")
+#undef max
+#if defined(USE_DX_INTEROP)
+#include <CL/cl_d3d10.h>
+#include <CL/cl_dx9_media_sharing.h>
+#endif
+#endif // _WIN32
+
+//
+#if defined(USE_CL_DEVICE_FISSION)
+#include <CL/cl_ext.h>
+#endif
+
+#if defined(__APPLE__) || defined(__MACOSX)
+#include <OpenGL/OpenGL.h>
+#include <OpenCL/opencl.h>
+#include <libkern/OSAtomic.h>
+#else
+#include <GL/gl.h>
+#include <CL/opencl.h>
+#endif // !__APPLE__
+
+// To avoid accidentally taking ownership of core OpenCL types
+// such as cl_kernel constructors are made explicit
+// under OpenCL 1.2
+#if defined(CL_VERSION_1_2) && !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS)
+#define __CL_EXPLICIT_CONSTRUCTORS explicit
+#else // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS)
+#define __CL_EXPLICIT_CONSTRUCTORS
+#endif // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS)
+
+// Define deprecated prefixes and suffixes to ensure compilation
+// in case they are not pre-defined
+#if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED)
+#define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
+#endif // #if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED)
+#if !defined(CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED)
+#define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
+#endif // #if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED)
+
+#if !defined(CL_CALLBACK)
+#define CL_CALLBACK
+#endif //CL_CALLBACK
+
+#include <utility>
+#include <limits>
+
+#if !defined(__NO_STD_VECTOR)
+#include <vector>
+#endif
+
+#if !defined(__NO_STD_STRING)
+#include <string>
+#endif
+
+#if defined(linux) || defined(__APPLE__) || defined(__MACOSX)
+#include <alloca.h>
+
+#include <emmintrin.h>
+#include <xmmintrin.h>
+#endif // linux
+
+#include <cstring>
+
+
+/*! \namespace cl
+ *
+ * \brief The OpenCL C++ bindings are defined within this namespace.
+ *
+ */
+namespace cl {
+
+class Memory;
+
+/**
+ * Deprecated APIs for 1.2
+ */
+#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2))
+#define __INIT_CL_EXT_FCN_PTR(name) \
+ if(!pfn_##name) { \
+ pfn_##name = (PFN_##name) \
+ clGetExtensionFunctionAddress(#name); \
+ if(!pfn_##name) { \
+ } \
+ }
+#endif // #if defined(CL_VERSION_1_1)
+
+#if defined(CL_VERSION_1_2)
+#define __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, name) \
+ if(!pfn_##name) { \
+ pfn_##name = (PFN_##name) \
+ clGetExtensionFunctionAddressForPlatform(platform, #name); \
+ if(!pfn_##name) { \
+ } \
+ }
+#endif // #if defined(CL_VERSION_1_1)
+
+class Program;
+class Device;
+class Context;
+class CommandQueue;
+class Memory;
+class Buffer;
+
+#if defined(__CL_ENABLE_EXCEPTIONS)
+/*! \brief Exception class
+ *
+ * This may be thrown by API functions when __CL_ENABLE_EXCEPTIONS is defined.
+ */
+class Error : public std::exception
+{
+private:
+ cl_int err_;
+ const char * errStr_;
+public:
+ /*! \brief Create a new CL error exception for a given error code
+ * and corresponding message.
+ *
+ * \param err error code value.
+ *
+ * \param errStr a descriptive string that must remain in scope until
+ * handling of the exception has concluded. If set, it
+ * will be returned by what().
+ */
+ Error(cl_int err, const char * errStr = NULL) : err_(err), errStr_(errStr)
+ {}
+
+ ~Error() throw() {}
+
+ /*! \brief Get error string associated with exception
+ *
+ * \return A memory pointer to the error message string.
+ */
+ virtual const char * what() const throw ()
+ {
+ if (errStr_ == NULL) {
+ return "empty";
+ }
+ else {
+ return errStr_;
+ }
+ }
+
+ /*! \brief Get error code associated with exception
+ *
+ * \return The error code.
+ */
+ cl_int err(void) const { return err_; }
+};
+
+#define __ERR_STR(x) #x
+#else
+#define __ERR_STR(x) NULL
+#endif // __CL_ENABLE_EXCEPTIONS
+
+
+namespace detail
+{
+#if defined(__CL_ENABLE_EXCEPTIONS)
+static inline cl_int errHandler (
+ cl_int err,
+ const char * errStr = NULL)
+{
+ if (err != CL_SUCCESS) {
+ throw Error(err, errStr);
+ }
+ return err;
+}
+#else
+static inline cl_int errHandler (cl_int err, const char * errStr = NULL)
+{
+ (void) errStr; // suppress unused variable warning
+ return err;
+}
+#endif // __CL_ENABLE_EXCEPTIONS
+}
+
+
+
+//! \cond DOXYGEN_DETAIL
+#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS)
+#define __GET_DEVICE_INFO_ERR __ERR_STR(clGetDeviceInfo)
+#define __GET_PLATFORM_INFO_ERR __ERR_STR(clGetPlatformInfo)
+#define __GET_DEVICE_IDS_ERR __ERR_STR(clGetDeviceIDs)
+#define __GET_PLATFORM_IDS_ERR __ERR_STR(clGetPlatformIDs)
+#define __GET_CONTEXT_INFO_ERR __ERR_STR(clGetContextInfo)
+#define __GET_EVENT_INFO_ERR __ERR_STR(clGetEventInfo)
+#define __GET_EVENT_PROFILE_INFO_ERR __ERR_STR(clGetEventProfileInfo)
+#define __GET_MEM_OBJECT_INFO_ERR __ERR_STR(clGetMemObjectInfo)
+#define __GET_IMAGE_INFO_ERR __ERR_STR(clGetImageInfo)
+#define __GET_SAMPLER_INFO_ERR __ERR_STR(clGetSamplerInfo)
+#define __GET_KERNEL_INFO_ERR __ERR_STR(clGetKernelInfo)
+#if defined(CL_VERSION_1_2)
+#define __GET_KERNEL_ARG_INFO_ERR __ERR_STR(clGetKernelArgInfo)
+#endif // #if defined(CL_VERSION_1_2)
+#define __GET_KERNEL_WORK_GROUP_INFO_ERR __ERR_STR(clGetKernelWorkGroupInfo)
+#define __GET_PROGRAM_INFO_ERR __ERR_STR(clGetProgramInfo)
+#define __GET_PROGRAM_BUILD_INFO_ERR __ERR_STR(clGetProgramBuildInfo)
+#define __GET_COMMAND_QUEUE_INFO_ERR __ERR_STR(clGetCommandQueueInfo)
+
+#define __CREATE_CONTEXT_ERR __ERR_STR(clCreateContext)
+#define __CREATE_CONTEXT_FROM_TYPE_ERR __ERR_STR(clCreateContextFromType)
+#define __GET_SUPPORTED_IMAGE_FORMATS_ERR __ERR_STR(clGetSupportedImageFormats)
+
+#define __CREATE_BUFFER_ERR __ERR_STR(clCreateBuffer)
+#define __COPY_ERR __ERR_STR(cl::copy)
+#define __CREATE_SUBBUFFER_ERR __ERR_STR(clCreateSubBuffer)
+#define __CREATE_GL_BUFFER_ERR __ERR_STR(clCreateFromGLBuffer)
+#define __CREATE_GL_RENDER_BUFFER_ERR __ERR_STR(clCreateFromGLBuffer)
+#define __GET_GL_OBJECT_INFO_ERR __ERR_STR(clGetGLObjectInfo)
+#if defined(CL_VERSION_1_2)
+#define __CREATE_IMAGE_ERR __ERR_STR(clCreateImage)
+#define __CREATE_GL_TEXTURE_ERR __ERR_STR(clCreateFromGLTexture)
+#define __IMAGE_DIMENSION_ERR __ERR_STR(Incorrect image dimensions)
+#endif // #if defined(CL_VERSION_1_2)
+#define __CREATE_SAMPLER_ERR __ERR_STR(clCreateSampler)
+#define __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR __ERR_STR(clSetMemObjectDestructorCallback)
+
+#define __CREATE_USER_EVENT_ERR __ERR_STR(clCreateUserEvent)
+#define __SET_USER_EVENT_STATUS_ERR __ERR_STR(clSetUserEventStatus)
+#define __SET_EVENT_CALLBACK_ERR __ERR_STR(clSetEventCallback)
+#define __WAIT_FOR_EVENTS_ERR __ERR_STR(clWaitForEvents)
+
+#define __CREATE_KERNEL_ERR __ERR_STR(clCreateKernel)
+#define __SET_KERNEL_ARGS_ERR __ERR_STR(clSetKernelArg)
+#define __CREATE_PROGRAM_WITH_SOURCE_ERR __ERR_STR(clCreateProgramWithSource)
+#define __CREATE_PROGRAM_WITH_BINARY_ERR __ERR_STR(clCreateProgramWithBinary)
+#if defined(CL_VERSION_1_2)
+#define __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR __ERR_STR(clCreateProgramWithBuiltInKernels)
+#endif // #if defined(CL_VERSION_1_2)
+#define __BUILD_PROGRAM_ERR __ERR_STR(clBuildProgram)
+#if defined(CL_VERSION_1_2)
+#define __COMPILE_PROGRAM_ERR __ERR_STR(clCompileProgram)
+
+#endif // #if defined(CL_VERSION_1_2)
+#define __CREATE_KERNELS_IN_PROGRAM_ERR __ERR_STR(clCreateKernelsInProgram)
+
+#define __CREATE_COMMAND_QUEUE_ERR __ERR_STR(clCreateCommandQueue)
+#define __SET_COMMAND_QUEUE_PROPERTY_ERR __ERR_STR(clSetCommandQueueProperty)
+#define __ENQUEUE_READ_BUFFER_ERR __ERR_STR(clEnqueueReadBuffer)
+#define __ENQUEUE_READ_BUFFER_RECT_ERR __ERR_STR(clEnqueueReadBufferRect)
+#define __ENQUEUE_WRITE_BUFFER_ERR __ERR_STR(clEnqueueWriteBuffer)
+#define __ENQUEUE_WRITE_BUFFER_RECT_ERR __ERR_STR(clEnqueueWriteBufferRect)
+#define __ENQEUE_COPY_BUFFER_ERR __ERR_STR(clEnqueueCopyBuffer)
+#define __ENQEUE_COPY_BUFFER_RECT_ERR __ERR_STR(clEnqueueCopyBufferRect)
+#define __ENQUEUE_FILL_BUFFER_ERR __ERR_STR(clEnqueueFillBuffer)
+#define __ENQUEUE_READ_IMAGE_ERR __ERR_STR(clEnqueueReadImage)
+#define __ENQUEUE_WRITE_IMAGE_ERR __ERR_STR(clEnqueueWriteImage)
+#define __ENQUEUE_COPY_IMAGE_ERR __ERR_STR(clEnqueueCopyImage)
+#define __ENQUEUE_FILL_IMAGE_ERR __ERR_STR(clEnqueueFillImage)
+#define __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR __ERR_STR(clEnqueueCopyImageToBuffer)
+#define __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR __ERR_STR(clEnqueueCopyBufferToImage)
+#define __ENQUEUE_MAP_BUFFER_ERR __ERR_STR(clEnqueueMapBuffer)
+#define __ENQUEUE_MAP_IMAGE_ERR __ERR_STR(clEnqueueMapImage)
+#define __ENQUEUE_UNMAP_MEM_OBJECT_ERR __ERR_STR(clEnqueueUnMapMemObject)
+#define __ENQUEUE_NDRANGE_KERNEL_ERR __ERR_STR(clEnqueueNDRangeKernel)
+#define __ENQUEUE_TASK_ERR __ERR_STR(clEnqueueTask)
+#define __ENQUEUE_NATIVE_KERNEL __ERR_STR(clEnqueueNativeKernel)
+#if defined(CL_VERSION_1_2)
+#define __ENQUEUE_MIGRATE_MEM_OBJECTS_ERR __ERR_STR(clEnqueueMigrateMemObjects)
+#endif // #if defined(CL_VERSION_1_2)
+
+#define __ENQUEUE_ACQUIRE_GL_ERR __ERR_STR(clEnqueueAcquireGLObjects)
+#define __ENQUEUE_RELEASE_GL_ERR __ERR_STR(clEnqueueReleaseGLObjects)
+
+
+#define __RETAIN_ERR __ERR_STR(Retain Object)
+#define __RELEASE_ERR __ERR_STR(Release Object)
+#define __FLUSH_ERR __ERR_STR(clFlush)
+#define __FINISH_ERR __ERR_STR(clFinish)
+#define __VECTOR_CAPACITY_ERR __ERR_STR(Vector capacity error)
+
+/**
+ * CL 1.2 version that uses device fission.
+ */
+#if defined(CL_VERSION_1_2)
+#define __CREATE_SUB_DEVICES __ERR_STR(clCreateSubDevices)
+#else
+#define __CREATE_SUB_DEVICES __ERR_STR(clCreateSubDevicesEXT)
+#endif // #if defined(CL_VERSION_1_2)
+
+/**
+ * Deprecated APIs for 1.2
+ */
+#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2))
+#define __ENQUEUE_MARKER_ERR __ERR_STR(clEnqueueMarker)
+#define __ENQUEUE_WAIT_FOR_EVENTS_ERR __ERR_STR(clEnqueueWaitForEvents)
+#define __ENQUEUE_BARRIER_ERR __ERR_STR(clEnqueueBarrier)
+#define __UNLOAD_COMPILER_ERR __ERR_STR(clUnloadCompiler)
+#define __CREATE_GL_TEXTURE_2D_ERR __ERR_STR(clCreateFromGLTexture2D)
+#define __CREATE_GL_TEXTURE_3D_ERR __ERR_STR(clCreateFromGLTexture3D)
+#define __CREATE_IMAGE2D_ERR __ERR_STR(clCreateImage2D)
+#define __CREATE_IMAGE3D_ERR __ERR_STR(clCreateImage3D)
+#endif // #if defined(CL_VERSION_1_1)
+
+#endif // __CL_USER_OVERRIDE_ERROR_STRINGS
+//! \endcond
+
+/**
+ * CL 1.2 marker and barrier commands
+ */
+#if defined(CL_VERSION_1_2)
+#define __ENQUEUE_MARKER_WAIT_LIST_ERR __ERR_STR(clEnqueueMarkerWithWaitList)
+#define __ENQUEUE_BARRIER_WAIT_LIST_ERR __ERR_STR(clEnqueueBarrierWithWaitList)
+#endif // #if defined(CL_VERSION_1_2)
+
+#if !defined(__USE_DEV_STRING) && !defined(__NO_STD_STRING)
+typedef std::string STRING_CLASS;
+#elif !defined(__USE_DEV_STRING)
+
+/*! \class string
+ * \brief Simple string class, that provides a limited subset of std::string
+ * functionality but avoids many of the issues that come with that class.
+
+ * \note Deprecated. Please use std::string as default or
+ * re-define the string class to match the std::string
+ * interface by defining STRING_CLASS
+ */
+class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED string CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
+{
+private:
+ ::size_t size_;
+ char * str_;
+public:
+ //! \brief Constructs an empty string, allocating no memory.
+ string(void) : size_(0), str_(NULL)
+ {
+ }
+
+ /*! \brief Constructs a string populated from an arbitrary value of
+ * specified size.
+ *
+ * An extra '\0' is added, in case none was contained in str.
+ *
+ * \param str the initial value of the string instance. Note that '\0'
+ * characters receive no special treatment. If NULL,
+ * the string is left empty, with a size of 0.
+ *
+ * \param size the number of characters to copy from str.
+ */
+ string(const char * str, ::size_t size) :
+ size_(size),
+ str_(NULL)
+ {
+ if( size > 0 ) {
+ str_ = new char[size_+1];
+ if (str_ != NULL) {
+ memcpy(str_, str, size_ * sizeof(char));
+ str_[size_] = '\0';
+ }
+ else {
+ size_ = 0;
+ }
+ }
+ }
+
+ /*! \brief Constructs a string populated from a null-terminated value.
+ *
+ * \param str the null-terminated initial value of the string instance.
+ * If NULL, the string is left empty, with a size of 0.
+ */
+ string(const char * str) :
+ size_(0),
+ str_(NULL)
+ {
+ if( str ) {
+ size_= ::strlen(str);
+ }
+ if( size_ > 0 ) {
+ str_ = new char[size_ + 1];
+ if (str_ != NULL) {
+ memcpy(str_, str, (size_ + 1) * sizeof(char));
+ }
+ }
+ }
+
+ void resize( ::size_t n )
+ {
+ if( size_ == n ) {
+ return;
+ }
+ if (n == 0) {
+ if( str_ ) {
+ delete [] str_;
+ }
+ str_ = NULL;
+ size_ = 0;
+ }
+ else {
+ char *newString = new char[n + 1];
+ int copySize = n;
+ if( size_ < n ) {
+ copySize = size_;
+ }
+ size_ = n;
+
+ if(str_) {
+ memcpy(newString, str_, (copySize + 1) * sizeof(char));
+ }
+ if( copySize < size_ ) {
+ memset(newString + copySize, 0, size_ - copySize);
+ }
+ newString[size_] = '\0';
+
+ delete [] str_;
+ str_ = newString;
+ }
+ }
+
+ const char& operator[] ( ::size_t pos ) const
+ {
+ return str_[pos];
+ }
+
+ char& operator[] ( ::size_t pos )
+ {
+ return str_[pos];
+ }
+
+ /*! \brief Copies the value of another string to this one.
+ *
+ * \param rhs the string to copy.
+ *
+ * \returns a reference to the modified instance.
+ */
+ string& operator=(const string& rhs)
+ {
+ if (this == &rhs) {
+ return *this;
+ }
+
+ if( str_ != NULL ) {
+ delete [] str_;
+ str_ = NULL;
+ size_ = 0;
+ }
+
+ if (rhs.size_ == 0 || rhs.str_ == NULL) {
+ str_ = NULL;
+ size_ = 0;
+ }
+ else {
+ str_ = new char[rhs.size_ + 1];
+ size_ = rhs.size_;
+
+ if (str_ != NULL) {
+ memcpy(str_, rhs.str_, (size_ + 1) * sizeof(char));
+ }
+ else {
+ size_ = 0;
+ }
+ }
+
+ return *this;
+ }
+
+ /*! \brief Constructs a string by copying the value of another instance.
+ *
+ * \param rhs the string to copy.
+ */
+ string(const string& rhs) :
+ size_(0),
+ str_(NULL)
+ {
+ *this = rhs;
+ }
+
+ //! \brief Destructor - frees memory used to hold the current value.
+ ~string()
+ {
+ delete[] str_;
+ str_ = NULL;
+ }
+
+ //! \brief Queries the length of the string, excluding any added '\0's.
+ ::size_t size(void) const { return size_; }
+
+ //! \brief Queries the length of the string, excluding any added '\0's.
+ ::size_t length(void) const { return size(); }
+
+ /*! \brief Returns a pointer to the private copy held by this instance,
+ * or "" if empty/unset.
+ */
+ const char * c_str(void) const { return (str_) ? str_ : "";}
+};
+typedef cl::string STRING_CLASS;
+#endif // #elif !defined(__USE_DEV_STRING)
+
+#if !defined(__USE_DEV_VECTOR) && !defined(__NO_STD_VECTOR)
+#define VECTOR_CLASS std::vector
+#elif !defined(__USE_DEV_VECTOR)
+#define VECTOR_CLASS cl::vector
+
+#if !defined(__MAX_DEFAULT_VECTOR_SIZE)
+#define __MAX_DEFAULT_VECTOR_SIZE 10
+#endif
+
+/*! \class vector
+ * \brief Fixed sized vector implementation that mirroring
+ *
+ * \note Deprecated. Please use std::vector as default or
+ * re-define the vector class to match the std::vector
+ * interface by defining VECTOR_CLASS
+
+ * \note Not recommended for use with custom objects as
+ * current implementation will construct N elements
+ *
+ * std::vector functionality.
+ * \brief Fixed sized vector compatible with std::vector.
+ *
+ * \note
+ * This differs from std::vector<> not just in memory allocation,
+ * but also in terms of when members are constructed, destroyed,
+ * and assigned instead of being copy constructed.
+ *
+ * \param T type of element contained in the vector.
+ *
+ * \param N maximum size of the vector.
+ */
+template <typename T, unsigned int N = __MAX_DEFAULT_VECTOR_SIZE>
+class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED vector CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
+{
+private:
+ T data_[N];
+ unsigned int size_;
+
+public:
+ //! \brief Constructs an empty vector with no memory allocated.
+ vector() :
+ size_(static_cast<unsigned int>(0))
+ {}
+
+ //! \brief Deallocates the vector's memory and destroys all of its elements.
+ ~vector()
+ {
+ clear();
+ }
+
+ //! \brief Returns the number of elements currently contained.
+ unsigned int size(void) const
+ {
+ return size_;
+ }
+
+ /*! \brief Empties the vector of all elements.
+ * \note
+ * This does not deallocate memory but will invoke destructors
+ * on contained elements.
+ */
+ void clear()
+ {
+ while(!empty()) {
+ pop_back();
+ }
+ }
+
+ /*! \brief Appends an element after the last valid element.
+ * Calling this on a vector that has reached capacity will throw an
+ * exception if exceptions are enabled.
+ */
+ void push_back (const T& x)
+ {
+ if (size() < N) {
+ new (&data_[size_]) T(x);
+ size_++;
+ } else {
+ detail::errHandler(CL_MEM_OBJECT_ALLOCATION_FAILURE, __VECTOR_CAPACITY_ERR);
+ }
+ }
+
+ /*! \brief Removes the last valid element from the vector.
+ * Calling this on an empty vector will throw an exception
+ * if exceptions are enabled.
+ */
+ void pop_back(void)
+ {
+ if (size_ != 0) {
+ --size_;
+ data_[size_].~T();
+ } else {
+ detail::errHandler(CL_MEM_OBJECT_ALLOCATION_FAILURE, __VECTOR_CAPACITY_ERR);
+ }
+ }
+
+ /*! \brief Constructs with a value copied from another.
+ *
+ * \param vec the vector to copy.
+ */
+ vector(const vector<T, N>& vec) :
+ size_(vec.size_)
+ {
+ if (size_ != 0) {
+ assign(vec.begin(), vec.end());
+ }
+ }
+
+ /*! \brief Constructs with a specified number of initial elements.
+ *
+ * \param size number of initial elements.
+ *
+ * \param val value of initial elements.
+ */
+ vector(unsigned int size, const T& val = T()) :
+ size_(0)
+ {
+ for (unsigned int i = 0; i < size; i++) {
+ push_back(val);
+ }
+ }
+
+ /*! \brief Overwrites the current content with that copied from another
+ * instance.
+ *
+ * \param rhs vector to copy.
+ *
+ * \returns a reference to this.
+ */
+ vector<T, N>& operator=(const vector<T, N>& rhs)
+ {
+ if (this == &rhs) {
+ return *this;
+ }
+
+ if (rhs.size_ != 0) {
+ assign(rhs.begin(), rhs.end());
+ } else {
+ clear();
+ }
+
+ return *this;
+ }
+
+ /*! \brief Tests equality against another instance.
+ *
+ * \param vec the vector against which to compare.
+ */
+ bool operator==(vector<T,N> &vec)
+ {
+ if (size() != vec.size()) {
+ return false;
+ }
+
+ for( unsigned int i = 0; i < size(); ++i ) {
+ if( operator[](i) != vec[i] ) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ //! \brief Conversion operator to T*.
+ operator T* () { return data_; }
+
+ //! \brief Conversion operator to const T*.
+ operator const T* () const { return data_; }
+
+ //! \brief Tests whether this instance has any elements.
+ bool empty (void) const
+ {
+ return size_==0;
+ }
+
+ //! \brief Returns the maximum number of elements this instance can hold.
+ unsigned int max_size (void) const
+ {
+ return N;
+ }
+
+ //! \brief Returns the maximum number of elements this instance can hold.
+ unsigned int capacity () const
+ {
+ return N;
+ }
+
+ /*! \brief Returns a reference to a given element.
+ *
+ * \param index which element to access. *
+ * \note
+ * The caller is responsible for ensuring index is >= 0 and < size().
+ */
+ T& operator[](int index)
+ {
+ return data_[index];
+ }
+
+ /*! \brief Returns a const reference to a given element.
+ *
+ * \param index which element to access.
+ *
+ * \note
+ * The caller is responsible for ensuring index is >= 0 and < size().
+ */
+ const T& operator[](int index) const
+ {
+ return data_[index];
+ }
+
+ /*! \brief Assigns elements of the vector based on a source iterator range.
+ *
+ * \param start Beginning iterator of source range
+ * \param end Enditerator of source range
+ *
+ * \note
+ * Will throw an exception if exceptions are enabled and size exceeded.
+ */
+ template<class I>
+ void assign(I start, I end)
+ {
+ clear();
+ while(start != end) {
+ push_back(*start);
+ start++;
+ }
+ }
+
+ /*! \class iterator
+ * \brief Const iterator class for vectors
+ */
+ class iterator
+ {
+ private:
+ const vector<T,N> *vec_;
+ int index_;
+
+ /**
+ * Internal iterator constructor to capture reference
+ * to the vector it iterates over rather than taking
+ * the vector by copy.
+ */
+ iterator (const vector<T,N> &vec, int index) :
+ vec_(&vec)
+ {
+ if( !vec.empty() ) {
+ index_ = index;
+ } else {
+ index_ = -1;
+ }
+ }
+
+ public:
+ iterator(void) :
+ index_(-1),
+ vec_(NULL)
+ {
+ }
+
+ iterator(const iterator& rhs) :
+ vec_(rhs.vec_),
+ index_(rhs.index_)
+ {
+ }
+
+ ~iterator(void) {}
+
+ static iterator begin(const cl::vector<T,N> &vec)
+ {
+ iterator i(vec, 0);
+
+ return i;
+ }
+
+ static iterator end(const cl::vector<T,N> &vec)
+ {
+ iterator i(vec, vec.size());
+
+ return i;
+ }
+
+ bool operator==(iterator i)
+ {
+ return ((vec_ == i.vec_) &&
+ (index_ == i.index_));
+ }
+
+ bool operator!=(iterator i)
+ {
+ return (!(*this==i));
+ }
+
+ iterator& operator++()
+ {
+ ++index_;
+ return *this;
+ }
+
+ iterator operator++(int)
+ {
+ iterator retVal(*this);
+ ++index_;
+ return retVal;
+ }
+
+ iterator& operator--()
+ {
+ --index_;
+ return *this;
+ }
+
+ iterator operator--(int)
+ {
+ iterator retVal(*this);
+ --index_;
+ return retVal;
+ }
+
+ const T& operator *() const
+ {
+ return (*vec_)[index_];
+ }
+ };
+
+ iterator begin(void)
+ {
+ return iterator::begin(*this);
+ }
+
+ iterator begin(void) const
+ {
+ return iterator::begin(*this);
+ }
+
+ iterator end(void)
+ {
+ return iterator::end(*this);
+ }
+
+ iterator end(void) const
+ {
+ return iterator::end(*this);
+ }
+
+ T& front(void)
+ {
+ return data_[0];
+ }
+
+ T& back(void)
+ {
+ return data_[size_];
+ }
+
+ const T& front(void) const
+ {
+ return data_[0];
+ }
+
+ const T& back(void) const
+ {
+ return data_[size_-1];
+ }
+};
+#endif // #if !defined(__USE_DEV_VECTOR) && !defined(__NO_STD_VECTOR)
+
+
+
+
+
+namespace detail {
+#define __DEFAULT_NOT_INITIALIZED 1
+#define __DEFAULT_BEING_INITIALIZED 2
+#define __DEFAULT_INITIALIZED 4
+
+ /*
+ * Compare and exchange primitives are needed for handling of defaults
+ */
+ inline int compare_exchange(volatile int * dest, int exchange, int comparand)
+ {
+#ifdef _WIN32
+ return (int)(InterlockedCompareExchange(
+ (volatile long*)dest,
+ (long)exchange,
+ (long)comparand));
+#elif defined(__APPLE__) || defined(__MACOSX)
+ return OSAtomicOr32Orig((uint32_t)exchange, (volatile uint32_t*)dest);
+#else // !_WIN32 || defined(__APPLE__) || defined(__MACOSX)
+ return (__sync_val_compare_and_swap(
+ dest,
+ comparand,
+ exchange));
+#endif // !_WIN32
+ }
+
+ inline void fence() { _mm_mfence(); }
+}; // namespace detail
+
+
+/*! \brief class used to interface between C++ and
+ * OpenCL C calls that require arrays of size_t values, whose
+ * size is known statically.
+ */
+template <int N>
+class size_t
+{
+private:
+ ::size_t data_[N];
+
+public:
+ //! \brief Initialize size_t to all 0s
+ size_t()
+ {
+ for( int i = 0; i < N; ++i ) {
+ data_[i] = 0;
+ }
+ }
+
+ ::size_t& operator[](int index)
+ {
+ return data_[index];
+ }
+
+ const ::size_t& operator[](int index) const
+ {
+ return data_[index];
+ }
+
+ //! \brief Conversion operator to T*.
+ operator ::size_t* () { return data_; }
+
+ //! \brief Conversion operator to const T*.
+ operator const ::size_t* () const { return data_; }
+};
+
+namespace detail {
+
+// Generic getInfoHelper. The final parameter is used to guide overload
+// resolution: the actual parameter passed is an int, which makes this
+// a worse conversion sequence than a specialization that declares the
+// parameter as an int.
+template<typename Functor, typename T>
+inline cl_int getInfoHelper(Functor f, cl_uint name, T* param, long)
+{
+ return f(name, sizeof(T), param, NULL);
+}
+
+// Specialized getInfoHelper for VECTOR_CLASS params
+template <typename Func, typename T>
+inline cl_int getInfoHelper(Func f, cl_uint name, VECTOR_CLASS<T>* param, long)
+{
+ ::size_t required;
+ cl_int err = f(name, 0, NULL, &required);
+ if (err != CL_SUCCESS) {
+ return err;
+ }
+
+ T* value = (T*) alloca(required);
+ err = f(name, required, value, NULL);
+ if (err != CL_SUCCESS) {
+ return err;
+ }
+
+ param->assign(&value[0], &value[required/sizeof(T)]);
+ return CL_SUCCESS;
+}
+
+/* Specialization for reference-counted types. This depends on the
+ * existence of Wrapper<T>::cl_type, and none of the other types having the
+ * cl_type member. Note that simplify specifying the parameter as Wrapper<T>
+ * does not work, because when using a derived type (e.g. Context) the generic
+ * template will provide a better match.
+ */
+template <typename Func, typename T>
+inline cl_int getInfoHelper(Func f, cl_uint name, VECTOR_CLASS<T>* param, int, typename T::cl_type = 0)
+{
+ ::size_t required;
+ cl_int err = f(name, 0, NULL, &required);
+ if (err != CL_SUCCESS) {
+ return err;
+ }
+
+ typename T::cl_type * value = (typename T::cl_type *) alloca(required);
+ err = f(name, required, value, NULL);
+ if (err != CL_SUCCESS) {
+ return err;
+ }
+
+ ::size_t elements = required / sizeof(typename T::cl_type);
+ param->assign(&value[0], &value[elements]);
+ for (::size_t i = 0; i < elements; i++)
+ {
+ if (value[i] != NULL)
+ {
+ err = (*param)[i].retain();
+ if (err != CL_SUCCESS) {
+ return err;
+ }
+ }
+ }
+ return CL_SUCCESS;
+}
+
+// Specialized for getInfo<CL_PROGRAM_BINARIES>
+template <typename Func>
+inline cl_int getInfoHelper(Func f, cl_uint name, VECTOR_CLASS<char *>* param, int)
+{
+ cl_int err = f(name, param->size() * sizeof(char *), &(*param)[0], NULL);
+
+ if (err != CL_SUCCESS) {
+ return err;
+ }
+
+ return CL_SUCCESS;
+}
+
+// Specialized GetInfoHelper for STRING_CLASS params
+template <typename Func>
+inline cl_int getInfoHelper(Func f, cl_uint name, STRING_CLASS* param, long)
+{
+ ::size_t required;
+ cl_int err = f(name, 0, NULL, &required);
+ if (err != CL_SUCCESS) {
+ return err;
+ }
+
+ char* value = (char*) alloca(required);
+ err = f(name, required, value, NULL);
+ if (err != CL_SUCCESS) {
+ return err;
+ }
+
+ *param = value;
+ return CL_SUCCESS;
+}
+
+// Specialized GetInfoHelper for cl::size_t params
+template <typename Func, ::size_t N>
+inline cl_int getInfoHelper(Func f, cl_uint name, size_t<N>* param, long)
+{
+ ::size_t required;
+ cl_int err = f(name, 0, NULL, &required);
+ if (err != CL_SUCCESS) {
+ return err;
+ }
+
+ ::size_t* value = (::size_t*) alloca(required);
+ err = f(name, required, value, NULL);
+ if (err != CL_SUCCESS) {
+ return err;
+ }
+
+ for(int i = 0; i < N; ++i) {
+ (*param)[i] = value[i];
+ }
+
+ return CL_SUCCESS;
+}
+
+template<typename T> struct ReferenceHandler;
+
+/* Specialization for reference-counted types. This depends on the
+ * existence of Wrapper<T>::cl_type, and none of the other types having the
+ * cl_type member. Note that simplify specifying the parameter as Wrapper<T>
+ * does not work, because when using a derived type (e.g. Context) the generic
+ * template will provide a better match.
+ */
+template<typename Func, typename T>
+inline cl_int getInfoHelper(Func f, cl_uint name, T* param, int, typename T::cl_type = 0)
+{
+ typename T::cl_type value;
+ cl_int err = f(name, sizeof(value), &value, NULL);
+ if (err != CL_SUCCESS) {
+ return err;
+ }
+ *param = value;
+ if (value != NULL)
+ {
+ err = param->retain();
+ if (err != CL_SUCCESS) {
+ return err;
+ }
+ }
+ return CL_SUCCESS;
+}
+
+#define __PARAM_NAME_INFO_1_0(F) \
+ F(cl_platform_info, CL_PLATFORM_PROFILE, STRING_CLASS) \
+ F(cl_platform_info, CL_PLATFORM_VERSION, STRING_CLASS) \
+ F(cl_platform_info, CL_PLATFORM_NAME, STRING_CLASS) \
+ F(cl_platform_info, CL_PLATFORM_VENDOR, STRING_CLASS) \
+ F(cl_platform_info, CL_PLATFORM_EXTENSIONS, STRING_CLASS) \
+ \
+ F(cl_device_info, CL_DEVICE_TYPE, cl_device_type) \
+ F(cl_device_info, CL_DEVICE_VENDOR_ID, cl_uint) \
+ F(cl_device_info, CL_DEVICE_MAX_COMPUTE_UNITS, cl_uint) \
+ F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, cl_uint) \
+ F(cl_device_info, CL_DEVICE_MAX_WORK_GROUP_SIZE, ::size_t) \
+ F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_SIZES, VECTOR_CLASS< ::size_t>) \
+ F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, cl_uint) \
+ F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, cl_uint) \
+ F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, cl_uint) \
+ F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, cl_uint) \
+ F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, cl_uint) \
+ F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, cl_uint) \
+ F(cl_device_info, CL_DEVICE_MAX_CLOCK_FREQUENCY, cl_uint) \
+ F(cl_device_info, CL_DEVICE_ADDRESS_BITS, cl_uint) \
+ F(cl_device_info, CL_DEVICE_MAX_READ_IMAGE_ARGS, cl_uint) \
+ F(cl_device_info, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, cl_uint) \
+ F(cl_device_info, CL_DEVICE_MAX_MEM_ALLOC_SIZE, cl_ulong) \
+ F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_WIDTH, ::size_t) \
+ F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_HEIGHT, ::size_t) \
+ F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_WIDTH, ::size_t) \
+ F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_HEIGHT, ::size_t) \
+ F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_DEPTH, ::size_t) \
+ F(cl_device_info, CL_DEVICE_IMAGE_SUPPORT, cl_bool) \
+ F(cl_device_info, CL_DEVICE_MAX_PARAMETER_SIZE, ::size_t) \
+ F(cl_device_info, CL_DEVICE_MAX_SAMPLERS, cl_uint) \
+ F(cl_device_info, CL_DEVICE_MEM_BASE_ADDR_ALIGN, cl_uint) \
+ F(cl_device_info, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, cl_uint) \
+ F(cl_device_info, CL_DEVICE_SINGLE_FP_CONFIG, cl_device_fp_config) \
+ F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, cl_device_mem_cache_type) \
+ F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, cl_uint)\
+ F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, cl_ulong) \
+ F(cl_device_info, CL_DEVICE_GLOBAL_MEM_SIZE, cl_ulong) \
+ F(cl_device_info, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, cl_ulong) \
+ F(cl_device_info, CL_DEVICE_MAX_CONSTANT_ARGS, cl_uint) \
+ F(cl_device_info, CL_DEVICE_LOCAL_MEM_TYPE, cl_device_local_mem_type) \
+ F(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE, cl_ulong) \
+ F(cl_device_info, CL_DEVICE_ERROR_CORRECTION_SUPPORT, cl_bool) \
+ F(cl_device_info, CL_DEVICE_PROFILING_TIMER_RESOLUTION, ::size_t) \
+ F(cl_device_info, CL_DEVICE_ENDIAN_LITTLE, cl_bool) \
+ F(cl_device_info, CL_DEVICE_AVAILABLE, cl_bool) \
+ F(cl_device_info, CL_DEVICE_COMPILER_AVAILABLE, cl_bool) \
+ F(cl_device_info, CL_DEVICE_EXECUTION_CAPABILITIES, cl_device_exec_capabilities) \
+ F(cl_device_info, CL_DEVICE_QUEUE_PROPERTIES, cl_command_queue_properties) \
+ F(cl_device_info, CL_DEVICE_PLATFORM, cl_platform_id) \
+ F(cl_device_info, CL_DEVICE_NAME, STRING_CLASS) \
+ F(cl_device_info, CL_DEVICE_VENDOR, STRING_CLASS) \
+ F(cl_device_info, CL_DRIVER_VERSION, STRING_CLASS) \
+ F(cl_device_info, CL_DEVICE_PROFILE, STRING_CLASS) \
+ F(cl_device_info, CL_DEVICE_VERSION, STRING_CLASS) \
+ F(cl_device_info, CL_DEVICE_EXTENSIONS, STRING_CLASS) \
+ \
+ F(cl_context_info, CL_CONTEXT_REFERENCE_COUNT, cl_uint) \
+ F(cl_context_info, CL_CONTEXT_DEVICES, VECTOR_CLASS<Device>) \
+ F(cl_context_info, CL_CONTEXT_PROPERTIES, VECTOR_CLASS<cl_context_properties>) \
+ \
+ F(cl_event_info, CL_EVENT_COMMAND_QUEUE, cl::CommandQueue) \
+ F(cl_event_info, CL_EVENT_COMMAND_TYPE, cl_command_type) \
+ F(cl_event_info, CL_EVENT_REFERENCE_COUNT, cl_uint) \
+ F(cl_event_info, CL_EVENT_COMMAND_EXECUTION_STATUS, cl_uint) \
+ \
+ F(cl_profiling_info, CL_PROFILING_COMMAND_QUEUED, cl_ulong) \
+ F(cl_profiling_info, CL_PROFILING_COMMAND_SUBMIT, cl_ulong) \
+ F(cl_profiling_info, CL_PROFILING_COMMAND_START, cl_ulong) \
+ F(cl_profiling_info, CL_PROFILING_COMMAND_END, cl_ulong) \
+ \
+ F(cl_mem_info, CL_MEM_TYPE, cl_mem_object_type) \
+ F(cl_mem_info, CL_MEM_FLAGS, cl_mem_flags) \
+ F(cl_mem_info, CL_MEM_SIZE, ::size_t) \
+ F(cl_mem_info, CL_MEM_HOST_PTR, void*) \
+ F(cl_mem_info, CL_MEM_MAP_COUNT, cl_uint) \
+ F(cl_mem_info, CL_MEM_REFERENCE_COUNT, cl_uint) \
+ F(cl_mem_info, CL_MEM_CONTEXT, cl::Context) \
+ \
+ F(cl_image_info, CL_IMAGE_FORMAT, cl_image_format) \
+ F(cl_image_info, CL_IMAGE_ELEMENT_SIZE, ::size_t) \
+ F(cl_image_info, CL_IMAGE_ROW_PITCH, ::size_t) \
+ F(cl_image_info, CL_IMAGE_SLICE_PITCH, ::size_t) \
+ F(cl_image_info, CL_IMAGE_WIDTH, ::size_t) \
+ F(cl_image_info, CL_IMAGE_HEIGHT, ::size_t) \
+ F(cl_image_info, CL_IMAGE_DEPTH, ::size_t) \
+ \
+ F(cl_sampler_info, CL_SAMPLER_REFERENCE_COUNT, cl_uint) \
+ F(cl_sampler_info, CL_SAMPLER_CONTEXT, cl::Context) \
+ F(cl_sampler_info, CL_SAMPLER_NORMALIZED_COORDS, cl_addressing_mode) \
+ F(cl_sampler_info, CL_SAMPLER_ADDRESSING_MODE, cl_filter_mode) \
+ F(cl_sampler_info, CL_SAMPLER_FILTER_MODE, cl_bool) \
+ \
+ F(cl_program_info, CL_PROGRAM_REFERENCE_COUNT, cl_uint) \
+ F(cl_program_info, CL_PROGRAM_CONTEXT, cl::Context) \
+ F(cl_program_info, CL_PROGRAM_NUM_DEVICES, cl_uint) \
+ F(cl_program_info, CL_PROGRAM_DEVICES, VECTOR_CLASS<Device>) \
+ F(cl_program_info, CL_PROGRAM_SOURCE, STRING_CLASS) \
+ F(cl_program_info, CL_PROGRAM_BINARY_SIZES, VECTOR_CLASS< ::size_t>) \
+ F(cl_program_info, CL_PROGRAM_BINARIES, VECTOR_CLASS<char *>) \
+ \
+ F(cl_program_build_info, CL_PROGRAM_BUILD_STATUS, cl_build_status) \
+ F(cl_program_build_info, CL_PROGRAM_BUILD_OPTIONS, STRING_CLASS) \
+ F(cl_program_build_info, CL_PROGRAM_BUILD_LOG, STRING_CLASS) \
+ \
+ F(cl_kernel_info, CL_KERNEL_FUNCTION_NAME, STRING_CLASS) \
+ F(cl_kernel_info, CL_KERNEL_NUM_ARGS, cl_uint) \
+ F(cl_kernel_info, CL_KERNEL_REFERENCE_COUNT, cl_uint) \
+ F(cl_kernel_info, CL_KERNEL_CONTEXT, cl::Context) \
+ F(cl_kernel_info, CL_KERNEL_PROGRAM, cl::Program) \
+ \
+ F(cl_kernel_work_group_info, CL_KERNEL_WORK_GROUP_SIZE, ::size_t) \
+ F(cl_kernel_work_group_info, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, cl::size_t<3>) \
+ F(cl_kernel_work_group_info, CL_KERNEL_LOCAL_MEM_SIZE, cl_ulong) \
+ \
+ F(cl_command_queue_info, CL_QUEUE_CONTEXT, cl::Context) \
+ F(cl_command_queue_info, CL_QUEUE_DEVICE, cl::Device) \
+ F(cl_command_queue_info, CL_QUEUE_REFERENCE_COUNT, cl_uint) \
+ F(cl_command_queue_info, CL_QUEUE_PROPERTIES, cl_command_queue_properties)
+
+#if defined(CL_VERSION_1_1)
+#define __PARAM_NAME_INFO_1_1(F) \
+ F(cl_context_info, CL_CONTEXT_NUM_DEVICES, cl_uint)\
+ F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, cl_uint) \
+ F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, cl_uint) \
+ F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, cl_uint) \
+ F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, cl_uint) \
+ F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, cl_uint) \
+ F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, cl_uint) \
+ F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, cl_uint) \
+ F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, cl_uint) \
+ F(cl_device_info, CL_DEVICE_DOUBLE_FP_CONFIG, cl_device_fp_config) \
+ F(cl_device_info, CL_DEVICE_HALF_FP_CONFIG, cl_device_fp_config) \
+ F(cl_device_info, CL_DEVICE_HOST_UNIFIED_MEMORY, cl_bool) \
+ F(cl_device_info, CL_DEVICE_OPENCL_C_VERSION, STRING_CLASS) \
+ \
+ F(cl_mem_info, CL_MEM_ASSOCIATED_MEMOBJECT, cl::Memory) \
+ F(cl_mem_info, CL_MEM_OFFSET, ::size_t) \
+ \
+ F(cl_kernel_work_group_info, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, ::size_t) \
+ F(cl_kernel_work_group_info, CL_KERNEL_PRIVATE_MEM_SIZE, cl_ulong) \
+ \
+ F(cl_event_info, CL_EVENT_CONTEXT, cl::Context)
+#endif // CL_VERSION_1_1
+
+
+#if defined(CL_VERSION_1_2)
+#define __PARAM_NAME_INFO_1_2(F) \
+ F(cl_image_info, CL_IMAGE_BUFFER, cl::Buffer) \
+ \
+ F(cl_program_info, CL_PROGRAM_NUM_KERNELS, ::size_t) \
+ F(cl_program_info, CL_PROGRAM_KERNEL_NAMES, STRING_CLASS) \
+ \
+ F(cl_program_build_info, CL_PROGRAM_BINARY_TYPE, cl_program_binary_type) \
+ \
+ F(cl_kernel_info, CL_KERNEL_ATTRIBUTES, STRING_CLASS) \
+ \
+ F(cl_kernel_arg_info, CL_KERNEL_ARG_ADDRESS_QUALIFIER, cl_kernel_arg_address_qualifier) \
+ F(cl_kernel_arg_info, CL_KERNEL_ARG_ACCESS_QUALIFIER, cl_kernel_arg_access_qualifier) \
+ F(cl_kernel_arg_info, CL_KERNEL_ARG_TYPE_NAME, STRING_CLASS) \
+ F(cl_kernel_arg_info, CL_KERNEL_ARG_NAME, STRING_CLASS) \
+ \
+ F(cl_device_info, CL_DEVICE_PARENT_DEVICE, cl_device_id) \
+ F(cl_device_info, CL_DEVICE_PARTITION_PROPERTIES, VECTOR_CLASS<cl_device_partition_property>) \
+ F(cl_device_info, CL_DEVICE_PARTITION_TYPE, VECTOR_CLASS<cl_device_partition_property>) \
+ F(cl_device_info, CL_DEVICE_REFERENCE_COUNT, cl_uint) \
+ F(cl_device_info, CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, ::size_t) \
+ F(cl_device_info, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, cl_device_affinity_domain) \
+ F(cl_device_info, CL_DEVICE_BUILT_IN_KERNELS, STRING_CLASS)
+#endif // #if defined(CL_VERSION_1_2)
+
+#if defined(USE_CL_DEVICE_FISSION)
+#define __PARAM_NAME_DEVICE_FISSION(F) \
+ F(cl_device_info, CL_DEVICE_PARENT_DEVICE_EXT, cl_device_id) \
+ F(cl_device_info, CL_DEVICE_PARTITION_TYPES_EXT, VECTOR_CLASS<cl_device_partition_property_ext>) \
+ F(cl_device_info, CL_DEVICE_AFFINITY_DOMAINS_EXT, VECTOR_CLASS<cl_device_partition_property_ext>) \
+ F(cl_device_info, CL_DEVICE_REFERENCE_COUNT_EXT , cl_uint) \
+ F(cl_device_info, CL_DEVICE_PARTITION_STYLE_EXT, VECTOR_CLASS<cl_device_partition_property_ext>)
+#endif // USE_CL_DEVICE_FISSION
+
+template <typename enum_type, cl_int Name>
+struct param_traits {};
+
+#define __CL_DECLARE_PARAM_TRAITS(token, param_name, T) \
+struct token; \
+template<> \
+struct param_traits<detail:: token,param_name> \
+{ \
+ enum { value = param_name }; \
+ typedef T param_type; \
+};
+
+__PARAM_NAME_INFO_1_0(__CL_DECLARE_PARAM_TRAITS)
+#if defined(CL_VERSION_1_1)
+__PARAM_NAME_INFO_1_1(__CL_DECLARE_PARAM_TRAITS)
+#endif // CL_VERSION_1_1
+#if defined(CL_VERSION_1_2)
+__PARAM_NAME_INFO_1_2(__CL_DECLARE_PARAM_TRAITS)
+#endif // CL_VERSION_1_1
+
+#if defined(USE_CL_DEVICE_FISSION)
+__PARAM_NAME_DEVICE_FISSION(__CL_DECLARE_PARAM_TRAITS);
+#endif // USE_CL_DEVICE_FISSION
+
+#ifdef CL_PLATFORM_ICD_SUFFIX_KHR
+__CL_DECLARE_PARAM_TRAITS(cl_platform_info, CL_PLATFORM_ICD_SUFFIX_KHR, STRING_CLASS)
+#endif
+
+#ifdef CL_DEVICE_PROFILING_TIMER_OFFSET_AMD
+__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_PROFILING_TIMER_OFFSET_AMD, cl_ulong)
+#endif
+
+#ifdef CL_DEVICE_GLOBAL_FREE_MEMORY_AMD
+__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_FREE_MEMORY_AMD, VECTOR_CLASS< ::size_t>)
+#endif
+#ifdef CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD
+__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD, cl_uint)
+#endif
+#ifdef CL_DEVICE_SIMD_WIDTH_AMD
+__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_SIMD_WIDTH_AMD, cl_uint)
+#endif
+#ifdef CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD
+__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, cl_uint)
+#endif
+#ifdef CL_DEVICE_WAVEFRONT_WIDTH_AMD
+__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_WAVEFRONT_WIDTH_AMD, cl_uint)
+#endif
+#ifdef CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD
+__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD, cl_uint)
+#endif
+#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD
+__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD, cl_uint)
+#endif
+#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD
+__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD, cl_uint)
+#endif
+#ifdef CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD
+__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD, cl_uint)
+#endif
+#ifdef CL_DEVICE_LOCAL_MEM_BANKS_AMD
+__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_LOCAL_MEM_BANKS_AMD, cl_uint)
+#endif
+
+#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV
+__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, cl_uint)
+#endif
+#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV
+__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, cl_uint)
+#endif
+#ifdef CL_DEVICE_REGISTERS_PER_BLOCK_NV
+__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_REGISTERS_PER_BLOCK_NV, cl_uint)
+#endif
+#ifdef CL_DEVICE_WARP_SIZE_NV
+__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_WARP_SIZE_NV, cl_uint)
+#endif
+#ifdef CL_DEVICE_GPU_OVERLAP_NV
+__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GPU_OVERLAP_NV, cl_bool)
+#endif
+#ifdef CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV
+__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV, cl_bool)
+#endif
+#ifdef CL_DEVICE_INTEGRATED_MEMORY_NV
+__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_INTEGRATED_MEMORY_NV, cl_bool)
+#endif
+
+// Convenience functions
+
+template <typename Func, typename T>
+inline cl_int
+getInfo(Func f, cl_uint name, T* param)
+{
+ return getInfoHelper(f, name, param, 0);
+}
+
+template <typename Func, typename Arg0>
+struct GetInfoFunctor0
+{
+ Func f_; const Arg0& arg0_;
+ cl_int operator ()(
+ cl_uint param, ::size_t size, void* value, ::size_t* size_ret)
+ { return f_(arg0_, param, size, value, size_ret); }
+};
+
+template <typename Func, typename Arg0, typename Arg1>
+struct GetInfoFunctor1
+{
+ Func f_; const Arg0& arg0_; const Arg1& arg1_;
+ cl_int operator ()(
+ cl_uint param, ::size_t size, void* value, ::size_t* size_ret)
+ { return f_(arg0_, arg1_, param, size, value, size_ret); }
+};
+
+template <typename Func, typename Arg0, typename T>
+inline cl_int
+getInfo(Func f, const Arg0& arg0, cl_uint name, T* param)
+{
+ GetInfoFunctor0<Func, Arg0> f0 = { f, arg0 };
+ return getInfoHelper(f0, name, param, 0);
+}
+
+template <typename Func, typename Arg0, typename Arg1, typename T>
+inline cl_int
+getInfo(Func f, const Arg0& arg0, const Arg1& arg1, cl_uint name, T* param)
+{
+ GetInfoFunctor1<Func, Arg0, Arg1> f0 = { f, arg0, arg1 };
+ return getInfoHelper(f0, name, param, 0);
+}
+
+template<typename T>
+struct ReferenceHandler
+{ };
+
+#if defined(CL_VERSION_1_2)
+/**
+ * OpenCL 1.2 devices do have retain/release.
+ */
+template <>
+struct ReferenceHandler<cl_device_id>
+{
+ /**
+ * Retain the device.
+ * \param device A valid device created using createSubDevices
+ * \return
+ * CL_SUCCESS if the function executed successfully.
+ * CL_INVALID_DEVICE if device was not a valid subdevice
+ * CL_OUT_OF_RESOURCES
+ * CL_OUT_OF_HOST_MEMORY
+ */
+ static cl_int retain(cl_device_id device)
+ { return ::clRetainDevice(device); }
+ /**
+ * Retain the device.
+ * \param device A valid device created using createSubDevices
+ * \return
+ * CL_SUCCESS if the function executed successfully.
+ * CL_INVALID_DEVICE if device was not a valid subdevice
+ * CL_OUT_OF_RESOURCES
+ * CL_OUT_OF_HOST_MEMORY
+ */
+ static cl_int release(cl_device_id device)
+ { return ::clReleaseDevice(device); }
+};
+#else // #if defined(CL_VERSION_1_2)
+/**
+ * OpenCL 1.1 devices do not have retain/release.
+ */
+template <>
+struct ReferenceHandler<cl_device_id>
+{
+ // cl_device_id does not have retain().
+ static cl_int retain(cl_device_id)
+ { return CL_SUCCESS; }
+ // cl_device_id does not have release().
+ static cl_int release(cl_device_id)
+ { return CL_SUCCESS; }
+};
+#endif // #if defined(CL_VERSION_1_2)
+
+template <>
+struct ReferenceHandler<cl_platform_id>
+{
+ // cl_platform_id does not have retain().
+ static cl_int retain(cl_platform_id)
+ { return CL_SUCCESS; }
+ // cl_platform_id does not have release().
+ static cl_int release(cl_platform_id)
+ { return CL_SUCCESS; }
+};
+
+template <>
+struct ReferenceHandler<cl_context>
+{
+ static cl_int retain(cl_context context)
+ { return ::clRetainContext(context); }
+ static cl_int release(cl_context context)
+ { return ::clReleaseContext(context); }
+};
+
+template <>
+struct ReferenceHandler<cl_command_queue>
+{
+ static cl_int retain(cl_command_queue queue)
+ { return ::clRetainCommandQueue(queue); }
+ static cl_int release(cl_command_queue queue)
+ { return ::clReleaseCommandQueue(queue); }
+};
+
+template <>
+struct ReferenceHandler<cl_mem>
+{
+ static cl_int retain(cl_mem memory)
+ { return ::clRetainMemObject(memory); }
+ static cl_int release(cl_mem memory)
+ { return ::clReleaseMemObject(memory); }
+};
+
+template <>
+struct ReferenceHandler<cl_sampler>
+{
+ static cl_int retain(cl_sampler sampler)
+ { return ::clRetainSampler(sampler); }
+ static cl_int release(cl_sampler sampler)
+ { return ::clReleaseSampler(sampler); }
+};
+
+template <>
+struct ReferenceHandler<cl_program>
+{
+ static cl_int retain(cl_program program)
+ { return ::clRetainProgram(program); }
+ static cl_int release(cl_program program)
+ { return ::clReleaseProgram(program); }
+};
+
+template <>
+struct ReferenceHandler<cl_kernel>
+{
+ static cl_int retain(cl_kernel kernel)
+ { return ::clRetainKernel(kernel); }
+ static cl_int release(cl_kernel kernel)
+ { return ::clReleaseKernel(kernel); }
+};
+
+template <>
+struct ReferenceHandler<cl_event>
+{
+ static cl_int retain(cl_event event)
+ { return ::clRetainEvent(event); }
+ static cl_int release(cl_event event)
+ { return ::clReleaseEvent(event); }
+};
+
+
+// Extracts version number with major in the upper 16 bits, minor in the lower 16
+static cl_uint getVersion(const char *versionInfo)
+{
+ int highVersion = 0;
+ int lowVersion = 0;
+ int index = 7;
+ while(versionInfo[index] != '.' ) {
+ highVersion *= 10;
+ highVersion += versionInfo[index]-'0';
+ ++index;
+ }
+ ++index;
+ while(versionInfo[index] != ' ' ) {
+ lowVersion *= 10;
+ lowVersion += versionInfo[index]-'0';
+ ++index;
+ }
+ return (highVersion << 16) | lowVersion;
+}
+
+static cl_uint getPlatformVersion(cl_platform_id platform)
+{
+ ::size_t size = 0;
+ clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 0, NULL, &size);
+ char *versionInfo = (char *) alloca(size);
+ clGetPlatformInfo(platform, CL_PLATFORM_VERSION, size, &versionInfo[0], &size);
+ return getVersion(versionInfo);
+}
+
+static cl_uint getDevicePlatformVersion(cl_device_id device)
+{
+ cl_platform_id platform;
+ clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform), &platform, NULL);
+ return getPlatformVersion(platform);
+}
+
+#if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS)
+static cl_uint getContextPlatformVersion(cl_context context)
+{
+ // The platform cannot be queried directly, so we first have to grab a
+ // device and obtain its context
+ ::size_t size = 0;
+ clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &size);
+ if (size == 0)
+ return 0;
+ cl_device_id *devices = (cl_device_id *) alloca(size);
+ clGetContextInfo(context, CL_CONTEXT_DEVICES, size, devices, NULL);
+ return getDevicePlatformVersion(devices[0]);
+}
+#endif // #if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS)
+
+template <typename T>
+class Wrapper
+{
+public:
+ typedef T cl_type;
+
+protected:
+ cl_type object_;
+
+public:
+ Wrapper() : object_(NULL) { }
+
+ Wrapper(const cl_type &obj) : object_(obj) { }
+
+ ~Wrapper()
+ {
+ if (object_ != NULL) { release(); }
+ }
+
+ Wrapper(const Wrapper<cl_type>& rhs)
+ {
+ object_ = rhs.object_;
+ if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); }
+ }
+
+ Wrapper<cl_type>& operator = (const Wrapper<cl_type>& rhs)
+ {
+ if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); }
+ object_ = rhs.object_;
+ if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); }
+ return *this;
+ }
+
+ Wrapper<cl_type>& operator = (const cl_type &rhs)
+ {
+ if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); }
+ object_ = rhs;
+ return *this;
+ }
+
+ cl_type operator ()() const { return object_; }
+
+ cl_type& operator ()() { return object_; }
+
+protected:
+ template<typename Func, typename U>
+ friend inline cl_int getInfoHelper(Func, cl_uint, U*, int, typename U::cl_type);
+
+ cl_int retain() const
+ {
+ return ReferenceHandler<cl_type>::retain(object_);
+ }
+
+ cl_int release() const
+ {
+ return ReferenceHandler<cl_type>::release(object_);
+ }
+};
+
+template <>
+class Wrapper<cl_device_id>
+{
+public:
+ typedef cl_device_id cl_type;
+
+protected:
+ cl_type object_;
+ bool referenceCountable_;
+
+ static bool isReferenceCountable(cl_device_id device)
+ {
+ bool retVal = false;
+ if (device != NULL) {
+ int version = getDevicePlatformVersion(device);
+ if(version > ((1 << 16) + 1)) {
+ retVal = true;
+ }
+ }
+ return retVal;
+ }
+
+public:
+ Wrapper() : object_(NULL), referenceCountable_(false)
+ {
+ }
+
+ Wrapper(const cl_type &obj) : object_(obj), referenceCountable_(false)
+ {
+ referenceCountable_ = isReferenceCountable(obj);
+ }
+
+ ~Wrapper()
+ {
+ if (object_ != NULL) { release(); }
+ }
+
+ Wrapper(const Wrapper<cl_type>& rhs)
+ {
+ object_ = rhs.object_;
+ referenceCountable_ = isReferenceCountable(object_);
+ if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); }
+ }
+
+ Wrapper<cl_type>& operator = (const Wrapper<cl_type>& rhs)
+ {
+ if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); }
+ object_ = rhs.object_;
+ referenceCountable_ = rhs.referenceCountable_;
+ if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); }
+ return *this;
+ }
+
+ Wrapper<cl_type>& operator = (const cl_type &rhs)
+ {
+ if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); }
+ object_ = rhs;
+ referenceCountable_ = isReferenceCountable(object_);
+ return *this;
+ }
+
+ cl_type operator ()() const { return object_; }
+
+ cl_type& operator ()() { return object_; }
+
+protected:
+ template<typename Func, typename U>
+ friend inline cl_int getInfoHelper(Func, cl_uint, U*, int, typename U::cl_type);
+
+ template<typename Func, typename U>
+ friend inline cl_int getInfoHelper(Func, cl_uint, VECTOR_CLASS<U>*, int, typename U::cl_type);
+
+ cl_int retain() const
+ {
+ if( referenceCountable_ ) {
+ return ReferenceHandler<cl_type>::retain(object_);
+ }
+ else {
+ return CL_SUCCESS;
+ }
+ }
+
+ cl_int release() const
+ {
+ if( referenceCountable_ ) {
+ return ReferenceHandler<cl_type>::release(object_);
+ }
+ else {
+ return CL_SUCCESS;
+ }
+ }
+};
+
+} // namespace detail
+//! \endcond
+
+/*! \stuct ImageFormat
+ * \brief Adds constructors and member functions for cl_image_format.
+ *
+ * \see cl_image_format
+ */
+struct ImageFormat : public cl_image_format
+{
+ //! \brief Default constructor - performs no initialization.
+ ImageFormat(){}
+
+ //! \brief Initializing constructor.
+ ImageFormat(cl_channel_order order, cl_channel_type type)
+ {
+ image_channel_order = order;
+ image_channel_data_type = type;
+ }
+
+ //! \brief Assignment operator.
+ ImageFormat& operator = (const ImageFormat& rhs)
+ {
+ if (this != &rhs) {
+ this->image_channel_data_type = rhs.image_channel_data_type;
+ this->image_channel_order = rhs.image_channel_order;
+ }
+ return *this;
+ }
+};
+
+/*! \brief Class interface for cl_device_id.
+ *
+ * \note Copies of these objects are inexpensive, since they don't 'own'
+ * any underlying resources or data structures.
+ *
+ * \see cl_device_id
+ */
+class Device : public detail::Wrapper<cl_device_id>
+{
+public:
+ //! \brief Default constructor - initializes to NULL.
+ Device() : detail::Wrapper<cl_type>() { }
+
+ /*! \brief Copy constructor.
+ *
+ * This simply copies the device ID value, which is an inexpensive operation.
+ */
+ Device(const Device& device) : detail::Wrapper<cl_type>(device) { }
+
+ /*! \brief Constructor from cl_device_id.
+ *
+ * This simply copies the device ID value, which is an inexpensive operation.
+ */
+ Device(const cl_device_id &device) : detail::Wrapper<cl_type>(device) { }
+
+ /*! \brief Returns the first device on the default context.
+ *
+ * \see Context::getDefault()
+ */
+ static Device getDefault(cl_int * err = NULL);
+
+ /*! \brief Assignment operator from Device.
+ *
+ * This simply copies the device ID value, which is an inexpensive operation.
+ */
+ Device& operator = (const Device& rhs)
+ {
+ if (this != &rhs) {
+ detail::Wrapper<cl_type>::operator=(rhs);
+ }
+ return *this;
+ }
+
+ /*! \brief Assignment operator from cl_device_id.
+ *
+ * This simply copies the device ID value, which is an inexpensive operation.
+ */
+ Device& operator = (const cl_device_id& rhs)
+ {
+ detail::Wrapper<cl_type>::operator=(rhs);
+ return *this;
+ }
+
+ //! \brief Wrapper for clGetDeviceInfo().
+ template <typename T>
+ cl_int getInfo(cl_device_info name, T* param) const
+ {
+ return detail::errHandler(
+ detail::getInfo(&::clGetDeviceInfo, object_, name, param),
+ __GET_DEVICE_INFO_ERR);
+ }
+
+ //! \brief Wrapper for clGetDeviceInfo() that returns by value.
+ template <cl_int name> typename
+ detail::param_traits<detail::cl_device_info, name>::param_type
+ getInfo(cl_int* err = NULL) const
+ {
+ typename detail::param_traits<
+ detail::cl_device_info, name>::param_type param;
+ cl_int result = getInfo(name, &param);
+ if (err != NULL) {
+ *err = result;
+ }
+ return param;
+ }
+
+ /**
+ * CL 1.2 version
+ */
+#if defined(CL_VERSION_1_2)
+ //! \brief Wrapper for clCreateSubDevicesEXT().
+ cl_int createSubDevices(
+ const cl_device_partition_property * properties,
+ VECTOR_CLASS<Device>* devices)
+ {
+ cl_uint n = 0;
+ cl_int err = clCreateSubDevices(object_, properties, 0, NULL, &n);
+ if (err != CL_SUCCESS) {
+ return detail::errHandler(err, __CREATE_SUB_DEVICES);
+ }
+
+ cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id));
+ err = clCreateSubDevices(object_, properties, n, ids, NULL);
+ if (err != CL_SUCCESS) {
+ return detail::errHandler(err, __CREATE_SUB_DEVICES);
+ }
+
+ devices->assign(&ids[0], &ids[n]);
+ return CL_SUCCESS;
+ }
+#endif // #if defined(CL_VERSION_1_2)
+
+/**
+ * CL 1.1 version that uses device fission.
+ */
+#if defined(CL_VERSION_1_1)
+#if defined(USE_CL_DEVICE_FISSION)
+ cl_int createSubDevices(
+ const cl_device_partition_property_ext * properties,
+ VECTOR_CLASS<Device>* devices)
+ {
+ typedef CL_API_ENTRY cl_int
+ ( CL_API_CALL * PFN_clCreateSubDevicesEXT)(
+ cl_device_id /*in_device*/,
+ const cl_device_partition_property_ext * /* properties */,
+ cl_uint /*num_entries*/,
+ cl_device_id * /*out_devices*/,
+ cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1;
+
+ static PFN_clCreateSubDevicesEXT pfn_clCreateSubDevicesEXT = NULL;
+ __INIT_CL_EXT_FCN_PTR(clCreateSubDevicesEXT);
+
+ cl_uint n = 0;
+ cl_int err = pfn_clCreateSubDevicesEXT(object_, properties, 0, NULL, &n);
+ if (err != CL_SUCCESS) {
+ return detail::errHandler(err, __CREATE_SUB_DEVICES);
+ }
+
+ cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id));
+ err = pfn_clCreateSubDevicesEXT(object_, properties, n, ids, NULL);
+ if (err != CL_SUCCESS) {
+ return detail::errHandler(err, __CREATE_SUB_DEVICES);
+ }
+
+ devices->assign(&ids[0], &ids[n]);
+ return CL_SUCCESS;
+ }
+#endif // #if defined(USE_CL_DEVICE_FISSION)
+#endif // #if defined(CL_VERSION_1_1)
+};
+
+/*! \brief Class interface for cl_platform_id.
+ *
+ * \note Copies of these objects are inexpensive, since they don't 'own'
+ * any underlying resources or data structures.
+ *
+ * \see cl_platform_id
+ */
+class Platform : public detail::Wrapper<cl_platform_id>
+{
+public:
+ //! \brief Default constructor - initializes to NULL.
+ Platform() : detail::Wrapper<cl_type>() { }
+
+ /*! \brief Copy constructor.
+ *
+ * This simply copies the platform ID value, which is an inexpensive operation.
+ */
+ Platform(const Platform& platform) : detail::Wrapper<cl_type>(platform) { }
+
+ /*! \brief Constructor from cl_platform_id.
+ *
+ * This simply copies the platform ID value, which is an inexpensive operation.
+ */
+ Platform(const cl_platform_id &platform) : detail::Wrapper<cl_type>(platform) { }
+
+ /*! \brief Assignment operator from Platform.
+ *
+ * This simply copies the platform ID value, which is an inexpensive operation.
+ */
+ Platform& operator = (const Platform& rhs)
+ {
+ if (this != &rhs) {
+ detail::Wrapper<cl_type>::operator=(rhs);
+ }
+ return *this;
+ }
+
+ /*! \brief Assignment operator from cl_platform_id.
+ *
+ * This simply copies the platform ID value, which is an inexpensive operation.
+ */
+ Platform& operator = (const cl_platform_id& rhs)
+ {
+ detail::Wrapper<cl_type>::operator=(rhs);
+ return *this;
+ }
+
+ //! \brief Wrapper for clGetPlatformInfo().
+ cl_int getInfo(cl_platform_info name, STRING_CLASS* param) const
+ {
+ return detail::errHandler(
+ detail::getInfo(&::clGetPlatformInfo, object_, name, param),
+ __GET_PLATFORM_INFO_ERR);
+ }
+
+ //! \brief Wrapper for clGetPlatformInfo() that returns by value.
+ template <cl_int name> typename
+ detail::param_traits<detail::cl_platform_info, name>::param_type
+ getInfo(cl_int* err = NULL) const
+ {
+ typename detail::param_traits<
+ detail::cl_platform_info, name>::param_type param;
+ cl_int result = getInfo(name, &param);
+ if (err != NULL) {
+ *err = result;
+ }
+ return param;
+ }
+
+ /*! \brief Gets a list of devices for this platform.
+ *
+ * Wraps clGetDeviceIDs().
+ */
+ cl_int getDevices(
+ cl_device_type type,
+ VECTOR_CLASS<Device>* devices) const
+ {
+ cl_uint n = 0;
+ if( devices == NULL ) {
+ return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_DEVICE_IDS_ERR);
+ }
+ cl_int err = ::clGetDeviceIDs(object_, type, 0, NULL, &n);
+ if (err != CL_SUCCESS) {
+ return detail::errHandler(err, __GET_DEVICE_IDS_ERR);
+ }
+
+ cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id));
+ err = ::clGetDeviceIDs(object_, type, n, ids, NULL);
+ if (err != CL_SUCCESS) {
+ return detail::errHandler(err, __GET_DEVICE_IDS_ERR);
+ }
+
+ devices->assign(&ids[0], &ids[n]);
+ return CL_SUCCESS;
+ }
+
+#if defined(USE_DX_INTEROP)
+ /*! \brief Get the list of available D3D10 devices.
+ *
+ * \param d3d_device_source.
+ *
+ * \param d3d_object.
+ *
+ * \param d3d_device_set.
+ *
+ * \param devices returns a vector of OpenCL D3D10 devices found. The cl::Device
+ * values returned in devices can be used to identify a specific OpenCL
+ * device. If \a devices argument is NULL, this argument is ignored.
+ *
+ * \return One of the following values:
+ * - CL_SUCCESS if the function is executed successfully.
+ *
+ * The application can query specific capabilities of the OpenCL device(s)
+ * returned by cl::getDevices. This can be used by the application to
+ * determine which device(s) to use.
+ *
+ * \note In the case that exceptions are enabled and a return value
+ * other than CL_SUCCESS is generated, then cl::Error exception is
+ * generated.
+ */
+ cl_int getDevices(
+ cl_d3d10_device_source_khr d3d_device_source,
+ void * d3d_object,
+ cl_d3d10_device_set_khr d3d_device_set,
+ VECTOR_CLASS<Device>* devices) const
+ {
+ typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clGetDeviceIDsFromD3D10KHR)(
+ cl_platform_id platform,
+ cl_d3d10_device_source_khr d3d_device_source,
+ void * d3d_object,
+ cl_d3d10_device_set_khr d3d_device_set,
+ cl_uint num_entries,
+ cl_device_id * devices,
+ cl_uint* num_devices);
+
+ if( devices == NULL ) {
+ return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_DEVICE_IDS_ERR);
+ }
+
+ static PFN_clGetDeviceIDsFromD3D10KHR pfn_clGetDeviceIDsFromD3D10KHR = NULL;
+ __INIT_CL_EXT_FCN_PTR_PLATFORM(object_, clGetDeviceIDsFromD3D10KHR);
+
+ cl_uint n = 0;
+ cl_int err = pfn_clGetDeviceIDsFromD3D10KHR(
+ object_,
+ d3d_device_source,
+ d3d_object,
+ d3d_device_set,
+ 0,
+ NULL,
+ &n);
+ if (err != CL_SUCCESS) {
+ return detail::errHandler(err, __GET_DEVICE_IDS_ERR);
+ }
+
+ cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id));
+ err = pfn_clGetDeviceIDsFromD3D10KHR(
+ object_,
+ d3d_device_source,
+ d3d_object,
+ d3d_device_set,
+ n,
+ ids,
+ NULL);
+ if (err != CL_SUCCESS) {
+ return detail::errHandler(err, __GET_DEVICE_IDS_ERR);
+ }
+
+ devices->assign(&ids[0], &ids[n]);
+ return CL_SUCCESS;
+ }
+#endif
+
+ /*! \brief Gets a list of available platforms.
+ *
+ * Wraps clGetPlatformIDs().
+ */
+ static cl_int get(
+ VECTOR_CLASS<Platform>* platforms)
+ {
+ cl_uint n = 0;
+
+ if( platforms == NULL ) {
+ return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_PLATFORM_IDS_ERR);
+ }
+
+ cl_int err = ::clGetPlatformIDs(0, NULL, &n);
+ if (err != CL_SUCCESS) {
+ return detail::errHandler(err, __GET_PLATFORM_IDS_ERR);
+ }
+
+ cl_platform_id* ids = (cl_platform_id*) alloca(
+ n * sizeof(cl_platform_id));
+ err = ::clGetPlatformIDs(n, ids, NULL);
+ if (err != CL_SUCCESS) {
+ return detail::errHandler(err, __GET_PLATFORM_IDS_ERR);
+ }
+
+ platforms->assign(&ids[0], &ids[n]);
+ return CL_SUCCESS;
+ }
+
+ /*! \brief Gets the first available platform.
+ *
+ * Wraps clGetPlatformIDs(), returning the first result.
+ */
+ static cl_int get(
+ Platform * platform)
+ {
+ cl_uint n = 0;
+
+ if( platform == NULL ) {
+ return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_PLATFORM_IDS_ERR);
+ }
+
+ cl_int err = ::clGetPlatformIDs(0, NULL, &n);
+ if (err != CL_SUCCESS) {
+ return detail::errHandler(err, __GET_PLATFORM_IDS_ERR);
+ }
+
+ cl_platform_id* ids = (cl_platform_id*) alloca(
+ n * sizeof(cl_platform_id));
+ err = ::clGetPlatformIDs(n, ids, NULL);
+ if (err != CL_SUCCESS) {
+ return detail::errHandler(err, __GET_PLATFORM_IDS_ERR);
+ }
+
+ *platform = ids[0];
+ return CL_SUCCESS;
+ }
+
+ /*! \brief Gets the first available platform, returning it by value.
+ *
+ * Wraps clGetPlatformIDs(), returning the first result.
+ */
+ static Platform get(
+ cl_int * errResult = NULL)
+ {
+ Platform platform;
+ cl_uint n = 0;
+ cl_int err = ::clGetPlatformIDs(0, NULL, &n);
+ if (err != CL_SUCCESS) {
+ detail::errHandler(err, __GET_PLATFORM_IDS_ERR);
+ if (errResult != NULL) {
+ *errResult = err;
+ }
+ }
+
+ cl_platform_id* ids = (cl_platform_id*) alloca(
+ n * sizeof(cl_platform_id));
+ err = ::clGetPlatformIDs(n, ids, NULL);
+
+ if (err != CL_SUCCESS) {
+ detail::errHandler(err, __GET_PLATFORM_IDS_ERR);
+ }
+
+ if (errResult != NULL) {
+ *errResult = err;
+ }
+
+ return ids[0];
+ }
+
+ static Platform getDefault(
+ cl_int *errResult = NULL )
+ {
+ return get(errResult);
+ }
+
+
+#if defined(CL_VERSION_1_2)
+ //! \brief Wrapper for clUnloadCompiler().
+ cl_int
+ unloadCompiler()
+ {
+ return ::clUnloadPlatformCompiler(object_);
+ }
+#endif // #if defined(CL_VERSION_1_2)
+}; // class Platform
+
+/**
+ * Deprecated APIs for 1.2
+ */
+#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2))
+/**
+ * Unload the OpenCL compiler.
+ * \note Deprecated for OpenCL 1.2. Use Platform::unloadCompiler instead.
+ */
+inline CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int
+UnloadCompiler() CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
+inline cl_int
+UnloadCompiler()
+{
+ return ::clUnloadCompiler();
+}
+#endif // #if defined(CL_VERSION_1_1)
+
+/*! \brief Class interface for cl_context.
+ *
+ * \note Copies of these objects are shallow, meaning that the copy will refer
+ * to the same underlying cl_context as the original. For details, see
+ * clRetainContext() and clReleaseContext().
+ *
+ * \see cl_context
+ */
+class Context
+ : public detail::Wrapper<cl_context>
+{
+private:
+ static volatile int default_initialized_;
+ static Context default_;
+ static volatile cl_int default_error_;
+public:
+ /*! \brief Destructor.
+ *
+ * This calls clReleaseContext() on the value held by this instance.
+ */
+ ~Context() { }
+
+ /*! \brief Constructs a context including a list of specified devices.
+ *
+ * Wraps clCreateContext().
+ */
+ Context(
+ const VECTOR_CLASS<Device>& devices,
+ cl_context_properties* properties = NULL,
+ void (CL_CALLBACK * notifyFptr)(
+ const char *,
+ const void *,
+ ::size_t,
+ void *) = NULL,
+ void* data = NULL,
+ cl_int* err = NULL)
+ {
+ cl_int error;
+
+ ::size_t numDevices = devices.size();
+ cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id));
+ for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) {
+ deviceIDs[deviceIndex] = (devices[deviceIndex])();
+ }
+
+ object_ = ::clCreateContext(
+ properties, (cl_uint) numDevices,
+ deviceIDs,
+ notifyFptr, data, &error);
+
+ detail::errHandler(error, __CREATE_CONTEXT_ERR);
+ if (err != NULL) {
+ *err = error;
+ }
+ }
+
+ Context(
+ const Device& device,
+ cl_context_properties* properties = NULL,
+ void (CL_CALLBACK * notifyFptr)(
+ const char *,
+ const void *,
+ ::size_t,
+ void *) = NULL,
+ void* data = NULL,
+ cl_int* err = NULL)
+ {
+ cl_int error;
+
+ cl_device_id deviceID = device();
+
+ object_ = ::clCreateContext(
+ properties, 1,
+ &deviceID,
+ notifyFptr, data, &error);
+
+ detail::errHandler(error, __CREATE_CONTEXT_ERR);
+ if (err != NULL) {
+ *err = error;
+ }
+ }
+
+ /*! \brief Constructs a context including all or a subset of devices of a specified type.
+ *
+ * Wraps clCreateContextFromType().
+ */
+ Context(
+ cl_device_type type,
+ cl_context_properties* properties = NULL,
+ void (CL_CALLBACK * notifyFptr)(
+ const char *,
+ const void *,
+ ::size_t,
+ void *) = NULL,
+ void* data = NULL,
+ cl_int* err = NULL)
+ {
+ cl_int error;
+
+#if !defined(__APPLE__) || !defined(__MACOS)
+ cl_context_properties prop[4] = {CL_CONTEXT_PLATFORM, 0, 0, 0 };
+
+ if (properties == NULL) {
+ // Get a valid platform ID as we cannot send in a blank one
+ VECTOR_CLASS<Platform> platforms;
+ error = Platform::get(&platforms);
+ if (error != CL_SUCCESS) {
+ detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR);
+ if (err != NULL) {
+ *err = error;
+ }
+ return;
+ }
+
+ // Check the platforms we found for a device of our specified type
+ cl_context_properties platform_id = 0;
+ for (unsigned int i = 0; i < platforms.size(); i++) {
+
+ VECTOR_CLASS<Device> devices;
+
+#if defined(__CL_ENABLE_EXCEPTIONS)
+ try {
+#endif
+
+ error = platforms[i].getDevices(type, &devices);
+
+#if defined(__CL_ENABLE_EXCEPTIONS)
+ } catch (Error) {}
+ // Catch if exceptions are enabled as we don't want to exit if first platform has no devices of type
+ // We do error checking next anyway, and can throw there if needed
+#endif
+
+ // Only squash CL_SUCCESS and CL_DEVICE_NOT_FOUND
+ if (error != CL_SUCCESS && error != CL_DEVICE_NOT_FOUND) {
+ detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR);
+ if (err != NULL) {
+ *err = error;
+ }
+ }
+
+ if (devices.size() > 0) {
+ platform_id = (cl_context_properties)platforms[i]();
+ break;
+ }
+ }
+
+ if (platform_id == 0) {
+ detail::errHandler(CL_DEVICE_NOT_FOUND, __CREATE_CONTEXT_FROM_TYPE_ERR);
+ if (err != NULL) {
+ *err = CL_DEVICE_NOT_FOUND;
+ }
+ return;
+ }
+
+ prop[1] = platform_id;
+ properties = &prop[0];
+ }
+#endif
+ object_ = ::clCreateContextFromType(
+ properties, type, notifyFptr, data, &error);
+
+ detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR);
+ if (err != NULL) {
+ *err = error;
+ }
+ }
+
+ /*! \brief Returns a singleton context including all devices of CL_DEVICE_TYPE_DEFAULT.
+ *
+ * \note All calls to this function return the same cl_context as the first.
+ */
+ static Context getDefault(cl_int * err = NULL)
+ {
+ int state = detail::compare_exchange(
+ &default_initialized_,
+ __DEFAULT_BEING_INITIALIZED, __DEFAULT_NOT_INITIALIZED);
+
+ if (state & __DEFAULT_INITIALIZED) {
+ if (err != NULL) {
+ *err = default_error_;
+ }
+ return default_;
+ }
+
+ if (state & __DEFAULT_BEING_INITIALIZED) {
+ // Assume writes will propagate eventually...
+ while(default_initialized_ != __DEFAULT_INITIALIZED) {
+ detail::fence();
+ }
+
+ if (err != NULL) {
+ *err = default_error_;
+ }
+ return default_;
+ }
+
+ cl_int error;
+ default_ = Context(
+ CL_DEVICE_TYPE_DEFAULT,
+ NULL,
+ NULL,
+ NULL,
+ &error);
+
+ detail::fence();
+
+ default_error_ = error;
+ // Assume writes will propagate eventually...
+ default_initialized_ = __DEFAULT_INITIALIZED;
+
+ detail::fence();
+
+ if (err != NULL) {
+ *err = default_error_;
+ }
+ return default_;
+
+ }
+
+ //! \brief Default constructor - initializes to NULL.
+ Context() : detail::Wrapper<cl_type>() { }
+
+ /*! \brief Copy constructor.
+ *
+ * This calls clRetainContext() on the parameter's cl_context.
+ */
+ Context(const Context& context) : detail::Wrapper<cl_type>(context) { }
+
+ /*! \brief Constructor from cl_context - takes ownership.
+ *
+ * This effectively transfers ownership of a refcount on the cl_context
+ * into the new Context object.
+ */
+ __CL_EXPLICIT_CONSTRUCTORS Context(const cl_context& context) : detail::Wrapper<cl_type>(context) { }
+
+ /*! \brief Assignment operator from Context.
+ *
+ * This calls clRetainContext() on the parameter and clReleaseContext() on
+ * the previous value held by this instance.
+ */
+ Context& operator = (const Context& rhs)
+ {
+ if (this != &rhs) {
+ detail::Wrapper<cl_type>::operator=(rhs);
+ }
+ return *this;
+ }
+
+ /*! \brief Assignment operator from cl_context - takes ownership.
+ *
+ * This effectively transfers ownership of a refcount on the rhs and calls
+ * clReleaseContext() on the value previously held by this instance.
+ */
+ Context& operator = (const cl_context& rhs)
+ {
+ detail::Wrapper<cl_type>::operator=(rhs);
+ return *this;
+ }
+
+ //! \brief Wrapper for clGetContextInfo().
+ template <typename T>
+ cl_int getInfo(cl_context_info name, T* param) const
+ {
+ return detail::errHandler(
+ detail::getInfo(&::clGetContextInfo, object_, name, param),
+ __GET_CONTEXT_INFO_ERR);
+ }
+
+ //! \brief Wrapper for clGetContextInfo() that returns by value.
+ template <cl_int name> typename
+ detail::param_traits<detail::cl_context_info, name>::param_type
+ getInfo(cl_int* err = NULL) const
+ {
+ typename detail::param_traits<
+ detail::cl_context_info, name>::param_type param;
+ cl_int result = getInfo(name, &param);
+ if (err != NULL) {
+ *err = result;
+ }
+ return param;
+ }
+
+ /*! \brief Gets a list of supported image formats.
+ *
+ * Wraps clGetSupportedImageFormats().
+ */
+ cl_int getSupportedImageFormats(
+ cl_mem_flags flags,
+ cl_mem_object_type type,
+ VECTOR_CLASS<ImageFormat>* formats) const
+ {
+ cl_uint numEntries;
+ cl_int err = ::clGetSupportedImageFormats(
+ object_,
+ flags,
+ type,
+ 0,
+ NULL,
+ &numEntries);
+ if (err != CL_SUCCESS) {
+ return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR);
+ }
+
+ ImageFormat* value = (ImageFormat*)
+ alloca(numEntries * sizeof(ImageFormat));
+ err = ::clGetSupportedImageFormats(
+ object_,
+ flags,
+ type,
+ numEntries,
+ (cl_image_format*) value,
+ NULL);
+ if (err != CL_SUCCESS) {
+ return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR);
+ }
+
+ formats->assign(&value[0], &value[numEntries]);
+ return CL_SUCCESS;
+ }
+};
+
+inline Device Device::getDefault(cl_int * err)
+{
+ cl_int error;
+ Device device;
+
+ Context context = Context::getDefault(&error);
+ detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR);
+
+ if (error != CL_SUCCESS) {
+ if (err != NULL) {
+ *err = error;
+ }
+ }
+ else {
+ device = context.getInfo<CL_CONTEXT_DEVICES>()[0];
+ if (err != NULL) {
+ *err = CL_SUCCESS;
+ }
+ }
+
+ return device;
+}
+
+
+#ifdef _WIN32
+__declspec(selectany) volatile int Context::default_initialized_ = __DEFAULT_NOT_INITIALIZED;
+__declspec(selectany) Context Context::default_;
+__declspec(selectany) volatile cl_int Context::default_error_ = CL_SUCCESS;
+#else
+__attribute__((weak)) volatile int Context::default_initialized_ = __DEFAULT_NOT_INITIALIZED;
+__attribute__((weak)) Context Context::default_;
+__attribute__((weak)) volatile cl_int Context::default_error_ = CL_SUCCESS;
+#endif
+
+/*! \brief Class interface for cl_event.
+ *
+ * \note Copies of these objects are shallow, meaning that the copy will refer
+ * to the same underlying cl_event as the original. For details, see
+ * clRetainEvent() and clReleaseEvent().
+ *
+ * \see cl_event
+ */
+class Event : public detail::Wrapper<cl_event>
+{
+public:
+ /*! \brief Destructor.
+ *
+ * This calls clReleaseEvent() on the value held by this instance.
+ */
+ ~Event() { }
+
+ //! \brief Default constructor - initializes to NULL.
+ Event() : detail::Wrapper<cl_type>() { }
+
+ /*! \brief Copy constructor.
+ *
+ * This calls clRetainEvent() on the parameter's cl_event.
+ */
+ Event(const Event& event) : detail::Wrapper<cl_type>(event) { }
+
+ /*! \brief Constructor from cl_event - takes ownership.
+ *
+ * This effectively transfers ownership of a refcount on the cl_event
+ * into the new Event object.
+ */
+ Event(const cl_event& event) : detail::Wrapper<cl_type>(event) { }
+
+ /*! \brief Assignment operator from cl_event - takes ownership.
+ *
+ * This effectively transfers ownership of a refcount on the rhs and calls
+ * clReleaseEvent() on the value previously held by this instance.
+ */
+ Event& operator = (const Event& rhs)
+ {
+ if (this != &rhs) {
+ detail::Wrapper<cl_type>::operator=(rhs);
+ }
+ return *this;
+ }
+
+ /*! \brief Assignment operator from cl_event.
+ *
+ * This calls clRetainEvent() on the parameter and clReleaseEvent() on
+ * the previous value held by this instance.
+ */
+ Event& operator = (const cl_event& rhs)
+ {
+ detail::Wrapper<cl_type>::operator=(rhs);
+ return *this;
+ }
+
+ //! \brief Wrapper for clGetEventInfo().
+ template <typename T>
+ cl_int getInfo(cl_event_info name, T* param) const
+ {
+ return detail::errHandler(
+ detail::getInfo(&::clGetEventInfo, object_, name, param),
+ __GET_EVENT_INFO_ERR);
+ }
+
+ //! \brief Wrapper for clGetEventInfo() that returns by value.
+ template <cl_int name> typename
+ detail::param_traits<detail::cl_event_info, name>::param_type
+ getInfo(cl_int* err = NULL) const
+ {
+ typename detail::param_traits<
+ detail::cl_event_info, name>::param_type param;
+ cl_int result = getInfo(name, &param);
+ if (err != NULL) {
+ *err = result;
+ }
+ return param;
+ }
+
+ //! \brief Wrapper for clGetEventProfilingInfo().
+ template <typename T>
+ cl_int getProfilingInfo(cl_profiling_info name, T* param) const
+ {
+ return detail::errHandler(detail::getInfo(
+ &::clGetEventProfilingInfo, object_, name, param),
+ __GET_EVENT_PROFILE_INFO_ERR);
+ }
+
+ //! \brief Wrapper for clGetEventProfilingInfo() that returns by value.
+ template <cl_int name> typename
+ detail::param_traits<detail::cl_profiling_info, name>::param_type
+ getProfilingInfo(cl_int* err = NULL) const
+ {
+ typename detail::param_traits<
+ detail::cl_profiling_info, name>::param_type param;
+ cl_int result = getProfilingInfo(name, &param);
+ if (err != NULL) {
+ *err = result;
+ }
+ return param;
+ }
+
+ /*! \brief Blocks the calling thread until this event completes.
+ *
+ * Wraps clWaitForEvents().
+ */
+ cl_int wait() const
+ {
+ return detail::errHandler(
+ ::clWaitForEvents(1, &object_),
+ __WAIT_FOR_EVENTS_ERR);
+ }
+
+#if defined(CL_VERSION_1_1)
+ /*! \brief Registers a user callback function for a specific command execution status.
+ *
+ * Wraps clSetEventCallback().
+ */
+ cl_int setCallback(
+ cl_int type,
+ void (CL_CALLBACK * pfn_notify)(cl_event, cl_int, void *),
+ void * user_data = NULL)
+ {
+ return detail::errHandler(
+ ::clSetEventCallback(
+ object_,
+ type,
+ pfn_notify,
+ user_data),
+ __SET_EVENT_CALLBACK_ERR);
+ }
+#endif
+
+ /*! \brief Blocks the calling thread until every event specified is complete.
+ *
+ * Wraps clWaitForEvents().
+ */
+ static cl_int
+ waitForEvents(const VECTOR_CLASS<Event>& events)
+ {
+ return detail::errHandler(
+ ::clWaitForEvents(
+ (cl_uint) events.size(), (cl_event*)&events.front()),
+ __WAIT_FOR_EVENTS_ERR);
+ }
+};
+
+#if defined(CL_VERSION_1_1)
+/*! \brief Class interface for user events (a subset of cl_event's).
+ *
+ * See Event for details about copy semantics, etc.
+ */
+class UserEvent : public Event
+{
+public:
+ /*! \brief Constructs a user event on a given context.
+ *
+ * Wraps clCreateUserEvent().
+ */
+ UserEvent(
+ const Context& context,
+ cl_int * err = NULL)
+ {
+ cl_int error;
+ object_ = ::clCreateUserEvent(
+ context(),
+ &error);
+
+ detail::errHandler(error, __CREATE_USER_EVENT_ERR);
+ if (err != NULL) {
+ *err = error;
+ }
+ }
+
+ //! \brief Default constructor - initializes to NULL.
+ UserEvent() : Event() { }
+
+ //! \brief Copy constructor - performs shallow copy.
+ UserEvent(const UserEvent& event) : Event(event) { }
+
+ //! \brief Assignment Operator - performs shallow copy.
+ UserEvent& operator = (const UserEvent& rhs)
+ {
+ if (this != &rhs) {
+ Event::operator=(rhs);
+ }
+ return *this;
+ }
+
+ /*! \brief Sets the execution status of a user event object.
+ *
+ * Wraps clSetUserEventStatus().
+ */
+ cl_int setStatus(cl_int status)
+ {
+ return detail::errHandler(
+ ::clSetUserEventStatus(object_,status),
+ __SET_USER_EVENT_STATUS_ERR);
+ }
+};
+#endif
+
+/*! \brief Blocks the calling thread until every event specified is complete.
+ *
+ * Wraps clWaitForEvents().
+ */
+inline static cl_int
+WaitForEvents(const VECTOR_CLASS<Event>& events)
+{
+ return detail::errHandler(
+ ::clWaitForEvents(
+ (cl_uint) events.size(), (cl_event*)&events.front()),
+ __WAIT_FOR_EVENTS_ERR);
+}
+
+/*! \brief Class interface for cl_mem.
+ *
+ * \note Copies of these objects are shallow, meaning that the copy will refer
+ * to the same underlying cl_mem as the original. For details, see
+ * clRetainMemObject() and clReleaseMemObject().
+ *
+ * \see cl_mem
+ */
+class Memory : public detail::Wrapper<cl_mem>
+{
+public:
+
+ /*! \brief Destructor.
+ *
+ * This calls clReleaseMemObject() on the value held by this instance.
+ */
+ ~Memory() {}
+
+ //! \brief Default constructor - initializes to NULL.
+ Memory() : detail::Wrapper<cl_type>() { }
+
+ /*! \brief Copy constructor - performs shallow copy.
+ *
+ * This calls clRetainMemObject() on the parameter's cl_mem.
+ */
+ Memory(const Memory& memory) : detail::Wrapper<cl_type>(memory) { }
+
+ /*! \brief Constructor from cl_mem - takes ownership.
+ *
+ * This effectively transfers ownership of a refcount on the cl_mem
+ * into the new Memory object.
+ */
+ __CL_EXPLICIT_CONSTRUCTORS Memory(const cl_mem& memory) : detail::Wrapper<cl_type>(memory) { }
+
+ /*! \brief Assignment operator from Memory.
+ *
+ * This calls clRetainMemObject() on the parameter and clReleaseMemObject()
+ * on the previous value held by this instance.
+ */
+ Memory& operator = (const Memory& rhs)
+ {
+ if (this != &rhs) {
+ detail::Wrapper<cl_type>::operator=(rhs);
+ }
+ return *this;
+ }
+
+ /*! \brief Assignment operator from cl_mem - takes ownership.
+ *
+ * This effectively transfers ownership of a refcount on the rhs and calls
+ * clReleaseMemObject() on the value previously held by this instance.
+ */
+ Memory& operator = (const cl_mem& rhs)
+ {
+ detail::Wrapper<cl_type>::operator=(rhs);
+ return *this;
+ }
+
+ //! \brief Wrapper for clGetMemObjectInfo().
+ template <typename T>
+ cl_int getInfo(cl_mem_info name, T* param) const
+ {
+ return detail::errHandler(
+ detail::getInfo(&::clGetMemObjectInfo, object_, name, param),
+ __GET_MEM_OBJECT_INFO_ERR);
+ }
+
+ //! \brief Wrapper for clGetMemObjectInfo() that returns by value.
+ template <cl_int name> typename
+ detail::param_traits<detail::cl_mem_info, name>::param_type
+ getInfo(cl_int* err = NULL) const
+ {
+ typename detail::param_traits<
+ detail::cl_mem_info, name>::param_type param;
+ cl_int result = getInfo(name, &param);
+ if (err != NULL) {
+ *err = result;
+ }
+ return param;
+ }
+
+#if defined(CL_VERSION_1_1)
+ /*! \brief Registers a callback function to be called when the memory object
+ * is no longer needed.
+ *
+ * Wraps clSetMemObjectDestructorCallback().
+ *
+ * Repeated calls to this function, for a given cl_mem value, will append
+ * to the list of functions called (in reverse order) when memory object's
+ * resources are freed and the memory object is deleted.
+ *
+ * \note
+ * The registered callbacks are associated with the underlying cl_mem
+ * value - not the Memory class instance.
+ */
+ cl_int setDestructorCallback(
+ void (CL_CALLBACK * pfn_notify)(cl_mem, void *),
+ void * user_data = NULL)
+ {
+ return detail::errHandler(
+ ::clSetMemObjectDestructorCallback(
+ object_,
+ pfn_notify,
+ user_data),
+ __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR);
+ }
+#endif
+
+};
+
+// Pre-declare copy functions
+class Buffer;
+template< typename IteratorType >
+cl_int copy( IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer );
+template< typename IteratorType >
+cl_int copy( const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator );
+template< typename IteratorType >
+cl_int copy( const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer );
+template< typename IteratorType >
+cl_int copy( const CommandQueue &queue, const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator );
+
+
+/*! \brief Class interface for Buffer Memory Objects.
+ *
+ * See Memory for details about copy semantics, etc.
+ *
+ * \see Memory
+ */
+class Buffer : public Memory
+{
+public:
+
+ /*! \brief Constructs a Buffer in a specified context.
+ *
+ * Wraps clCreateBuffer().
+ *
+ * \param host_ptr Storage to be used if the CL_MEM_USE_HOST_PTR flag was
+ * specified. Note alignment & exclusivity requirements.
+ */
+ Buffer(
+ const Context& context,
+ cl_mem_flags flags,
+ ::size_t size,
+ void* host_ptr = NULL,
+ cl_int* err = NULL)
+ {
+ cl_int error;
+ object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error);
+
+ detail::errHandler(error, __CREATE_BUFFER_ERR);
+ if (err != NULL) {
+ *err = error;
+ }
+ }
+
+ /*! \brief Constructs a Buffer in the default context.
+ *
+ * Wraps clCreateBuffer().
+ *
+ * \param host_ptr Storage to be used if the CL_MEM_USE_HOST_PTR flag was
+ * specified. Note alignment & exclusivity requirements.
+ *
+ * \see Context::getDefault()
+ */
+ Buffer(
+ cl_mem_flags flags,
+ ::size_t size,
+ void* host_ptr = NULL,
+ cl_int* err = NULL)
+ {
+ cl_int error;
+
+ Context context = Context::getDefault(err);
+
+ object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error);
+
+ detail::errHandler(error, __CREATE_BUFFER_ERR);
+ if (err != NULL) {
+ *err = error;
+ }
+ }
+
+ /*!
+ * \brief Construct a Buffer from a host container via iterators.
+ * IteratorType must be random access.
+ * If useHostPtr is specified iterators must represent contiguous data.
+ */
+ template< typename IteratorType >
+ Buffer(
+ IteratorType startIterator,
+ IteratorType endIterator,
+ bool readOnly,
+ bool useHostPtr = false,
+ cl_int* err = NULL)
+ {
+ typedef typename std::iterator_traits<IteratorType>::value_type DataType;
+ cl_int error;
+
+ cl_mem_flags flags = 0;
+ if( readOnly ) {
+ flags |= CL_MEM_READ_ONLY;
+ }
+ else {
+ flags |= CL_MEM_READ_WRITE;
+ }
+ if( useHostPtr ) {
+ flags |= CL_MEM_USE_HOST_PTR;
+ }
+
+ ::size_t size = sizeof(DataType)*(endIterator - startIterator);
+
+ Context context = Context::getDefault(err);
+
+ if( useHostPtr ) {
+ object_ = ::clCreateBuffer(context(), flags, size, static_cast<DataType*>(&*startIterator), &error);
+ } else {
+ object_ = ::clCreateBuffer(context(), flags, size, 0, &error);
+ }
+
+ detail::errHandler(error, __CREATE_BUFFER_ERR);
+ if (err != NULL) {
+ *err = error;
+ }
+
+ if( !useHostPtr ) {
+ error = cl::copy(startIterator, endIterator, *this);
+ detail::errHandler(error, __CREATE_BUFFER_ERR);
+ if (err != NULL) {
+ *err = error;
+ }
+ }
+ }
+
+ /*!
+ * \brief Construct a Buffer from a host container via iterators using a specified context.
+ * IteratorType must be random access.
+ * If useHostPtr is specified iterators must represent contiguous data.
+ */
+ template< typename IteratorType >
+ Buffer(const Context &context, IteratorType startIterator, IteratorType endIterator,
+ bool readOnly, bool useHostPtr = false, cl_int* err = NULL);
+
+ //! \brief Default constructor - initializes to NULL.
+ Buffer() : Memory() { }
+
+ /*! \brief Copy constructor - performs shallow copy.
+ *
+ * See Memory for further details.
+ */
+ Buffer(const Buffer& buffer) : Memory(buffer) { }
+
+ /*! \brief Constructor from cl_mem - takes ownership.
+ *
+ * See Memory for further details.
+ */
+ __CL_EXPLICIT_CONSTRUCTORS Buffer(const cl_mem& buffer) : Memory(buffer) { }
+
+ /*! \brief Assignment from Buffer - performs shallow copy.
+ *
+ * See Memory for further details.
+ */
+ Buffer& operator = (const Buffer& rhs)
+ {
+ if (this != &rhs) {
+ Memory::operator=(rhs);
+ }
+ return *this;
+ }
+
+ /*! \brief Assignment from cl_mem - performs shallow copy.
+ *
+ * See Memory for further details.
+ */
+ Buffer& operator = (const cl_mem& rhs)
+ {
+ Memory::operator=(rhs);
+ return *this;
+ }
+
+#if defined(CL_VERSION_1_1)
+ /*! \brief Creates a new buffer object from this.
+ *
+ * Wraps clCreateSubBuffer().
+ */
+ Buffer createSubBuffer(
+ cl_mem_flags flags,
+ cl_buffer_create_type buffer_create_type,
+ const void * buffer_create_info,
+ cl_int * err = NULL)
+ {
+ Buffer result;
+ cl_int error;
+ result.object_ = ::clCreateSubBuffer(
+ object_,
+ flags,
+ buffer_create_type,
+ buffer_create_info,
+ &error);
+
+ detail::errHandler(error, __CREATE_SUBBUFFER_ERR);
+ if (err != NULL) {
+ *err = error;
+ }
+
+ return result;
+ }
+#endif
+};
+
+#if defined (USE_DX_INTEROP)
+/*! \brief Class interface for creating OpenCL buffers from ID3D10Buffer's.
+ *
+ * This is provided to facilitate interoperability with Direct3D.
+ *
+ * See Memory for details about copy semantics, etc.
+ *
+ * \see Memory
+ */
+class BufferD3D10 : public Buffer
+{
+public:
+ typedef CL_API_ENTRY cl_mem (CL_API_CALL *PFN_clCreateFromD3D10BufferKHR)(
+ cl_context context, cl_mem_flags flags, ID3D10Buffer* buffer,
+ cl_int* errcode_ret);
+
+ /*! \brief Constructs a BufferD3D10, in a specified context, from a
+ * given ID3D10Buffer.
+ *
+ * Wraps clCreateFromD3D10BufferKHR().
+ */
+ BufferD3D10(
+ const Context& context,
+ cl_mem_flags flags,
+ ID3D10Buffer* bufobj,
+ cl_int * err = NULL)
+ {
+ static PFN_clCreateFromD3D10BufferKHR pfn_clCreateFromD3D10BufferKHR = NULL;
+
+#if defined(CL_VERSION_1_2)
+ vector<cl_context_properties> props = context.getInfo<CL_CONTEXT_PROPERTIES>();
+ cl_platform platform = -1;
+ for( int i = 0; i < props.size(); ++i ) {
+ if( props[i] == CL_CONTEXT_PLATFORM ) {
+ platform = props[i+1];
+ }
+ }
+ __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clCreateFromD3D10BufferKHR);
+#endif
+#if defined(CL_VERSION_1_1)
+ __INIT_CL_EXT_FCN_PTR(clCreateFromD3D10BufferKHR);
+#endif
+
+ cl_int error;
+ object_ = pfn_clCreateFromD3D10BufferKHR(
+ context(),
+ flags,
+ bufobj,
+ &error);
+
+ detail::errHandler(error, __CREATE_GL_BUFFER_ERR);
+ if (err != NULL) {
+ *err = error;
+ }
+ }
+
+ //! \brief Default constructor - initializes to NULL.
+ BufferD3D10() : Buffer() { }
+
+ /*! \brief Copy constructor - performs shallow copy.
+ *
+ * See Memory for further details.
+ */
+ BufferD3D10(const BufferD3D10& buffer) : Buffer(buffer) { }
+
+ /*! \brief Constructor from cl_mem - takes ownership.
+ *
+ * See Memory for further details.
+ */
+ __CL_EXPLICIT_CONSTRUCTORS BufferD3D10(const cl_mem& buffer) : Buffer(buffer) { }
+
+ /*! \brief Assignment from BufferD3D10 - performs shallow copy.
+ *
+ * See Memory for further details.
+ */
+ BufferD3D10& operator = (const BufferD3D10& rhs)
+ {
+ if (this != &rhs) {
+ Buffer::operator=(rhs);
+ }
+ return *this;
+ }
+
+ /*! \brief Assignment from cl_mem - performs shallow copy.
+ *
+ * See Memory for further details.
+ */
+ BufferD3D10& operator = (const cl_mem& rhs)
+ {
+ Buffer::operator=(rhs);
+ return *this;
+ }
+};
+#endif
+
+/*! \brief Class interface for GL Buffer Memory Objects.
+ *
+ * This is provided to facilitate interoperability with OpenGL.
+ *
+ * See Memory for details about copy semantics, etc.
+ *
+ * \see Memory
+ */
+class BufferGL : public Buffer
+{
+public:
+ /*! \brief Constructs a BufferGL in a specified context, from a given
+ * GL buffer.
+ *
+ * Wraps clCreateFromGLBuffer().
+ */
+ BufferGL(
+ const Context& context,
+ cl_mem_flags flags,
+ GLuint bufobj,
+ cl_int * err = NULL)
+ {
+ cl_int error;
+ object_ = ::clCreateFromGLBuffer(
+ context(),
+ flags,
+ bufobj,
+ &error);
+
+ detail::errHandler(error, __CREATE_GL_BUFFER_ERR);
+ if (err != NULL) {
+ *err = error;
+ }
+ }
+
+ //! \brief Default constructor - initializes to NULL.
+ BufferGL() : Buffer() { }
+
+ /*! \brief Copy constructor - performs shallow copy.
+ *
+ * See Memory for further details.
+ */
+ BufferGL(const BufferGL& buffer) : Buffer(buffer) { }
+
+ /*! \brief Constructor from cl_mem - takes ownership.
+ *
+ * See Memory for further details.
+ */
+ __CL_EXPLICIT_CONSTRUCTORS BufferGL(const cl_mem& buffer) : Buffer(buffer) { }
+
+ /*! \brief Assignment from BufferGL - performs shallow copy.
+ *
+ * See Memory for further details.
+ */
+ BufferGL& operator = (const BufferGL& rhs)
+ {
+ if (this != &rhs) {
+ Buffer::operator=(rhs);
+ }
+ return *this;
+ }
+
+ /*! \brief Assignment from cl_mem - performs shallow copy.
+ *
+ * See Memory for further details.
+ */
+ BufferGL& operator = (const cl_mem& rhs)
+ {
+ Buffer::operator=(rhs);
+ return *this;
+ }
+
+ //! \brief Wrapper for clGetGLObjectInfo().
+ cl_int getObjectInfo(
+ cl_gl_object_type *type,
+ GLuint * gl_object_name)
+ {
+ return detail::errHandler(
+ ::clGetGLObjectInfo(object_,type,gl_object_name),
+ __GET_GL_OBJECT_INFO_ERR);
+ }
+};
+
+/*! \brief Class interface for GL Render Buffer Memory Objects.
+ *
+ * This is provided to facilitate interoperability with OpenGL.
+ *
+ * See Memory for details about copy semantics, etc.
+ *
+ * \see Memory
+ */
+class BufferRenderGL : public Buffer
+{
+public:
+ /*! \brief Constructs a BufferRenderGL in a specified context, from a given
+ * GL Renderbuffer.
+ *
+ * Wraps clCreateFromGLRenderbuffer().
+ */
+ BufferRenderGL(
+ const Context& context,
+ cl_mem_flags flags,
+ GLuint bufobj,
+ cl_int * err = NULL)
+ {
+ cl_int error;
+ object_ = ::clCreateFromGLRenderbuffer(
+ context(),
+ flags,
+ bufobj,
+ &error);
+
+ detail::errHandler(error, __CREATE_GL_RENDER_BUFFER_ERR);
+ if (err != NULL) {
+ *err = error;
+ }
+ }
+
+ //! \brief Default constructor - initializes to NULL.
+ BufferRenderGL() : Buffer() { }
+
+ /*! \brief Copy constructor - performs shallow copy.
+ *
+ * See Memory for further details.
+ */
+ BufferRenderGL(const BufferGL& buffer) : Buffer(buffer) { }
+
+ /*! \brief Constructor from cl_mem - takes ownership.
+ *
+ * See Memory for further details.
+ */
+ __CL_EXPLICIT_CONSTRUCTORS BufferRenderGL(const cl_mem& buffer) : Buffer(buffer) { }
+
+ /*! \brief Assignment from BufferGL - performs shallow copy.
+ *
+ * See Memory for further details.
+ */
+ BufferRenderGL& operator = (const BufferRenderGL& rhs)
+ {
+ if (this != &rhs) {
+ Buffer::operator=(rhs);
+ }
+ return *this;
+ }
+
+ /*! \brief Assignment from cl_mem - performs shallow copy.
+ *
+ * See Memory for further details.
+ */
+ BufferRenderGL& operator = (const cl_mem& rhs)
+ {
+ Buffer::operator=(rhs);
+ return *this;
+ }
+
+ //! \brief Wrapper for clGetGLObjectInfo().
+ cl_int getObjectInfo(
+ cl_gl_object_type *type,
+ GLuint * gl_object_name)
+ {
+ return detail::errHandler(
+ ::clGetGLObjectInfo(object_,type,gl_object_name),
+ __GET_GL_OBJECT_INFO_ERR);
+ }
+};
+
+/*! \brief C++ base class for Image Memory objects.
+ *
+ * See Memory for details about copy semantics, etc.
+ *
+ * \see Memory
+ */
+class Image : public Memory
+{
+protected:
+ //! \brief Default constructor - initializes to NULL.
+ Image() : Memory() { }
+
+ /*! \brief Copy constructor - performs shallow copy.
+ *
+ * See Memory for further details.
+ */
+ Image(const Image& image) : Memory(image) { }
+
+ /*! \brief Constructor from cl_mem - takes ownership.
+ *
+ * See Memory for further details.
+ */
+ __CL_EXPLICIT_CONSTRUCTORS Image(const cl_mem& image) : Memory(image) { }
+
+ /*! \brief Assignment from Image - performs shallow copy.
+ *
+ * See Memory for further details.
+ */
+ Image& operator = (const Image& rhs)
+ {
+ if (this != &rhs) {
+ Memory::operator=(rhs);
+ }
+ return *this;
+ }
+
+ /*! \brief Assignment from cl_mem - performs shallow copy.
+ *
+ * See Memory for further details.
+ */
+ Image& operator = (const cl_mem& rhs)
+ {
+ Memory::operator=(rhs);
+ return *this;
+ }
+
+public:
+ //! \brief Wrapper for clGetImageInfo().
+ template <typename T>
+ cl_int getImageInfo(cl_image_info name, T* param) const
+ {
+ return detail::errHandler(
+ detail::getInfo(&::clGetImageInfo, object_, name, param),
+ __GET_IMAGE_INFO_ERR);
+ }
+
+ //! \brief Wrapper for clGetImageInfo() that returns by value.
+ template <cl_int name> typename
+ detail::param_traits<detail::cl_image_info, name>::param_type
+ getImageInfo(cl_int* err = NULL) const
+ {
+ typename detail::param_traits<
+ detail::cl_image_info, name>::param_type param;
+ cl_int result = getImageInfo(name, &param);
+ if (err != NULL) {
+ *err = result;
+ }
+ return param;
+ }
+};
+
+#if defined(CL_VERSION_1_2)
+/*! \brief Class interface for 1D Image Memory objects.
+ *
+ * See Memory for details about copy semantics, etc.
+ *
+ * \see Memory
+ */
+class Image1D : public Image
+{
+public:
+ /*! \brief Constructs a 1D Image in a specified context.
+ *
+ * Wraps clCreateImage().
+ */
+ Image1D(
+ const Context& context,
+ cl_mem_flags flags,
+ ImageFormat format,
+ ::size_t width,
+ void* host_ptr = NULL,
+ cl_int* err = NULL)
+ {
+ cl_int error;
+ cl_image_desc desc =
+ {
+ CL_MEM_OBJECT_IMAGE1D,
+ width,
+ 0, 0, 0, 0, 0, 0, 0, 0
+ };
+ object_ = ::clCreateImage(
+ context(),
+ flags,
+ &format,
+ &desc,
+ host_ptr,
+ &error);
+
+ detail::errHandler(error, __CREATE_IMAGE_ERR);
+ if (err != NULL) {
+ *err = error;
+ }
+ }
+
+ //! \brief Default constructor - initializes to NULL.
+ Image1D() { }
+
+ /*! \brief Copy constructor - performs shallow copy.
+ *
+ * See Memory for further details.
+ */
+ Image1D(const Image1D& image1D) : Image(image1D) { }
+
+ /*! \brief Constructor from cl_mem - takes ownership.
+ *
+ * See Memory for further details.
+ */
+ __CL_EXPLICIT_CONSTRUCTORS Image1D(const cl_mem& image1D) : Image(image1D) { }
+
+ /*! \brief Assignment from Image1D - performs shallow copy.
+ *
+ * See Memory for further details.
+ */
+ Image1D& operator = (const Image1D& rhs)
+ {
+ if (this != &rhs) {
+ Image::operator=(rhs);
+ }
+ return *this;
+ }
+
+ /*! \brief Assignment from cl_mem - performs shallow copy.
+ *
+ * See Memory for further details.
+ */
+ Image1D& operator = (const cl_mem& rhs)
+ {
+ Image::operator=(rhs);
+ return *this;
+ }
+};
+
+/*! \class Image1DBuffer
+ * \brief Image interface for 1D buffer images.
+ */
+class Image1DBuffer : public Image
+{
+public:
+ Image1DBuffer(
+ const Context& context,
+ cl_mem_flags flags,
+ ImageFormat format,
+ ::size_t width,
+ const Buffer &buffer,
+ cl_int* err = NULL)
+ {
+ cl_int error;
+ cl_image_desc desc =
+ {
+ CL_MEM_OBJECT_IMAGE1D_BUFFER,
+ width,
+ 0, 0, 0, 0, 0, 0, 0,
+ buffer()
+ };
+ object_ = ::clCreateImage(
+ context(),
+ flags,
+ &format,
+ &desc,
+ NULL,
+ &error);
+
+ detail::errHandler(error, __CREATE_IMAGE_ERR);
+ if (err != NULL) {
+ *err = error;
+ }
+ }
+
+ Image1DBuffer() { }
+
+ Image1DBuffer(const Image1DBuffer& image1D) : Image(image1D) { }
+
+ __CL_EXPLICIT_CONSTRUCTORS Image1DBuffer(const cl_mem& image1D) : Image(image1D) { }
+
+ Image1DBuffer& operator = (const Image1DBuffer& rhs)
+ {
+ if (this != &rhs) {
+ Image::operator=(rhs);
+ }
+ return *this;
+ }
+
+ Image1DBuffer& operator = (const cl_mem& rhs)
+ {
+ Image::operator=(rhs);
+ return *this;
+ }
+};
+
+/*! \class Image1DArray
+ * \brief Image interface for arrays of 1D images.
+ */
+class Image1DArray : public Image
+{
+public:
+ Image1DArray(
+ const Context& context,
+ cl_mem_flags flags,
+ ImageFormat format,
+ ::size_t arraySize,
+ ::size_t width,
+ ::size_t rowPitch,
+ void* host_ptr = NULL,
+ cl_int* err = NULL)
+ {
+ cl_int error;
+ cl_image_desc desc =
+ {
+ CL_MEM_OBJECT_IMAGE1D_ARRAY,
+ width,
+ 0, 0, // height, depth (unused)
+ arraySize,
+ rowPitch,
+ 0, 0, 0, 0
+ };
+ object_ = ::clCreateImage(
+ context(),
+ flags,
+ &format,
+ &desc,
+ host_ptr,
+ &error);
+
+ detail::errHandler(error, __CREATE_IMAGE_ERR);
+ if (err != NULL) {
+ *err = error;
+ }
+ }
+
+ Image1DArray() { }
+
+ Image1DArray(const Image1DArray& imageArray) : Image(imageArray) { }
+
+ __CL_EXPLICIT_CONSTRUCTORS Image1DArray(const cl_mem& imageArray) : Image(imageArray) { }
+
+ Image1DArray& operator = (const Image1DArray& rhs)
+ {
+ if (this != &rhs) {
+ Image::operator=(rhs);
+ }
+ return *this;
+ }
+
+ Image1DArray& operator = (const cl_mem& rhs)
+ {
+ Image::operator=(rhs);
+ return *this;
+ }
+};
+#endif // #if defined(CL_VERSION_1_2)
+
+
+/*! \brief Class interface for 2D Image Memory objects.
+ *
+ * See Memory for details about copy semantics, etc.
+ *
+ * \see Memory
+ */
+class Image2D : public Image
+{
+public:
+ /*! \brief Constructs a 1D Image in a specified context.
+ *
+ * Wraps clCreateImage().
+ */
+ Image2D(
+ const Context& context,
+ cl_mem_flags flags,
+ ImageFormat format,
+ ::size_t width,
+ ::size_t height,
+ ::size_t row_pitch = 0,
+ void* host_ptr = NULL,
+ cl_int* err = NULL)
+ {
+ cl_int error;
+ bool useCreateImage;
+
+#if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS)
+ // Run-time decision based on the actual platform
+ {
+ cl_uint version = detail::getContextPlatformVersion(context());
+ useCreateImage = (version >= 0x10002); // OpenCL 1.2 or above
+ }
+#elif defined(CL_VERSION_1_2)
+ useCreateImage = true;
+#else
+ useCreateImage = false;
+#endif
+
+#if defined(CL_VERSION_1_2)
+ if (useCreateImage)
+ {
+ cl_image_desc desc =
+ {
+ CL_MEM_OBJECT_IMAGE2D,
+ width,
+ height,
+ 0, 0, // depth, array size (unused)
+ row_pitch,
+ 0, 0, 0, 0
+ };
+ object_ = ::clCreateImage(
+ context(),
+ flags,
+ &format,
+ &desc,
+ host_ptr,
+ &error);
+
+ detail::errHandler(error, __CREATE_IMAGE_ERR);
+ if (err != NULL) {
+ *err = error;
+ }
+ }
+#endif // #if defined(CL_VERSION_1_2)
+#if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS)
+ if (!useCreateImage)
+ {
+ object_ = ::clCreateImage2D(
+ context(), flags,&format, width, height, row_pitch, host_ptr, &error);
+
+ detail::errHandler(error, __CREATE_IMAGE2D_ERR);
+ if (err != NULL) {
+ *err = error;
+ }
+ }
+#endif // #if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS)
+ }
+
+ //! \brief Default constructor - initializes to NULL.
+ Image2D() { }
+
+ /*! \brief Copy constructor - performs shallow copy.
+ *
+ * See Memory for further details.
+ */
+ Image2D(const Image2D& image2D) : Image(image2D) { }
+
+ /*! \brief Constructor from cl_mem - takes ownership.
+ *
+ * See Memory for further details.
+ */
+ __CL_EXPLICIT_CONSTRUCTORS Image2D(const cl_mem& image2D) : Image(image2D) { }
+
+ /*! \brief Assignment from Image2D - performs shallow copy.
+ *
+ * See Memory for further details.
+ */
+ Image2D& operator = (const Image2D& rhs)
+ {
+ if (this != &rhs) {
+ Image::operator=(rhs);
+ }
+ return *this;
+ }
+
+ /*! \brief Assignment from cl_mem - performs shallow copy.
+ *
+ * See Memory for further details.
+ */
+ Image2D& operator = (const cl_mem& rhs)
+ {
+ Image::operator=(rhs);
+ return *this;
+ }
+};
+
+
+#if !defined(CL_VERSION_1_2)
+/*! \brief Class interface for GL 2D Image Memory objects.
+ *
+ * This is provided to facilitate interoperability with OpenGL.
+ *
+ * See Memory for details about copy semantics, etc.
+ *
+ * \see Memory
+ * \note Deprecated for OpenCL 1.2. Please use ImageGL instead.
+ */
+class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED Image2DGL CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED : public Image2D
+{
+public:
+ /*! \brief Constructs an Image2DGL in a specified context, from a given
+ * GL Texture.
+ *
+ * Wraps clCreateFromGLTexture2D().
+ */
+ Image2DGL(
+ const Context& context,
+ cl_mem_flags flags,
+ GLenum target,
+ GLint miplevel,
+ GLuint texobj,
+ cl_int * err = NULL)
+ {
+ cl_int error;
+ object_ = ::clCreateFromGLTexture2D(
+ context(),
+ flags,
+ target,
+ miplevel,
+ texobj,
+ &error);
+
+ detail::errHandler(error, __CREATE_GL_TEXTURE_2D_ERR);
+ if (err != NULL) {
+ *err = error;
+ }
+
+ }
+
+ //! \brief Default constructor - initializes to NULL.
+ Image2DGL() : Image2D() { }
+
+ /*! \brief Copy constructor - performs shallow copy.
+ *
+ * See Memory for further details.
+ */
+ Image2DGL(const Image2DGL& image) : Image2D(image) { }
+
+ /*! \brief Constructor from cl_mem - takes ownership.
+ *
+ * See Memory for further details.
+ */
+ __CL_EXPLICIT_CONSTRUCTORS Image2DGL(const cl_mem& image) : Image2D(image) { }
+
+ /*! \brief Assignment from Image2DGL - performs shallow copy.
+ *
+ * See Memory for further details.
+ */
+ Image2DGL& operator = (const Image2DGL& rhs)
+ {
+ if (this != &rhs) {
+ Image2D::operator=(rhs);
+ }
+ return *this;
+ }
+
+ /*! \brief Assignment from cl_mem - performs shallow copy.
+ *
+ * See Memory for further details.
+ */
+ Image2DGL& operator = (const cl_mem& rhs)
+ {
+ Image2D::operator=(rhs);
+ return *this;
+ }
+};
+#endif // #if !defined(CL_VERSION_1_2)
+
+#if defined(CL_VERSION_1_2)
+/*! \class Image2DArray
+ * \brief Image interface for arrays of 2D images.
+ */
+class Image2DArray : public Image
+{
+public:
+ Image2DArray(
+ const Context& context,
+ cl_mem_flags flags,
+ ImageFormat format,
+ ::size_t arraySize,
+ ::size_t width,
+ ::size_t height,
+ ::size_t rowPitch,
+ ::size_t slicePitch,
+ void* host_ptr = NULL,
+ cl_int* err = NULL)
+ {
+ cl_int error;
+ cl_image_desc desc =
+ {
+ CL_MEM_OBJECT_IMAGE2D_ARRAY,
+ width,
+ height,
+ 0, // depth (unused)
+ arraySize,
+ rowPitch,
+ slicePitch,
+ 0, 0, 0
+ };
+ object_ = ::clCreateImage(
+ context(),
+ flags,
+ &format,
+ &desc,
+ host_ptr,
+ &error);
+
+ detail::errHandler(error, __CREATE_IMAGE_ERR);
+ if (err != NULL) {
+ *err = error;
+ }
+ }
+
+ Image2DArray() { }
+
+ Image2DArray(const Image2DArray& imageArray) : Image(imageArray) { }
+
+ __CL_EXPLICIT_CONSTRUCTORS Image2DArray(const cl_mem& imageArray) : Image(imageArray) { }
+
+ Image2DArray& operator = (const Image2DArray& rhs)
+ {
+ if (this != &rhs) {
+ Image::operator=(rhs);
+ }
+ return *this;
+ }
+
+ Image2DArray& operator = (const cl_mem& rhs)
+ {
+ Image::operator=(rhs);
+ return *this;
+ }
+};
+#endif // #if defined(CL_VERSION_1_2)
+
+/*! \brief Class interface for 3D Image Memory objects.
+ *
+ * See Memory for details about copy semantics, etc.
+ *
+ * \see Memory
+ */
+class Image3D : public Image
+{
+public:
+ /*! \brief Constructs a 3D Image in a specified context.
+ *
+ * Wraps clCreateImage().
+ */
+ Image3D(
+ const Context& context,
+ cl_mem_flags flags,
+ ImageFormat format,
+ ::size_t width,
+ ::size_t height,
+ ::size_t depth,
+ ::size_t row_pitch = 0,
+ ::size_t slice_pitch = 0,
+ void* host_ptr = NULL,
+ cl_int* err = NULL)
+ {
+ cl_int error;
+ bool useCreateImage;
+
+#if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS)
+ // Run-time decision based on the actual platform
+ {
+ cl_uint version = detail::getContextPlatformVersion(context());
+ useCreateImage = (version >= 0x10002); // OpenCL 1.2 or above
+ }
+#elif defined(CL_VERSION_1_2)
+ useCreateImage = true;
+#else
+ useCreateImage = false;
+#endif
+
+#if defined(CL_VERSION_1_2)
+ if (useCreateImage)
+ {
+ cl_image_desc desc =
+ {
+ CL_MEM_OBJECT_IMAGE3D,
+ width,
+ height,
+ depth,
+ 0, // array size (unused)
+ row_pitch,
+ slice_pitch,
+ 0, 0, 0
+ };
+ object_ = ::clCreateImage(
+ context(),
+ flags,
+ &format,
+ &desc,
+ host_ptr,
+ &error);
+
+ detail::errHandler(error, __CREATE_IMAGE_ERR);
+ if (err != NULL) {
+ *err = error;
+ }
+ }
+#endif // #if defined(CL_VERSION_1_2)
+#if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS)
+ if (!useCreateImage)
+ {
+ object_ = ::clCreateImage3D(
+ context(), flags, &format, width, height, depth, row_pitch,
+ slice_pitch, host_ptr, &error);
+
+ detail::errHandler(error, __CREATE_IMAGE3D_ERR);
+ if (err != NULL) {
+ *err = error;
+ }
+ }
+#endif // #if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS)
+ }
+
+ //! \brief Default constructor - initializes to NULL.
+ Image3D() { }
+
+ /*! \brief Copy constructor - performs shallow copy.
+ *
+ * See Memory for further details.
+ */
+ Image3D(const Image3D& image3D) : Image(image3D) { }
+
+ /*! \brief Constructor from cl_mem - takes ownership.
+ *
+ * See Memory for further details.
+ */
+ __CL_EXPLICIT_CONSTRUCTORS Image3D(const cl_mem& image3D) : Image(image3D) { }
+
+ /*! \brief Assignment from Image3D - performs shallow copy.
+ *
+ * See Memory for further details.
+ */
+ Image3D& operator = (const Image3D& rhs)
+ {
+ if (this != &rhs) {
+ Image::operator=(rhs);
+ }
+ return *this;
+ }
+
+ /*! \brief Assignment from cl_mem - performs shallow copy.
+ *
+ * See Memory for further details.
+ */
+ Image3D& operator = (const cl_mem& rhs)
+ {
+ Image::operator=(rhs);
+ return *this;
+ }
+};
+
+#if !defined(CL_VERSION_1_2)
+/*! \brief Class interface for GL 3D Image Memory objects.
+ *
+ * This is provided to facilitate interoperability with OpenGL.
+ *
+ * See Memory for details about copy semantics, etc.
+ *
+ * \see Memory
+ */
+class Image3DGL : public Image3D
+{
+public:
+ /*! \brief Constructs an Image3DGL in a specified context, from a given
+ * GL Texture.
+ *
+ * Wraps clCreateFromGLTexture3D().
+ */
+ Image3DGL(
+ const Context& context,
+ cl_mem_flags flags,
+ GLenum target,
+ GLint miplevel,
+ GLuint texobj,
+ cl_int * err = NULL)
+ {
+ cl_int error;
+ object_ = ::clCreateFromGLTexture3D(
+ context(),
+ flags,
+ target,
+ miplevel,
+ texobj,
+ &error);
+
+ detail::errHandler(error, __CREATE_GL_TEXTURE_3D_ERR);
+ if (err != NULL) {
+ *err = error;
+ }
+ }
+
+ //! \brief Default constructor - initializes to NULL.
+ Image3DGL() : Image3D() { }
+
+ /*! \brief Copy constructor - performs shallow copy.
+ *
+ * See Memory for further details.
+ */
+ Image3DGL(const Image3DGL& image) : Image3D(image) { }
+
+ /*! \brief Constructor from cl_mem - takes ownership.
+ *
+ * See Memory for further details.
+ */
+ __CL_EXPLICIT_CONSTRUCTORS Image3DGL(const cl_mem& image) : Image3D(image) { }
+
+ /*! \brief Assignment from Image3DGL - performs shallow copy.
+ *
+ * See Memory for further details.
+ */
+ Image3DGL& operator = (const Image3DGL& rhs)
+ {
+ if (this != &rhs) {
+ Image3D::operator=(rhs);
+ }
+ return *this;
+ }
+
+ /*! \brief Assignment from cl_mem - performs shallow copy.
+ *
+ * See Memory for further details.
+ */
+ Image3DGL& operator = (const cl_mem& rhs)
+ {
+ Image3D::operator=(rhs);
+ return *this;
+ }
+};
+#endif // #if !defined(CL_VERSION_1_2)
+
+#if defined(CL_VERSION_1_2)
+/*! \class ImageGL
+ * \brief general image interface for GL interop.
+ * We abstract the 2D and 3D GL images into a single instance here
+ * that wraps all GL sourced images on the grounds that setup information
+ * was performed by OpenCL anyway.
+ */
+class ImageGL : public Image
+{
+public:
+ ImageGL(
+ const Context& context,
+ cl_mem_flags flags,
+ GLenum target,
+ GLint miplevel,
+ GLuint texobj,
+ cl_int * err = NULL)
+ {
+ cl_int error;
+ object_ = ::clCreateFromGLTexture(
+ context(),
+ flags,
+ target,
+ miplevel,
+ texobj,
+ &error);
+
+ detail::errHandler(error, __CREATE_GL_TEXTURE_ERR);
+ if (err != NULL) {
+ *err = error;
+ }
+ }
+
+ ImageGL() : Image() { }
+
+ ImageGL(const ImageGL& image) : Image(image) { }
+
+ __CL_EXPLICIT_CONSTRUCTORS ImageGL(const cl_mem& image) : Image(image) { }
+
+ ImageGL& operator = (const ImageGL& rhs)
+ {
+ if (this != &rhs) {
+ Image::operator=(rhs);
+ }
+ return *this;
+ }
+
+ ImageGL& operator = (const cl_mem& rhs)
+ {
+ Image::operator=(rhs);
+ return *this;
+ }
+};
+#endif // #if defined(CL_VERSION_1_2)
+
+/*! \brief Class interface for cl_sampler.
+ *
+ * \note Copies of these objects are shallow, meaning that the copy will refer
+ * to the same underlying cl_sampler as the original. For details, see
+ * clRetainSampler() and clReleaseSampler().
+ *
+ * \see cl_sampler
+ */
+class Sampler : public detail::Wrapper<cl_sampler>
+{
+public:
+ /*! \brief Destructor.
+ *
+ * This calls clReleaseSampler() on the value held by this instance.
+ */
+ ~Sampler() { }
+
+ //! \brief Default constructor - initializes to NULL.
+ Sampler() { }
+
+ /*! \brief Constructs a Sampler in a specified context.
+ *
+ * Wraps clCreateSampler().
+ */
+ Sampler(
+ const Context& context,
+ cl_bool normalized_coords,
+ cl_addressing_mode addressing_mode,
+ cl_filter_mode filter_mode,
+ cl_int* err = NULL)
+ {
+ cl_int error;
+ object_ = ::clCreateSampler(
+ context(),
+ normalized_coords,
+ addressing_mode,
+ filter_mode,
+ &error);
+
+ detail::errHandler(error, __CREATE_SAMPLER_ERR);
+ if (err != NULL) {
+ *err = error;
+ }
+ }
+
+ /*! \brief Copy constructor - performs shallow copy.
+ *
+ * This calls clRetainSampler() on the parameter's cl_sampler.
+ */
+ Sampler(const Sampler& sampler) : detail::Wrapper<cl_type>(sampler) { }
+
+ /*! \brief Constructor from cl_sampler - takes ownership.
+ *
+ * This effectively transfers ownership of a refcount on the cl_sampler
+ * into the new Sampler object.
+ */
+ Sampler(const cl_sampler& sampler) : detail::Wrapper<cl_type>(sampler) { }
+
+ /*! \brief Assignment operator from Sampler.
+ *
+ * This calls clRetainSampler() on the parameter and clReleaseSampler()
+ * on the previous value held by this instance.
+ */
+ Sampler& operator = (const Sampler& rhs)
+ {
+ if (this != &rhs) {
+ detail::Wrapper<cl_type>::operator=(rhs);
+ }
+ return *this;
+ }
+
+ /*! \brief Assignment operator from cl_sampler - takes ownership.
+ *
+ * This effectively transfers ownership of a refcount on the rhs and calls
+ * clReleaseSampler() on the value previously held by this instance.
+ */
+ Sampler& operator = (const cl_sampler& rhs)
+ {
+ detail::Wrapper<cl_type>::operator=(rhs);
+ return *this;
+ }
+
+ //! \brief Wrapper for clGetSamplerInfo().
+ template <typename T>
+ cl_int getInfo(cl_sampler_info name, T* param) const
+ {
+ return detail::errHandler(
+ detail::getInfo(&::clGetSamplerInfo, object_, name, param),
+ __GET_SAMPLER_INFO_ERR);
+ }
+
+ //! \brief Wrapper for clGetSamplerInfo() that returns by value.
+ template <cl_int name> typename
+ detail::param_traits<detail::cl_sampler_info, name>::param_type
+ getInfo(cl_int* err = NULL) const
+ {
+ typename detail::param_traits<
+ detail::cl_sampler_info, name>::param_type param;
+ cl_int result = getInfo(name, &param);
+ if (err != NULL) {
+ *err = result;
+ }
+ return param;
+ }
+};
+
+class Program;
+class CommandQueue;
+class Kernel;
+
+//! \brief Class interface for specifying NDRange values.
+class NDRange
+{
+private:
+ size_t<3> sizes_;
+ cl_uint dimensions_;
+
+public:
+ //! \brief Default constructor - resulting range has zero dimensions.
+ NDRange()
+ : dimensions_(0)
+ { }
+
+ //! \brief Constructs one-dimensional range.
+ NDRange(::size_t size0)
+ : dimensions_(1)
+ {
+ sizes_[0] = size0;
+ }
+
+ //! \brief Constructs two-dimensional range.
+ NDRange(::size_t size0, ::size_t size1)
+ : dimensions_(2)
+ {
+ sizes_[0] = size0;
+ sizes_[1] = size1;
+ }
+
+ //! \brief Constructs three-dimensional range.
+ NDRange(::size_t size0, ::size_t size1, ::size_t size2)
+ : dimensions_(3)
+ {
+ sizes_[0] = size0;
+ sizes_[1] = size1;
+ sizes_[2] = size2;
+ }
+
+ /*! \brief Conversion operator to const ::size_t *.
+ *
+ * \returns a pointer to the size of the first dimension.
+ */
+ operator const ::size_t*() const {
+ return (const ::size_t*) sizes_;
+ }
+
+ //! \brief Queries the number of dimensions in the range.
+ ::size_t dimensions() const { return dimensions_; }
+};
+
+//! \brief A zero-dimensional range.
+static const NDRange NullRange;
+
+//! \brief Local address wrapper for use with Kernel::setArg
+struct LocalSpaceArg
+{
+ ::size_t size_;
+};
+
+namespace detail {
+
+template <typename T>
+struct KernelArgumentHandler
+{
+ static ::size_t size(const T&) { return sizeof(T); }
+ static T* ptr(T& value) { return &value; }
+};
+
+template <>
+struct KernelArgumentHandler<LocalSpaceArg>
+{
+ static ::size_t size(const LocalSpaceArg& value) { return value.size_; }
+ static void* ptr(LocalSpaceArg&) { return NULL; }
+};
+
+}
+//! \endcond
+
+/*! __local
+ * \brief Helper function for generating LocalSpaceArg objects.
+ * Deprecated. Replaced with Local.
+ */
+inline CL_EXT_PREFIX__VERSION_1_1_DEPRECATED LocalSpaceArg
+__local(::size_t size) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
+inline LocalSpaceArg
+__local(::size_t size)
+{
+ LocalSpaceArg ret = { size };
+ return ret;
+}
+
+/*! Local
+ * \brief Helper function for generating LocalSpaceArg objects.
+ */
+inline LocalSpaceArg
+Local(::size_t size)
+{
+ LocalSpaceArg ret = { size };
+ return ret;
+}
+
+//class KernelFunctor;
+
+/*! \brief Class interface for cl_kernel.
+ *
+ * \note Copies of these objects are shallow, meaning that the copy will refer
+ * to the same underlying cl_kernel as the original. For details, see
+ * clRetainKernel() and clReleaseKernel().
+ *
+ * \see cl_kernel
+ */
+class Kernel : public detail::Wrapper<cl_kernel>
+{
+public:
+ inline Kernel(const Program& program, const char* name, cl_int* err = NULL);
+
+ /*! \brief Destructor.
+ *
+ * This calls clReleaseKernel() on the value held by this instance.
+ */
+ ~Kernel() { }
+
+ //! \brief Default constructor - initializes to NULL.
+ Kernel() { }
+
+ /*! \brief Copy constructor - performs shallow copy.
+ *
+ * This calls clRetainKernel() on the parameter's cl_kernel.
+ */
+ Kernel(const Kernel& kernel) : detail::Wrapper<cl_type>(kernel) { }
+
+ /*! \brief Constructor from cl_kernel - takes ownership.
+ *
+ * This effectively transfers ownership of a refcount on the cl_kernel
+ * into the new Kernel object.
+ */
+ __CL_EXPLICIT_CONSTRUCTORS Kernel(const cl_kernel& kernel) : detail::Wrapper<cl_type>(kernel) { }
+
+ /*! \brief Assignment operator from Kernel.
+ *
+ * This calls clRetainKernel() on the parameter and clReleaseKernel()
+ * on the previous value held by this instance.
+ */
+ Kernel& operator = (const Kernel& rhs)
+ {
+ if (this != &rhs) {
+ detail::Wrapper<cl_type>::operator=(rhs);
+ }
+ return *this;
+ }
+
+ /*! \brief Assignment operator from cl_kernel - takes ownership.
+ *
+ * This effectively transfers ownership of a refcount on the rhs and calls
+ * clReleaseKernel() on the value previously held by this instance.
+ */
+ Kernel& operator = (const cl_kernel& rhs)
+ {
+ detail::Wrapper<cl_type>::operator=(rhs);
+ return *this;
+ }
+
+ template <typename T>
+ cl_int getInfo(cl_kernel_info name, T* param) const
+ {
+ return detail::errHandler(
+ detail::getInfo(&::clGetKernelInfo, object_, name, param),
+ __GET_KERNEL_INFO_ERR);
+ }
+
+ template <cl_int name> typename
+ detail::param_traits<detail::cl_kernel_info, name>::param_type
+ getInfo(cl_int* err = NULL) const
+ {
+ typename detail::param_traits<
+ detail::cl_kernel_info, name>::param_type param;
+ cl_int result = getInfo(name, &param);
+ if (err != NULL) {
+ *err = result;
+ }
+ return param;
+ }
+
+#if defined(CL_VERSION_1_2)
+ template <typename T>
+ cl_int getArgInfo(cl_uint argIndex, cl_kernel_arg_info name, T* param) const
+ {
+ return detail::errHandler(
+ detail::getInfo(&::clGetKernelArgInfo, object_, argIndex, name, param),
+ __GET_KERNEL_ARG_INFO_ERR);
+ }
+
+ template <cl_int name> typename
+ detail::param_traits<detail::cl_kernel_arg_info, name>::param_type
+ getArgInfo(cl_uint argIndex, cl_int* err = NULL) const
+ {
+ typename detail::param_traits<
+ detail::cl_kernel_arg_info, name>::param_type param;
+ cl_int result = getArgInfo(argIndex, name, &param);
+ if (err != NULL) {
+ *err = result;
+ }
+ return param;
+ }
+#endif // #if defined(CL_VERSION_1_2)
+
+ template <typename T>
+ cl_int getWorkGroupInfo(
+ const Device& device, cl_kernel_work_group_info name, T* param) const
+ {
+ return detail::errHandler(
+ detail::getInfo(
+ &::clGetKernelWorkGroupInfo, object_, device(), name, param),
+ __GET_KERNEL_WORK_GROUP_INFO_ERR);
+ }
+
+ template <cl_int name> typename
+ detail::param_traits<detail::cl_kernel_work_group_info, name>::param_type
+ getWorkGroupInfo(const Device& device, cl_int* err = NULL) const
+ {
+ typename detail::param_traits<
+ detail::cl_kernel_work_group_info, name>::param_type param;
+ cl_int result = getWorkGroupInfo(device, name, &param);
+ if (err != NULL) {
+ *err = result;
+ }
+ return param;
+ }
+
+ template <typename T>
+ cl_int setArg(cl_uint index, T value)
+ {
+ return detail::errHandler(
+ ::clSetKernelArg(
+ object_,
+ index,
+ detail::KernelArgumentHandler<T>::size(value),
+ detail::KernelArgumentHandler<T>::ptr(value)),
+ __SET_KERNEL_ARGS_ERR);
+ }
+
+ cl_int setArg(cl_uint index, ::size_t size, void* argPtr)
+ {
+ return detail::errHandler(
+ ::clSetKernelArg(object_, index, size, argPtr),
+ __SET_KERNEL_ARGS_ERR);
+ }
+};
+
+/*! \class Program
+ * \brief Program interface that implements cl_program.
+ */
+class Program : public detail::Wrapper<cl_program>
+{
+public:
+ typedef VECTOR_CLASS<std::pair<const void*, ::size_t> > Binaries;
+ typedef VECTOR_CLASS<std::pair<const char*, ::size_t> > Sources;
+
+ Program(
+ const STRING_CLASS& source,
+ bool build = false,
+ cl_int* err = NULL)
+ {
+ cl_int error;
+
+ const char * strings = source.c_str();
+ const ::size_t length = source.size();
+
+ Context context = Context::getDefault(err);
+
+ object_ = ::clCreateProgramWithSource(
+ context(), (cl_uint)1, &strings, &length, &error);
+
+ detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR);
+
+ if (error == CL_SUCCESS && build) {
+
+ error = ::clBuildProgram(
+ object_,
+ 0,
+ NULL,
+ "",
+ NULL,
+ NULL);
+
+ detail::errHandler(error, __BUILD_PROGRAM_ERR);
+ }
+
+ if (err != NULL) {
+ *err = error;
+ }
+ }
+
+ Program(
+ const Context& context,
+ const STRING_CLASS& source,
+ bool build = false,
+ cl_int* err = NULL)
+ {
+ cl_int error;
+
+ const char * strings = source.c_str();
+ const ::size_t length = source.size();
+
+ object_ = ::clCreateProgramWithSource(
+ context(), (cl_uint)1, &strings, &length, &error);
+
+ detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR);
+
+ if (error == CL_SUCCESS && build) {
+
+ error = ::clBuildProgram(
+ object_,
+ 0,
+ NULL,
+ "",
+ NULL,
+ NULL);
+
+ detail::errHandler(error, __BUILD_PROGRAM_ERR);
+ }
+
+ if (err != NULL) {
+ *err = error;
+ }
+ }
+
+ Program(
+ const Context& context,
+ const Sources& sources,
+ cl_int* err = NULL)
+ {
+ cl_int error;
+
+ const ::size_t n = (::size_t)sources.size();
+ ::size_t* lengths = (::size_t*) alloca(n * sizeof(::size_t));
+ const char** strings = (const char**) alloca(n * sizeof(const char*));
+
+ for (::size_t i = 0; i < n; ++i) {
+ strings[i] = sources[(int)i].first;
+ lengths[i] = sources[(int)i].second;
+ }
+
+ object_ = ::clCreateProgramWithSource(
+ context(), (cl_uint)n, strings, lengths, &error);
+
+ detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR);
+ if (err != NULL) {
+ *err = error;
+ }
+ }
+
+ /**
+ * Construct a program object from a list of devices and a per-device list of binaries.
+ * \param context A valid OpenCL context in which to construct the program.
+ * \param devices A vector of OpenCL device objects for which the program will be created.
+ * \param binaries A vector of pairs of a pointer to a binary object and its length.
+ * \param binaryStatus An optional vector that on completion will be resized to
+ * match the size of binaries and filled with values to specify if each binary
+ * was successfully loaded.
+ * Set to CL_SUCCESS if the binary was successfully loaded.
+ * Set to CL_INVALID_VALUE if the length is 0 or the binary pointer is NULL.
+ * Set to CL_INVALID_BINARY if the binary provided is not valid for the matching device.
+ * \param err if non-NULL will be set to CL_SUCCESS on successful operation or one of the following errors:
+ * CL_INVALID_CONTEXT if context is not a valid context.
+ * CL_INVALID_VALUE if the length of devices is zero; or if the length of binaries does not match the length of devices;
+ * or if any entry in binaries is NULL or has length 0.
+ * CL_INVALID_DEVICE if OpenCL devices listed in devices are not in the list of devices associated with context.
+ * CL_INVALID_BINARY if an invalid program binary was encountered for any device. binaryStatus will return specific status for each device.
+ * CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the OpenCL implementation on the host.
+ */
+ Program(
+ const Context& context,
+ const VECTOR_CLASS<Device>& devices,
+ const Binaries& binaries,
+ VECTOR_CLASS<cl_int>* binaryStatus = NULL,
+ cl_int* err = NULL)
+ {
+ cl_int error;
+
+ const ::size_t numDevices = devices.size();
+
+ // Catch size mismatch early and return
+ if(binaries.size() != numDevices) {
+ error = CL_INVALID_VALUE;
+ detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR);
+ if (err != NULL) {
+ *err = error;
+ }
+ return;
+ }
+
+ ::size_t* lengths = (::size_t*) alloca(numDevices * sizeof(::size_t));
+ const unsigned char** images = (const unsigned char**) alloca(numDevices * sizeof(const unsigned char**));
+
+ for (::size_t i = 0; i < numDevices; ++i) {
+ images[i] = (const unsigned char*)binaries[i].first;
+ lengths[i] = binaries[(int)i].second;
+ }
+
+ cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id));
+ for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) {
+ deviceIDs[deviceIndex] = (devices[deviceIndex])();
+ }
+
+ if(binaryStatus) {
+ binaryStatus->resize(numDevices);
+ }
+
+ object_ = ::clCreateProgramWithBinary(
+ context(), (cl_uint) devices.size(),
+ deviceIDs,
+ lengths, images, binaryStatus != NULL
+ ? &binaryStatus->front()
+ : NULL, &error);
+
+ detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR);
+ if (err != NULL) {
+ *err = error;
+ }
+ }
+
+
+#if defined(CL_VERSION_1_2)
+ /**
+ * Create program using builtin kernels.
+ * \param kernelNames Semi-colon separated list of builtin kernel names
+ */
+ Program(
+ const Context& context,
+ const VECTOR_CLASS<Device>& devices,
+ const STRING_CLASS& kernelNames,
+ cl_int* err = NULL)
+ {
+ cl_int error;
+
+
+ ::size_t numDevices = devices.size();
+ cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id));
+ for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) {
+ deviceIDs[deviceIndex] = (devices[deviceIndex])();
+ }
+
+ object_ = ::clCreateProgramWithBuiltInKernels(
+ context(),
+ (cl_uint) devices.size(),
+ deviceIDs,
+ kernelNames.c_str(),
+ &error);
+
+ detail::errHandler(error, __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR);
+ if (err != NULL) {
+ *err = error;
+ }
+ }
+#endif // #if defined(CL_VERSION_1_2)
+
+ Program() { }
+
+ Program(const Program& program) : detail::Wrapper<cl_type>(program) { }
+
+ __CL_EXPLICIT_CONSTRUCTORS Program(const cl_program& program) : detail::Wrapper<cl_type>(program) { }
+
+ Program& operator = (const Program& rhs)
+ {
+ if (this != &rhs) {
+ detail::Wrapper<cl_type>::operator=(rhs);
+ }
+ return *this;
+ }
+
+ Program& operator = (const cl_program& rhs)
+ {
+ detail::Wrapper<cl_type>::operator=(rhs);
+ return *this;
+ }
+
+ cl_int build(
+ const VECTOR_CLASS<Device>& devices,
+ const char* options = NULL,
+ void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL,
+ void* data = NULL) const
+ {
+ ::size_t numDevices = devices.size();
+ cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id));
+ for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) {
+ deviceIDs[deviceIndex] = (devices[deviceIndex])();
+ }
+
+ return detail::errHandler(
+ ::clBuildProgram(
+ object_,
+ (cl_uint)
+ devices.size(),
+ deviceIDs,
+ options,
+ notifyFptr,
+ data),
+ __BUILD_PROGRAM_ERR);
+ }
+
+ cl_int build(
+ const char* options = NULL,
+ void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL,
+ void* data = NULL) const
+ {
+ return detail::errHandler(
+ ::clBuildProgram(
+ object_,
+ 0,
+ NULL,
+ options,
+ notifyFptr,
+ data),
+ __BUILD_PROGRAM_ERR);
+ }
+
+#if defined(CL_VERSION_1_2)
+ cl_int compile(
+ const char* options = NULL,
+ void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL,
+ void* data = NULL) const
+ {
+ return detail::errHandler(
+ ::clCompileProgram(
+ object_,
+ 0,
+ NULL,
+ options,
+ 0,
+ NULL,
+ NULL,
+ notifyFptr,
+ data),
+ __COMPILE_PROGRAM_ERR);
+ }
+#endif
+
+ template <typename T>
+ cl_int getInfo(cl_program_info name, T* param) const
+ {
+ return detail::errHandler(
+ detail::getInfo(&::clGetProgramInfo, object_, name, param),
+ __GET_PROGRAM_INFO_ERR);
+ }
+
+ template <cl_int name> typename
+ detail::param_traits<detail::cl_program_info, name>::param_type
+ getInfo(cl_int* err = NULL) const
+ {
+ typename detail::param_traits<
+ detail::cl_program_info, name>::param_type param;
+ cl_int result = getInfo(name, &param);
+ if (err != NULL) {
+ *err = result;
+ }
+ return param;
+ }
+
+ template <typename T>
+ cl_int getBuildInfo(
+ const Device& device, cl_program_build_info name, T* param) const
+ {
+ return detail::errHandler(
+ detail::getInfo(
+ &::clGetProgramBuildInfo, object_, device(), name, param),
+ __GET_PROGRAM_BUILD_INFO_ERR);
+ }
+
+ template <cl_int name> typename
+ detail::param_traits<detail::cl_program_build_info, name>::param_type
+ getBuildInfo(const Device& device, cl_int* err = NULL) const
+ {
+ typename detail::param_traits<
+ detail::cl_program_build_info, name>::param_type param;
+ cl_int result = getBuildInfo(device, name, &param);
+ if (err != NULL) {
+ *err = result;
+ }
+ return param;
+ }
+
+ cl_int createKernels(VECTOR_CLASS<Kernel>* kernels)
+ {
+ cl_uint numKernels;
+ cl_int err = ::clCreateKernelsInProgram(object_, 0, NULL, &numKernels);
+ if (err != CL_SUCCESS) {
+ return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR);
+ }
+
+ Kernel* value = (Kernel*) alloca(numKernels * sizeof(Kernel));
+ err = ::clCreateKernelsInProgram(
+ object_, numKernels, (cl_kernel*) value, NULL);
+ if (err != CL_SUCCESS) {
+ return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR);
+ }
+
+ kernels->assign(&value[0], &value[numKernels]);
+ return CL_SUCCESS;
+ }
+};
+
+#if defined(CL_VERSION_1_2)
+inline Program linkProgram(
+ Program input1,
+ Program input2,
+ const char* options = NULL,
+ void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL,
+ void* data = NULL,
+ cl_int* err = NULL)
+{
+ cl_int err_local = CL_SUCCESS;
+
+ cl_program programs[2] = { input1(), input2() };
+
+ Context ctx = input1.getInfo<CL_PROGRAM_CONTEXT>();
+
+ cl_program prog = ::clLinkProgram(
+ ctx(),
+ 0,
+ NULL,
+ options,
+ 2,
+ programs,
+ notifyFptr,
+ data,
+ &err_local);
+
+ detail::errHandler(err_local,__COMPILE_PROGRAM_ERR);
+ if (err != NULL) {
+ *err = err_local;
+ }
+
+ return Program(prog);
+}
+
+inline Program linkProgram(
+ VECTOR_CLASS<Program> inputPrograms,
+ const char* options = NULL,
+ void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL,
+ void* data = NULL,
+ cl_int* err = NULL)
+{
+ cl_int err_local = CL_SUCCESS;
+
+ cl_program * programs = (cl_program*) alloca(inputPrograms.size() * sizeof(cl_program));
+
+ if (programs != NULL) {
+ for (unsigned int i = 0; i < inputPrograms.size(); i++) {
+ programs[i] = inputPrograms[i]();
+ }
+ }
+
+ cl_program prog = ::clLinkProgram(
+ Context::getDefault()(),
+ 0,
+ NULL,
+ options,
+ (cl_uint)inputPrograms.size(),
+ programs,
+ notifyFptr,
+ data,
+ &err_local);
+
+ detail::errHandler(err_local,__COMPILE_PROGRAM_ERR);
+ if (err != NULL) {
+ *err = err_local;
+ }
+
+ return Program(prog);
+}
+#endif
+
+template<>
+inline VECTOR_CLASS<char *> cl::Program::getInfo<CL_PROGRAM_BINARIES>(cl_int* err) const
+{
+ VECTOR_CLASS< ::size_t> sizes = getInfo<CL_PROGRAM_BINARY_SIZES>();
+ VECTOR_CLASS<char *> binaries;
+ for (VECTOR_CLASS< ::size_t>::iterator s = sizes.begin(); s != sizes.end(); ++s)
+ {
+ char *ptr = NULL;
+ if (*s != 0)
+ ptr = new char[*s];
+ binaries.push_back(ptr);
+ }
+
+ cl_int result = getInfo(CL_PROGRAM_BINARIES, &binaries);
+ if (err != NULL) {
+ *err = result;
+ }
+ return binaries;
+}
+
+inline Kernel::Kernel(const Program& program, const char* name, cl_int* err)
+{
+ cl_int error;
+
+ object_ = ::clCreateKernel(program(), name, &error);
+ detail::errHandler(error, __CREATE_KERNEL_ERR);
+
+ if (err != NULL) {
+ *err = error;
+ }
+
+}
+
+/*! \class CommandQueue
+ * \brief CommandQueue interface for cl_command_queue.
+ */
+class CommandQueue : public detail::Wrapper<cl_command_queue>
+{
+private:
+ static volatile int default_initialized_;
+ static CommandQueue default_;
+ static volatile cl_int default_error_;
+public:
+ CommandQueue(
+ cl_command_queue_properties properties,
+ cl_int* err = NULL)
+ {
+ cl_int error;
+
+ Context context = Context::getDefault(&error);
+ detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR);
+
+ if (error != CL_SUCCESS) {
+ if (err != NULL) {
+ *err = error;
+ }
+ }
+ else {
+ Device device = context.getInfo<CL_CONTEXT_DEVICES>()[0];
+
+ object_ = ::clCreateCommandQueue(
+ context(), device(), properties, &error);
+
+ detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR);
+ if (err != NULL) {
+ *err = error;
+ }
+ }
+ }
+ /*!
+ * \brief Constructs a CommandQueue for an implementation defined device in the given context
+ */
+ explicit CommandQueue(
+ const Context& context,
+ cl_command_queue_properties properties = 0,
+ cl_int* err = NULL)
+ {
+ cl_int error;
+ VECTOR_CLASS<cl::Device> devices;
+ error = context.getInfo(CL_CONTEXT_DEVICES, &devices);
+
+ detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR);
+
+ if (error != CL_SUCCESS)
+ {
+ if (err != NULL) {
+ *err = error;
+ }
+ return;
+ }
+
+ object_ = ::clCreateCommandQueue(context(), devices[0](), properties, &error);
+
+ detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR);
+
+ if (err != NULL) {
+ *err = error;
+ }
+
+ }
+
+ CommandQueue(
+ const Context& context,
+ const Device& device,
+ cl_command_queue_properties properties = 0,
+ cl_int* err = NULL)
+ {
+ cl_int error;
+ object_ = ::clCreateCommandQueue(
+ context(), device(), properties, &error);
+
+ detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR);
+ if (err != NULL) {
+ *err = error;
+ }
+ }
+
+ static CommandQueue getDefault(cl_int * err = NULL)
+ {
+ int state = detail::compare_exchange(
+ &default_initialized_,
+ __DEFAULT_BEING_INITIALIZED, __DEFAULT_NOT_INITIALIZED);
+
+ if (state & __DEFAULT_INITIALIZED) {
+ if (err != NULL) {
+ *err = default_error_;
+ }
+ return default_;
+ }
+
+ if (state & __DEFAULT_BEING_INITIALIZED) {
+ // Assume writes will propagate eventually...
+ while(default_initialized_ != __DEFAULT_INITIALIZED) {
+ detail::fence();
+ }
+
+ if (err != NULL) {
+ *err = default_error_;
+ }
+ return default_;
+ }
+
+ cl_int error;
+
+ Context context = Context::getDefault(&error);
+ detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR);
+
+ if (error != CL_SUCCESS) {
+ if (err != NULL) {
+ *err = error;
+ }
+ }
+ else {
+ Device device = context.getInfo<CL_CONTEXT_DEVICES>()[0];
+
+ default_ = CommandQueue(context, device, 0, &error);
+
+ detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR);
+ if (err != NULL) {
+ *err = error;
+ }
+ }
+
+ detail::fence();
+
+ default_error_ = error;
+ // Assume writes will propagate eventually...
+ default_initialized_ = __DEFAULT_INITIALIZED;
+
+ detail::fence();
+
+ if (err != NULL) {
+ *err = default_error_;
+ }
+ return default_;
+
+ }
+
+ CommandQueue() { }
+
+ CommandQueue(const CommandQueue& commandQueue) : detail::Wrapper<cl_type>(commandQueue) { }
+
+ CommandQueue(const cl_command_queue& commandQueue) : detail::Wrapper<cl_type>(commandQueue) { }
+
+ CommandQueue& operator = (const CommandQueue& rhs)
+ {
+ if (this != &rhs) {
+ detail::Wrapper<cl_type>::operator=(rhs);
+ }
+ return *this;
+ }
+
+ CommandQueue& operator = (const cl_command_queue& rhs)
+ {
+ detail::Wrapper<cl_type>::operator=(rhs);
+ return *this;
+ }
+
+ template <typename T>
+ cl_int getInfo(cl_command_queue_info name, T* param) const
+ {
+ return detail::errHandler(
+ detail::getInfo(
+ &::clGetCommandQueueInfo, object_, name, param),
+ __GET_COMMAND_QUEUE_INFO_ERR);
+ }
+
+ template <cl_int name> typename
+ detail::param_traits<detail::cl_command_queue_info, name>::param_type
+ getInfo(cl_int* err = NULL) const
+ {
+ typename detail::param_traits<
+ detail::cl_command_queue_info, name>::param_type param;
+ cl_int result = getInfo(name, &param);
+ if (err != NULL) {
+ *err = result;
+ }
+ return param;
+ }
+
+ cl_int enqueueReadBuffer(
+ const Buffer& buffer,
+ cl_bool blocking,
+ ::size_t offset,
+ ::size_t size,
+ void* ptr,
+ const VECTOR_CLASS<Event>* events = NULL,
+ Event* event = NULL) const
+ {
+ cl_event tmp;
+ cl_int err = detail::errHandler(
+ ::clEnqueueReadBuffer(
+ object_, buffer(), blocking, offset, size,
+ ptr,
+ (events != NULL) ? (cl_uint) events->size() : 0,
+ (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+ (event != NULL) ? &tmp : NULL),
+ __ENQUEUE_READ_BUFFER_ERR);
+
+ if (event != NULL && err == CL_SUCCESS)
+ *event = tmp;
+
+ return err;
+ }
+
+ cl_int enqueueWriteBuffer(
+ const Buffer& buffer,
+ cl_bool blocking,
+ ::size_t offset,
+ ::size_t size,
+ const void* ptr,
+ const VECTOR_CLASS<Event>* events = NULL,
+ Event* event = NULL) const
+ {
+ cl_event tmp;
+ cl_int err = detail::errHandler(
+ ::clEnqueueWriteBuffer(
+ object_, buffer(), blocking, offset, size,
+ ptr,
+ (events != NULL) ? (cl_uint) events->size() : 0,
+ (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+ (event != NULL) ? &tmp : NULL),
+ __ENQUEUE_WRITE_BUFFER_ERR);
+
+ if (event != NULL && err == CL_SUCCESS)
+ *event = tmp;
+
+ return err;
+ }
+
+ cl_int enqueueCopyBuffer(
+ const Buffer& src,
+ const Buffer& dst,
+ ::size_t src_offset,
+ ::size_t dst_offset,
+ ::size_t size,
+ const VECTOR_CLASS<Event>* events = NULL,
+ Event* event = NULL) const
+ {
+ cl_event tmp;
+ cl_int err = detail::errHandler(
+ ::clEnqueueCopyBuffer(
+ object_, src(), dst(), src_offset, dst_offset, size,
+ (events != NULL) ? (cl_uint) events->size() : 0,
+ (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+ (event != NULL) ? &tmp : NULL),
+ __ENQEUE_COPY_BUFFER_ERR);
+
+ if (event != NULL && err == CL_SUCCESS)
+ *event = tmp;
+
+ return err;
+ }
+
+ cl_int enqueueReadBufferRect(
+ const Buffer& buffer,
+ cl_bool blocking,
+ const size_t<3>& buffer_offset,
+ const size_t<3>& host_offset,
+ const size_t<3>& region,
+ ::size_t buffer_row_pitch,
+ ::size_t buffer_slice_pitch,
+ ::size_t host_row_pitch,
+ ::size_t host_slice_pitch,
+ void *ptr,
+ const VECTOR_CLASS<Event>* events = NULL,
+ Event* event = NULL) const
+ {
+ cl_event tmp;
+ cl_int err = detail::errHandler(
+ ::clEnqueueReadBufferRect(
+ object_,
+ buffer(),
+ blocking,
+ (const ::size_t *)buffer_offset,
+ (const ::size_t *)host_offset,
+ (const ::size_t *)region,
+ buffer_row_pitch,
+ buffer_slice_pitch,
+ host_row_pitch,
+ host_slice_pitch,
+ ptr,
+ (events != NULL) ? (cl_uint) events->size() : 0,
+ (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+ (event != NULL) ? &tmp : NULL),
+ __ENQUEUE_READ_BUFFER_RECT_ERR);
+
+ if (event != NULL && err == CL_SUCCESS)
+ *event = tmp;
+
+ return err;
+ }
+
+ cl_int enqueueWriteBufferRect(
+ const Buffer& buffer,
+ cl_bool blocking,
+ const size_t<3>& buffer_offset,
+ const size_t<3>& host_offset,
+ const size_t<3>& region,
+ ::size_t buffer_row_pitch,
+ ::size_t buffer_slice_pitch,
+ ::size_t host_row_pitch,
+ ::size_t host_slice_pitch,
+ void *ptr,
+ const VECTOR_CLASS<Event>* events = NULL,
+ Event* event = NULL) const
+ {
+ cl_event tmp;
+ cl_int err = detail::errHandler(
+ ::clEnqueueWriteBufferRect(
+ object_,
+ buffer(),
+ blocking,
+ (const ::size_t *)buffer_offset,
+ (const ::size_t *)host_offset,
+ (const ::size_t *)region,
+ buffer_row_pitch,
+ buffer_slice_pitch,
+ host_row_pitch,
+ host_slice_pitch,
+ ptr,
+ (events != NULL) ? (cl_uint) events->size() : 0,
+ (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+ (event != NULL) ? &tmp : NULL),
+ __ENQUEUE_WRITE_BUFFER_RECT_ERR);
+
+ if (event != NULL && err == CL_SUCCESS)
+ *event = tmp;
+
+ return err;
+ }
+
+ cl_int enqueueCopyBufferRect(
+ const Buffer& src,
+ const Buffer& dst,
+ const size_t<3>& src_origin,
+ const size_t<3>& dst_origin,
+ const size_t<3>& region,
+ ::size_t src_row_pitch,
+ ::size_t src_slice_pitch,
+ ::size_t dst_row_pitch,
+ ::size_t dst_slice_pitch,
+ const VECTOR_CLASS<Event>* events = NULL,
+ Event* event = NULL) const
+ {
+ cl_event tmp;
+ cl_int err = detail::errHandler(
+ ::clEnqueueCopyBufferRect(
+ object_,
+ src(),
+ dst(),
+ (const ::size_t *)src_origin,
+ (const ::size_t *)dst_origin,
+ (const ::size_t *)region,
+ src_row_pitch,
+ src_slice_pitch,
+ dst_row_pitch,
+ dst_slice_pitch,
+ (events != NULL) ? (cl_uint) events->size() : 0,
+ (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+ (event != NULL) ? &tmp : NULL),
+ __ENQEUE_COPY_BUFFER_RECT_ERR);
+
+ if (event != NULL && err == CL_SUCCESS)
+ *event = tmp;
+
+ return err;
+ }
+
+#if defined(CL_VERSION_1_2)
+ /**
+ * Enqueue a command to fill a buffer object with a pattern
+ * of a given size. The pattern is specified a as vector.
+ * \tparam PatternType The datatype of the pattern field.
+ * The pattern type must be an accepted OpenCL data type.
+ */
+ template<typename PatternType>
+ cl_int enqueueFillBuffer(
+ const Buffer& buffer,
+ PatternType pattern,
+ ::size_t offset,
+ ::size_t size,
+ const VECTOR_CLASS<Event>* events = NULL,
+ Event* event = NULL) const
+ {
+ cl_event tmp;
+ cl_int err = detail::errHandler(
+ ::clEnqueueFillBuffer(
+ object_,
+ buffer(),
+ static_cast<void*>(&pattern),
+ sizeof(PatternType),
+ offset,
+ size,
+ (events != NULL) ? (cl_uint) events->size() : 0,
+ (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+ (event != NULL) ? &tmp : NULL),
+ __ENQUEUE_FILL_BUFFER_ERR);
+
+ if (event != NULL && err == CL_SUCCESS)
+ *event = tmp;
+
+ return err;
+ }
+#endif // #if defined(CL_VERSION_1_2)
+
+ cl_int enqueueReadImage(
+ const Image& image,
+ cl_bool blocking,
+ const size_t<3>& origin,
+ const size_t<3>& region,
+ ::size_t row_pitch,
+ ::size_t slice_pitch,
+ void* ptr,
+ const VECTOR_CLASS<Event>* events = NULL,
+ Event* event = NULL) const
+ {
+ cl_event tmp;
+ cl_int err = detail::errHandler(
+ ::clEnqueueReadImage(
+ object_, image(), blocking, (const ::size_t *) origin,
+ (const ::size_t *) region, row_pitch, slice_pitch, ptr,
+ (events != NULL) ? (cl_uint) events->size() : 0,
+ (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+ (event != NULL) ? &tmp : NULL),
+ __ENQUEUE_READ_IMAGE_ERR);
+
+ if (event != NULL && err == CL_SUCCESS)
+ *event = tmp;
+
+ return err;
+ }
+
+ cl_int enqueueWriteImage(
+ const Image& image,
+ cl_bool blocking,
+ const size_t<3>& origin,
+ const size_t<3>& region,
+ ::size_t row_pitch,
+ ::size_t slice_pitch,
+ void* ptr,
+ const VECTOR_CLASS<Event>* events = NULL,
+ Event* event = NULL) const
+ {
+ cl_event tmp;
+ cl_int err = detail::errHandler(
+ ::clEnqueueWriteImage(
+ object_, image(), blocking, (const ::size_t *) origin,
+ (const ::size_t *) region, row_pitch, slice_pitch, ptr,
+ (events != NULL) ? (cl_uint) events->size() : 0,
+ (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+ (event != NULL) ? &tmp : NULL),
+ __ENQUEUE_WRITE_IMAGE_ERR);
+
+ if (event != NULL && err == CL_SUCCESS)
+ *event = tmp;
+
+ return err;
+ }
+
+ cl_int enqueueCopyImage(
+ const Image& src,
+ const Image& dst,
+ const size_t<3>& src_origin,
+ const size_t<3>& dst_origin,
+ const size_t<3>& region,
+ const VECTOR_CLASS<Event>* events = NULL,
+ Event* event = NULL) const
+ {
+ cl_event tmp;
+ cl_int err = detail::errHandler(
+ ::clEnqueueCopyImage(
+ object_, src(), dst(), (const ::size_t *) src_origin,
+ (const ::size_t *)dst_origin, (const ::size_t *) region,
+ (events != NULL) ? (cl_uint) events->size() : 0,
+ (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+ (event != NULL) ? &tmp : NULL),
+ __ENQUEUE_COPY_IMAGE_ERR);
+
+ if (event != NULL && err == CL_SUCCESS)
+ *event = tmp;
+
+ return err;
+ }
+
+#if defined(CL_VERSION_1_2)
+ /**
+ * Enqueue a command to fill an image object with a specified color.
+ * \param fillColor is the color to use to fill the image.
+ * This is a four component RGBA floating-point color value if
+ * the image channel data type is not an unnormalized signed or
+ * unsigned data type.
+ */
+ cl_int enqueueFillImage(
+ const Image& image,
+ cl_float4 fillColor,
+ const size_t<3>& origin,
+ const size_t<3>& region,
+ const VECTOR_CLASS<Event>* events = NULL,
+ Event* event = NULL) const
+ {
+ cl_event tmp;
+ cl_int err = detail::errHandler(
+ ::clEnqueueFillImage(
+ object_,
+ image(),
+ static_cast<void*>(&fillColor),
+ (const ::size_t *) origin,
+ (const ::size_t *) region,
+ (events != NULL) ? (cl_uint) events->size() : 0,
+ (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+ (event != NULL) ? &tmp : NULL),
+ __ENQUEUE_FILL_IMAGE_ERR);
+
+ if (event != NULL && err == CL_SUCCESS)
+ *event = tmp;
+
+ return err;
+ }
+
+ /**
+ * Enqueue a command to fill an image object with a specified color.
+ * \param fillColor is the color to use to fill the image.
+ * This is a four component RGBA signed integer color value if
+ * the image channel data type is an unnormalized signed integer
+ * type.
+ */
+ cl_int enqueueFillImage(
+ const Image& image,
+ cl_int4 fillColor,
+ const size_t<3>& origin,
+ const size_t<3>& region,
+ const VECTOR_CLASS<Event>* events = NULL,
+ Event* event = NULL) const
+ {
+ cl_event tmp;
+ cl_int err = detail::errHandler(
+ ::clEnqueueFillImage(
+ object_,
+ image(),
+ static_cast<void*>(&fillColor),
+ (const ::size_t *) origin,
+ (const ::size_t *) region,
+ (events != NULL) ? (cl_uint) events->size() : 0,
+ (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+ (event != NULL) ? &tmp : NULL),
+ __ENQUEUE_FILL_IMAGE_ERR);
+
+ if (event != NULL && err == CL_SUCCESS)
+ *event = tmp;
+
+ return err;
+ }
+
+ /**
+ * Enqueue a command to fill an image object with a specified color.
+ * \param fillColor is the color to use to fill the image.
+ * This is a four component RGBA unsigned integer color value if
+ * the image channel data type is an unnormalized unsigned integer
+ * type.
+ */
+ cl_int enqueueFillImage(
+ const Image& image,
+ cl_uint4 fillColor,
+ const size_t<3>& origin,
+ const size_t<3>& region,
+ const VECTOR_CLASS<Event>* events = NULL,
+ Event* event = NULL) const
+ {
+ cl_event tmp;
+ cl_int err = detail::errHandler(
+ ::clEnqueueFillImage(
+ object_,
+ image(),
+ static_cast<void*>(&fillColor),
+ (const ::size_t *) origin,
+ (const ::size_t *) region,
+ (events != NULL) ? (cl_uint) events->size() : 0,
+ (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+ (event != NULL) ? &tmp : NULL),
+ __ENQUEUE_FILL_IMAGE_ERR);
+
+ if (event != NULL && err == CL_SUCCESS)
+ *event = tmp;
+
+ return err;
+ }
+#endif // #if defined(CL_VERSION_1_2)
+
+ cl_int enqueueCopyImageToBuffer(
+ const Image& src,
+ const Buffer& dst,
+ const size_t<3>& src_origin,
+ const size_t<3>& region,
+ ::size_t dst_offset,
+ const VECTOR_CLASS<Event>* events = NULL,
+ Event* event = NULL) const
+ {
+ cl_event tmp;
+ cl_int err = detail::errHandler(
+ ::clEnqueueCopyImageToBuffer(
+ object_, src(), dst(), (const ::size_t *) src_origin,
+ (const ::size_t *) region, dst_offset,
+ (events != NULL) ? (cl_uint) events->size() : 0,
+ (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+ (event != NULL) ? &tmp : NULL),
+ __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR);
+
+ if (event != NULL && err == CL_SUCCESS)
+ *event = tmp;
+
+ return err;
+ }
+
+ cl_int enqueueCopyBufferToImage(
+ const Buffer& src,
+ const Image& dst,
+ ::size_t src_offset,
+ const size_t<3>& dst_origin,
+ const size_t<3>& region,
+ const VECTOR_CLASS<Event>* events = NULL,
+ Event* event = NULL) const
+ {
+ cl_event tmp;
+ cl_int err = detail::errHandler(
+ ::clEnqueueCopyBufferToImage(
+ object_, src(), dst(), src_offset,
+ (const ::size_t *) dst_origin, (const ::size_t *) region,
+ (events != NULL) ? (cl_uint) events->size() : 0,
+ (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+ (event != NULL) ? &tmp : NULL),
+ __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR);
+
+ if (event != NULL && err == CL_SUCCESS)
+ *event = tmp;
+
+ return err;
+ }
+
+ void* enqueueMapBuffer(
+ const Buffer& buffer,
+ cl_bool blocking,
+ cl_map_flags flags,
+ ::size_t offset,
+ ::size_t size,
+ const VECTOR_CLASS<Event>* events = NULL,
+ Event* event = NULL,
+ cl_int* err = NULL) const
+ {
+ cl_int error;
+ void * result = ::clEnqueueMapBuffer(
+ object_, buffer(), blocking, flags, offset, size,
+ (events != NULL) ? (cl_uint) events->size() : 0,
+ (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+ (cl_event*) event,
+ &error);
+
+ detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR);
+ if (err != NULL) {
+ *err = error;
+ }
+ return result;
+ }
+
+ void* enqueueMapImage(
+ const Image& buffer,
+ cl_bool blocking,
+ cl_map_flags flags,
+ const size_t<3>& origin,
+ const size_t<3>& region,
+ ::size_t * row_pitch,
+ ::size_t * slice_pitch,
+ const VECTOR_CLASS<Event>* events = NULL,
+ Event* event = NULL,
+ cl_int* err = NULL) const
+ {
+ cl_int error;
+ void * result = ::clEnqueueMapImage(
+ object_, buffer(), blocking, flags,
+ (const ::size_t *) origin, (const ::size_t *) region,
+ row_pitch, slice_pitch,
+ (events != NULL) ? (cl_uint) events->size() : 0,
+ (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+ (cl_event*) event,
+ &error);
+
+ detail::errHandler(error, __ENQUEUE_MAP_IMAGE_ERR);
+ if (err != NULL) {
+ *err = error;
+ }
+ return result;
+ }
+
+ cl_int enqueueUnmapMemObject(
+ const Memory& memory,
+ void* mapped_ptr,
+ const VECTOR_CLASS<Event>* events = NULL,
+ Event* event = NULL) const
+ {
+ cl_event tmp;
+ cl_int err = detail::errHandler(
+ ::clEnqueueUnmapMemObject(
+ object_, memory(), mapped_ptr,
+ (events != NULL) ? (cl_uint) events->size() : 0,
+ (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+ (event != NULL) ? &tmp : NULL),
+ __ENQUEUE_UNMAP_MEM_OBJECT_ERR);
+
+ if (event != NULL && err == CL_SUCCESS)
+ *event = tmp;
+
+ return err;
+ }
+
+#if defined(CL_VERSION_1_2)
+ /**
+ * Enqueues a marker command which waits for either a list of events to complete,
+ * or all previously enqueued commands to complete.
+ *
+ * Enqueues a marker command which waits for either a list of events to complete,
+ * or if the list is empty it waits for all commands previously enqueued in command_queue
+ * to complete before it completes. This command returns an event which can be waited on,
+ * i.e. this event can be waited on to insure that all events either in the event_wait_list
+ * or all previously enqueued commands, queued before this command to command_queue,
+ * have completed.
+ */
+ cl_int enqueueMarkerWithWaitList(
+ const VECTOR_CLASS<Event> *events = 0,
+ Event *event = 0)
+ {
+ cl_event tmp;
+ cl_int err = detail::errHandler(
+ ::clEnqueueMarkerWithWaitList(
+ object_,
+ (events != NULL) ? (cl_uint) events->size() : 0,
+ (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+ (event != NULL) ? &tmp : NULL),
+ __ENQUEUE_MARKER_WAIT_LIST_ERR);
+
+ if (event != NULL && err == CL_SUCCESS)
+ *event = tmp;
+
+ return err;
+ }
+
+ /**
+ * A synchronization point that enqueues a barrier operation.
+ *
+ * Enqueues a barrier command which waits for either a list of events to complete,
+ * or if the list is empty it waits for all commands previously enqueued in command_queue
+ * to complete before it completes. This command blocks command execution, that is, any
+ * following commands enqueued after it do not execute until it completes. This command
+ * returns an event which can be waited on, i.e. this event can be waited on to insure that
+ * all events either in the event_wait_list or all previously enqueued commands, queued
+ * before this command to command_queue, have completed.
+ */
+ cl_int enqueueBarrierWithWaitList(
+ const VECTOR_CLASS<Event> *events = 0,
+ Event *event = 0)
+ {
+ cl_event tmp;
+ cl_int err = detail::errHandler(
+ ::clEnqueueBarrierWithWaitList(
+ object_,
+ (events != NULL) ? (cl_uint) events->size() : 0,
+ (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+ (event != NULL) ? &tmp : NULL),
+ __ENQUEUE_BARRIER_WAIT_LIST_ERR);
+
+ if (event != NULL && err == CL_SUCCESS)
+ *event = tmp;
+
+ return err;
+ }
+
+ /**
+ * Enqueues a command to indicate with which device a set of memory objects
+ * should be associated.
+ */
+ cl_int enqueueMigrateMemObjects(
+ const VECTOR_CLASS<Memory> &memObjects,
+ cl_mem_migration_flags flags,
+ const VECTOR_CLASS<Event>* events = NULL,
+ Event* event = NULL
+ )
+ {
+ cl_event tmp;
+
+ cl_mem* localMemObjects = static_cast<cl_mem*>(alloca(memObjects.size() * sizeof(cl_mem)));
+ for( int i = 0; i < (int)memObjects.size(); ++i ) {
+ localMemObjects[i] = memObjects[i]();
+ }
+
+
+ cl_int err = detail::errHandler(
+ ::clEnqueueMigrateMemObjects(
+ object_,
+ (cl_uint)memObjects.size(),
+ static_cast<const cl_mem*>(localMemObjects),
+ flags,
+ (events != NULL) ? (cl_uint) events->size() : 0,
+ (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+ (event != NULL) ? &tmp : NULL),
+ __ENQUEUE_UNMAP_MEM_OBJECT_ERR);
+
+ if (event != NULL && err == CL_SUCCESS)
+ *event = tmp;
+
+ return err;
+ }
+#endif // #if defined(CL_VERSION_1_2)
+
+ cl_int enqueueNDRangeKernel(
+ const Kernel& kernel,
+ const NDRange& offset,
+ const NDRange& global,
+ const NDRange& local = NullRange,
+ const VECTOR_CLASS<Event>* events = NULL,
+ Event* event = NULL) const
+ {
+ cl_event tmp;
+ cl_int err = detail::errHandler(
+ ::clEnqueueNDRangeKernel(
+ object_, kernel(), (cl_uint) global.dimensions(),
+ offset.dimensions() != 0 ? (const ::size_t*) offset : NULL,
+ (const ::size_t*) global,
+ local.dimensions() != 0 ? (const ::size_t*) local : NULL,
+ (events != NULL) ? (cl_uint) events->size() : 0,
+ (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+ (event != NULL) ? &tmp : NULL),
+ __ENQUEUE_NDRANGE_KERNEL_ERR);
+
+ if (event != NULL && err == CL_SUCCESS)
+ *event = tmp;
+
+ return err;
+ }
+
+ cl_int enqueueTask(
+ const Kernel& kernel,
+ const VECTOR_CLASS<Event>* events = NULL,
+ Event* event = NULL) const
+ {
+ cl_event tmp;
+ cl_int err = detail::errHandler(
+ ::clEnqueueTask(
+ object_, kernel(),
+ (events != NULL) ? (cl_uint) events->size() : 0,
+ (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+ (event != NULL) ? &tmp : NULL),
+ __ENQUEUE_TASK_ERR);
+
+ if (event != NULL && err == CL_SUCCESS)
+ *event = tmp;
+
+ return err;
+ }
+
+ cl_int enqueueNativeKernel(
+ void (CL_CALLBACK *userFptr)(void *),
+ std::pair<void*, ::size_t> args,
+ const VECTOR_CLASS<Memory>* mem_objects = NULL,
+ const VECTOR_CLASS<const void*>* mem_locs = NULL,
+ const VECTOR_CLASS<Event>* events = NULL,
+ Event* event = NULL) const
+ {
+ cl_mem * mems = (mem_objects != NULL && mem_objects->size() > 0)
+ ? (cl_mem*) alloca(mem_objects->size() * sizeof(cl_mem))
+ : NULL;
+
+ if (mems != NULL) {
+ for (unsigned int i = 0; i < mem_objects->size(); i++) {
+ mems[i] = ((*mem_objects)[i])();
+ }
+ }
+
+ cl_event tmp;
+ cl_int err = detail::errHandler(
+ ::clEnqueueNativeKernel(
+ object_, userFptr, args.first, args.second,
+ (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0,
+ mems,
+ (mem_locs != NULL) ? (const void **) &mem_locs->front() : NULL,
+ (events != NULL) ? (cl_uint) events->size() : 0,
+ (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+ (event != NULL) ? &tmp : NULL),
+ __ENQUEUE_NATIVE_KERNEL);
+
+ if (event != NULL && err == CL_SUCCESS)
+ *event = tmp;
+
+ return err;
+ }
+
+/**
+ * Deprecated APIs for 1.2
+ */
+#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2))
+ CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
+ cl_int enqueueMarker(Event* event = NULL) const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
+ {
+ return detail::errHandler(
+ ::clEnqueueMarker(object_, (cl_event*) event),
+ __ENQUEUE_MARKER_ERR);
+ }
+
+ CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
+ cl_int enqueueWaitForEvents(const VECTOR_CLASS<Event>& events) const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
+ {
+ return detail::errHandler(
+ ::clEnqueueWaitForEvents(
+ object_,
+ (cl_uint) events.size(),
+ (const cl_event*) &events.front()),
+ __ENQUEUE_WAIT_FOR_EVENTS_ERR);
+ }
+#endif // #if defined(CL_VERSION_1_1)
+
+ cl_int enqueueAcquireGLObjects(
+ const VECTOR_CLASS<Memory>* mem_objects = NULL,
+ const VECTOR_CLASS<Event>* events = NULL,
+ Event* event = NULL) const
+ {
+ cl_event tmp;
+ cl_int err = detail::errHandler(
+ ::clEnqueueAcquireGLObjects(
+ object_,
+ (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0,
+ (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL,
+ (events != NULL) ? (cl_uint) events->size() : 0,
+ (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+ (event != NULL) ? &tmp : NULL),
+ __ENQUEUE_ACQUIRE_GL_ERR);
+
+ if (event != NULL && err == CL_SUCCESS)
+ *event = tmp;
+
+ return err;
+ }
+
+ cl_int enqueueReleaseGLObjects(
+ const VECTOR_CLASS<Memory>* mem_objects = NULL,
+ const VECTOR_CLASS<Event>* events = NULL,
+ Event* event = NULL) const
+ {
+ cl_event tmp;
+ cl_int err = detail::errHandler(
+ ::clEnqueueReleaseGLObjects(
+ object_,
+ (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0,
+ (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL,
+ (events != NULL) ? (cl_uint) events->size() : 0,
+ (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+ (event != NULL) ? &tmp : NULL),
+ __ENQUEUE_RELEASE_GL_ERR);
+
+ if (event != NULL && err == CL_SUCCESS)
+ *event = tmp;
+
+ return err;
+ }
+
+#if defined (USE_DX_INTEROP)
+typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueAcquireD3D10ObjectsKHR)(
+ cl_command_queue command_queue, cl_uint num_objects,
+ const cl_mem* mem_objects, cl_uint num_events_in_wait_list,
+ const cl_event* event_wait_list, cl_event* event);
+typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueReleaseD3D10ObjectsKHR)(
+ cl_command_queue command_queue, cl_uint num_objects,
+ const cl_mem* mem_objects, cl_uint num_events_in_wait_list,
+ const cl_event* event_wait_list, cl_event* event);
+
+ cl_int enqueueAcquireD3D10Objects(
+ const VECTOR_CLASS<Memory>* mem_objects = NULL,
+ const VECTOR_CLASS<Event>* events = NULL,
+ Event* event = NULL) const
+ {
+ static PFN_clEnqueueAcquireD3D10ObjectsKHR pfn_clEnqueueAcquireD3D10ObjectsKHR = NULL;
+#if defined(CL_VERSION_1_2)
+ cl_context context = getInfo<CL_QUEUE_CONTEXT>();
+ cl::Device device(getInfo<CL_QUEUE_DEVICE>());
+ cl_platform_id platform = device.getInfo<CL_DEVICE_PLATFORM>();
+ __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clEnqueueAcquireD3D10ObjectsKHR);
+#endif
+#if defined(CL_VERSION_1_1)
+ __INIT_CL_EXT_FCN_PTR(clEnqueueAcquireD3D10ObjectsKHR);
+#endif
+
+ cl_event tmp;
+ cl_int err = detail::errHandler(
+ pfn_clEnqueueAcquireD3D10ObjectsKHR(
+ object_,
+ (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0,
+ (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL,
+ (events != NULL) ? (cl_uint) events->size() : 0,
+ (events != NULL) ? (cl_event*) &events->front() : NULL,
+ (event != NULL) ? &tmp : NULL),
+ __ENQUEUE_ACQUIRE_GL_ERR);
+
+ if (event != NULL && err == CL_SUCCESS)
+ *event = tmp;
+
+ return err;
+ }
+
+ cl_int enqueueReleaseD3D10Objects(
+ const VECTOR_CLASS<Memory>* mem_objects = NULL,
+ const VECTOR_CLASS<Event>* events = NULL,
+ Event* event = NULL) const
+ {
+ static PFN_clEnqueueReleaseD3D10ObjectsKHR pfn_clEnqueueReleaseD3D10ObjectsKHR = NULL;
+#if defined(CL_VERSION_1_2)
+ cl_context context = getInfo<CL_QUEUE_CONTEXT>();
+ cl::Device device(getInfo<CL_QUEUE_DEVICE>());
+ cl_platform_id platform = device.getInfo<CL_DEVICE_PLATFORM>();
+ __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clEnqueueReleaseD3D10ObjectsKHR);
+#endif // #if defined(CL_VERSION_1_2)
+#if defined(CL_VERSION_1_1)
+ __INIT_CL_EXT_FCN_PTR(clEnqueueReleaseD3D10ObjectsKHR);
+#endif // #if defined(CL_VERSION_1_1)
+
+ cl_event tmp;
+ cl_int err = detail::errHandler(
+ pfn_clEnqueueReleaseD3D10ObjectsKHR(
+ object_,
+ (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0,
+ (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL,
+ (events != NULL) ? (cl_uint) events->size() : 0,
+ (events != NULL) ? (cl_event*) &events->front() : NULL,
+ (event != NULL) ? &tmp : NULL),
+ __ENQUEUE_RELEASE_GL_ERR);
+
+ if (event != NULL && err == CL_SUCCESS)
+ *event = tmp;
+
+ return err;
+ }
+#endif
+
+/**
+ * Deprecated APIs for 1.2
+ */
+#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2))
+ CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
+ cl_int enqueueBarrier() const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
+ {
+ return detail::errHandler(
+ ::clEnqueueBarrier(object_),
+ __ENQUEUE_BARRIER_ERR);
+ }
+#endif // #if defined(CL_VERSION_1_1)
+
+ cl_int flush() const
+ {
+ return detail::errHandler(::clFlush(object_), __FLUSH_ERR);
+ }
+
+ cl_int finish() const
+ {
+ return detail::errHandler(::clFinish(object_), __FINISH_ERR);
+ }
+};
+
+#ifdef _WIN32
+__declspec(selectany) volatile int CommandQueue::default_initialized_ = __DEFAULT_NOT_INITIALIZED;
+__declspec(selectany) CommandQueue CommandQueue::default_;
+__declspec(selectany) volatile cl_int CommandQueue::default_error_ = CL_SUCCESS;
+#else
+__attribute__((weak)) volatile int CommandQueue::default_initialized_ = __DEFAULT_NOT_INITIALIZED;
+__attribute__((weak)) CommandQueue CommandQueue::default_;
+__attribute__((weak)) volatile cl_int CommandQueue::default_error_ = CL_SUCCESS;
+#endif
+
+template< typename IteratorType >
+Buffer::Buffer(
+ const Context &context,
+ IteratorType startIterator,
+ IteratorType endIterator,
+ bool readOnly,
+ bool useHostPtr,
+ cl_int* err)
+{
+ typedef typename std::iterator_traits<IteratorType>::value_type DataType;
+ cl_int error;
+
+ cl_mem_flags flags = 0;
+ if( readOnly ) {
+ flags |= CL_MEM_READ_ONLY;
+ }
+ else {
+ flags |= CL_MEM_READ_WRITE;
+ }
+ if( useHostPtr ) {
+ flags |= CL_MEM_USE_HOST_PTR;
+ }
+
+ ::size_t size = sizeof(DataType)*(endIterator - startIterator);
+
+ if( useHostPtr ) {
+ object_ = ::clCreateBuffer(context(), flags, size, static_cast<DataType*>(&*startIterator), &error);
+ } else {
+ object_ = ::clCreateBuffer(context(), flags, size, 0, &error);
+ }
+
+ detail::errHandler(error, __CREATE_BUFFER_ERR);
+ if (err != NULL) {
+ *err = error;
+ }
+
+ if( !useHostPtr ) {
+ CommandQueue queue(context, 0, &error);
+ detail::errHandler(error, __CREATE_BUFFER_ERR);
+ if (err != NULL) {
+ *err = error;
+ }
+
+ error = cl::copy(queue, startIterator, endIterator, *this);
+ detail::errHandler(error, __CREATE_BUFFER_ERR);
+ if (err != NULL) {
+ *err = error;
+ }
+ }
+}
+
+inline cl_int enqueueReadBuffer(
+ const Buffer& buffer,
+ cl_bool blocking,
+ ::size_t offset,
+ ::size_t size,
+ void* ptr,
+ const VECTOR_CLASS<Event>* events = NULL,
+ Event* event = NULL)
+{
+ cl_int error;
+ CommandQueue queue = CommandQueue::getDefault(&error);
+
+ if (error != CL_SUCCESS) {
+ return error;
+ }
+
+ return queue.enqueueReadBuffer(buffer, blocking, offset, size, ptr, events, event);
+}
+
+inline cl_int enqueueWriteBuffer(
+ const Buffer& buffer,
+ cl_bool blocking,
+ ::size_t offset,
+ ::size_t size,
+ const void* ptr,
+ const VECTOR_CLASS<Event>* events = NULL,
+ Event* event = NULL)
+{
+ cl_int error;
+ CommandQueue queue = CommandQueue::getDefault(&error);
+
+ if (error != CL_SUCCESS) {
+ return error;
+ }
+
+ return queue.enqueueWriteBuffer(buffer, blocking, offset, size, ptr, events, event);
+}
+
+inline void* enqueueMapBuffer(
+ const Buffer& buffer,
+ cl_bool blocking,
+ cl_map_flags flags,
+ ::size_t offset,
+ ::size_t size,
+ const VECTOR_CLASS<Event>* events = NULL,
+ Event* event = NULL,
+ cl_int* err = NULL)
+{
+ cl_int error;
+ CommandQueue queue = CommandQueue::getDefault(&error);
+ detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR);
+ if (err != NULL) {
+ *err = error;
+ }
+
+ void * result = ::clEnqueueMapBuffer(
+ queue(), buffer(), blocking, flags, offset, size,
+ (events != NULL) ? (cl_uint) events->size() : 0,
+ (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+ (cl_event*) event,
+ &error);
+
+ detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR);
+ if (err != NULL) {
+ *err = error;
+ }
+ return result;
+}
+
+inline cl_int enqueueUnmapMemObject(
+ const Memory& memory,
+ void* mapped_ptr,
+ const VECTOR_CLASS<Event>* events = NULL,
+ Event* event = NULL)
+{
+ cl_int error;
+ CommandQueue queue = CommandQueue::getDefault(&error);
+ detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR);
+ if (error != CL_SUCCESS) {
+ return error;
+ }
+
+ cl_event tmp;
+ cl_int err = detail::errHandler(
+ ::clEnqueueUnmapMemObject(
+ queue(), memory(), mapped_ptr,
+ (events != NULL) ? (cl_uint) events->size() : 0,
+ (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+ (event != NULL) ? &tmp : NULL),
+ __ENQUEUE_UNMAP_MEM_OBJECT_ERR);
+
+ if (event != NULL && err == CL_SUCCESS)
+ *event = tmp;
+
+ return err;
+}
+
+inline cl_int enqueueCopyBuffer(
+ const Buffer& src,
+ const Buffer& dst,
+ ::size_t src_offset,
+ ::size_t dst_offset,
+ ::size_t size,
+ const VECTOR_CLASS<Event>* events = NULL,
+ Event* event = NULL)
+{
+ cl_int error;
+ CommandQueue queue = CommandQueue::getDefault(&error);
+
+ if (error != CL_SUCCESS) {
+ return error;
+ }
+
+ return queue.enqueueCopyBuffer(src, dst, src_offset, dst_offset, size, events, event);
+}
+
+/**
+ * Blocking copy operation between iterators and a buffer.
+ * Host to Device.
+ * Uses default command queue.
+ */
+template< typename IteratorType >
+inline cl_int copy( IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer )
+{
+ cl_int error;
+ CommandQueue queue = CommandQueue::getDefault(&error);
+ if (error != CL_SUCCESS)
+ return error;
+
+ return cl::copy(queue, startIterator, endIterator, buffer);
+}
+
+/**
+ * Blocking copy operation between iterators and a buffer.
+ * Device to Host.
+ * Uses default command queue.
+ */
+template< typename IteratorType >
+inline cl_int copy( const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator )
+{
+ cl_int error;
+ CommandQueue queue = CommandQueue::getDefault(&error);
+ if (error != CL_SUCCESS)
+ return error;
+
+ return cl::copy(queue, buffer, startIterator, endIterator);
+}
+
+/**
+ * Blocking copy operation between iterators and a buffer.
+ * Host to Device.
+ * Uses specified queue.
+ */
+template< typename IteratorType >
+inline cl_int copy( const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer )
+{
+ typedef typename std::iterator_traits<IteratorType>::value_type DataType;
+ cl_int error;
+
+ ::size_t length = endIterator-startIterator;
+ ::size_t byteLength = length*sizeof(DataType);
+
+ DataType *pointer =
+ static_cast<DataType*>(queue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_WRITE, 0, byteLength, 0, 0, &error));
+ // if exceptions enabled, enqueueMapBuffer will throw
+ if( error != CL_SUCCESS ) {
+ return error;
+ }
+#if defined(_MSC_VER)
+ std::copy(
+ startIterator,
+ endIterator,
+ stdext::checked_array_iterator<DataType*>(
+ pointer, length));
+#else
+ std::copy(startIterator, endIterator, pointer);
+#endif
+ Event endEvent;
+ error = queue.enqueueUnmapMemObject(buffer, pointer, 0, &endEvent);
+ // if exceptions enabled, enqueueUnmapMemObject will throw
+ if( error != CL_SUCCESS ) {
+ return error;
+ }
+ endEvent.wait();
+ return CL_SUCCESS;
+}
+
+/**
+ * Blocking copy operation between iterators and a buffer.
+ * Device to Host.
+ * Uses specified queue.
+ */
+template< typename IteratorType >
+inline cl_int copy( const CommandQueue &queue, const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator )
+{
+ typedef typename std::iterator_traits<IteratorType>::value_type DataType;
+ cl_int error;
+
+ ::size_t length = endIterator-startIterator;
+ ::size_t byteLength = length*sizeof(DataType);
+
+ DataType *pointer =
+ static_cast<DataType*>(queue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_READ, 0, byteLength, 0, 0, &error));
+ // if exceptions enabled, enqueueMapBuffer will throw
+ if( error != CL_SUCCESS ) {
+ return error;
+ }
+ std::copy(pointer, pointer + length, startIterator);
+ Event endEvent;
+ error = queue.enqueueUnmapMemObject(buffer, pointer, 0, &endEvent);
+ // if exceptions enabled, enqueueUnmapMemObject will throw
+ if( error != CL_SUCCESS ) {
+ return error;
+ }
+ endEvent.wait();
+ return CL_SUCCESS;
+}
+
+#if defined(CL_VERSION_1_1)
+inline cl_int enqueueReadBufferRect(
+ const Buffer& buffer,
+ cl_bool blocking,
+ const size_t<3>& buffer_offset,
+ const size_t<3>& host_offset,
+ const size_t<3>& region,
+ ::size_t buffer_row_pitch,
+ ::size_t buffer_slice_pitch,
+ ::size_t host_row_pitch,
+ ::size_t host_slice_pitch,
+ void *ptr,
+ const VECTOR_CLASS<Event>* events = NULL,
+ Event* event = NULL)
+{
+ cl_int error;
+ CommandQueue queue = CommandQueue::getDefault(&error);
+
+ if (error != CL_SUCCESS) {
+ return error;
+ }
+
+ return queue.enqueueReadBufferRect(
+ buffer,
+ blocking,
+ buffer_offset,
+ host_offset,
+ region,
+ buffer_row_pitch,
+ buffer_slice_pitch,
+ host_row_pitch,
+ host_slice_pitch,
+ ptr,
+ events,
+ event);
+}
+
+inline cl_int enqueueWriteBufferRect(
+ const Buffer& buffer,
+ cl_bool blocking,
+ const size_t<3>& buffer_offset,
+ const size_t<3>& host_offset,
+ const size_t<3>& region,
+ ::size_t buffer_row_pitch,
+ ::size_t buffer_slice_pitch,
+ ::size_t host_row_pitch,
+ ::size_t host_slice_pitch,
+ void *ptr,
+ const VECTOR_CLASS<Event>* events = NULL,
+ Event* event = NULL)
+{
+ cl_int error;
+ CommandQueue queue = CommandQueue::getDefault(&error);
+
+ if (error != CL_SUCCESS) {
+ return error;
+ }
+
+ return queue.enqueueWriteBufferRect(
+ buffer,
+ blocking,
+ buffer_offset,
+ host_offset,
+ region,
+ buffer_row_pitch,
+ buffer_slice_pitch,
+ host_row_pitch,
+ host_slice_pitch,
+ ptr,
+ events,
+ event);
+}
+
+inline cl_int enqueueCopyBufferRect(
+ const Buffer& src,
+ const Buffer& dst,
+ const size_t<3>& src_origin,
+ const size_t<3>& dst_origin,
+ const size_t<3>& region,
+ ::size_t src_row_pitch,
+ ::size_t src_slice_pitch,
+ ::size_t dst_row_pitch,
+ ::size_t dst_slice_pitch,
+ const VECTOR_CLASS<Event>* events = NULL,
+ Event* event = NULL)
+{
+ cl_int error;
+ CommandQueue queue = CommandQueue::getDefault(&error);
+
+ if (error != CL_SUCCESS) {
+ return error;
+ }
+
+ return queue.enqueueCopyBufferRect(
+ src,
+ dst,
+ src_origin,
+ dst_origin,
+ region,
+ src_row_pitch,
+ src_slice_pitch,
+ dst_row_pitch,
+ dst_slice_pitch,
+ events,
+ event);
+}
+#endif
+
+inline cl_int enqueueReadImage(
+ const Image& image,
+ cl_bool blocking,
+ const size_t<3>& origin,
+ const size_t<3>& region,
+ ::size_t row_pitch,
+ ::size_t slice_pitch,
+ void* ptr,
+ const VECTOR_CLASS<Event>* events = NULL,
+ Event* event = NULL)
+{
+ cl_int error;
+ CommandQueue queue = CommandQueue::getDefault(&error);
+
+ if (error != CL_SUCCESS) {
+ return error;
+ }
+
+ return queue.enqueueReadImage(
+ image,
+ blocking,
+ origin,
+ region,
+ row_pitch,
+ slice_pitch,
+ ptr,
+ events,
+ event);
+}
+
+inline cl_int enqueueWriteImage(
+ const Image& image,
+ cl_bool blocking,
+ const size_t<3>& origin,
+ const size_t<3>& region,
+ ::size_t row_pitch,
+ ::size_t slice_pitch,
+ void* ptr,
+ const VECTOR_CLASS<Event>* events = NULL,
+ Event* event = NULL)
+{
+ cl_int error;
+ CommandQueue queue = CommandQueue::getDefault(&error);
+
+ if (error != CL_SUCCESS) {
+ return error;
+ }
+
+ return queue.enqueueWriteImage(
+ image,
+ blocking,
+ origin,
+ region,
+ row_pitch,
+ slice_pitch,
+ ptr,
+ events,
+ event);
+}
+
+inline cl_int enqueueCopyImage(
+ const Image& src,
+ const Image& dst,
+ const size_t<3>& src_origin,
+ const size_t<3>& dst_origin,
+ const size_t<3>& region,
+ const VECTOR_CLASS<Event>* events = NULL,
+ Event* event = NULL)
+{
+ cl_int error;
+ CommandQueue queue = CommandQueue::getDefault(&error);
+
+ if (error != CL_SUCCESS) {
+ return error;
+ }
+
+ return queue.enqueueCopyImage(
+ src,
+ dst,
+ src_origin,
+ dst_origin,
+ region,
+ events,
+ event);
+}
+
+inline cl_int enqueueCopyImageToBuffer(
+ const Image& src,
+ const Buffer& dst,
+ const size_t<3>& src_origin,
+ const size_t<3>& region,
+ ::size_t dst_offset,
+ const VECTOR_CLASS<Event>* events = NULL,
+ Event* event = NULL)
+{
+ cl_int error;
+ CommandQueue queue = CommandQueue::getDefault(&error);
+
+ if (error != CL_SUCCESS) {
+ return error;
+ }
+
+ return queue.enqueueCopyImageToBuffer(
+ src,
+ dst,
+ src_origin,
+ region,
+ dst_offset,
+ events,
+ event);
+}
+
+inline cl_int enqueueCopyBufferToImage(
+ const Buffer& src,
+ const Image& dst,
+ ::size_t src_offset,
+ const size_t<3>& dst_origin,
+ const size_t<3>& region,
+ const VECTOR_CLASS<Event>* events = NULL,
+ Event* event = NULL)
+{
+ cl_int error;
+ CommandQueue queue = CommandQueue::getDefault(&error);
+
+ if (error != CL_SUCCESS) {
+ return error;
+ }
+
+ return queue.enqueueCopyBufferToImage(
+ src,
+ dst,
+ src_offset,
+ dst_origin,
+ region,
+ events,
+ event);
+}
+
+
+inline cl_int flush(void)
+{
+ cl_int error;
+ CommandQueue queue = CommandQueue::getDefault(&error);
+
+ if (error != CL_SUCCESS) {
+ return error;
+ }
+
+ return queue.flush();
+}
+
+inline cl_int finish(void)
+{
+ cl_int error;
+ CommandQueue queue = CommandQueue::getDefault(&error);
+
+ if (error != CL_SUCCESS) {
+ return error;
+ }
+
+
+ return queue.finish();
+}
+
+// Kernel Functor support
+// New interface as of September 2011
+// Requires the C++11 std::tr1::function (note do not support TR1)
+// Visual Studio 2010 and GCC 4.2
+
+struct EnqueueArgs
+{
+ CommandQueue queue_;
+ const NDRange offset_;
+ const NDRange global_;
+ const NDRange local_;
+ VECTOR_CLASS<Event> events_;
+
+ EnqueueArgs(NDRange global) :
+ queue_(CommandQueue::getDefault()),
+ offset_(NullRange),
+ global_(global),
+ local_(NullRange)
+ {
+
+ }
+
+ EnqueueArgs(NDRange global, NDRange local) :
+ queue_(CommandQueue::getDefault()),
+ offset_(NullRange),
+ global_(global),
+ local_(local)
+ {
+
+ }
+
+ EnqueueArgs(NDRange offset, NDRange global, NDRange local) :
+ queue_(CommandQueue::getDefault()),
+ offset_(offset),
+ global_(global),
+ local_(local)
+ {
+
+ }
+
+ EnqueueArgs(Event e, NDRange global) :
+ queue_(CommandQueue::getDefault()),
+ offset_(NullRange),
+ global_(global),
+ local_(NullRange)
+ {
+ events_.push_back(e);
+ }
+
+ EnqueueArgs(Event e, NDRange global, NDRange local) :
+ queue_(CommandQueue::getDefault()),
+ offset_(NullRange),
+ global_(global),
+ local_(local)
+ {
+ events_.push_back(e);
+ }
+
+ EnqueueArgs(Event e, NDRange offset, NDRange global, NDRange local) :
+ queue_(CommandQueue::getDefault()),
+ offset_(offset),
+ global_(global),
+ local_(local)
+ {
+ events_.push_back(e);
+ }
+
+ EnqueueArgs(const VECTOR_CLASS<Event> &events, NDRange global) :
+ queue_(CommandQueue::getDefault()),
+ offset_(NullRange),
+ global_(global),
+ local_(NullRange),
+ events_(events)
+ {
+
+ }
+
+ EnqueueArgs(const VECTOR_CLASS<Event> &events, NDRange global, NDRange local) :
+ queue_(CommandQueue::getDefault()),
+ offset_(NullRange),
+ global_(global),
+ local_(local),
+ events_(events)
+ {
+
+ }
+
+ EnqueueArgs(const VECTOR_CLASS<Event> &events, NDRange offset, NDRange global, NDRange local) :
+ queue_(CommandQueue::getDefault()),
+ offset_(offset),
+ global_(global),
+ local_(local),
+ events_(events)
+ {
+
+ }
+
+ EnqueueArgs(CommandQueue &queue, NDRange global) :
+ queue_(queue),
+ offset_(NullRange),
+ global_(global),
+ local_(NullRange)
+ {
+
+ }
+
+ EnqueueArgs(CommandQueue &queue, NDRange global, NDRange local) :
+ queue_(queue),
+ offset_(NullRange),
+ global_(global),
+ local_(local)
+ {
+
+ }
+
+ EnqueueArgs(CommandQueue &queue, NDRange offset, NDRange global, NDRange local) :
+ queue_(queue),
+ offset_(offset),
+ global_(global),
+ local_(local)
+ {
+
+ }
+
+ EnqueueArgs(CommandQueue &queue, Event e, NDRange global) :
+ queue_(queue),
+ offset_(NullRange),
+ global_(global),
+ local_(NullRange)
+ {
+ events_.push_back(e);
+ }
+
+ EnqueueArgs(CommandQueue &queue, Event e, NDRange global, NDRange local) :
+ queue_(queue),
+ offset_(NullRange),
+ global_(global),
+ local_(local)
+ {
+ events_.push_back(e);
+ }
+
+ EnqueueArgs(CommandQueue &queue, Event e, NDRange offset, NDRange global, NDRange local) :
+ queue_(queue),
+ offset_(offset),
+ global_(global),
+ local_(local)
+ {
+ events_.push_back(e);
+ }
+
+ EnqueueArgs(CommandQueue &queue, const VECTOR_CLASS<Event> &events, NDRange global) :
+ queue_(queue),
+ offset_(NullRange),
+ global_(global),
+ local_(NullRange),
+ events_(events)
+ {
+
+ }
+
+ EnqueueArgs(CommandQueue &queue, const VECTOR_CLASS<Event> &events, NDRange global, NDRange local) :
+ queue_(queue),
+ offset_(NullRange),
+ global_(global),
+ local_(local),
+ events_(events)
+ {
+
+ }
+
+ EnqueueArgs(CommandQueue &queue, const VECTOR_CLASS<Event> &events, NDRange offset, NDRange global, NDRange local) :
+ queue_(queue),
+ offset_(offset),
+ global_(global),
+ local_(local),
+ events_(events)
+ {
+
+ }
+};
+
+namespace detail {
+
+class NullType {};
+
+template<int index, typename T0>
+struct SetArg
+{
+ static void set (Kernel kernel, T0 arg)
+ {
+ kernel.setArg(index, arg);
+ }
+};
+
+template<int index>
+struct SetArg<index, NullType>
+{
+ static void set (Kernel, NullType)
+ {
+ }
+};
+
+template <
+ typename T0, typename T1, typename T2, typename T3,
+ typename T4, typename T5, typename T6, typename T7,
+ typename T8, typename T9, typename T10, typename T11,
+ typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19,
+ typename T20, typename T21, typename T22, typename T23,
+ typename T24, typename T25, typename T26, typename T27,
+ typename T28, typename T29, typename T30, typename T31
+>
+class KernelFunctorGlobal
+{
+private:
+ Kernel kernel_;
+
+public:
+ KernelFunctorGlobal(
+ Kernel kernel) :
+ kernel_(kernel)
+ {}
+
+ KernelFunctorGlobal(
+ const Program& program,
+ const STRING_CLASS name,
+ cl_int * err = NULL) :
+ kernel_(program, name.c_str(), err)
+ {}
+
+ Event operator() (
+ const EnqueueArgs& args,
+ T0 t0,
+ T1 t1 = NullType(),
+ T2 t2 = NullType(),
+ T3 t3 = NullType(),
+ T4 t4 = NullType(),
+ T5 t5 = NullType(),
+ T6 t6 = NullType(),
+ T7 t7 = NullType(),
+ T8 t8 = NullType(),
+ T9 t9 = NullType(),
+ T10 t10 = NullType(),
+ T11 t11 = NullType(),
+ T12 t12 = NullType(),
+ T13 t13 = NullType(),
+ T14 t14 = NullType(),
+ T15 t15 = NullType(),
+ T16 t16 = NullType(),
+ T17 t17 = NullType(),
+ T18 t18 = NullType(),
+ T19 t19 = NullType(),
+ T20 t20 = NullType(),
+ T21 t21 = NullType(),
+ T22 t22 = NullType(),
+ T23 t23 = NullType(),
+ T24 t24 = NullType(),
+ T25 t25 = NullType(),
+ T26 t26 = NullType(),
+ T27 t27 = NullType(),
+ T28 t28 = NullType(),
+ T29 t29 = NullType(),
+ T30 t30 = NullType(),
+ T31 t31 = NullType()
+ )
+ {
+ Event event;
+ SetArg<0, T0>::set(kernel_, t0);
+ SetArg<1, T1>::set(kernel_, t1);
+ SetArg<2, T2>::set(kernel_, t2);
+ SetArg<3, T3>::set(kernel_, t3);
+ SetArg<4, T4>::set(kernel_, t4);
+ SetArg<5, T5>::set(kernel_, t5);
+ SetArg<6, T6>::set(kernel_, t6);
+ SetArg<7, T7>::set(kernel_, t7);
+ SetArg<8, T8>::set(kernel_, t8);
+ SetArg<9, T9>::set(kernel_, t9);
+ SetArg<10, T10>::set(kernel_, t10);
+ SetArg<11, T11>::set(kernel_, t11);
+ SetArg<12, T12>::set(kernel_, t12);
+ SetArg<13, T13>::set(kernel_, t13);
+ SetArg<14, T14>::set(kernel_, t14);
+ SetArg<15, T15>::set(kernel_, t15);
+ SetArg<16, T16>::set(kernel_, t16);
+ SetArg<17, T17>::set(kernel_, t17);
+ SetArg<18, T18>::set(kernel_, t18);
+ SetArg<19, T19>::set(kernel_, t19);
+ SetArg<20, T20>::set(kernel_, t20);
+ SetArg<21, T21>::set(kernel_, t21);
+ SetArg<22, T22>::set(kernel_, t22);
+ SetArg<23, T23>::set(kernel_, t23);
+ SetArg<24, T24>::set(kernel_, t24);
+ SetArg<25, T25>::set(kernel_, t25);
+ SetArg<26, T26>::set(kernel_, t26);
+ SetArg<27, T27>::set(kernel_, t27);
+ SetArg<28, T28>::set(kernel_, t28);
+ SetArg<29, T29>::set(kernel_, t29);
+ SetArg<30, T30>::set(kernel_, t30);
+ SetArg<31, T31>::set(kernel_, t31);
+
+ args.queue_.enqueueNDRangeKernel(
+ kernel_,
+ args.offset_,
+ args.global_,
+ args.local_,
+ &args.events_,
+ &event);
+
+ return event;
+ }
+
+};
+
+//------------------------------------------------------------------------------------------------------
+
+
+template<
+ typename T0,
+ typename T1,
+ typename T2,
+ typename T3,
+ typename T4,
+ typename T5,
+ typename T6,
+ typename T7,
+ typename T8,
+ typename T9,
+ typename T10,
+ typename T11,
+ typename T12,
+ typename T13,
+ typename T14,
+ typename T15,
+ typename T16,
+ typename T17,
+ typename T18,
+ typename T19,
+ typename T20,
+ typename T21,
+ typename T22,
+ typename T23,
+ typename T24,
+ typename T25,
+ typename T26,
+ typename T27,
+ typename T28,
+ typename T29,
+ typename T30,
+ typename T31>
+struct functionImplementation_
+{
+ typedef detail::KernelFunctorGlobal<
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ T17,
+ T18,
+ T19,
+ T20,
+ T21,
+ T22,
+ T23,
+ T24,
+ T25,
+ T26,
+ T27,
+ T28,
+ T29,
+ T30,
+ T31> FunctorType;
+
+ FunctorType functor_;
+
+ functionImplementation_(const FunctorType &functor) :
+ functor_(functor)
+ {
+
+ #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 32))
+ // Fail variadic expansion for dev11
+ static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+ #endif
+
+ }
+
+ //! \brief Return type of the functor
+ typedef Event result_type;
+
+ //! \brief Function signature of kernel functor with no event dependency.
+ typedef Event type_(
+ const EnqueueArgs&,
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ T17,
+ T18,
+ T19,
+ T20,
+ T21,
+ T22,
+ T23,
+ T24,
+ T25,
+ T26,
+ T27,
+ T28,
+ T29,
+ T30,
+ T31);
+
+ Event operator()(
+ const EnqueueArgs& enqueueArgs,
+ T0 arg0,
+ T1 arg1,
+ T2 arg2,
+ T3 arg3,
+ T4 arg4,
+ T5 arg5,
+ T6 arg6,
+ T7 arg7,
+ T8 arg8,
+ T9 arg9,
+ T10 arg10,
+ T11 arg11,
+ T12 arg12,
+ T13 arg13,
+ T14 arg14,
+ T15 arg15,
+ T16 arg16,
+ T17 arg17,
+ T18 arg18,
+ T19 arg19,
+ T20 arg20,
+ T21 arg21,
+ T22 arg22,
+ T23 arg23,
+ T24 arg24,
+ T25 arg25,
+ T26 arg26,
+ T27 arg27,
+ T28 arg28,
+ T29 arg29,
+ T30 arg30,
+ T31 arg31)
+ {
+ return functor_(
+ enqueueArgs,
+ arg0,
+ arg1,
+ arg2,
+ arg3,
+ arg4,
+ arg5,
+ arg6,
+ arg7,
+ arg8,
+ arg9,
+ arg10,
+ arg11,
+ arg12,
+ arg13,
+ arg14,
+ arg15,
+ arg16,
+ arg17,
+ arg18,
+ arg19,
+ arg20,
+ arg21,
+ arg22,
+ arg23,
+ arg24,
+ arg25,
+ arg26,
+ arg27,
+ arg28,
+ arg29,
+ arg30,
+ arg31);
+ }
+
+
+};
+
+template<
+ typename T0,
+ typename T1,
+ typename T2,
+ typename T3,
+ typename T4,
+ typename T5,
+ typename T6,
+ typename T7,
+ typename T8,
+ typename T9,
+ typename T10,
+ typename T11,
+ typename T12,
+ typename T13,
+ typename T14,
+ typename T15,
+ typename T16,
+ typename T17,
+ typename T18,
+ typename T19,
+ typename T20,
+ typename T21,
+ typename T22,
+ typename T23,
+ typename T24,
+ typename T25,
+ typename T26,
+ typename T27,
+ typename T28,
+ typename T29,
+ typename T30>
+struct functionImplementation_
+< T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ T17,
+ T18,
+ T19,
+ T20,
+ T21,
+ T22,
+ T23,
+ T24,
+ T25,
+ T26,
+ T27,
+ T28,
+ T29,
+ T30,
+ NullType>
+{
+ typedef detail::KernelFunctorGlobal<
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ T17,
+ T18,
+ T19,
+ T20,
+ T21,
+ T22,
+ T23,
+ T24,
+ T25,
+ T26,
+ T27,
+ T28,
+ T29,
+ T30,
+ NullType> FunctorType;
+
+ FunctorType functor_;
+
+ functionImplementation_(const FunctorType &functor) :
+ functor_(functor)
+ {
+
+ #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 31))
+ // Fail variadic expansion for dev11
+ static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+ #endif
+
+ }
+
+ //! \brief Return type of the functor
+ typedef Event result_type;
+
+ //! \brief Function signature of kernel functor with no event dependency.
+ typedef Event type_(
+ const EnqueueArgs&,
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ T17,
+ T18,
+ T19,
+ T20,
+ T21,
+ T22,
+ T23,
+ T24,
+ T25,
+ T26,
+ T27,
+ T28,
+ T29,
+ T30);
+
+ Event operator()(
+ const EnqueueArgs& enqueueArgs,
+ T0 arg0,
+ T1 arg1,
+ T2 arg2,
+ T3 arg3,
+ T4 arg4,
+ T5 arg5,
+ T6 arg6,
+ T7 arg7,
+ T8 arg8,
+ T9 arg9,
+ T10 arg10,
+ T11 arg11,
+ T12 arg12,
+ T13 arg13,
+ T14 arg14,
+ T15 arg15,
+ T16 arg16,
+ T17 arg17,
+ T18 arg18,
+ T19 arg19,
+ T20 arg20,
+ T21 arg21,
+ T22 arg22,
+ T23 arg23,
+ T24 arg24,
+ T25 arg25,
+ T26 arg26,
+ T27 arg27,
+ T28 arg28,
+ T29 arg29,
+ T30 arg30)
+ {
+ return functor_(
+ enqueueArgs,
+ arg0,
+ arg1,
+ arg2,
+ arg3,
+ arg4,
+ arg5,
+ arg6,
+ arg7,
+ arg8,
+ arg9,
+ arg10,
+ arg11,
+ arg12,
+ arg13,
+ arg14,
+ arg15,
+ arg16,
+ arg17,
+ arg18,
+ arg19,
+ arg20,
+ arg21,
+ arg22,
+ arg23,
+ arg24,
+ arg25,
+ arg26,
+ arg27,
+ arg28,
+ arg29,
+ arg30);
+ }
+
+
+};
+
+template<
+ typename T0,
+ typename T1,
+ typename T2,
+ typename T3,
+ typename T4,
+ typename T5,
+ typename T6,
+ typename T7,
+ typename T8,
+ typename T9,
+ typename T10,
+ typename T11,
+ typename T12,
+ typename T13,
+ typename T14,
+ typename T15,
+ typename T16,
+ typename T17,
+ typename T18,
+ typename T19,
+ typename T20,
+ typename T21,
+ typename T22,
+ typename T23,
+ typename T24,
+ typename T25,
+ typename T26,
+ typename T27,
+ typename T28,
+ typename T29>
+struct functionImplementation_
+< T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ T17,
+ T18,
+ T19,
+ T20,
+ T21,
+ T22,
+ T23,
+ T24,
+ T25,
+ T26,
+ T27,
+ T28,
+ T29,
+ NullType,
+ NullType>
+{
+ typedef detail::KernelFunctorGlobal<
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ T17,
+ T18,
+ T19,
+ T20,
+ T21,
+ T22,
+ T23,
+ T24,
+ T25,
+ T26,
+ T27,
+ T28,
+ T29,
+ NullType,
+ NullType> FunctorType;
+
+ FunctorType functor_;
+
+ functionImplementation_(const FunctorType &functor) :
+ functor_(functor)
+ {
+
+ #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 30))
+ // Fail variadic expansion for dev11
+ static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+ #endif
+
+ }
+
+ //! \brief Return type of the functor
+ typedef Event result_type;
+
+ //! \brief Function signature of kernel functor with no event dependency.
+ typedef Event type_(
+ const EnqueueArgs&,
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ T17,
+ T18,
+ T19,
+ T20,
+ T21,
+ T22,
+ T23,
+ T24,
+ T25,
+ T26,
+ T27,
+ T28,
+ T29);
+
+ Event operator()(
+ const EnqueueArgs& enqueueArgs,
+ T0 arg0,
+ T1 arg1,
+ T2 arg2,
+ T3 arg3,
+ T4 arg4,
+ T5 arg5,
+ T6 arg6,
+ T7 arg7,
+ T8 arg8,
+ T9 arg9,
+ T10 arg10,
+ T11 arg11,
+ T12 arg12,
+ T13 arg13,
+ T14 arg14,
+ T15 arg15,
+ T16 arg16,
+ T17 arg17,
+ T18 arg18,
+ T19 arg19,
+ T20 arg20,
+ T21 arg21,
+ T22 arg22,
+ T23 arg23,
+ T24 arg24,
+ T25 arg25,
+ T26 arg26,
+ T27 arg27,
+ T28 arg28,
+ T29 arg29)
+ {
+ return functor_(
+ enqueueArgs,
+ arg0,
+ arg1,
+ arg2,
+ arg3,
+ arg4,
+ arg5,
+ arg6,
+ arg7,
+ arg8,
+ arg9,
+ arg10,
+ arg11,
+ arg12,
+ arg13,
+ arg14,
+ arg15,
+ arg16,
+ arg17,
+ arg18,
+ arg19,
+ arg20,
+ arg21,
+ arg22,
+ arg23,
+ arg24,
+ arg25,
+ arg26,
+ arg27,
+ arg28,
+ arg29);
+ }
+
+
+};
+
+template<
+ typename T0,
+ typename T1,
+ typename T2,
+ typename T3,
+ typename T4,
+ typename T5,
+ typename T6,
+ typename T7,
+ typename T8,
+ typename T9,
+ typename T10,
+ typename T11,
+ typename T12,
+ typename T13,
+ typename T14,
+ typename T15,
+ typename T16,
+ typename T17,
+ typename T18,
+ typename T19,
+ typename T20,
+ typename T21,
+ typename T22,
+ typename T23,
+ typename T24,
+ typename T25,
+ typename T26,
+ typename T27,
+ typename T28>
+struct functionImplementation_
+< T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ T17,
+ T18,
+ T19,
+ T20,
+ T21,
+ T22,
+ T23,
+ T24,
+ T25,
+ T26,
+ T27,
+ T28,
+ NullType,
+ NullType,
+ NullType>
+{
+ typedef detail::KernelFunctorGlobal<
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ T17,
+ T18,
+ T19,
+ T20,
+ T21,
+ T22,
+ T23,
+ T24,
+ T25,
+ T26,
+ T27,
+ T28,
+ NullType,
+ NullType,
+ NullType> FunctorType;
+
+ FunctorType functor_;
+
+ functionImplementation_(const FunctorType &functor) :
+ functor_(functor)
+ {
+
+ #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 29))
+ // Fail variadic expansion for dev11
+ static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+ #endif
+
+ }
+
+ //! \brief Return type of the functor
+ typedef Event result_type;
+
+ //! \brief Function signature of kernel functor with no event dependency.
+ typedef Event type_(
+ const EnqueueArgs&,
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ T17,
+ T18,
+ T19,
+ T20,
+ T21,
+ T22,
+ T23,
+ T24,
+ T25,
+ T26,
+ T27,
+ T28);
+
+ Event operator()(
+ const EnqueueArgs& enqueueArgs,
+ T0 arg0,
+ T1 arg1,
+ T2 arg2,
+ T3 arg3,
+ T4 arg4,
+ T5 arg5,
+ T6 arg6,
+ T7 arg7,
+ T8 arg8,
+ T9 arg9,
+ T10 arg10,
+ T11 arg11,
+ T12 arg12,
+ T13 arg13,
+ T14 arg14,
+ T15 arg15,
+ T16 arg16,
+ T17 arg17,
+ T18 arg18,
+ T19 arg19,
+ T20 arg20,
+ T21 arg21,
+ T22 arg22,
+ T23 arg23,
+ T24 arg24,
+ T25 arg25,
+ T26 arg26,
+ T27 arg27,
+ T28 arg28)
+ {
+ return functor_(
+ enqueueArgs,
+ arg0,
+ arg1,
+ arg2,
+ arg3,
+ arg4,
+ arg5,
+ arg6,
+ arg7,
+ arg8,
+ arg9,
+ arg10,
+ arg11,
+ arg12,
+ arg13,
+ arg14,
+ arg15,
+ arg16,
+ arg17,
+ arg18,
+ arg19,
+ arg20,
+ arg21,
+ arg22,
+ arg23,
+ arg24,
+ arg25,
+ arg26,
+ arg27,
+ arg28);
+ }
+
+
+};
+
+template<
+ typename T0,
+ typename T1,
+ typename T2,
+ typename T3,
+ typename T4,
+ typename T5,
+ typename T6,
+ typename T7,
+ typename T8,
+ typename T9,
+ typename T10,
+ typename T11,
+ typename T12,
+ typename T13,
+ typename T14,
+ typename T15,
+ typename T16,
+ typename T17,
+ typename T18,
+ typename T19,
+ typename T20,
+ typename T21,
+ typename T22,
+ typename T23,
+ typename T24,
+ typename T25,
+ typename T26,
+ typename T27>
+struct functionImplementation_
+< T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ T17,
+ T18,
+ T19,
+ T20,
+ T21,
+ T22,
+ T23,
+ T24,
+ T25,
+ T26,
+ T27,
+ NullType,
+ NullType,
+ NullType,
+ NullType>
+{
+ typedef detail::KernelFunctorGlobal<
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ T17,
+ T18,
+ T19,
+ T20,
+ T21,
+ T22,
+ T23,
+ T24,
+ T25,
+ T26,
+ T27,
+ NullType,
+ NullType,
+ NullType,
+ NullType> FunctorType;
+
+ FunctorType functor_;
+
+ functionImplementation_(const FunctorType &functor) :
+ functor_(functor)
+ {
+
+ #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 28))
+ // Fail variadic expansion for dev11
+ static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+ #endif
+
+ }
+
+ //! \brief Return type of the functor
+ typedef Event result_type;
+
+ //! \brief Function signature of kernel functor with no event dependency.
+ typedef Event type_(
+ const EnqueueArgs&,
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ T17,
+ T18,
+ T19,
+ T20,
+ T21,
+ T22,
+ T23,
+ T24,
+ T25,
+ T26,
+ T27);
+
+ Event operator()(
+ const EnqueueArgs& enqueueArgs,
+ T0 arg0,
+ T1 arg1,
+ T2 arg2,
+ T3 arg3,
+ T4 arg4,
+ T5 arg5,
+ T6 arg6,
+ T7 arg7,
+ T8 arg8,
+ T9 arg9,
+ T10 arg10,
+ T11 arg11,
+ T12 arg12,
+ T13 arg13,
+ T14 arg14,
+ T15 arg15,
+ T16 arg16,
+ T17 arg17,
+ T18 arg18,
+ T19 arg19,
+ T20 arg20,
+ T21 arg21,
+ T22 arg22,
+ T23 arg23,
+ T24 arg24,
+ T25 arg25,
+ T26 arg26,
+ T27 arg27)
+ {
+ return functor_(
+ enqueueArgs,
+ arg0,
+ arg1,
+ arg2,
+ arg3,
+ arg4,
+ arg5,
+ arg6,
+ arg7,
+ arg8,
+ arg9,
+ arg10,
+ arg11,
+ arg12,
+ arg13,
+ arg14,
+ arg15,
+ arg16,
+ arg17,
+ arg18,
+ arg19,
+ arg20,
+ arg21,
+ arg22,
+ arg23,
+ arg24,
+ arg25,
+ arg26,
+ arg27);
+ }
+
+
+};
+
+template<
+ typename T0,
+ typename T1,
+ typename T2,
+ typename T3,
+ typename T4,
+ typename T5,
+ typename T6,
+ typename T7,
+ typename T8,
+ typename T9,
+ typename T10,
+ typename T11,
+ typename T12,
+ typename T13,
+ typename T14,
+ typename T15,
+ typename T16,
+ typename T17,
+ typename T18,
+ typename T19,
+ typename T20,
+ typename T21,
+ typename T22,
+ typename T23,
+ typename T24,
+ typename T25,
+ typename T26>
+struct functionImplementation_
+< T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ T17,
+ T18,
+ T19,
+ T20,
+ T21,
+ T22,
+ T23,
+ T24,
+ T25,
+ T26,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType>
+{
+ typedef detail::KernelFunctorGlobal<
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ T17,
+ T18,
+ T19,
+ T20,
+ T21,
+ T22,
+ T23,
+ T24,
+ T25,
+ T26,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType> FunctorType;
+
+ FunctorType functor_;
+
+ functionImplementation_(const FunctorType &functor) :
+ functor_(functor)
+ {
+
+ #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 27))
+ // Fail variadic expansion for dev11
+ static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+ #endif
+
+ }
+
+ //! \brief Return type of the functor
+ typedef Event result_type;
+
+ //! \brief Function signature of kernel functor with no event dependency.
+ typedef Event type_(
+ const EnqueueArgs&,
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ T17,
+ T18,
+ T19,
+ T20,
+ T21,
+ T22,
+ T23,
+ T24,
+ T25,
+ T26);
+
+ Event operator()(
+ const EnqueueArgs& enqueueArgs,
+ T0 arg0,
+ T1 arg1,
+ T2 arg2,
+ T3 arg3,
+ T4 arg4,
+ T5 arg5,
+ T6 arg6,
+ T7 arg7,
+ T8 arg8,
+ T9 arg9,
+ T10 arg10,
+ T11 arg11,
+ T12 arg12,
+ T13 arg13,
+ T14 arg14,
+ T15 arg15,
+ T16 arg16,
+ T17 arg17,
+ T18 arg18,
+ T19 arg19,
+ T20 arg20,
+ T21 arg21,
+ T22 arg22,
+ T23 arg23,
+ T24 arg24,
+ T25 arg25,
+ T26 arg26)
+ {
+ return functor_(
+ enqueueArgs,
+ arg0,
+ arg1,
+ arg2,
+ arg3,
+ arg4,
+ arg5,
+ arg6,
+ arg7,
+ arg8,
+ arg9,
+ arg10,
+ arg11,
+ arg12,
+ arg13,
+ arg14,
+ arg15,
+ arg16,
+ arg17,
+ arg18,
+ arg19,
+ arg20,
+ arg21,
+ arg22,
+ arg23,
+ arg24,
+ arg25,
+ arg26);
+ }
+
+
+};
+
+template<
+ typename T0,
+ typename T1,
+ typename T2,
+ typename T3,
+ typename T4,
+ typename T5,
+ typename T6,
+ typename T7,
+ typename T8,
+ typename T9,
+ typename T10,
+ typename T11,
+ typename T12,
+ typename T13,
+ typename T14,
+ typename T15,
+ typename T16,
+ typename T17,
+ typename T18,
+ typename T19,
+ typename T20,
+ typename T21,
+ typename T22,
+ typename T23,
+ typename T24,
+ typename T25>
+struct functionImplementation_
+< T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ T17,
+ T18,
+ T19,
+ T20,
+ T21,
+ T22,
+ T23,
+ T24,
+ T25,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType>
+{
+ typedef detail::KernelFunctorGlobal<
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ T17,
+ T18,
+ T19,
+ T20,
+ T21,
+ T22,
+ T23,
+ T24,
+ T25,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType> FunctorType;
+
+ FunctorType functor_;
+
+ functionImplementation_(const FunctorType &functor) :
+ functor_(functor)
+ {
+
+ #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 26))
+ // Fail variadic expansion for dev11
+ static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+ #endif
+
+ }
+
+ //! \brief Return type of the functor
+ typedef Event result_type;
+
+ //! \brief Function signature of kernel functor with no event dependency.
+ typedef Event type_(
+ const EnqueueArgs&,
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ T17,
+ T18,
+ T19,
+ T20,
+ T21,
+ T22,
+ T23,
+ T24,
+ T25);
+
+ Event operator()(
+ const EnqueueArgs& enqueueArgs,
+ T0 arg0,
+ T1 arg1,
+ T2 arg2,
+ T3 arg3,
+ T4 arg4,
+ T5 arg5,
+ T6 arg6,
+ T7 arg7,
+ T8 arg8,
+ T9 arg9,
+ T10 arg10,
+ T11 arg11,
+ T12 arg12,
+ T13 arg13,
+ T14 arg14,
+ T15 arg15,
+ T16 arg16,
+ T17 arg17,
+ T18 arg18,
+ T19 arg19,
+ T20 arg20,
+ T21 arg21,
+ T22 arg22,
+ T23 arg23,
+ T24 arg24,
+ T25 arg25)
+ {
+ return functor_(
+ enqueueArgs,
+ arg0,
+ arg1,
+ arg2,
+ arg3,
+ arg4,
+ arg5,
+ arg6,
+ arg7,
+ arg8,
+ arg9,
+ arg10,
+ arg11,
+ arg12,
+ arg13,
+ arg14,
+ arg15,
+ arg16,
+ arg17,
+ arg18,
+ arg19,
+ arg20,
+ arg21,
+ arg22,
+ arg23,
+ arg24,
+ arg25);
+ }
+
+
+};
+
+template<
+ typename T0,
+ typename T1,
+ typename T2,
+ typename T3,
+ typename T4,
+ typename T5,
+ typename T6,
+ typename T7,
+ typename T8,
+ typename T9,
+ typename T10,
+ typename T11,
+ typename T12,
+ typename T13,
+ typename T14,
+ typename T15,
+ typename T16,
+ typename T17,
+ typename T18,
+ typename T19,
+ typename T20,
+ typename T21,
+ typename T22,
+ typename T23,
+ typename T24>
+struct functionImplementation_
+< T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ T17,
+ T18,
+ T19,
+ T20,
+ T21,
+ T22,
+ T23,
+ T24,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType>
+{
+ typedef detail::KernelFunctorGlobal<
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ T17,
+ T18,
+ T19,
+ T20,
+ T21,
+ T22,
+ T23,
+ T24,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType> FunctorType;
+
+ FunctorType functor_;
+
+ functionImplementation_(const FunctorType &functor) :
+ functor_(functor)
+ {
+
+ #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 25))
+ // Fail variadic expansion for dev11
+ static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+ #endif
+
+ }
+
+ //! \brief Return type of the functor
+ typedef Event result_type;
+
+ //! \brief Function signature of kernel functor with no event dependency.
+ typedef Event type_(
+ const EnqueueArgs&,
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ T17,
+ T18,
+ T19,
+ T20,
+ T21,
+ T22,
+ T23,
+ T24);
+
+ Event operator()(
+ const EnqueueArgs& enqueueArgs,
+ T0 arg0,
+ T1 arg1,
+ T2 arg2,
+ T3 arg3,
+ T4 arg4,
+ T5 arg5,
+ T6 arg6,
+ T7 arg7,
+ T8 arg8,
+ T9 arg9,
+ T10 arg10,
+ T11 arg11,
+ T12 arg12,
+ T13 arg13,
+ T14 arg14,
+ T15 arg15,
+ T16 arg16,
+ T17 arg17,
+ T18 arg18,
+ T19 arg19,
+ T20 arg20,
+ T21 arg21,
+ T22 arg22,
+ T23 arg23,
+ T24 arg24)
+ {
+ return functor_(
+ enqueueArgs,
+ arg0,
+ arg1,
+ arg2,
+ arg3,
+ arg4,
+ arg5,
+ arg6,
+ arg7,
+ arg8,
+ arg9,
+ arg10,
+ arg11,
+ arg12,
+ arg13,
+ arg14,
+ arg15,
+ arg16,
+ arg17,
+ arg18,
+ arg19,
+ arg20,
+ arg21,
+ arg22,
+ arg23,
+ arg24);
+ }
+
+
+};
+
+template<
+ typename T0,
+ typename T1,
+ typename T2,
+ typename T3,
+ typename T4,
+ typename T5,
+ typename T6,
+ typename T7,
+ typename T8,
+ typename T9,
+ typename T10,
+ typename T11,
+ typename T12,
+ typename T13,
+ typename T14,
+ typename T15,
+ typename T16,
+ typename T17,
+ typename T18,
+ typename T19,
+ typename T20,
+ typename T21,
+ typename T22,
+ typename T23>
+struct functionImplementation_
+< T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ T17,
+ T18,
+ T19,
+ T20,
+ T21,
+ T22,
+ T23,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType>
+{
+ typedef detail::KernelFunctorGlobal<
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ T17,
+ T18,
+ T19,
+ T20,
+ T21,
+ T22,
+ T23,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType> FunctorType;
+
+ FunctorType functor_;
+
+ functionImplementation_(const FunctorType &functor) :
+ functor_(functor)
+ {
+
+ #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 24))
+ // Fail variadic expansion for dev11
+ static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+ #endif
+
+ }
+
+ //! \brief Return type of the functor
+ typedef Event result_type;
+
+ //! \brief Function signature of kernel functor with no event dependency.
+ typedef Event type_(
+ const EnqueueArgs&,
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ T17,
+ T18,
+ T19,
+ T20,
+ T21,
+ T22,
+ T23);
+
+ Event operator()(
+ const EnqueueArgs& enqueueArgs,
+ T0 arg0,
+ T1 arg1,
+ T2 arg2,
+ T3 arg3,
+ T4 arg4,
+ T5 arg5,
+ T6 arg6,
+ T7 arg7,
+ T8 arg8,
+ T9 arg9,
+ T10 arg10,
+ T11 arg11,
+ T12 arg12,
+ T13 arg13,
+ T14 arg14,
+ T15 arg15,
+ T16 arg16,
+ T17 arg17,
+ T18 arg18,
+ T19 arg19,
+ T20 arg20,
+ T21 arg21,
+ T22 arg22,
+ T23 arg23)
+ {
+ return functor_(
+ enqueueArgs,
+ arg0,
+ arg1,
+ arg2,
+ arg3,
+ arg4,
+ arg5,
+ arg6,
+ arg7,
+ arg8,
+ arg9,
+ arg10,
+ arg11,
+ arg12,
+ arg13,
+ arg14,
+ arg15,
+ arg16,
+ arg17,
+ arg18,
+ arg19,
+ arg20,
+ arg21,
+ arg22,
+ arg23);
+ }
+
+
+};
+
+template<
+ typename T0,
+ typename T1,
+ typename T2,
+ typename T3,
+ typename T4,
+ typename T5,
+ typename T6,
+ typename T7,
+ typename T8,
+ typename T9,
+ typename T10,
+ typename T11,
+ typename T12,
+ typename T13,
+ typename T14,
+ typename T15,
+ typename T16,
+ typename T17,
+ typename T18,
+ typename T19,
+ typename T20,
+ typename T21,
+ typename T22>
+struct functionImplementation_
+< T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ T17,
+ T18,
+ T19,
+ T20,
+ T21,
+ T22,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType>
+{
+ typedef detail::KernelFunctorGlobal<
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ T17,
+ T18,
+ T19,
+ T20,
+ T21,
+ T22,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType> FunctorType;
+
+ FunctorType functor_;
+
+ functionImplementation_(const FunctorType &functor) :
+ functor_(functor)
+ {
+
+ #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 23))
+ // Fail variadic expansion for dev11
+ static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+ #endif
+
+ }
+
+ //! \brief Return type of the functor
+ typedef Event result_type;
+
+ //! \brief Function signature of kernel functor with no event dependency.
+ typedef Event type_(
+ const EnqueueArgs&,
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ T17,
+ T18,
+ T19,
+ T20,
+ T21,
+ T22);
+
+ Event operator()(
+ const EnqueueArgs& enqueueArgs,
+ T0 arg0,
+ T1 arg1,
+ T2 arg2,
+ T3 arg3,
+ T4 arg4,
+ T5 arg5,
+ T6 arg6,
+ T7 arg7,
+ T8 arg8,
+ T9 arg9,
+ T10 arg10,
+ T11 arg11,
+ T12 arg12,
+ T13 arg13,
+ T14 arg14,
+ T15 arg15,
+ T16 arg16,
+ T17 arg17,
+ T18 arg18,
+ T19 arg19,
+ T20 arg20,
+ T21 arg21,
+ T22 arg22)
+ {
+ return functor_(
+ enqueueArgs,
+ arg0,
+ arg1,
+ arg2,
+ arg3,
+ arg4,
+ arg5,
+ arg6,
+ arg7,
+ arg8,
+ arg9,
+ arg10,
+ arg11,
+ arg12,
+ arg13,
+ arg14,
+ arg15,
+ arg16,
+ arg17,
+ arg18,
+ arg19,
+ arg20,
+ arg21,
+ arg22);
+ }
+
+
+};
+
+template<
+ typename T0,
+ typename T1,
+ typename T2,
+ typename T3,
+ typename T4,
+ typename T5,
+ typename T6,
+ typename T7,
+ typename T8,
+ typename T9,
+ typename T10,
+ typename T11,
+ typename T12,
+ typename T13,
+ typename T14,
+ typename T15,
+ typename T16,
+ typename T17,
+ typename T18,
+ typename T19,
+ typename T20,
+ typename T21>
+struct functionImplementation_
+< T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ T17,
+ T18,
+ T19,
+ T20,
+ T21,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType>
+{
+ typedef detail::KernelFunctorGlobal<
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ T17,
+ T18,
+ T19,
+ T20,
+ T21,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType> FunctorType;
+
+ FunctorType functor_;
+
+ functionImplementation_(const FunctorType &functor) :
+ functor_(functor)
+ {
+
+ #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 22))
+ // Fail variadic expansion for dev11
+ static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+ #endif
+
+ }
+
+ //! \brief Return type of the functor
+ typedef Event result_type;
+
+ //! \brief Function signature of kernel functor with no event dependency.
+ typedef Event type_(
+ const EnqueueArgs&,
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ T17,
+ T18,
+ T19,
+ T20,
+ T21);
+
+ Event operator()(
+ const EnqueueArgs& enqueueArgs,
+ T0 arg0,
+ T1 arg1,
+ T2 arg2,
+ T3 arg3,
+ T4 arg4,
+ T5 arg5,
+ T6 arg6,
+ T7 arg7,
+ T8 arg8,
+ T9 arg9,
+ T10 arg10,
+ T11 arg11,
+ T12 arg12,
+ T13 arg13,
+ T14 arg14,
+ T15 arg15,
+ T16 arg16,
+ T17 arg17,
+ T18 arg18,
+ T19 arg19,
+ T20 arg20,
+ T21 arg21)
+ {
+ return functor_(
+ enqueueArgs,
+ arg0,
+ arg1,
+ arg2,
+ arg3,
+ arg4,
+ arg5,
+ arg6,
+ arg7,
+ arg8,
+ arg9,
+ arg10,
+ arg11,
+ arg12,
+ arg13,
+ arg14,
+ arg15,
+ arg16,
+ arg17,
+ arg18,
+ arg19,
+ arg20,
+ arg21);
+ }
+
+
+};
+
+template<
+ typename T0,
+ typename T1,
+ typename T2,
+ typename T3,
+ typename T4,
+ typename T5,
+ typename T6,
+ typename T7,
+ typename T8,
+ typename T9,
+ typename T10,
+ typename T11,
+ typename T12,
+ typename T13,
+ typename T14,
+ typename T15,
+ typename T16,
+ typename T17,
+ typename T18,
+ typename T19,
+ typename T20>
+struct functionImplementation_
+< T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ T17,
+ T18,
+ T19,
+ T20,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType>
+{
+ typedef detail::KernelFunctorGlobal<
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ T17,
+ T18,
+ T19,
+ T20,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType> FunctorType;
+
+ FunctorType functor_;
+
+ functionImplementation_(const FunctorType &functor) :
+ functor_(functor)
+ {
+
+ #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 21))
+ // Fail variadic expansion for dev11
+ static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+ #endif
+
+ }
+
+ //! \brief Return type of the functor
+ typedef Event result_type;
+
+ //! \brief Function signature of kernel functor with no event dependency.
+ typedef Event type_(
+ const EnqueueArgs&,
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ T17,
+ T18,
+ T19,
+ T20);
+
+ Event operator()(
+ const EnqueueArgs& enqueueArgs,
+ T0 arg0,
+ T1 arg1,
+ T2 arg2,
+ T3 arg3,
+ T4 arg4,
+ T5 arg5,
+ T6 arg6,
+ T7 arg7,
+ T8 arg8,
+ T9 arg9,
+ T10 arg10,
+ T11 arg11,
+ T12 arg12,
+ T13 arg13,
+ T14 arg14,
+ T15 arg15,
+ T16 arg16,
+ T17 arg17,
+ T18 arg18,
+ T19 arg19,
+ T20 arg20)
+ {
+ return functor_(
+ enqueueArgs,
+ arg0,
+ arg1,
+ arg2,
+ arg3,
+ arg4,
+ arg5,
+ arg6,
+ arg7,
+ arg8,
+ arg9,
+ arg10,
+ arg11,
+ arg12,
+ arg13,
+ arg14,
+ arg15,
+ arg16,
+ arg17,
+ arg18,
+ arg19,
+ arg20);
+ }
+
+
+};
+
+template<
+ typename T0,
+ typename T1,
+ typename T2,
+ typename T3,
+ typename T4,
+ typename T5,
+ typename T6,
+ typename T7,
+ typename T8,
+ typename T9,
+ typename T10,
+ typename T11,
+ typename T12,
+ typename T13,
+ typename T14,
+ typename T15,
+ typename T16,
+ typename T17,
+ typename T18,
+ typename T19>
+struct functionImplementation_
+< T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ T17,
+ T18,
+ T19,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType>
+{
+ typedef detail::KernelFunctorGlobal<
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ T17,
+ T18,
+ T19,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType> FunctorType;
+
+ FunctorType functor_;
+
+ functionImplementation_(const FunctorType &functor) :
+ functor_(functor)
+ {
+
+ #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 20))
+ // Fail variadic expansion for dev11
+ static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+ #endif
+
+ }
+
+ //! \brief Return type of the functor
+ typedef Event result_type;
+
+ //! \brief Function signature of kernel functor with no event dependency.
+ typedef Event type_(
+ const EnqueueArgs&,
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ T17,
+ T18,
+ T19);
+
+ Event operator()(
+ const EnqueueArgs& enqueueArgs,
+ T0 arg0,
+ T1 arg1,
+ T2 arg2,
+ T3 arg3,
+ T4 arg4,
+ T5 arg5,
+ T6 arg6,
+ T7 arg7,
+ T8 arg8,
+ T9 arg9,
+ T10 arg10,
+ T11 arg11,
+ T12 arg12,
+ T13 arg13,
+ T14 arg14,
+ T15 arg15,
+ T16 arg16,
+ T17 arg17,
+ T18 arg18,
+ T19 arg19)
+ {
+ return functor_(
+ enqueueArgs,
+ arg0,
+ arg1,
+ arg2,
+ arg3,
+ arg4,
+ arg5,
+ arg6,
+ arg7,
+ arg8,
+ arg9,
+ arg10,
+ arg11,
+ arg12,
+ arg13,
+ arg14,
+ arg15,
+ arg16,
+ arg17,
+ arg18,
+ arg19);
+ }
+
+
+};
+
+template<
+ typename T0,
+ typename T1,
+ typename T2,
+ typename T3,
+ typename T4,
+ typename T5,
+ typename T6,
+ typename T7,
+ typename T8,
+ typename T9,
+ typename T10,
+ typename T11,
+ typename T12,
+ typename T13,
+ typename T14,
+ typename T15,
+ typename T16,
+ typename T17,
+ typename T18>
+struct functionImplementation_
+< T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ T17,
+ T18,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType>
+{
+ typedef detail::KernelFunctorGlobal<
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ T17,
+ T18,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType> FunctorType;
+
+ FunctorType functor_;
+
+ functionImplementation_(const FunctorType &functor) :
+ functor_(functor)
+ {
+
+ #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 19))
+ // Fail variadic expansion for dev11
+ static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+ #endif
+
+ }
+
+ //! \brief Return type of the functor
+ typedef Event result_type;
+
+ //! \brief Function signature of kernel functor with no event dependency.
+ typedef Event type_(
+ const EnqueueArgs&,
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ T17,
+ T18);
+
+ Event operator()(
+ const EnqueueArgs& enqueueArgs,
+ T0 arg0,
+ T1 arg1,
+ T2 arg2,
+ T3 arg3,
+ T4 arg4,
+ T5 arg5,
+ T6 arg6,
+ T7 arg7,
+ T8 arg8,
+ T9 arg9,
+ T10 arg10,
+ T11 arg11,
+ T12 arg12,
+ T13 arg13,
+ T14 arg14,
+ T15 arg15,
+ T16 arg16,
+ T17 arg17,
+ T18 arg18)
+ {
+ return functor_(
+ enqueueArgs,
+ arg0,
+ arg1,
+ arg2,
+ arg3,
+ arg4,
+ arg5,
+ arg6,
+ arg7,
+ arg8,
+ arg9,
+ arg10,
+ arg11,
+ arg12,
+ arg13,
+ arg14,
+ arg15,
+ arg16,
+ arg17,
+ arg18);
+ }
+
+
+};
+
+template<
+ typename T0,
+ typename T1,
+ typename T2,
+ typename T3,
+ typename T4,
+ typename T5,
+ typename T6,
+ typename T7,
+ typename T8,
+ typename T9,
+ typename T10,
+ typename T11,
+ typename T12,
+ typename T13,
+ typename T14,
+ typename T15,
+ typename T16,
+ typename T17>
+struct functionImplementation_
+< T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ T17,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType>
+{
+ typedef detail::KernelFunctorGlobal<
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ T17,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType> FunctorType;
+
+ FunctorType functor_;
+
+ functionImplementation_(const FunctorType &functor) :
+ functor_(functor)
+ {
+
+ #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 18))
+ // Fail variadic expansion for dev11
+ static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+ #endif
+
+ }
+
+ //! \brief Return type of the functor
+ typedef Event result_type;
+
+ //! \brief Function signature of kernel functor with no event dependency.
+ typedef Event type_(
+ const EnqueueArgs&,
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ T17);
+
+ Event operator()(
+ const EnqueueArgs& enqueueArgs,
+ T0 arg0,
+ T1 arg1,
+ T2 arg2,
+ T3 arg3,
+ T4 arg4,
+ T5 arg5,
+ T6 arg6,
+ T7 arg7,
+ T8 arg8,
+ T9 arg9,
+ T10 arg10,
+ T11 arg11,
+ T12 arg12,
+ T13 arg13,
+ T14 arg14,
+ T15 arg15,
+ T16 arg16,
+ T17 arg17)
+ {
+ return functor_(
+ enqueueArgs,
+ arg0,
+ arg1,
+ arg2,
+ arg3,
+ arg4,
+ arg5,
+ arg6,
+ arg7,
+ arg8,
+ arg9,
+ arg10,
+ arg11,
+ arg12,
+ arg13,
+ arg14,
+ arg15,
+ arg16,
+ arg17);
+ }
+
+
+};
+
+template<
+ typename T0,
+ typename T1,
+ typename T2,
+ typename T3,
+ typename T4,
+ typename T5,
+ typename T6,
+ typename T7,
+ typename T8,
+ typename T9,
+ typename T10,
+ typename T11,
+ typename T12,
+ typename T13,
+ typename T14,
+ typename T15,
+ typename T16>
+struct functionImplementation_
+< T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType>
+{
+ typedef detail::KernelFunctorGlobal<
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType> FunctorType;
+
+ FunctorType functor_;
+
+ functionImplementation_(const FunctorType &functor) :
+ functor_(functor)
+ {
+
+ #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 17))
+ // Fail variadic expansion for dev11
+ static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+ #endif
+
+ }
+
+ //! \brief Return type of the functor
+ typedef Event result_type;
+
+ //! \brief Function signature of kernel functor with no event dependency.
+ typedef Event type_(
+ const EnqueueArgs&,
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ T16);
+
+ Event operator()(
+ const EnqueueArgs& enqueueArgs,
+ T0 arg0,
+ T1 arg1,
+ T2 arg2,
+ T3 arg3,
+ T4 arg4,
+ T5 arg5,
+ T6 arg6,
+ T7 arg7,
+ T8 arg8,
+ T9 arg9,
+ T10 arg10,
+ T11 arg11,
+ T12 arg12,
+ T13 arg13,
+ T14 arg14,
+ T15 arg15,
+ T16 arg16)
+ {
+ return functor_(
+ enqueueArgs,
+ arg0,
+ arg1,
+ arg2,
+ arg3,
+ arg4,
+ arg5,
+ arg6,
+ arg7,
+ arg8,
+ arg9,
+ arg10,
+ arg11,
+ arg12,
+ arg13,
+ arg14,
+ arg15,
+ arg16);
+ }
+
+
+};
+
+template<
+ typename T0,
+ typename T1,
+ typename T2,
+ typename T3,
+ typename T4,
+ typename T5,
+ typename T6,
+ typename T7,
+ typename T8,
+ typename T9,
+ typename T10,
+ typename T11,
+ typename T12,
+ typename T13,
+ typename T14,
+ typename T15>
+struct functionImplementation_
+< T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType>
+{
+ typedef detail::KernelFunctorGlobal<
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType> FunctorType;
+
+ FunctorType functor_;
+
+ functionImplementation_(const FunctorType &functor) :
+ functor_(functor)
+ {
+
+ #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 16))
+ // Fail variadic expansion for dev11
+ static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+ #endif
+
+ }
+
+ //! \brief Return type of the functor
+ typedef Event result_type;
+
+ //! \brief Function signature of kernel functor with no event dependency.
+ typedef Event type_(
+ const EnqueueArgs&,
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ T15);
+
+ Event operator()(
+ const EnqueueArgs& enqueueArgs,
+ T0 arg0,
+ T1 arg1,
+ T2 arg2,
+ T3 arg3,
+ T4 arg4,
+ T5 arg5,
+ T6 arg6,
+ T7 arg7,
+ T8 arg8,
+ T9 arg9,
+ T10 arg10,
+ T11 arg11,
+ T12 arg12,
+ T13 arg13,
+ T14 arg14,
+ T15 arg15)
+ {
+ return functor_(
+ enqueueArgs,
+ arg0,
+ arg1,
+ arg2,
+ arg3,
+ arg4,
+ arg5,
+ arg6,
+ arg7,
+ arg8,
+ arg9,
+ arg10,
+ arg11,
+ arg12,
+ arg13,
+ arg14,
+ arg15);
+ }
+
+
+};
+
+template<
+ typename T0,
+ typename T1,
+ typename T2,
+ typename T3,
+ typename T4,
+ typename T5,
+ typename T6,
+ typename T7,
+ typename T8,
+ typename T9,
+ typename T10,
+ typename T11,
+ typename T12,
+ typename T13,
+ typename T14>
+struct functionImplementation_
+< T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType>
+{
+ typedef detail::KernelFunctorGlobal<
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType> FunctorType;
+
+ FunctorType functor_;
+
+ functionImplementation_(const FunctorType &functor) :
+ functor_(functor)
+ {
+
+ #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 15))
+ // Fail variadic expansion for dev11
+ static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+ #endif
+
+ }
+
+ //! \brief Return type of the functor
+ typedef Event result_type;
+
+ //! \brief Function signature of kernel functor with no event dependency.
+ typedef Event type_(
+ const EnqueueArgs&,
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ T14);
+
+ Event operator()(
+ const EnqueueArgs& enqueueArgs,
+ T0 arg0,
+ T1 arg1,
+ T2 arg2,
+ T3 arg3,
+ T4 arg4,
+ T5 arg5,
+ T6 arg6,
+ T7 arg7,
+ T8 arg8,
+ T9 arg9,
+ T10 arg10,
+ T11 arg11,
+ T12 arg12,
+ T13 arg13,
+ T14 arg14)
+ {
+ return functor_(
+ enqueueArgs,
+ arg0,
+ arg1,
+ arg2,
+ arg3,
+ arg4,
+ arg5,
+ arg6,
+ arg7,
+ arg8,
+ arg9,
+ arg10,
+ arg11,
+ arg12,
+ arg13,
+ arg14);
+ }
+
+
+};
+
+template<
+ typename T0,
+ typename T1,
+ typename T2,
+ typename T3,
+ typename T4,
+ typename T5,
+ typename T6,
+ typename T7,
+ typename T8,
+ typename T9,
+ typename T10,
+ typename T11,
+ typename T12,
+ typename T13>
+struct functionImplementation_
+< T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType>
+{
+ typedef detail::KernelFunctorGlobal<
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType> FunctorType;
+
+ FunctorType functor_;
+
+ functionImplementation_(const FunctorType &functor) :
+ functor_(functor)
+ {
+
+ #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 14))
+ // Fail variadic expansion for dev11
+ static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+ #endif
+
+ }
+
+ //! \brief Return type of the functor
+ typedef Event result_type;
+
+ //! \brief Function signature of kernel functor with no event dependency.
+ typedef Event type_(
+ const EnqueueArgs&,
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ T13);
+
+ Event operator()(
+ const EnqueueArgs& enqueueArgs,
+ T0 arg0,
+ T1 arg1,
+ T2 arg2,
+ T3 arg3,
+ T4 arg4,
+ T5 arg5,
+ T6 arg6,
+ T7 arg7,
+ T8 arg8,
+ T9 arg9,
+ T10 arg10,
+ T11 arg11,
+ T12 arg12,
+ T13 arg13)
+ {
+ return functor_(
+ enqueueArgs,
+ arg0,
+ arg1,
+ arg2,
+ arg3,
+ arg4,
+ arg5,
+ arg6,
+ arg7,
+ arg8,
+ arg9,
+ arg10,
+ arg11,
+ arg12,
+ arg13);
+ }
+
+
+};
+
+template<
+ typename T0,
+ typename T1,
+ typename T2,
+ typename T3,
+ typename T4,
+ typename T5,
+ typename T6,
+ typename T7,
+ typename T8,
+ typename T9,
+ typename T10,
+ typename T11,
+ typename T12>
+struct functionImplementation_
+< T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType>
+{
+ typedef detail::KernelFunctorGlobal<
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType> FunctorType;
+
+ FunctorType functor_;
+
+ functionImplementation_(const FunctorType &functor) :
+ functor_(functor)
+ {
+
+ #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 13))
+ // Fail variadic expansion for dev11
+ static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+ #endif
+
+ }
+
+ //! \brief Return type of the functor
+ typedef Event result_type;
+
+ //! \brief Function signature of kernel functor with no event dependency.
+ typedef Event type_(
+ const EnqueueArgs&,
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ T12);
+
+ Event operator()(
+ const EnqueueArgs& enqueueArgs,
+ T0 arg0,
+ T1 arg1,
+ T2 arg2,
+ T3 arg3,
+ T4 arg4,
+ T5 arg5,
+ T6 arg6,
+ T7 arg7,
+ T8 arg8,
+ T9 arg9,
+ T10 arg10,
+ T11 arg11,
+ T12 arg12)
+ {
+ return functor_(
+ enqueueArgs,
+ arg0,
+ arg1,
+ arg2,
+ arg3,
+ arg4,
+ arg5,
+ arg6,
+ arg7,
+ arg8,
+ arg9,
+ arg10,
+ arg11,
+ arg12);
+ }
+
+
+};
+
+template<
+ typename T0,
+ typename T1,
+ typename T2,
+ typename T3,
+ typename T4,
+ typename T5,
+ typename T6,
+ typename T7,
+ typename T8,
+ typename T9,
+ typename T10,
+ typename T11>
+struct functionImplementation_
+< T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType>
+{
+ typedef detail::KernelFunctorGlobal<
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType> FunctorType;
+
+ FunctorType functor_;
+
+ functionImplementation_(const FunctorType &functor) :
+ functor_(functor)
+ {
+
+ #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 12))
+ // Fail variadic expansion for dev11
+ static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+ #endif
+
+ }
+
+ //! \brief Return type of the functor
+ typedef Event result_type;
+
+ //! \brief Function signature of kernel functor with no event dependency.
+ typedef Event type_(
+ const EnqueueArgs&,
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ T11);
+
+ Event operator()(
+ const EnqueueArgs& enqueueArgs,
+ T0 arg0,
+ T1 arg1,
+ T2 arg2,
+ T3 arg3,
+ T4 arg4,
+ T5 arg5,
+ T6 arg6,
+ T7 arg7,
+ T8 arg8,
+ T9 arg9,
+ T10 arg10,
+ T11 arg11)
+ {
+ return functor_(
+ enqueueArgs,
+ arg0,
+ arg1,
+ arg2,
+ arg3,
+ arg4,
+ arg5,
+ arg6,
+ arg7,
+ arg8,
+ arg9,
+ arg10,
+ arg11);
+ }
+
+
+};
+
+template<
+ typename T0,
+ typename T1,
+ typename T2,
+ typename T3,
+ typename T4,
+ typename T5,
+ typename T6,
+ typename T7,
+ typename T8,
+ typename T9,
+ typename T10>
+struct functionImplementation_
+< T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType>
+{
+ typedef detail::KernelFunctorGlobal<
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType> FunctorType;
+
+ FunctorType functor_;
+
+ functionImplementation_(const FunctorType &functor) :
+ functor_(functor)
+ {
+
+ #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 11))
+ // Fail variadic expansion for dev11
+ static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+ #endif
+
+ }
+
+ //! \brief Return type of the functor
+ typedef Event result_type;
+
+ //! \brief Function signature of kernel functor with no event dependency.
+ typedef Event type_(
+ const EnqueueArgs&,
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ T10);
+
+ Event operator()(
+ const EnqueueArgs& enqueueArgs,
+ T0 arg0,
+ T1 arg1,
+ T2 arg2,
+ T3 arg3,
+ T4 arg4,
+ T5 arg5,
+ T6 arg6,
+ T7 arg7,
+ T8 arg8,
+ T9 arg9,
+ T10 arg10)
+ {
+ return functor_(
+ enqueueArgs,
+ arg0,
+ arg1,
+ arg2,
+ arg3,
+ arg4,
+ arg5,
+ arg6,
+ arg7,
+ arg8,
+ arg9,
+ arg10);
+ }
+
+
+};
+
+template<
+ typename T0,
+ typename T1,
+ typename T2,
+ typename T3,
+ typename T4,
+ typename T5,
+ typename T6,
+ typename T7,
+ typename T8,
+ typename T9>
+struct functionImplementation_
+< T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType>
+{
+ typedef detail::KernelFunctorGlobal<
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType> FunctorType;
+
+ FunctorType functor_;
+
+ functionImplementation_(const FunctorType &functor) :
+ functor_(functor)
+ {
+
+ #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 10))
+ // Fail variadic expansion for dev11
+ static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+ #endif
+
+ }
+
+ //! \brief Return type of the functor
+ typedef Event result_type;
+
+ //! \brief Function signature of kernel functor with no event dependency.
+ typedef Event type_(
+ const EnqueueArgs&,
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ T9);
+
+ Event operator()(
+ const EnqueueArgs& enqueueArgs,
+ T0 arg0,
+ T1 arg1,
+ T2 arg2,
+ T3 arg3,
+ T4 arg4,
+ T5 arg5,
+ T6 arg6,
+ T7 arg7,
+ T8 arg8,
+ T9 arg9)
+ {
+ return functor_(
+ enqueueArgs,
+ arg0,
+ arg1,
+ arg2,
+ arg3,
+ arg4,
+ arg5,
+ arg6,
+ arg7,
+ arg8,
+ arg9);
+ }
+
+
+};
+
+template<
+ typename T0,
+ typename T1,
+ typename T2,
+ typename T3,
+ typename T4,
+ typename T5,
+ typename T6,
+ typename T7,
+ typename T8>
+struct functionImplementation_
+< T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType>
+{
+ typedef detail::KernelFunctorGlobal<
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType> FunctorType;
+
+ FunctorType functor_;
+
+ functionImplementation_(const FunctorType &functor) :
+ functor_(functor)
+ {
+
+ #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 9))
+ // Fail variadic expansion for dev11
+ static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+ #endif
+
+ }
+
+ //! \brief Return type of the functor
+ typedef Event result_type;
+
+ //! \brief Function signature of kernel functor with no event dependency.
+ typedef Event type_(
+ const EnqueueArgs&,
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ T8);
+
+ Event operator()(
+ const EnqueueArgs& enqueueArgs,
+ T0 arg0,
+ T1 arg1,
+ T2 arg2,
+ T3 arg3,
+ T4 arg4,
+ T5 arg5,
+ T6 arg6,
+ T7 arg7,
+ T8 arg8)
+ {
+ return functor_(
+ enqueueArgs,
+ arg0,
+ arg1,
+ arg2,
+ arg3,
+ arg4,
+ arg5,
+ arg6,
+ arg7,
+ arg8);
+ }
+
+
+};
+
+template<
+ typename T0,
+ typename T1,
+ typename T2,
+ typename T3,
+ typename T4,
+ typename T5,
+ typename T6,
+ typename T7>
+struct functionImplementation_
+< T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType>
+{
+ typedef detail::KernelFunctorGlobal<
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType> FunctorType;
+
+ FunctorType functor_;
+
+ functionImplementation_(const FunctorType &functor) :
+ functor_(functor)
+ {
+
+ #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 8))
+ // Fail variadic expansion for dev11
+ static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+ #endif
+
+ }
+
+ //! \brief Return type of the functor
+ typedef Event result_type;
+
+ //! \brief Function signature of kernel functor with no event dependency.
+ typedef Event type_(
+ const EnqueueArgs&,
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ T7);
+
+ Event operator()(
+ const EnqueueArgs& enqueueArgs,
+ T0 arg0,
+ T1 arg1,
+ T2 arg2,
+ T3 arg3,
+ T4 arg4,
+ T5 arg5,
+ T6 arg6,
+ T7 arg7)
+ {
+ return functor_(
+ enqueueArgs,
+ arg0,
+ arg1,
+ arg2,
+ arg3,
+ arg4,
+ arg5,
+ arg6,
+ arg7);
+ }
+
+
+};
+
+template<
+ typename T0,
+ typename T1,
+ typename T2,
+ typename T3,
+ typename T4,
+ typename T5,
+ typename T6>
+struct functionImplementation_
+< T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType>
+{
+ typedef detail::KernelFunctorGlobal<
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType> FunctorType;
+
+ FunctorType functor_;
+
+ functionImplementation_(const FunctorType &functor) :
+ functor_(functor)
+ {
+
+ #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 7))
+ // Fail variadic expansion for dev11
+ static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+ #endif
+
+ }
+
+ //! \brief Return type of the functor
+ typedef Event result_type;
+
+ //! \brief Function signature of kernel functor with no event dependency.
+ typedef Event type_(
+ const EnqueueArgs&,
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ T6);
+
+ Event operator()(
+ const EnqueueArgs& enqueueArgs,
+ T0 arg0,
+ T1 arg1,
+ T2 arg2,
+ T3 arg3,
+ T4 arg4,
+ T5 arg5,
+ T6 arg6)
+ {
+ return functor_(
+ enqueueArgs,
+ arg0,
+ arg1,
+ arg2,
+ arg3,
+ arg4,
+ arg5,
+ arg6);
+ }
+
+
+};
+
+template<
+ typename T0,
+ typename T1,
+ typename T2,
+ typename T3,
+ typename T4,
+ typename T5>
+struct functionImplementation_
+< T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType>
+{
+ typedef detail::KernelFunctorGlobal<
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType> FunctorType;
+
+ FunctorType functor_;
+
+ functionImplementation_(const FunctorType &functor) :
+ functor_(functor)
+ {
+
+ #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 6))
+ // Fail variadic expansion for dev11
+ static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+ #endif
+
+ }
+
+ //! \brief Return type of the functor
+ typedef Event result_type;
+
+ //! \brief Function signature of kernel functor with no event dependency.
+ typedef Event type_(
+ const EnqueueArgs&,
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ T5);
+
+ Event operator()(
+ const EnqueueArgs& enqueueArgs,
+ T0 arg0,
+ T1 arg1,
+ T2 arg2,
+ T3 arg3,
+ T4 arg4,
+ T5 arg5)
+ {
+ return functor_(
+ enqueueArgs,
+ arg0,
+ arg1,
+ arg2,
+ arg3,
+ arg4,
+ arg5);
+ }
+
+
+};
+
+template<
+ typename T0,
+ typename T1,
+ typename T2,
+ typename T3,
+ typename T4>
+struct functionImplementation_
+< T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType>
+{
+ typedef detail::KernelFunctorGlobal<
+ T0,
+ T1,
+ T2,
+ T3,
+ T4,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType> FunctorType;
+
+ FunctorType functor_;
+
+ functionImplementation_(const FunctorType &functor) :
+ functor_(functor)
+ {
+
+ #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 5))
+ // Fail variadic expansion for dev11
+ static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+ #endif
+
+ }
+
+ //! \brief Return type of the functor
+ typedef Event result_type;
+
+ //! \brief Function signature of kernel functor with no event dependency.
+ typedef Event type_(
+ const EnqueueArgs&,
+ T0,
+ T1,
+ T2,
+ T3,
+ T4);
+
+ Event operator()(
+ const EnqueueArgs& enqueueArgs,
+ T0 arg0,
+ T1 arg1,
+ T2 arg2,
+ T3 arg3,
+ T4 arg4)
+ {
+ return functor_(
+ enqueueArgs,
+ arg0,
+ arg1,
+ arg2,
+ arg3,
+ arg4);
+ }
+
+
+};
+
+template<
+ typename T0,
+ typename T1,
+ typename T2,
+ typename T3>
+struct functionImplementation_
+< T0,
+ T1,
+ T2,
+ T3,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType>
+{
+ typedef detail::KernelFunctorGlobal<
+ T0,
+ T1,
+ T2,
+ T3,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType> FunctorType;
+
+ FunctorType functor_;
+
+ functionImplementation_(const FunctorType &functor) :
+ functor_(functor)
+ {
+
+ #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 4))
+ // Fail variadic expansion for dev11
+ static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+ #endif
+
+ }
+
+ //! \brief Return type of the functor
+ typedef Event result_type;
+
+ //! \brief Function signature of kernel functor with no event dependency.
+ typedef Event type_(
+ const EnqueueArgs&,
+ T0,
+ T1,
+ T2,
+ T3);
+
+ Event operator()(
+ const EnqueueArgs& enqueueArgs,
+ T0 arg0,
+ T1 arg1,
+ T2 arg2,
+ T3 arg3)
+ {
+ return functor_(
+ enqueueArgs,
+ arg0,
+ arg1,
+ arg2,
+ arg3);
+ }
+
+
+};
+
+template<
+ typename T0,
+ typename T1,
+ typename T2>
+struct functionImplementation_
+< T0,
+ T1,
+ T2,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType>
+{
+ typedef detail::KernelFunctorGlobal<
+ T0,
+ T1,
+ T2,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType> FunctorType;
+
+ FunctorType functor_;
+
+ functionImplementation_(const FunctorType &functor) :
+ functor_(functor)
+ {
+
+ #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 3))
+ // Fail variadic expansion for dev11
+ static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+ #endif
+
+ }
+
+ //! \brief Return type of the functor
+ typedef Event result_type;
+
+ //! \brief Function signature of kernel functor with no event dependency.
+ typedef Event type_(
+ const EnqueueArgs&,
+ T0,
+ T1,
+ T2);
+
+ Event operator()(
+ const EnqueueArgs& enqueueArgs,
+ T0 arg0,
+ T1 arg1,
+ T2 arg2)
+ {
+ return functor_(
+ enqueueArgs,
+ arg0,
+ arg1,
+ arg2);
+ }
+
+
+};
+
+template<
+ typename T0,
+ typename T1>
+struct functionImplementation_
+< T0,
+ T1,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType>
+{
+ typedef detail::KernelFunctorGlobal<
+ T0,
+ T1,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType> FunctorType;
+
+ FunctorType functor_;
+
+ functionImplementation_(const FunctorType &functor) :
+ functor_(functor)
+ {
+
+ #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 2))
+ // Fail variadic expansion for dev11
+ static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+ #endif
+
+ }
+
+ //! \brief Return type of the functor
+ typedef Event result_type;
+
+ //! \brief Function signature of kernel functor with no event dependency.
+ typedef Event type_(
+ const EnqueueArgs&,
+ T0,
+ T1);
+
+ Event operator()(
+ const EnqueueArgs& enqueueArgs,
+ T0 arg0,
+ T1 arg1)
+ {
+ return functor_(
+ enqueueArgs,
+ arg0,
+ arg1);
+ }
+
+
+};
+
+template<
+ typename T0>
+struct functionImplementation_
+< T0,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType>
+{
+ typedef detail::KernelFunctorGlobal<
+ T0,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType,
+ NullType> FunctorType;
+
+ FunctorType functor_;
+
+ functionImplementation_(const FunctorType &functor) :
+ functor_(functor)
+ {
+
+ #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 1))
+ // Fail variadic expansion for dev11
+ static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+ #endif
+
+ }
+
+ //! \brief Return type of the functor
+ typedef Event result_type;
+
+ //! \brief Function signature of kernel functor with no event dependency.
+ typedef Event type_(
+ const EnqueueArgs&,
+ T0);
+
+ Event operator()(
+ const EnqueueArgs& enqueueArgs,
+ T0 arg0)
+ {
+ return functor_(
+ enqueueArgs,
+ arg0);
+ }
+
+
+};
+
+
+
+
+
+} // namespace detail
+
+//----------------------------------------------------------------------------------------------
+
+template <
+ typename T0, typename T1 = detail::NullType, typename T2 = detail::NullType,
+ typename T3 = detail::NullType, typename T4 = detail::NullType,
+ typename T5 = detail::NullType, typename T6 = detail::NullType,
+ typename T7 = detail::NullType, typename T8 = detail::NullType,
+ typename T9 = detail::NullType, typename T10 = detail::NullType,
+ typename T11 = detail::NullType, typename T12 = detail::NullType,
+ typename T13 = detail::NullType, typename T14 = detail::NullType,
+ typename T15 = detail::NullType, typename T16 = detail::NullType,
+ typename T17 = detail::NullType, typename T18 = detail::NullType,
+ typename T19 = detail::NullType, typename T20 = detail::NullType,
+ typename T21 = detail::NullType, typename T22 = detail::NullType,
+ typename T23 = detail::NullType, typename T24 = detail::NullType,
+ typename T25 = detail::NullType, typename T26 = detail::NullType,
+ typename T27 = detail::NullType, typename T28 = detail::NullType,
+ typename T29 = detail::NullType, typename T30 = detail::NullType,
+ typename T31 = detail::NullType
+>
+struct make_kernel :
+ public detail::functionImplementation_<
+ T0, T1, T2, T3,
+ T4, T5, T6, T7,
+ T8, T9, T10, T11,
+ T12, T13, T14, T15,
+ T16, T17, T18, T19,
+ T20, T21, T22, T23,
+ T24, T25, T26, T27,
+ T28, T29, T30, T31
+ >
+{
+public:
+ typedef detail::KernelFunctorGlobal<
+ T0, T1, T2, T3,
+ T4, T5, T6, T7,
+ T8, T9, T10, T11,
+ T12, T13, T14, T15,
+ T16, T17, T18, T19,
+ T20, T21, T22, T23,
+ T24, T25, T26, T27,
+ T28, T29, T30, T31
+ > FunctorType;
+
+ make_kernel(
+ const Program& program,
+ const STRING_CLASS name,
+ cl_int * err = NULL) :
+ detail::functionImplementation_<
+ T0, T1, T2, T3,
+ T4, T5, T6, T7,
+ T8, T9, T10, T11,
+ T12, T13, T14, T15,
+ T16, T17, T18, T19,
+ T20, T21, T22, T23,
+ T24, T25, T26, T27,
+ T28, T29, T30, T31
+ >(
+ FunctorType(program, name, err))
+ {}
+
+ make_kernel(
+ const Kernel kernel) :
+ detail::functionImplementation_<
+ T0, T1, T2, T3,
+ T4, T5, T6, T7,
+ T8, T9, T10, T11,
+ T12, T13, T14, T15,
+ T16, T17, T18, T19,
+ T20, T21, T22, T23,
+ T24, T25, T26, T27,
+ T28, T29, T30, T31
+ >(
+ FunctorType(kernel))
+ {}
+};
+
+
+//----------------------------------------------------------------------------------------------------------------------
+
+#undef __ERR_STR
+#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS)
+#undef __GET_DEVICE_INFO_ERR
+#undef __GET_PLATFORM_INFO_ERR
+#undef __GET_DEVICE_IDS_ERR
+#undef __GET_CONTEXT_INFO_ERR
+#undef __GET_EVENT_INFO_ERR
+#undef __GET_EVENT_PROFILE_INFO_ERR
+#undef __GET_MEM_OBJECT_INFO_ERR
+#undef __GET_IMAGE_INFO_ERR
+#undef __GET_SAMPLER_INFO_ERR
+#undef __GET_KERNEL_INFO_ERR
+#undef __GET_KERNEL_ARG_INFO_ERR
+#undef __GET_KERNEL_WORK_GROUP_INFO_ERR
+#undef __GET_PROGRAM_INFO_ERR
+#undef __GET_PROGRAM_BUILD_INFO_ERR
+#undef __GET_COMMAND_QUEUE_INFO_ERR
+
+#undef __CREATE_CONTEXT_ERR
+#undef __CREATE_CONTEXT_FROM_TYPE_ERR
+#undef __GET_SUPPORTED_IMAGE_FORMATS_ERR
+
+#undef __CREATE_BUFFER_ERR
+#undef __CREATE_SUBBUFFER_ERR
+#undef __CREATE_IMAGE2D_ERR
+#undef __CREATE_IMAGE3D_ERR
+#undef __CREATE_SAMPLER_ERR
+#undef __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR
+
+#undef __CREATE_USER_EVENT_ERR
+#undef __SET_USER_EVENT_STATUS_ERR
+#undef __SET_EVENT_CALLBACK_ERR
+#undef __SET_PRINTF_CALLBACK_ERR
+
+#undef __WAIT_FOR_EVENTS_ERR
+
+#undef __CREATE_KERNEL_ERR
+#undef __SET_KERNEL_ARGS_ERR
+#undef __CREATE_PROGRAM_WITH_SOURCE_ERR
+#undef __CREATE_PROGRAM_WITH_BINARY_ERR
+#undef __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR
+#undef __BUILD_PROGRAM_ERR
+#undef __CREATE_KERNELS_IN_PROGRAM_ERR
+
+#undef __CREATE_COMMAND_QUEUE_ERR
+#undef __SET_COMMAND_QUEUE_PROPERTY_ERR
+#undef __ENQUEUE_READ_BUFFER_ERR
+#undef __ENQUEUE_WRITE_BUFFER_ERR
+#undef __ENQUEUE_READ_BUFFER_RECT_ERR
+#undef __ENQUEUE_WRITE_BUFFER_RECT_ERR
+#undef __ENQEUE_COPY_BUFFER_ERR
+#undef __ENQEUE_COPY_BUFFER_RECT_ERR
+#undef __ENQUEUE_READ_IMAGE_ERR
+#undef __ENQUEUE_WRITE_IMAGE_ERR
+#undef __ENQUEUE_COPY_IMAGE_ERR
+#undef __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR
+#undef __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR
+#undef __ENQUEUE_MAP_BUFFER_ERR
+#undef __ENQUEUE_MAP_IMAGE_ERR
+#undef __ENQUEUE_UNMAP_MEM_OBJECT_ERR
+#undef __ENQUEUE_NDRANGE_KERNEL_ERR
+#undef __ENQUEUE_TASK_ERR
+#undef __ENQUEUE_NATIVE_KERNEL
+
+#undef __CL_EXPLICIT_CONSTRUCTORS
+
+#undef __UNLOAD_COMPILER_ERR
+#endif //__CL_USER_OVERRIDE_ERROR_STRINGS
+
+#undef __CL_FUNCTION_TYPE
+
+// Extensions
+/**
+ * Deprecated APIs for 1.2
+ */
+#if defined(CL_VERSION_1_1)
+#undef __INIT_CL_EXT_FCN_PTR
+#endif // #if defined(CL_VERSION_1_1)
+#undef __CREATE_SUB_DEVICES
+
+#if defined(USE_CL_DEVICE_FISSION)
+#undef __PARAM_NAME_DEVICE_FISSION
+#endif // USE_CL_DEVICE_FISSION
+
+#undef __DEFAULT_NOT_INITIALIZED
+#undef __DEFAULT_BEING_INITIALIZED
+#undef __DEFAULT_INITIALIZED
+
+} // namespace cl
+
+#ifdef _WIN32
+#pragma pop_macro("max")
+#endif // _WIN32
+
+#endif // CL_HPP_
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/ethash_cl_miner.cpp b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/ethash_cl_miner.cpp
new file mode 100644
index 000000000..11b29333c
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/ethash_cl_miner.cpp
@@ -0,0 +1,754 @@
+/*
+ This file is part of c-ethash.
+
+ c-ethash is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ c-ethash is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>.
+*/
+/** @file ethash_cl_miner.cpp
+* @author Tim Hughes <tim@twistedfury.com>
+* @date 2015
+*/
+
+
+#define _CRT_SECURE_NO_WARNINGS
+
+#include <assert.h>
+#include <queue>
+#include "ethash_cl_miner.h"
+#include <libethash/util.h>
+
+#undef min
+#undef max
+
+#define HASH_BYTES 32
+
+static char const ethash_inner_code[] = R"(
+
+// author Tim Hughes <tim@twistedfury.com>
+// Tested on Radeon HD 7850
+// Hashrate: 15940347 hashes/s
+// Bandwidth: 124533 MB/s
+// search kernel should fit in <= 84 VGPRS (3 wavefronts)
+
+#define THREADS_PER_HASH (128 / 16)
+#define HASHES_PER_LOOP (GROUP_SIZE / THREADS_PER_HASH)
+
+#define FNV_PRIME 0x01000193
+
+__constant uint2 const Keccak_f1600_RC[24] = {
+ (uint2)(0x00000001, 0x00000000),
+ (uint2)(0x00008082, 0x00000000),
+ (uint2)(0x0000808a, 0x80000000),
+ (uint2)(0x80008000, 0x80000000),
+ (uint2)(0x0000808b, 0x00000000),
+ (uint2)(0x80000001, 0x00000000),
+ (uint2)(0x80008081, 0x80000000),
+ (uint2)(0x00008009, 0x80000000),
+ (uint2)(0x0000008a, 0x00000000),
+ (uint2)(0x00000088, 0x00000000),
+ (uint2)(0x80008009, 0x00000000),
+ (uint2)(0x8000000a, 0x00000000),
+ (uint2)(0x8000808b, 0x00000000),
+ (uint2)(0x0000008b, 0x80000000),
+ (uint2)(0x00008089, 0x80000000),
+ (uint2)(0x00008003, 0x80000000),
+ (uint2)(0x00008002, 0x80000000),
+ (uint2)(0x00000080, 0x80000000),
+ (uint2)(0x0000800a, 0x00000000),
+ (uint2)(0x8000000a, 0x80000000),
+ (uint2)(0x80008081, 0x80000000),
+ (uint2)(0x00008080, 0x80000000),
+ (uint2)(0x80000001, 0x00000000),
+ (uint2)(0x80008008, 0x80000000),
+};
+
+void keccak_f1600_round(uint2* a, uint r, uint out_size)
+{
+ #if !__ENDIAN_LITTLE__
+ for (uint i = 0; i != 25; ++i)
+ a[i] = a[i].yx;
+ #endif
+
+ uint2 b[25];
+ uint2 t;
+
+ // Theta
+ b[0] = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20];
+ b[1] = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21];
+ b[2] = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22];
+ b[3] = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23];
+ b[4] = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24];
+ t = b[4] ^ (uint2)(b[1].x << 1 | b[1].y >> 31, b[1].y << 1 | b[1].x >> 31);
+ a[0] ^= t;
+ a[5] ^= t;
+ a[10] ^= t;
+ a[15] ^= t;
+ a[20] ^= t;
+ t = b[0] ^ (uint2)(b[2].x << 1 | b[2].y >> 31, b[2].y << 1 | b[2].x >> 31);
+ a[1] ^= t;
+ a[6] ^= t;
+ a[11] ^= t;
+ a[16] ^= t;
+ a[21] ^= t;
+ t = b[1] ^ (uint2)(b[3].x << 1 | b[3].y >> 31, b[3].y << 1 | b[3].x >> 31);
+ a[2] ^= t;
+ a[7] ^= t;
+ a[12] ^= t;
+ a[17] ^= t;
+ a[22] ^= t;
+ t = b[2] ^ (uint2)(b[4].x << 1 | b[4].y >> 31, b[4].y << 1 | b[4].x >> 31);
+ a[3] ^= t;
+ a[8] ^= t;
+ a[13] ^= t;
+ a[18] ^= t;
+ a[23] ^= t;
+ t = b[3] ^ (uint2)(b[0].x << 1 | b[0].y >> 31, b[0].y << 1 | b[0].x >> 31);
+ a[4] ^= t;
+ a[9] ^= t;
+ a[14] ^= t;
+ a[19] ^= t;
+ a[24] ^= t;
+
+ // Rho Pi
+ b[0] = a[0];
+ b[10] = (uint2)(a[1].x << 1 | a[1].y >> 31, a[1].y << 1 | a[1].x >> 31);
+ b[7] = (uint2)(a[10].x << 3 | a[10].y >> 29, a[10].y << 3 | a[10].x >> 29);
+ b[11] = (uint2)(a[7].x << 6 | a[7].y >> 26, a[7].y << 6 | a[7].x >> 26);
+ b[17] = (uint2)(a[11].x << 10 | a[11].y >> 22, a[11].y << 10 | a[11].x >> 22);
+ b[18] = (uint2)(a[17].x << 15 | a[17].y >> 17, a[17].y << 15 | a[17].x >> 17);
+ b[3] = (uint2)(a[18].x << 21 | a[18].y >> 11, a[18].y << 21 | a[18].x >> 11);
+ b[5] = (uint2)(a[3].x << 28 | a[3].y >> 4, a[3].y << 28 | a[3].x >> 4);
+ b[16] = (uint2)(a[5].y << 4 | a[5].x >> 28, a[5].x << 4 | a[5].y >> 28);
+ b[8] = (uint2)(a[16].y << 13 | a[16].x >> 19, a[16].x << 13 | a[16].y >> 19);
+ b[21] = (uint2)(a[8].y << 23 | a[8].x >> 9, a[8].x << 23 | a[8].y >> 9);
+ b[24] = (uint2)(a[21].x << 2 | a[21].y >> 30, a[21].y << 2 | a[21].x >> 30);
+ b[4] = (uint2)(a[24].x << 14 | a[24].y >> 18, a[24].y << 14 | a[24].x >> 18);
+ b[15] = (uint2)(a[4].x << 27 | a[4].y >> 5, a[4].y << 27 | a[4].x >> 5);
+ b[23] = (uint2)(a[15].y << 9 | a[15].x >> 23, a[15].x << 9 | a[15].y >> 23);
+ b[19] = (uint2)(a[23].y << 24 | a[23].x >> 8, a[23].x << 24 | a[23].y >> 8);
+ b[13] = (uint2)(a[19].x << 8 | a[19].y >> 24, a[19].y << 8 | a[19].x >> 24);
+ b[12] = (uint2)(a[13].x << 25 | a[13].y >> 7, a[13].y << 25 | a[13].x >> 7);
+ b[2] = (uint2)(a[12].y << 11 | a[12].x >> 21, a[12].x << 11 | a[12].y >> 21);
+ b[20] = (uint2)(a[2].y << 30 | a[2].x >> 2, a[2].x << 30 | a[2].y >> 2);
+ b[14] = (uint2)(a[20].x << 18 | a[20].y >> 14, a[20].y << 18 | a[20].x >> 14);
+ b[22] = (uint2)(a[14].y << 7 | a[14].x >> 25, a[14].x << 7 | a[14].y >> 25);
+ b[9] = (uint2)(a[22].y << 29 | a[22].x >> 3, a[22].x << 29 | a[22].y >> 3);
+ b[6] = (uint2)(a[9].x << 20 | a[9].y >> 12, a[9].y << 20 | a[9].x >> 12);
+ b[1] = (uint2)(a[6].y << 12 | a[6].x >> 20, a[6].x << 12 | a[6].y >> 20);
+
+ // Chi
+ a[0] = bitselect(b[0] ^ b[2], b[0], b[1]);
+ a[1] = bitselect(b[1] ^ b[3], b[1], b[2]);
+ a[2] = bitselect(b[2] ^ b[4], b[2], b[3]);
+ a[3] = bitselect(b[3] ^ b[0], b[3], b[4]);
+ if (out_size >= 4)
+ {
+ a[4] = bitselect(b[4] ^ b[1], b[4], b[0]);
+ a[5] = bitselect(b[5] ^ b[7], b[5], b[6]);
+ a[6] = bitselect(b[6] ^ b[8], b[6], b[7]);
+ a[7] = bitselect(b[7] ^ b[9], b[7], b[8]);
+ a[8] = bitselect(b[8] ^ b[5], b[8], b[9]);
+ if (out_size >= 8)
+ {
+ a[9] = bitselect(b[9] ^ b[6], b[9], b[5]);
+ a[10] = bitselect(b[10] ^ b[12], b[10], b[11]);
+ a[11] = bitselect(b[11] ^ b[13], b[11], b[12]);
+ a[12] = bitselect(b[12] ^ b[14], b[12], b[13]);
+ a[13] = bitselect(b[13] ^ b[10], b[13], b[14]);
+ a[14] = bitselect(b[14] ^ b[11], b[14], b[10]);
+ a[15] = bitselect(b[15] ^ b[17], b[15], b[16]);
+ a[16] = bitselect(b[16] ^ b[18], b[16], b[17]);
+ a[17] = bitselect(b[17] ^ b[19], b[17], b[18]);
+ a[18] = bitselect(b[18] ^ b[15], b[18], b[19]);
+ a[19] = bitselect(b[19] ^ b[16], b[19], b[15]);
+ a[20] = bitselect(b[20] ^ b[22], b[20], b[21]);
+ a[21] = bitselect(b[21] ^ b[23], b[21], b[22]);
+ a[22] = bitselect(b[22] ^ b[24], b[22], b[23]);
+ a[23] = bitselect(b[23] ^ b[20], b[23], b[24]);
+ a[24] = bitselect(b[24] ^ b[21], b[24], b[20]);
+ }
+ }
+
+ // Iota
+ a[0] ^= Keccak_f1600_RC[r];
+
+ #if !__ENDIAN_LITTLE__
+ for (uint i = 0; i != 25; ++i)
+ a[i] = a[i].yx;
+ #endif
+}
+
+void keccak_f1600_no_absorb(ulong* a, uint in_size, uint out_size, uint isolate)
+{
+ for (uint i = in_size; i != 25; ++i)
+ {
+ a[i] = 0;
+ }
+#if __ENDIAN_LITTLE__
+ a[in_size] ^= 0x0000000000000001;
+ a[24-out_size*2] ^= 0x8000000000000000;
+#else
+ a[in_size] ^= 0x0100000000000000;
+ a[24-out_size*2] ^= 0x0000000000000080;
+#endif
+
+ // Originally I unrolled the first and last rounds to interface
+ // better with surrounding code, however I haven't done this
+ // without causing the AMD compiler to blow up the VGPR usage.
+ uint r = 0;
+ do
+ {
+ // This dynamic branch stops the AMD compiler unrolling the loop
+ // and additionally saves about 33% of the VGPRs, enough to gain another
+ // wavefront. Ideally we'd get 4 in flight, but 3 is the best I can
+ // massage out of the compiler. It doesn't really seem to matter how
+ // much we try and help the compiler save VGPRs because it seems to throw
+ // that information away, hence the implementation of keccak here
+ // doesn't bother.
+ if (isolate)
+ {
+ keccak_f1600_round((uint2*)a, r++, 25);
+ }
+ }
+ while (r < 23);
+
+ // final round optimised for digest size
+ keccak_f1600_round((uint2*)a, r++, out_size);
+}
+
+#define copy(dst, src, count) for (uint i = 0; i != count; ++i) { (dst)[i] = (src)[i]; }
+
+#define countof(x) (sizeof(x) / sizeof(x[0]))
+
+uint fnv(uint x, uint y)
+{
+ return x * FNV_PRIME ^ y;
+}
+
+uint4 fnv4(uint4 x, uint4 y)
+{
+ return x * FNV_PRIME ^ y;
+}
+
+uint fnv_reduce(uint4 v)
+{
+ return fnv(fnv(fnv(v.x, v.y), v.z), v.w);
+}
+
+typedef union
+{
+ ulong ulongs[32 / sizeof(ulong)];
+ uint uints[32 / sizeof(uint)];
+} hash32_t;
+
+typedef union
+{
+ ulong ulongs[64 / sizeof(ulong)];
+ uint4 uint4s[64 / sizeof(uint4)];
+} hash64_t;
+
+typedef union
+{
+ uint uints[128 / sizeof(uint)];
+ uint4 uint4s[128 / sizeof(uint4)];
+} hash128_t;
+
+hash64_t init_hash(__constant hash32_t const* header, ulong nonce, uint isolate)
+{
+ hash64_t init;
+ uint const init_size = countof(init.ulongs);
+ uint const hash_size = countof(header->ulongs);
+
+ // sha3_512(header .. nonce)
+ ulong state[25];
+ copy(state, header->ulongs, hash_size);
+ state[hash_size] = nonce;
+ keccak_f1600_no_absorb(state, hash_size + 1, init_size, isolate);
+
+ copy(init.ulongs, state, init_size);
+ return init;
+}
+
+uint inner_loop(uint4 init, uint thread_id, __local uint* share, __global hash128_t const* g_dag, uint isolate)
+{
+ uint4 mix = init;
+
+ // share init0
+ if (thread_id == 0)
+ *share = mix.x;
+ barrier(CLK_LOCAL_MEM_FENCE);
+ uint init0 = *share;
+
+ uint a = 0;
+ do
+ {
+ bool update_share = thread_id == (a/4) % THREADS_PER_HASH;
+
+ #pragma unroll
+ for (uint i = 0; i != 4; ++i)
+ {
+ if (update_share)
+ {
+ uint m[4] = { mix.x, mix.y, mix.z, mix.w };
+ *share = fnv(init0 ^ (a+i), m[i]) % DAG_SIZE;
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ mix = fnv4(mix, g_dag[*share].uint4s[thread_id]);
+ }
+ }
+ while ((a += 4) != (ACCESSES & isolate));
+
+ return fnv_reduce(mix);
+}
+
+hash32_t final_hash(hash64_t const* init, hash32_t const* mix, uint isolate)
+{
+ ulong state[25];
+
+ hash32_t hash;
+ uint const hash_size = countof(hash.ulongs);
+ uint const init_size = countof(init->ulongs);
+ uint const mix_size = countof(mix->ulongs);
+
+ // keccak_256(keccak_512(header..nonce) .. mix);
+ copy(state, init->ulongs, init_size);
+ copy(state + init_size, mix->ulongs, mix_size);
+ keccak_f1600_no_absorb(state, init_size+mix_size, hash_size, isolate);
+
+ // copy out
+ copy(hash.ulongs, state, hash_size);
+ return hash;
+}
+
+hash32_t compute_hash_simple(
+ __constant hash32_t const* g_header,
+ __global hash128_t const* g_dag,
+ ulong nonce,
+ uint isolate
+ )
+{
+ hash64_t init = init_hash(g_header, nonce, isolate);
+
+ hash128_t mix;
+ for (uint i = 0; i != countof(mix.uint4s); ++i)
+ {
+ mix.uint4s[i] = init.uint4s[i % countof(init.uint4s)];
+ }
+
+ uint mix_val = mix.uints[0];
+ uint init0 = mix.uints[0];
+ uint a = 0;
+ do
+ {
+ uint pi = fnv(init0 ^ a, mix_val) % DAG_SIZE;
+ uint n = (a+1) % countof(mix.uints);
+
+ #pragma unroll
+ for (uint i = 0; i != countof(mix.uints); ++i)
+ {
+ mix.uints[i] = fnv(mix.uints[i], g_dag[pi].uints[i]);
+ mix_val = i == n ? mix.uints[i] : mix_val;
+ }
+ }
+ while (++a != (ACCESSES & isolate));
+
+ // reduce to output
+ hash32_t fnv_mix;
+ for (uint i = 0; i != countof(fnv_mix.uints); ++i)
+ {
+ fnv_mix.uints[i] = fnv_reduce(mix.uint4s[i]);
+ }
+
+ return final_hash(&init, &fnv_mix, isolate);
+}
+
+typedef union
+{
+ struct
+ {
+ hash64_t init;
+ uint pad; // avoid lds bank conflicts
+ };
+ hash32_t mix;
+} compute_hash_share;
+
+hash32_t compute_hash(
+ __local compute_hash_share* share,
+ __constant hash32_t const* g_header,
+ __global hash128_t const* g_dag,
+ ulong nonce,
+ uint isolate
+ )
+{
+ uint const gid = get_global_id(0);
+
+ // Compute one init hash per work item.
+ hash64_t init = init_hash(g_header, nonce, isolate);
+
+ // Threads work together in this phase in groups of 8.
+ uint const thread_id = gid % THREADS_PER_HASH;
+ uint const hash_id = (gid % GROUP_SIZE) / THREADS_PER_HASH;
+
+ hash32_t mix;
+ uint i = 0;
+ do
+ {
+ // share init with other threads
+ if (i == thread_id)
+ share[hash_id].init = init;
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ uint4 thread_init = share[hash_id].init.uint4s[thread_id % (64 / sizeof(uint4))];
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ uint thread_mix = inner_loop(thread_init, thread_id, share[hash_id].mix.uints, g_dag, isolate);
+
+ share[hash_id].mix.uints[thread_id] = thread_mix;
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if (i == thread_id)
+ mix = share[hash_id].mix;
+ barrier(CLK_LOCAL_MEM_FENCE);
+ }
+ while (++i != (THREADS_PER_HASH & isolate));
+
+ return final_hash(&init, &mix, isolate);
+}
+
+__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
+__kernel void ethash_hash_simple(
+ __global hash32_t* g_hashes,
+ __constant hash32_t const* g_header,
+ __global hash128_t const* g_dag,
+ ulong start_nonce,
+ uint isolate
+ )
+{
+ uint const gid = get_global_id(0);
+ g_hashes[gid] = compute_hash_simple(g_header, g_dag, start_nonce + gid, isolate);
+}
+
+__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
+__kernel void ethash_search_simple(
+ __global volatile uint* restrict g_output,
+ __constant hash32_t const* g_header,
+ __global hash128_t const* g_dag,
+ ulong start_nonce,
+ ulong target,
+ uint isolate
+ )
+{
+ uint const gid = get_global_id(0);
+ hash32_t hash = compute_hash_simple(g_header, g_dag, start_nonce + gid, isolate);
+
+ if (hash.ulongs[countof(hash.ulongs)-1] < target)
+ {
+ uint slot = min(MAX_OUTPUTS, atomic_inc(&g_output[0]) + 1);
+ g_output[slot] = gid;
+ }
+}
+
+__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
+__kernel void ethash_hash(
+ __global hash32_t* g_hashes,
+ __constant hash32_t const* g_header,
+ __global hash128_t const* g_dag,
+ ulong start_nonce,
+ uint isolate
+ )
+{
+ __local compute_hash_share share[HASHES_PER_LOOP];
+
+ uint const gid = get_global_id(0);
+ g_hashes[gid] = compute_hash(share, g_header, g_dag, start_nonce + gid, isolate);
+}
+
+__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
+__kernel void ethash_search(
+ __global volatile uint* restrict g_output,
+ __constant hash32_t const* g_header,
+ __global hash128_t const* g_dag,
+ ulong start_nonce,
+ ulong target,
+ uint isolate
+ )
+{
+ __local compute_hash_share share[HASHES_PER_LOOP];
+
+ uint const gid = get_global_id(0);
+ hash32_t hash = compute_hash(share, g_header, g_dag, start_nonce + gid, isolate);
+
+ if (hash.ulongs[countof(hash.ulongs)-1] < target)
+ {
+ uint slot = min(MAX_OUTPUTS, atomic_inc(&g_output[0]) + 1);
+ g_output[slot] = gid;
+ }
+}
+
+)";
+
+static void add_definition(std::string& source, char const* id, unsigned value)
+{
+ char buf[256];
+ sprintf(buf, "#define %s %uu\n", id, value);
+ source.insert(source.begin(), buf, buf + strlen(buf));
+}
+
+ethash_cl_miner::ethash_cl_miner()
+{
+}
+
+bool ethash_cl_miner::init(ethash_params const& params, const uint8_t seed[32], unsigned workgroup_size)
+{
+ // store params
+ m_params = params;
+
+ // get all platforms
+ std::vector<cl::Platform> platforms;
+ cl::Platform::get(&platforms);
+ if (platforms.empty())
+ {
+ debugf("No OpenCL platforms found.\n");
+ return false;
+ }
+
+ // use default platform
+ debugf("Using platform: %s\n", platforms[0].getInfo<CL_PLATFORM_NAME>().c_str());
+
+ // get GPU device of the default platform
+ std::vector<cl::Device> devices;
+ platforms[0].getDevices(CL_DEVICE_TYPE_ALL, &devices);
+ if (devices.empty())
+ {
+ debugf("No OpenCL devices found.\n");
+ return false;
+ }
+
+ // use default device
+ cl::Device& device = devices[0];
+ debugf("Using device: %s\n", device.getInfo<CL_DEVICE_NAME>().c_str());
+
+ // create context
+ m_context = cl::Context({device});
+ m_queue = cl::CommandQueue(m_context, device);
+
+ // use requested workgroup size, but we require multiple of 8
+ m_workgroup_size = ((workgroup_size + 7) / 8) * 8;
+
+ // patch source code
+ std::string code = ethash_inner_code;
+ add_definition(code, "GROUP_SIZE", m_workgroup_size);
+ add_definition(code, "DAG_SIZE", (unsigned)(params.full_size / MIX_BYTES));
+ add_definition(code, "ACCESSES", ACCESSES);
+ add_definition(code, "MAX_OUTPUTS", c_max_search_results);
+ //debugf("%s", code.c_str());
+
+ // create miner OpenCL program
+ cl::Program::Sources sources;
+ sources.push_back({code.c_str(), code.size()});
+
+ cl::Program program(m_context, sources);
+ try
+ {
+ program.build({device});
+ }
+ catch (cl::Error err)
+ {
+ debugf("%s\n", program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device).c_str());
+ return false;
+ }
+ m_hash_kernel = cl::Kernel(program, "ethash_hash");
+ m_search_kernel = cl::Kernel(program, "ethash_search");
+
+ // create buffer for dag
+ m_dag = cl::Buffer(m_context, CL_MEM_READ_ONLY, params.full_size);
+
+ // create buffer for header
+ m_header = cl::Buffer(m_context, CL_MEM_READ_ONLY, 32);
+
+ // compute dag on CPU
+ {
+ void* cache_mem = malloc(params.cache_size + 63);
+ ethash_cache cache;
+ cache.mem = (void*)(((uintptr_t)cache_mem + 63) & ~63);
+ ethash_mkcache(&cache, &params, seed);
+
+ // if this throws then it's because we probably need to subdivide the dag uploads for compatibility
+ void* dag_ptr = m_queue.enqueueMapBuffer(m_dag, true, CL_MAP_WRITE_INVALIDATE_REGION, 0, params.full_size);
+ ethash_compute_full_data(dag_ptr, &params, &cache);
+ m_queue.enqueueUnmapMemObject(m_dag, dag_ptr);
+
+ free(cache_mem);
+ }
+
+ // create mining buffers
+ for (unsigned i = 0; i != c_num_buffers; ++i)
+ {
+ m_hash_buf[i] = cl::Buffer(m_context, CL_MEM_WRITE_ONLY | CL_MEM_HOST_READ_ONLY, 32*c_hash_batch_size);
+ m_search_buf[i] = cl::Buffer(m_context, CL_MEM_WRITE_ONLY, (c_max_search_results + 1) * sizeof(uint32_t));
+ }
+ return true;
+}
+
+void ethash_cl_miner::hash(uint8_t* ret, uint8_t const* header, uint64_t nonce, unsigned count)
+{
+ struct pending_batch
+ {
+ unsigned base;
+ unsigned count;
+ unsigned buf;
+ };
+ std::queue<pending_batch> pending;
+
+ // update header constant buffer
+ m_queue.enqueueWriteBuffer(m_header, true, 0, 32, header);
+
+ /*
+ __kernel void ethash_combined_hash(
+ __global hash32_t* g_hashes,
+ __constant hash32_t const* g_header,
+ __global hash128_t const* g_dag,
+ ulong start_nonce,
+ uint isolate
+ )
+ */
+ m_hash_kernel.setArg(1, m_header);
+ m_hash_kernel.setArg(2, m_dag);
+ m_hash_kernel.setArg(3, nonce);
+ m_hash_kernel.setArg(4, ~0u); // have to pass this to stop the compile unrolling the loop
+
+ unsigned buf = 0;
+ for (unsigned i = 0; i < count || !pending.empty(); )
+ {
+ // how many this batch
+ if (i < count)
+ {
+ unsigned const this_count = std::min(count - i, c_hash_batch_size);
+ unsigned const batch_count = std::max(this_count, m_workgroup_size);
+
+ // supply output hash buffer to kernel
+ m_hash_kernel.setArg(0, m_hash_buf[buf]);
+
+ // execute it!
+ clock_t start_time = clock();
+ m_queue.enqueueNDRangeKernel(
+ m_hash_kernel,
+ cl::NullRange,
+ cl::NDRange(batch_count),
+ cl::NDRange(m_workgroup_size)
+ );
+ m_queue.flush();
+
+ pending.push({i, this_count, buf});
+ i += this_count;
+ buf = (buf + 1) % c_num_buffers;
+ }
+
+ // read results
+ if (i == count || pending.size() == c_num_buffers)
+ {
+ pending_batch const& batch = pending.front();
+
+ // could use pinned host pointer instead, but this path isn't that important.
+ uint8_t* hashes = (uint8_t*)m_queue.enqueueMapBuffer(m_hash_buf[batch.buf], true, CL_MAP_READ, 0, batch.count * HASH_BYTES);
+ memcpy(ret + batch.base*HASH_BYTES, hashes, batch.count*HASH_BYTES);
+ m_queue.enqueueUnmapMemObject(m_hash_buf[batch.buf], hashes);
+
+ pending.pop();
+ }
+ }
+}
+
+
+void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook& hook)
+{
+ struct pending_batch
+ {
+ uint64_t start_nonce;
+ unsigned buf;
+ };
+ std::queue<pending_batch> pending;
+
+ static uint32_t const c_zero = 0;
+
+ // update header constant buffer
+ m_queue.enqueueWriteBuffer(m_header, false, 0, 32, header);
+ for (unsigned i = 0; i != c_num_buffers; ++i)
+ {
+ m_queue.enqueueWriteBuffer(m_search_buf[i], false, 0, 4, &c_zero);
+ }
+ cl::Event pre_return_event;
+ m_queue.enqueueBarrierWithWaitList(NULL, &pre_return_event);
+
+ /*
+ __kernel void ethash_combined_search(
+ __global hash32_t* g_hashes, // 0
+ __constant hash32_t const* g_header, // 1
+ __global hash128_t const* g_dag, // 2
+ ulong start_nonce, // 3
+ ulong target, // 4
+ uint isolate // 5
+ )
+ */
+ m_search_kernel.setArg(1, m_header);
+ m_search_kernel.setArg(2, m_dag);
+
+ // pass these to stop the compiler unrolling the loops
+ m_search_kernel.setArg(4, target);
+ m_search_kernel.setArg(5, ~0u);
+
+
+ unsigned buf = 0;
+ for (uint64_t start_nonce = 0; ; start_nonce += c_search_batch_size)
+ {
+ // supply output buffer to kernel
+ m_search_kernel.setArg(0, m_search_buf[buf]);
+ m_search_kernel.setArg(3, start_nonce);
+
+ // execute it!
+ m_queue.enqueueNDRangeKernel(m_search_kernel, cl::NullRange, c_search_batch_size, m_workgroup_size);
+
+ pending.push({start_nonce, buf});
+ buf = (buf + 1) % c_num_buffers;
+
+ // read results
+ if (pending.size() == c_num_buffers)
+ {
+ pending_batch const& batch = pending.front();
+
+ // could use pinned host pointer instead
+ uint32_t* results = (uint32_t*)m_queue.enqueueMapBuffer(m_search_buf[batch.buf], true, CL_MAP_READ, 0, (1+c_max_search_results) * sizeof(uint32_t));
+ unsigned num_found = std::min(results[0], c_max_search_results);
+
+ uint64_t nonces[c_max_search_results];
+ for (unsigned i = 0; i != num_found; ++i)
+ {
+ nonces[i] = batch.start_nonce + results[i+1];
+ }
+
+ m_queue.enqueueUnmapMemObject(m_search_buf[batch.buf], results);
+
+ bool exit = num_found && hook.found(nonces, num_found);
+ exit |= hook.searched(batch.start_nonce, c_search_batch_size); // always report searched before exit
+ if (exit)
+ break;
+
+ pending.pop();
+ }
+ }
+
+ // not safe to return until this is ready
+ pre_return_event.wait();
+}
+
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/ethash_cl_miner.h b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/ethash_cl_miner.h
new file mode 100644
index 000000000..f37100d91
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/ethash_cl_miner.h
@@ -0,0 +1,43 @@
+#pragma once
+
+#define __CL_ENABLE_EXCEPTIONS
+#define CL_USE_DEPRECATED_OPENCL_2_0_APIS
+#include "cl.hpp"
+#include <time.h>
+#include <libethash/ethash.h>
+
+class ethash_cl_miner
+{
+public:
+ struct search_hook
+ {
+ // reports progress, return true to abort
+ virtual bool found(uint64_t const* nonces, uint32_t count) = 0;
+ virtual bool searched(uint64_t start_nonce, uint32_t count) = 0;
+ };
+
+public:
+ ethash_cl_miner();
+
+ bool init(ethash_params const& params, const uint8_t seed[32], unsigned workgroup_size = 64);
+
+ void hash(uint8_t* ret, uint8_t const* header, uint64_t nonce, unsigned count);
+ void search(uint8_t const* header, uint64_t target, search_hook& hook);
+
+private:
+ static unsigned const c_max_search_results = 63;
+ static unsigned const c_num_buffers = 2;
+ static unsigned const c_hash_batch_size = 1024;
+ static unsigned const c_search_batch_size = 1024*256;
+
+ ethash_params m_params;
+ cl::Context m_context;
+ cl::CommandQueue m_queue;
+ cl::Kernel m_hash_kernel;
+ cl::Kernel m_search_kernel;
+ cl::Buffer m_dag;
+ cl::Buffer m_header;
+ cl::Buffer m_hash_buf[c_num_buffers];
+ cl::Buffer m_search_buf[c_num_buffers];
+ unsigned m_workgroup_size;
+}; \ No newline at end of file
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/CMakeLists.txt b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/CMakeLists.txt
new file mode 100644
index 000000000..b30ed3e2d
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/CMakeLists.txt
@@ -0,0 +1,15 @@
+find_package(CUDA)
+
+# Pass options to NVCC
+
+
+if (CUDA_FOUND)
+set(CUDA_NVCC_FLAGS " -gencode;arch=compute_30,code=sm_30;
+ -gencode;arch=compute_20,code=sm_20;
+ -gencode;arch=compute_11,code=sm_11;
+ -gencode;arch=compute_12,code=sm_12;
+ -gencode;arch=compute_13,code=sm_13;")
+cuda_add_executable(
+ ethash-cuda
+ libethash.cu)
+endif() \ No newline at end of file
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/cuPrintf.cu b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/cuPrintf.cu
new file mode 100644
index 000000000..f06653f2d
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/cuPrintf.cu
@@ -0,0 +1,879 @@
+/*
+ Copyright 2009 NVIDIA Corporation. All rights reserved.
+
+ NOTICE TO LICENSEE:
+
+ This source code and/or documentation ("Licensed Deliverables") are subject
+ to NVIDIA intellectual property rights under U.S. and international Copyright
+ laws.
+
+ These Licensed Deliverables contained herein is PROPRIETARY and CONFIDENTIAL
+ to NVIDIA and is being provided under the terms and conditions of a form of
+ NVIDIA software license agreement by and between NVIDIA and Licensee ("License
+ Agreement") or electronically accepted by Licensee. Notwithstanding any terms
+ or conditions to the contrary in the License Agreement, reproduction or
+ disclosure of the Licensed Deliverables to any third party without the express
+ written consent of NVIDIA is prohibited.
+
+ NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE LICENSE AGREEMENT,
+ NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THESE LICENSED
+ DELIVERABLES FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED
+ WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE
+ LICENSED DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. NOTWITHSTANDING ANY
+ TERMS OR CONDITIONS TO THE CONTRARY IN THE LICENSE AGREEMENT, IN NO EVENT SHALL
+ NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+ OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+ IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THESE LICENSED DELIVERABLES.
+
+ U.S. Government End Users. These Licensed Deliverables are a "commercial item"
+ as that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
+ "commercial computer software" and "commercial computer software documentation"
+ as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) and is provided to the
+ U.S. Government only as a commercial end item. Consistent with 48 C.F.R.12.212
+ and 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all U.S. Government
+ End Users acquire the Licensed Deliverables with only those rights set forth
+ herein.
+
+ Any use of the Licensed Deliverables in individual and commercial software must
+ include, in the user documentation and internal comments to the code, the above
+ Disclaimer and U.S. Government End Users Notice.
+ */
+
+/*
+ * cuPrintf.cu
+ *
+ * This is a printf command callable from within a kernel. It is set
+ * up so that output is sent to a memory buffer, which is emptied from
+ * the host side - but only after a cudaThreadSynchronize() on the host.
+ *
+ * Currently, there is a limitation of around 200 characters of output
+ * and no more than 10 arguments to a single cuPrintf() call. Issue
+ * multiple calls if longer format strings are required.
+ *
+ * It requires minimal setup, and is *NOT* optimised for performance.
+ * For example, writes are not coalesced - this is because there is an
+ * assumption that people will not want to printf from every single one
+ * of thousands of threads, but only from individual threads at a time.
+ *
+ * Using this is simple - it requires one host-side call to initialise
+ * everything, and then kernels can call cuPrintf at will. Sample code
+ * is the easiest way to demonstrate:
+ *
+ #include "cuPrintf.cu"
+
+ __global__ void testKernel(int val)
+ {
+ cuPrintf("Value is: %d\n", val);
+ }
+
+ int main()
+ {
+ cudaPrintfInit();
+ testKernel<<< 2, 3 >>>(10);
+ cudaPrintfDisplay(stdout, true);
+ cudaPrintfEnd();
+ return 0;
+ }
+ *
+ * See the header file, "cuPrintf.cuh" for more info, especially
+ * arguments to cudaPrintfInit() and cudaPrintfDisplay();
+ */
+
+#ifndef CUPRINTF_CU
+#define CUPRINTF_CU
+
+#include "cuPrintf.cuh"
+#if __CUDA_ARCH__ > 100 // Atomics only used with > sm_10 architecture
+#include <sm_11_atomic_functions.h>
+#endif
+
+// This is the smallest amount of memory, per-thread, which is allowed.
+// It is also the largest amount of space a single printf() can take up
+const static int CUPRINTF_MAX_LEN = 256;
+
+// This structure is used internally to track block/thread output restrictions.
+typedef struct __align__(8) {
+ int threadid; // CUPRINTF_UNRESTRICTED for unrestricted
+ int blockid; // CUPRINTF_UNRESTRICTED for unrestricted
+} cuPrintfRestriction;
+
+// The main storage is in a global print buffer, which has a known
+// start/end/length. These are atomically updated so it works as a
+// circular buffer.
+// Since the only control primitive that can be used is atomicAdd(),
+// we cannot wrap the pointer as such. The actual address must be
+// calculated from printfBufferPtr by mod-ing with printfBufferLength.
+// For sm_10 architecture, we must subdivide the buffer per-thread
+// since we do not even have an atomic primitive.
+__constant__ static char *globalPrintfBuffer = NULL; // Start of circular buffer (set up by host)
+__constant__ static int printfBufferLength = 0; // Size of circular buffer (set up by host)
+__device__ static cuPrintfRestriction restrictRules; // Output restrictions
+__device__ volatile static char *printfBufferPtr = NULL; // Current atomically-incremented non-wrapped offset
+
+// This is the header preceeding all printf entries.
+// NOTE: It *must* be size-aligned to the maximum entity size (size_t)
+typedef struct __align__(8) {
+ unsigned short magic; // Magic number says we're valid
+ unsigned short fmtoffset; // Offset of fmt string into buffer
+ unsigned short blockid; // Block ID of author
+ unsigned short threadid; // Thread ID of author
+} cuPrintfHeader;
+
+// Special header for sm_10 architecture
+#define CUPRINTF_SM10_MAGIC 0xC810 // Not a valid ascii character
+typedef struct __align__(16) {
+ unsigned short magic; // sm_10 specific magic number
+ unsigned short unused;
+ unsigned int thread_index; // thread ID for this buffer
+ unsigned int thread_buf_len; // per-thread buffer length
+ unsigned int offset; // most recent printf's offset
+} cuPrintfHeaderSM10;
+
+
+// Because we can't write an element which is not aligned to its bit-size,
+// we have to align all sizes and variables on maximum-size boundaries.
+// That means sizeof(double) in this case, but we'll use (long long) for
+// better arch<1.3 support
+#define CUPRINTF_ALIGN_SIZE sizeof(long long)
+
+// All our headers are prefixed with a magic number so we know they're ready
+#define CUPRINTF_SM11_MAGIC (unsigned short)0xC811 // Not a valid ascii character
+
+
+//
+// getNextPrintfBufPtr
+//
+// Grabs a block of space in the general circular buffer, using an
+// atomic function to ensure that it's ours. We handle wrapping
+// around the circular buffer and return a pointer to a place which
+// can be written to.
+//
+// Important notes:
+// 1. We always grab CUPRINTF_MAX_LEN bytes
+// 2. Because of 1, we never worry about wrapping around the end
+// 3. Because of 1, printfBufferLength *must* be a factor of CUPRINTF_MAX_LEN
+//
+// This returns a pointer to the place where we own.
+//
+__device__ static char *getNextPrintfBufPtr()
+{
+ // Initialisation check
+ if(!printfBufferPtr)
+ return NULL;
+
+ // Thread/block restriction check
+ if((restrictRules.blockid != CUPRINTF_UNRESTRICTED) && (restrictRules.blockid != (blockIdx.x + gridDim.x*blockIdx.y)))
+ return NULL;
+ if((restrictRules.threadid != CUPRINTF_UNRESTRICTED) && (restrictRules.threadid != (threadIdx.x + blockDim.x*threadIdx.y + blockDim.x*blockDim.y*threadIdx.z)))
+ return NULL;
+
+ // Conditional section, dependent on architecture
+#if __CUDA_ARCH__ == 100
+ // For sm_10 architectures, we have no atomic add - this means we must split the
+ // entire available buffer into per-thread blocks. Inefficient, but what can you do.
+ int thread_count = (gridDim.x * gridDim.y) * (blockDim.x * blockDim.y * blockDim.z);
+ int thread_index = threadIdx.x + blockDim.x*threadIdx.y + blockDim.x*blockDim.y*threadIdx.z +
+ (blockIdx.x + gridDim.x*blockIdx.y) * (blockDim.x * blockDim.y * blockDim.z);
+
+ // Find our own block of data and go to it. Make sure the per-thread length
+ // is a precise multiple of CUPRINTF_MAX_LEN, otherwise we risk size and
+ // alignment issues! We must round down, of course.
+ unsigned int thread_buf_len = printfBufferLength / thread_count;
+ thread_buf_len &= ~(CUPRINTF_MAX_LEN-1);
+
+ // We *must* have a thread buffer length able to fit at least two printfs (one header, one real)
+ if(thread_buf_len < (CUPRINTF_MAX_LEN * 2))
+ return NULL;
+
+ // Now address our section of the buffer. The first item is a header.
+ char *myPrintfBuffer = globalPrintfBuffer + (thread_buf_len * thread_index);
+ cuPrintfHeaderSM10 hdr = *(cuPrintfHeaderSM10 *)(void *)myPrintfBuffer;
+ if(hdr.magic != CUPRINTF_SM10_MAGIC)
+ {
+ // If our header is not set up, initialise it
+ hdr.magic = CUPRINTF_SM10_MAGIC;
+ hdr.thread_index = thread_index;
+ hdr.thread_buf_len = thread_buf_len;
+ hdr.offset = 0; // Note we start at 0! We pre-increment below.
+ *(cuPrintfHeaderSM10 *)(void *)myPrintfBuffer = hdr; // Write back the header
+
+ // For initial setup purposes, we might need to init thread0's header too
+ // (so that cudaPrintfDisplay() below will work). This is only run once.
+ cuPrintfHeaderSM10 *tophdr = (cuPrintfHeaderSM10 *)(void *)globalPrintfBuffer;
+ tophdr->thread_buf_len = thread_buf_len;
+ }
+
+ // Adjust the offset by the right amount, and wrap it if need be
+ unsigned int offset = hdr.offset + CUPRINTF_MAX_LEN;
+ if(offset >= hdr.thread_buf_len)
+ offset = CUPRINTF_MAX_LEN;
+
+ // Write back the new offset for next time and return a pointer to it
+ ((cuPrintfHeaderSM10 *)(void *)myPrintfBuffer)->offset = offset;
+ return myPrintfBuffer + offset;
+#else
+ // Much easier with an atomic operation!
+ size_t offset = atomicAdd((unsigned int *)&printfBufferPtr, CUPRINTF_MAX_LEN) - (size_t)globalPrintfBuffer;
+ offset %= printfBufferLength;
+ return globalPrintfBuffer + offset;
+#endif
+}
+
+
+//
+// writePrintfHeader
+//
+// Inserts the header for containing our UID, fmt position and
+// block/thread number. We generate it dynamically to avoid
+// issues arising from requiring pre-initialisation.
+//
+__device__ static void writePrintfHeader(char *ptr, char *fmtptr)
+{
+ if(ptr)
+ {
+ cuPrintfHeader header;
+ header.magic = CUPRINTF_SM11_MAGIC;
+ header.fmtoffset = (unsigned short)(fmtptr - ptr);
+ header.blockid = blockIdx.x + gridDim.x*blockIdx.y;
+ header.threadid = threadIdx.x + blockDim.x*threadIdx.y + blockDim.x*blockDim.y*threadIdx.z;
+ *(cuPrintfHeader *)(void *)ptr = header;
+ }
+}
+
+
+//
+// cuPrintfStrncpy
+//
+// This special strncpy outputs an aligned length value, followed by the
+// string. It then zero-pads the rest of the string until a 64-aligned
+// boundary. The length *includes* the padding. A pointer to the byte
+// just after the \0 is returned.
+//
+// This function could overflow CUPRINTF_MAX_LEN characters in our buffer.
+// To avoid it, we must count as we output and truncate where necessary.
+//
+__device__ static char *cuPrintfStrncpy(char *dest, const char *src, int n, char *end)
+{
+ // Initialisation and overflow check
+ if(!dest || !src || (dest >= end))
+ return NULL;
+
+ // Prepare to write the length specifier. We're guaranteed to have
+ // at least "CUPRINTF_ALIGN_SIZE" bytes left because we only write out in
+ // chunks that size, and CUPRINTF_MAX_LEN is aligned with CUPRINTF_ALIGN_SIZE.
+ int *lenptr = (int *)(void *)dest;
+ int len = 0;
+ dest += CUPRINTF_ALIGN_SIZE;
+
+ // Now copy the string
+ while(n--)
+ {
+ if(dest >= end) // Overflow check
+ break;
+
+ len++;
+ *dest++ = *src;
+ if(*src++ == '\0')
+ break;
+ }
+
+ // Now write out the padding bytes, and we have our length.
+ while((dest < end) && (((long)dest & (CUPRINTF_ALIGN_SIZE-1)) != 0))
+ {
+ len++;
+ *dest++ = 0;
+ }
+ *lenptr = len;
+ return (dest < end) ? dest : NULL; // Overflow means return NULL
+}
+
+
+//
+// copyArg
+//
+// This copies a length specifier and then the argument out to the
+// data buffer. Templates let the compiler figure all this out at
+// compile-time, making life much simpler from the programming
+// point of view. I'm assuimg all (const char *) is a string, and
+// everything else is the variable it points at. I'd love to see
+// a better way of doing it, but aside from parsing the format
+// string I can't think of one.
+//
+// The length of the data type is inserted at the beginning (so that
+// the display can distinguish between float and double), and the
+// pointer to the end of the entry is returned.
+//
+__device__ static char *copyArg(char *ptr, const char *arg, char *end)
+{
+ // Initialisation check
+ if(!ptr || !arg)
+ return NULL;
+
+ // strncpy does all our work. We just terminate.
+ if((ptr = cuPrintfStrncpy(ptr, arg, CUPRINTF_MAX_LEN, end)) != NULL)
+ *ptr = 0;
+
+ return ptr;
+}
+
+template <typename T>
+__device__ static char *copyArg(char *ptr, T &arg, char *end)
+{
+ // Initisalisation and overflow check. Alignment rules mean that
+ // we're at least CUPRINTF_ALIGN_SIZE away from "end", so we only need
+ // to check that one offset.
+ if(!ptr || ((ptr+CUPRINTF_ALIGN_SIZE) >= end))
+ return NULL;
+
+ // Write the length and argument
+ *(int *)(void *)ptr = sizeof(arg);
+ ptr += CUPRINTF_ALIGN_SIZE;
+ *(T *)(void *)ptr = arg;
+ ptr += CUPRINTF_ALIGN_SIZE;
+ *ptr = 0;
+
+ return ptr;
+}
+
+
+//
+// cuPrintf
+//
+// Templated printf functions to handle multiple arguments.
+// Note we return the total amount of data copied, not the number
+// of characters output. But then again, who ever looks at the
+// return from printf() anyway?
+//
+// The format is to grab a block of circular buffer space, the
+// start of which will hold a header and a pointer to the format
+// string. We then write in all the arguments, and finally the
+// format string itself. This is to make it easy to prevent
+// overflow of our buffer (we support up to 10 arguments, each of
+// which can be 12 bytes in length - that means that only the
+// format string (or a %s) can actually overflow; so the overflow
+// check need only be in the strcpy function.
+//
+// The header is written at the very last because that's what
+// makes it look like we're done.
+//
+// Errors, which are basically lack-of-initialisation, are ignored
+// in the called functions because NULL pointers are passed around
+//
+
+// All printf variants basically do the same thing, setting up the
+// buffer, writing all arguments, then finalising the header. For
+// clarity, we'll pack the code into some big macros.
+#define CUPRINTF_PREAMBLE \
+ char *start, *end, *bufptr, *fmtstart; \
+ if((start = getNextPrintfBufPtr()) == NULL) return 0; \
+ end = start + CUPRINTF_MAX_LEN; \
+ bufptr = start + sizeof(cuPrintfHeader);
+
+// Posting an argument is easy
+#define CUPRINTF_ARG(argname) \
+ bufptr = copyArg(bufptr, argname, end);
+
+// After args are done, record start-of-fmt and write the fmt and header
+#define CUPRINTF_POSTAMBLE \
+ fmtstart = bufptr; \
+ end = cuPrintfStrncpy(bufptr, fmt, CUPRINTF_MAX_LEN, end); \
+ writePrintfHeader(start, end ? fmtstart : NULL); \
+ return end ? (int)(end - start) : 0;
+
+__device__ int cuPrintf(const char *fmt)
+{
+ CUPRINTF_PREAMBLE;
+
+ CUPRINTF_POSTAMBLE;
+}
+template <typename T1> __device__ int cuPrintf(const char *fmt, T1 arg1)
+{
+ CUPRINTF_PREAMBLE;
+
+ CUPRINTF_ARG(arg1);
+
+ CUPRINTF_POSTAMBLE;
+}
+template <typename T1, typename T2> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2)
+{
+ CUPRINTF_PREAMBLE;
+
+ CUPRINTF_ARG(arg1);
+ CUPRINTF_ARG(arg2);
+
+ CUPRINTF_POSTAMBLE;
+}
+template <typename T1, typename T2, typename T3> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3)
+{
+ CUPRINTF_PREAMBLE;
+
+ CUPRINTF_ARG(arg1);
+ CUPRINTF_ARG(arg2);
+ CUPRINTF_ARG(arg3);
+
+ CUPRINTF_POSTAMBLE;
+}
+template <typename T1, typename T2, typename T3, typename T4> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4)
+{
+ CUPRINTF_PREAMBLE;
+
+ CUPRINTF_ARG(arg1);
+ CUPRINTF_ARG(arg2);
+ CUPRINTF_ARG(arg3);
+ CUPRINTF_ARG(arg4);
+
+ CUPRINTF_POSTAMBLE;
+}
+template <typename T1, typename T2, typename T3, typename T4, typename T5> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5)
+{
+ CUPRINTF_PREAMBLE;
+
+ CUPRINTF_ARG(arg1);
+ CUPRINTF_ARG(arg2);
+ CUPRINTF_ARG(arg3);
+ CUPRINTF_ARG(arg4);
+ CUPRINTF_ARG(arg5);
+
+ CUPRINTF_POSTAMBLE;
+}
+template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6)
+{
+ CUPRINTF_PREAMBLE;
+
+ CUPRINTF_ARG(arg1);
+ CUPRINTF_ARG(arg2);
+ CUPRINTF_ARG(arg3);
+ CUPRINTF_ARG(arg4);
+ CUPRINTF_ARG(arg5);
+ CUPRINTF_ARG(arg6);
+ CUPRINTF_POSTAMBLE;
+}
+template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7)
+{
+ CUPRINTF_PREAMBLE;
+
+ CUPRINTF_ARG(arg1);
+ CUPRINTF_ARG(arg2);
+ CUPRINTF_ARG(arg3);
+ CUPRINTF_ARG(arg4);
+ CUPRINTF_ARG(arg5);
+ CUPRINTF_ARG(arg6);
+ CUPRINTF_ARG(arg7);
+
+ CUPRINTF_POSTAMBLE;
+}
+template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8)
+{
+ CUPRINTF_PREAMBLE;
+
+ CUPRINTF_ARG(arg1);
+ CUPRINTF_ARG(arg2);
+ CUPRINTF_ARG(arg3);
+ CUPRINTF_ARG(arg4);
+ CUPRINTF_ARG(arg5);
+ CUPRINTF_ARG(arg6);
+ CUPRINTF_ARG(arg7);
+ CUPRINTF_ARG(arg8);
+
+ CUPRINTF_POSTAMBLE;
+}
+template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8, typename T9> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8, T9 arg9)
+{
+ CUPRINTF_PREAMBLE;
+
+ CUPRINTF_ARG(arg1);
+ CUPRINTF_ARG(arg2);
+ CUPRINTF_ARG(arg3);
+ CUPRINTF_ARG(arg4);
+ CUPRINTF_ARG(arg5);
+ CUPRINTF_ARG(arg6);
+ CUPRINTF_ARG(arg7);
+ CUPRINTF_ARG(arg8);
+ CUPRINTF_ARG(arg9);
+
+ CUPRINTF_POSTAMBLE;
+}
+template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8, typename T9, typename T10> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8, T9 arg9, T10 arg10)
+{
+ CUPRINTF_PREAMBLE;
+
+ CUPRINTF_ARG(arg1);
+ CUPRINTF_ARG(arg2);
+ CUPRINTF_ARG(arg3);
+ CUPRINTF_ARG(arg4);
+ CUPRINTF_ARG(arg5);
+ CUPRINTF_ARG(arg6);
+ CUPRINTF_ARG(arg7);
+ CUPRINTF_ARG(arg8);
+ CUPRINTF_ARG(arg9);
+ CUPRINTF_ARG(arg10);
+
+ CUPRINTF_POSTAMBLE;
+}
+#undef CUPRINTF_PREAMBLE
+#undef CUPRINTF_ARG
+#undef CUPRINTF_POSTAMBLE
+
+
+//
+// cuPrintfRestrict
+//
+// Called to restrict output to a given thread/block.
+// We store the info in "restrictRules", which is set up at
+// init time by the host. It's not the cleanest way to do this
+// because it means restrictions will last between
+// invocations, but given the output-pointer continuity,
+// I feel this is reasonable.
+//
+__device__ void cuPrintfRestrict(int threadid, int blockid)
+{
+ int thread_count = blockDim.x * blockDim.y * blockDim.z;
+ if(((threadid < thread_count) && (threadid >= 0)) || (threadid == CUPRINTF_UNRESTRICTED))
+ restrictRules.threadid = threadid;
+
+ int block_count = gridDim.x * gridDim.y;
+ if(((blockid < block_count) && (blockid >= 0)) || (blockid == CUPRINTF_UNRESTRICTED))
+ restrictRules.blockid = blockid;
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// HOST SIDE
+
+#include <stdio.h>
+static FILE *printf_fp;
+
+static char *printfbuf_start=NULL;
+static char *printfbuf_device=NULL;
+static int printfbuf_len=0;
+
+
+//
+// outputPrintfData
+//
+// Our own internal function, which takes a pointer to a data buffer
+// and passes it through libc's printf for output.
+//
+// We receive the formate string and a pointer to where the data is
+// held. We then run through and print it out.
+//
+// Returns 0 on failure, 1 on success
+//
+static int outputPrintfData(char *fmt, char *data)
+{
+ // Format string is prefixed by a length that we don't need
+ fmt += CUPRINTF_ALIGN_SIZE;
+
+ // Now run through it, printing everything we can. We must
+ // run to every % character, extract only that, and use printf
+ // to format it.
+ char *p = strchr(fmt, '%');
+ while(p != NULL)
+ {
+ // Print up to the % character
+ *p = '\0';
+ fputs(fmt, printf_fp);
+ *p = '%'; // Put back the %
+
+ // Now handle the format specifier
+ char *format = p++; // Points to the '%'
+ p += strcspn(p, "%cdiouxXeEfgGaAnps");
+ if(*p == '\0') // If no format specifier, print the whole thing
+ {
+ fmt = format;
+ break;
+ }
+
+ // Cut out the format bit and use printf to print it. It's prefixed
+ // by its length.
+ int arglen = *(int *)data;
+ if(arglen > CUPRINTF_MAX_LEN)
+ {
+ fputs("Corrupt printf buffer data - aborting\n", printf_fp);
+ return 0;
+ }
+
+ data += CUPRINTF_ALIGN_SIZE;
+
+ char specifier = *p++;
+ char c = *p; // Store for later
+ *p = '\0';
+ switch(specifier)
+ {
+ // These all take integer arguments
+ case 'c':
+ case 'd':
+ case 'i':
+ case 'o':
+ case 'u':
+ case 'x':
+ case 'X':
+ case 'p':
+ fprintf(printf_fp, format, *((int *)data));
+ break;
+
+ // These all take double arguments
+ case 'e':
+ case 'E':
+ case 'f':
+ case 'g':
+ case 'G':
+ case 'a':
+ case 'A':
+ if(arglen == 4) // Float vs. Double thing
+ fprintf(printf_fp, format, *((float *)data));
+ else
+ fprintf(printf_fp, format, *((double *)data));
+ break;
+
+ // Strings are handled in a special way
+ case 's':
+ fprintf(printf_fp, format, (char *)data);
+ break;
+
+ // % is special
+ case '%':
+ fprintf(printf_fp, "%%");
+ break;
+
+ // Everything else is just printed out as-is
+ default:
+ fprintf(printf_fp, format);
+ break;
+ }
+ data += CUPRINTF_ALIGN_SIZE; // Move on to next argument
+ *p = c; // Restore what we removed
+ fmt = p; // Adjust fmt string to be past the specifier
+ p = strchr(fmt, '%'); // and get the next specifier
+ }
+
+ // Print out the last of the string
+ fputs(fmt, printf_fp);
+ return 1;
+}
+
+
+//
+// doPrintfDisplay
+//
+// This runs through the blocks of CUPRINTF_MAX_LEN-sized data, calling the
+// print function above to display them. We've got this separate from
+// cudaPrintfDisplay() below so we can handle the SM_10 architecture
+// partitioning.
+//
+static int doPrintfDisplay(int headings, int clear, char *bufstart, char *bufend, char *bufptr, char *endptr)
+{
+ // Grab, piece-by-piece, each output element until we catch
+ // up with the circular buffer end pointer
+ int printf_count=0;
+ char printfbuf_local[CUPRINTF_MAX_LEN+1];
+ printfbuf_local[CUPRINTF_MAX_LEN] = '\0';
+
+ while(bufptr != endptr)
+ {
+ // Wrap ourselves at the end-of-buffer
+ if(bufptr == bufend)
+ bufptr = bufstart;
+
+ // Adjust our start pointer to within the circular buffer and copy a block.
+ cudaMemcpy(printfbuf_local, bufptr, CUPRINTF_MAX_LEN, cudaMemcpyDeviceToHost);
+
+ // If the magic number isn't valid, then this write hasn't gone through
+ // yet and we'll wait until it does (or we're past the end for non-async printfs).
+ cuPrintfHeader *hdr = (cuPrintfHeader *)printfbuf_local;
+ if((hdr->magic != CUPRINTF_SM11_MAGIC) || (hdr->fmtoffset >= CUPRINTF_MAX_LEN))
+ {
+ //fprintf(printf_fp, "Bad magic number in printf header\n");
+ break;
+ }
+
+ // Extract all the info and get this printf done
+ if(headings)
+ fprintf(printf_fp, "[%d, %d]: ", hdr->blockid, hdr->threadid);
+ if(hdr->fmtoffset == 0)
+ fprintf(printf_fp, "printf buffer overflow\n");
+ else if(!outputPrintfData(printfbuf_local+hdr->fmtoffset, printfbuf_local+sizeof(cuPrintfHeader)))
+ break;
+ printf_count++;
+
+ // Clear if asked
+ if(clear)
+ cudaMemset(bufptr, 0, CUPRINTF_MAX_LEN);
+
+ // Now advance our start location, because we're done, and keep copying
+ bufptr += CUPRINTF_MAX_LEN;
+ }
+
+ return printf_count;
+}
+
+
+//
+// cudaPrintfInit
+//
+// Takes a buffer length to allocate, creates the memory on the device and
+// returns a pointer to it for when a kernel is called. It's up to the caller
+// to free it.
+//
+extern "C" cudaError_t cudaPrintfInit(size_t bufferLen)
+{
+ // Fix up bufferlen to be a multiple of CUPRINTF_MAX_LEN
+ bufferLen = (bufferLen < CUPRINTF_MAX_LEN) ? CUPRINTF_MAX_LEN : bufferLen;
+ if((bufferLen % CUPRINTF_MAX_LEN) > 0)
+ bufferLen += (CUPRINTF_MAX_LEN - (bufferLen % CUPRINTF_MAX_LEN));
+ printfbuf_len = (int)bufferLen;
+
+ // Allocate a print buffer on the device and zero it
+ if(cudaMalloc((void **)&printfbuf_device, printfbuf_len) != cudaSuccess)
+ return cudaErrorInitializationError;
+ cudaMemset(printfbuf_device, 0, printfbuf_len);
+ printfbuf_start = printfbuf_device; // Where we start reading from
+
+ // No restrictions to begin with
+ cuPrintfRestriction restrict;
+ restrict.threadid = restrict.blockid = CUPRINTF_UNRESTRICTED;
+ cudaMemcpyToSymbol(restrictRules, &restrict, sizeof(restrict));
+
+ // Initialise the buffer and the respective lengths/pointers.
+ cudaMemcpyToSymbol(globalPrintfBuffer, &printfbuf_device, sizeof(char *));
+ cudaMemcpyToSymbol(printfBufferPtr, &printfbuf_device, sizeof(char *));
+ cudaMemcpyToSymbol(printfBufferLength, &printfbuf_len, sizeof(printfbuf_len));
+
+ return cudaSuccess;
+}
+
+
+//
+// cudaPrintfEnd
+//
+// Frees up the memory which we allocated
+//
+extern "C" void cudaPrintfEnd()
+{
+ if(!printfbuf_start || !printfbuf_device)
+ return;
+
+ cudaFree(printfbuf_device);
+ printfbuf_start = printfbuf_device = NULL;
+}
+
+
+//
+// cudaPrintfDisplay
+//
+// Each call to this function dumps the entire current contents
+// of the printf buffer to the pre-specified FILE pointer. The
+// circular "start" pointer is advanced so that subsequent calls
+// dumps only new stuff.
+//
+// In the case of async memory access (via streams), call this
+// repeatedly to keep trying to empty the buffer. If it's a sync
+// access, then the whole buffer should empty in one go.
+//
+// Arguments:
+// outputFP - File descriptor to output to (NULL => stdout)
+// showThreadID - If true, prints [block,thread] before each line
+//
+extern "C" cudaError_t cudaPrintfDisplay(void *outputFP, bool showThreadID)
+{
+ printf_fp = (FILE *)((outputFP == NULL) ? stdout : outputFP);
+
+ // For now, we force "synchronous" mode which means we're not concurrent
+ // with kernel execution. This also means we don't need clearOnPrint.
+ // If you're patching it for async operation, here's where you want it.
+ bool sync_printfs = true;
+ bool clearOnPrint = false;
+
+ // Initialisation check
+ if(!printfbuf_start || !printfbuf_device || !printf_fp)
+ return cudaErrorMissingConfiguration;
+
+ // To determine which architecture we're using, we read the
+ // first short from the buffer - it'll be the magic number
+ // relating to the version.
+ unsigned short magic;
+ cudaMemcpy(&magic, printfbuf_device, sizeof(unsigned short), cudaMemcpyDeviceToHost);
+
+ // For SM_10 architecture, we've split our buffer into one-per-thread.
+ // That means we must do each thread block separately. It'll require
+ // extra reading. We also, for now, don't support async printfs because
+ // that requires tracking one start pointer per thread.
+ if(magic == CUPRINTF_SM10_MAGIC)
+ {
+ sync_printfs = true;
+ clearOnPrint = false;
+ int blocklen = 0;
+ char *blockptr = printfbuf_device;
+ while(blockptr < (printfbuf_device + printfbuf_len))
+ {
+ cuPrintfHeaderSM10 hdr;
+ cudaMemcpy(&hdr, blockptr, sizeof(hdr), cudaMemcpyDeviceToHost);
+
+ // We get our block-size-step from the very first header
+ if(hdr.thread_buf_len != 0)
+ blocklen = hdr.thread_buf_len;
+
+ // No magic number means no printfs from this thread
+ if(hdr.magic != CUPRINTF_SM10_MAGIC)
+ {
+ if(blocklen == 0)
+ {
+ fprintf(printf_fp, "No printf headers found at all!\n");
+ break; // No valid headers!
+ }
+ blockptr += blocklen;
+ continue;
+ }
+
+ // "offset" is non-zero then we can print the block contents
+ if(hdr.offset > 0)
+ {
+ // For synchronous printfs, we must print from endptr->bufend, then from start->end
+ if(sync_printfs)
+ doPrintfDisplay(showThreadID, clearOnPrint, blockptr+CUPRINTF_MAX_LEN, blockptr+hdr.thread_buf_len, blockptr+hdr.offset+CUPRINTF_MAX_LEN, blockptr+hdr.thread_buf_len);
+ doPrintfDisplay(showThreadID, clearOnPrint, blockptr+CUPRINTF_MAX_LEN, blockptr+hdr.thread_buf_len, blockptr+CUPRINTF_MAX_LEN, blockptr+hdr.offset+CUPRINTF_MAX_LEN);
+ }
+
+ // Move on to the next block and loop again
+ blockptr += hdr.thread_buf_len;
+ }
+ }
+ // For SM_11 and up, everything is a single buffer and it's simple
+ else if(magic == CUPRINTF_SM11_MAGIC)
+ {
+ // Grab the current "end of circular buffer" pointer.
+ char *printfbuf_end = NULL;
+ cudaMemcpyFromSymbol(&printfbuf_end, printfBufferPtr, sizeof(char *));
+
+ // Adjust our starting and ending pointers to within the block
+ char *bufptr = ((printfbuf_start - printfbuf_device) % printfbuf_len) + printfbuf_device;
+ char *endptr = ((printfbuf_end - printfbuf_device) % printfbuf_len) + printfbuf_device;
+
+ // For synchronous (i.e. after-kernel-exit) printf display, we have to handle circular
+ // buffer wrap carefully because we could miss those past "end".
+ if(sync_printfs)
+ doPrintfDisplay(showThreadID, clearOnPrint, printfbuf_device, printfbuf_device+printfbuf_len, endptr, printfbuf_device+printfbuf_len);
+ doPrintfDisplay(showThreadID, clearOnPrint, printfbuf_device, printfbuf_device+printfbuf_len, bufptr, endptr);
+
+ printfbuf_start = printfbuf_end;
+ }
+ else
+ ;//printf("Bad magic number in cuPrintf buffer header\n");
+
+ // If we were synchronous, then we must ensure that the memory is cleared on exit
+ // otherwise another kernel launch with a different grid size could conflict.
+ if(sync_printfs)
+ cudaMemset(printfbuf_device, 0, printfbuf_len);
+
+ return cudaSuccess;
+}
+
+// Cleanup
+#undef CUPRINTF_MAX_LEN
+#undef CUPRINTF_ALIGN_SIZE
+#undef CUPRINTF_SM10_MAGIC
+#undef CUPRINTF_SM11_MAGIC
+
+#endif
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/cuPrintf.cuh b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/cuPrintf.cuh
new file mode 100644
index 000000000..cf3fe4868
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/cuPrintf.cuh
@@ -0,0 +1,162 @@
+/*
+ Copyright 2009 NVIDIA Corporation. All rights reserved.
+
+ NOTICE TO LICENSEE:
+
+ This source code and/or documentation ("Licensed Deliverables") are subject
+ to NVIDIA intellectual property rights under U.S. and international Copyright
+ laws.
+
+ These Licensed Deliverables contained herein is PROPRIETARY and CONFIDENTIAL
+ to NVIDIA and is being provided under the terms and conditions of a form of
+ NVIDIA software license agreement by and between NVIDIA and Licensee ("License
+ Agreement") or electronically accepted by Licensee. Notwithstanding any terms
+ or conditions to the contrary in the License Agreement, reproduction or
+ disclosure of the Licensed Deliverables to any third party without the express
+ written consent of NVIDIA is prohibited.
+
+ NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE LICENSE AGREEMENT,
+ NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THESE LICENSED
+ DELIVERABLES FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED
+ WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE
+ LICENSED DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. NOTWITHSTANDING ANY
+ TERMS OR CONDITIONS TO THE CONTRARY IN THE LICENSE AGREEMENT, IN NO EVENT SHALL
+ NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+ OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+ IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THESE LICENSED DELIVERABLES.
+
+ U.S. Government End Users. These Licensed Deliverables are a "commercial item"
+ as that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
+ "commercial computer software" and "commercial computer software documentation"
+ as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) and is provided to the
+ U.S. Government only as a commercial end item. Consistent with 48 C.F.R.12.212
+ and 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all U.S. Government
+ End Users acquire the Licensed Deliverables with only those rights set forth
+ herein.
+
+ Any use of the Licensed Deliverables in individual and commercial software must
+ include, in the user documentation and internal comments to the code, the above
+ Disclaimer and U.S. Government End Users Notice.
+ */
+
+#ifndef CUPRINTF_H
+#define CUPRINTF_H
+
+/*
+ * This is the header file supporting cuPrintf.cu and defining both
+ * the host and device-side interfaces. See that file for some more
+ * explanation and sample use code. See also below for details of the
+ * host-side interfaces.
+ *
+ * Quick sample code:
+ *
+ #include "cuPrintf.cu"
+
+ __global__ void testKernel(int val)
+ {
+ cuPrintf("Value is: %d\n", val);
+ }
+
+ int main()
+ {
+ cudaPrintfInit();
+ testKernel<<< 2, 3 >>>(10);
+ cudaPrintfDisplay(stdout, true);
+ cudaPrintfEnd();
+ return 0;
+ }
+ */
+
+///////////////////////////////////////////////////////////////////////////////
+// DEVICE SIDE
+// External function definitions for device-side code
+
+// Abuse of templates to simulate varargs
+__device__ int cuPrintf(const char *fmt);
+template <typename T1> __device__ int cuPrintf(const char *fmt, T1 arg1);
+template <typename T1, typename T2> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2);
+template <typename T1, typename T2, typename T3> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3);
+template <typename T1, typename T2, typename T3, typename T4> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4);
+template <typename T1, typename T2, typename T3, typename T4, typename T5> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5);
+template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6);
+template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7);
+template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8);
+template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8, typename T9> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8, T9 arg9);
+template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8, typename T9, typename T10> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8, T9 arg9, T10 arg10);
+
+
+//
+// cuPrintfRestrict
+//
+// Called to restrict output to a given thread/block. Pass
+// the constant CUPRINTF_UNRESTRICTED to unrestrict output
+// for thread/block IDs. Note you can therefore allow
+// "all printfs from block 3" or "printfs from thread 2
+// on all blocks", or "printfs only from block 1, thread 5".
+//
+// Arguments:
+// threadid - Thread ID to allow printfs from
+// blockid - Block ID to allow printfs from
+//
+// NOTE: Restrictions last between invocations of
+// kernels unless cudaPrintfInit() is called again.
+//
+#define CUPRINTF_UNRESTRICTED -1
+__device__ void cuPrintfRestrict(int threadid, int blockid);
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// HOST SIDE
+// External function definitions for host-side code
+
+//
+// cudaPrintfInit
+//
+// Call this once to initialise the printf system. If the output
+// file or buffer size needs to be changed, call cudaPrintfEnd()
+// before re-calling cudaPrintfInit().
+//
+// The default size for the buffer is 1 megabyte. For CUDA
+// architecture 1.1 and above, the buffer is filled linearly and
+// is completely used; however for architecture 1.0, the buffer
+// is divided into as many segments are there are threads, even
+// if some threads do not call cuPrintf().
+//
+// Arguments:
+// bufferLen - Length, in bytes, of total space to reserve
+// (in device global memory) for output.
+//
+// Returns:
+// cudaSuccess if all is well.
+//
+extern "C" cudaError_t cudaPrintfInit(size_t bufferLen=1048576); // 1-meg - that's enough for 4096 printfs by all threads put together
+
+//
+// cudaPrintfEnd
+//
+// Cleans up all memories allocated by cudaPrintfInit().
+// Call this at exit, or before calling cudaPrintfInit() again.
+//
+extern "C" void cudaPrintfEnd();
+
+//
+// cudaPrintfDisplay
+//
+// Dumps the contents of the output buffer to the specified
+// file pointer. If the output pointer is not specified,
+// the default "stdout" is used.
+//
+// Arguments:
+// outputFP - A file pointer to an output stream.
+// showThreadID - If "true", output strings are prefixed
+// by "[blockid, threadid] " at output.
+//
+// Returns:
+// cudaSuccess if all is well.
+//
+extern "C" cudaError_t cudaPrintfDisplay(void *outputFP=NULL, bool showThreadID=false);
+
+#endif // CUPRINTF_H
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/libethash.cu b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/libethash.cu
new file mode 100644
index 000000000..3e53c8853
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/libethash.cu
@@ -0,0 +1,27 @@
+#include "cuPrintf.cu"
+#include <stdio.h>
+
+__global__ void device_greetings(void)
+{
+ cuPrintf("Hello, world from the device!\n");
+}
+
+int main(void)
+{
+ // greet from the host
+ printf("Hello, world from the host!\n");
+
+ // initialize cuPrintf
+ cudaPrintfInit();
+
+ // launch a kernel with a single thread to greet from the device
+ device_greetings<<<1,1>>>();
+
+ // display the device's greeting
+ cudaPrintfDisplay();
+
+ // clean up after cuPrintf
+ cudaPrintfEnd();
+
+ return 0;
+}
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/CMakeLists.txt b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/CMakeLists.txt
new file mode 100644
index 000000000..bef63ef0a
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/CMakeLists.txt
@@ -0,0 +1,33 @@
+set(LIBRARY ethash)
+set(CMAKE_BUILD_TYPE Release)
+
+if (NOT MSVC)
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=gnu99")
+endif()
+
+set(FILES util.c
+ util.h
+ internal.c
+ ethash.h
+ endian.h
+ compiler.h
+ fnv.h
+ data_sizes.h)
+
+if (NOT CRYPTOPP_FOUND)
+ find_package(CryptoPP 5.6.2)
+endif()
+
+if (CRYPTOPP_FOUND)
+ add_definitions(-DWITH_CRYPTOPP)
+ include_directories( ${CRYPTOPP_INCLUDE_DIRS} )
+ list(APPEND FILES sha3_cryptopp.cpp sha3_cryptopp.h)
+else()
+ list(APPEND FILES sha3.c sha3.h)
+endif()
+
+add_library(${LIBRARY} ${FILES})
+
+if (CRYPTOPP_FOUND)
+ TARGET_LINK_LIBRARIES(${LIBRARY} ${CRYPTOPP_LIBRARIES})
+endif() \ No newline at end of file
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/compiler.h b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/compiler.h
new file mode 100644
index 000000000..9695871cd
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/compiler.h
@@ -0,0 +1,33 @@
+/*
+ This file is part of cpp-ethereum.
+
+ cpp-ethereum is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ cpp-ethereum is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>.
+*/
+/** @file compiler.h
+ * @date 2014
+ */
+#pragma once
+
+// Visual Studio doesn't support the inline keyword in C mode
+#if defined(_MSC_VER) && !defined(__cplusplus)
+#define inline __inline
+#endif
+
+// pretend restrict is a standard keyword
+#if defined(_MSC_VER)
+#define restrict __restrict
+#else
+#define restrict __restrict__
+#endif
+
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/data_sizes.h b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/data_sizes.h
new file mode 100644
index 000000000..40417cb71
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/data_sizes.h
@@ -0,0 +1,248 @@
+/*
+ This file is part of cpp-ethereum.
+
+ cpp-ethereum is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software FoundationUUU,either version 3 of the LicenseUUU,or
+ (at your option) any later version.
+
+ cpp-ethereum is distributed in the hope that it will be usefulU,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with cpp-ethereum. If notUUU,see <http://www.gnu.org/licenses/>.
+*/
+
+/** @file nth_prime.h
+* @author Matthew Wampler-Doty <negacthulhu@gmail.com>
+* @date 2015
+*/
+
+// TODO: Update this after ~7 years
+
+#pragma once
+
+#include <stdint.h>
+//#include <Security/Security.h>
+#include "compiler.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+// 500 Epochs worth of tabulated DAG sizes (~3.5 Years)
+
+// Generated with the following Mathematica Code:
+// GetDataSizes[n_] := Module[{
+// DAGSizeBytesInit = 2^30,
+// MixBytes = 128,
+// DAGGrowth = 113000000,
+// j = 0},
+// Reap[
+// While[j < n,
+// Module[{i =
+// Floor[(DAGSizeBytesInit + DAGGrowth * j) / MixBytes]},
+// While[! PrimeQ[i], i--];
+// Sow[i*MixBytes]; j++]]]][[2]][[1]]
+
+static const size_t dag_sizes[] = {
+ 1073739904U, 1186739584U, 1299741568U, 1412741248U, 1525741696U,
+ 1638736768U, 1751741312U, 1864740736U, 1977740672U, 2090740864U,
+ 2203740544U, 2316741248U, 2429739392U, 2542740352U, 2655741824U,
+ 2768739712U, 2881740416U, 2994741632U, 3107740544U, 3220741504U,
+ 3333738112U, 3446741632U, 3559741312U, 3672740224U, 3785740928U,
+ 3898738304U, 4011741824U, 4124739712U, 4237735808U, 4350740864U,
+ 4463741824U, 4576741504U, 4689741184U, 4802739328U, 4915741568U,
+ 5028740224U, 5141740672U, 5254738304U, 5367741824U, 5480737664U,
+ 5593738112U, 5706741632U, 5819740544U, 5932734592U, 6045739904U,
+ 6158740096U, 6271740032U, 6384731776U, 6497732992U, 6610740352U,
+ 6723741056U, 6836741504U, 6949740416U, 7062740096U, 7175741824U,
+ 7288740224U, 7401741184U, 7514741632U, 7627741568U, 7740739712U,
+ 7853739136U, 7966740352U, 8079741568U, 8192739712U, 8305738624U,
+ 8418740864U, 8531740288U, 8644740736U, 8757735808U, 8870738816U,
+ 8983739264U, 9096740992U, 9209740928U, 9322739584U, 9435741824U,
+ 9548741504U, 9661739392U, 9774738304U, 9887741312U, 10000738688U,
+ 10113739136U, 10226741632U, 10339739776U, 10452741248U, 10565740928U,
+ 10678736512U, 10791734656U, 10904741248U, 11017738112U, 11130741632U,
+ 11243741312U, 11356739456U, 11469740416U, 11582734976U, 11695739008U,
+ 11808741248U, 11921734784U, 12034739072U, 12147741568U, 12260737408U,
+ 12373741696U, 12486738304U, 12599740544U, 12712740224U, 12825741184U,
+ 12938736256U, 13051741312U, 13164737408U, 13277738368U, 13390738048U,
+ 13503741824U, 13616741504U, 13729737088U, 13842740096U, 13955741312U,
+ 14068741504U, 14181740416U, 14294741632U, 14407739776U, 14520740224U,
+ 14633740928U, 14746736512U, 14859741824U, 14972740736U, 15085740928U,
+ 15198738304U, 15311732096U, 15424740736U, 15537739904U, 15650741632U,
+ 15763741568U, 15876737152U, 15989741696U, 16102740608U, 16215741056U,
+ 16328741248U, 16441740416U, 16554737792U, 16667740288U, 16780740992U,
+ 16893738112U, 17006741632U, 17119739008U, 17232735616U, 17345739392U,
+ 17458740352U, 17571736192U, 17684739712U, 17797739392U, 17910740096U,
+ 18023741312U, 18136740736U, 18249738112U, 18362738816U, 18475735424U,
+ 18588740224U, 18701738368U, 18814736768U, 18927737216U, 19040739968U,
+ 19153739648U, 19266736768U, 19379737984U, 19492739456U, 19605738368U,
+ 19718740352U, 19831741312U, 19944736384U, 20057741696U, 20170741376U,
+ 20283741824U, 20396737408U, 20509741696U, 20622741376U, 20735739008U,
+ 20848741504U, 20961740672U, 21074739328U, 21187740032U, 21300739456U,
+ 21413741696U, 21526740608U, 21639741824U, 21752737408U, 21865741696U,
+ 21978741376U, 22091741824U, 22204738432U, 22317740672U, 22430740096U,
+ 22543736704U, 22656741248U, 22769739904U, 22882739584U, 22995740288U,
+ 23108740736U, 23221740928U, 23334741376U, 23447737216U, 23560740992U,
+ 23673741184U, 23786740864U, 23899737728U, 24012741248U, 24125734784U,
+ 24238736512U, 24351741824U, 24464740736U, 24577737088U, 24690741632U,
+ 24803739776U, 24916740736U, 25029740416U, 25142740864U, 25255741568U,
+ 25368741248U, 25481740672U, 25594741376U, 25707741568U, 25820741504U,
+ 25933730432U, 26046739072U, 26159741824U, 26272741504U, 26385740672U,
+ 26498740096U, 26611741568U, 26724740992U, 26837739904U, 26950735232U,
+ 27063738496U, 27176741248U, 27289741184U, 27402740864U, 27515740544U,
+ 27628737152U, 27741740672U, 27854741632U, 27967740544U, 28080739712U,
+ 28193738368U, 28306741376U, 28419737728U, 28532739968U, 28645739648U,
+ 28758740096U, 28871741312U, 28984739456U, 29097740416U, 29210740864U,
+ 29323741312U, 29436740224U, 29549741696U, 29662738304U, 29775741568U,
+ 29888741504U, 30001740928U, 30114737024U, 30227735168U, 30340737664U,
+ 30453738368U, 30566737024U, 30679733632U, 30792740224U, 30905740928U,
+ 31018740352U, 31131740032U, 31244738944U, 31357737344U, 31470741376U,
+ 31583740544U, 31696740224U, 31809738112U, 31922739328U, 32035737472U,
+ 32148740992U, 32261741696U, 32374740352U, 32487741824U, 32600740736U,
+ 32713739648U, 32826740608U, 32939729792U, 33052740992U, 33165740672U,
+ 33278739584U, 33391741312U, 33504739712U, 33617740928U, 33730740608U,
+ 33843738496U, 33956739968U, 34069741696U, 34182739328U, 34295741824U,
+ 34408739968U, 34521740672U, 34634736512U, 34747741568U, 34860741248U,
+ 34973739392U, 35086738304U, 35199741056U, 35312736896U, 35425741184U,
+ 35538741376U, 35651740288U, 35764737152U, 35877741184U, 35990739584U,
+ 36103740544U, 36216740992U, 36329739392U, 36442737536U, 36555741568U,
+ 36668740736U, 36781741184U, 36894737024U, 37007741312U, 37120739456U,
+ 37233741184U, 37346736256U, 37459736192U, 37572734336U, 37685739904U,
+ 37798740352U, 37911737728U, 38024741504U, 38137739648U, 38250740608U,
+ 38363741824U, 38476740992U, 38589741184U, 38702740096U, 38815741312U,
+ 38928741248U, 39041738368U, 39154739584U, 39267741824U, 39380739712U,
+ 39493735808U, 39606741632U, 39719741312U, 39832741504U, 39945739648U,
+ 40058740352U, 40171740032U, 40284740992U, 40397740672U, 40510740352U,
+ 40623740288U, 40736738176U, 40849737856U, 40962741376U, 41075739776U,
+ 41188737664U, 41301735808U, 41414738048U, 41527741312U, 41640740992U,
+ 41753739904U, 41866739072U, 41979738496U, 42092740736U, 42205739648U,
+ 42318740608U, 42431741312U, 42544738688U, 42657741184U, 42770738048U,
+ 42883741568U, 42996741248U, 43109740928U, 43222736512U, 43335741056U,
+ 43448730496U, 43561740416U, 43674741632U, 43787740544U, 43900741504U,
+ 44013739648U, 44126740864U, 44239740544U, 44352741248U, 44465738368U,
+ 44578735232U, 44691739264U, 44804741504U, 44917741696U, 45030741376U,
+ 45143741824U, 45256740992U, 45369739136U, 45482740096U, 45595739776U,
+ 45708739712U, 45821740672U, 45934741376U, 46047741056U, 46160741248U,
+ 46273737088U, 46386740864U, 46499739008U, 46612739968U, 46725735296U,
+ 46838740864U, 46951741568U, 47064737152U, 47177741696U, 47290741376U,
+ 47403738752U, 47516741248U, 47629739648U, 47742741632U, 47855737984U,
+ 47968740224U, 48081738368U, 48194741632U, 48307739264U, 48420739712U,
+ 48533739136U, 48646738304U, 48759741824U, 48872741504U, 48985739392U,
+ 49098741376U, 49211741056U, 49324740992U, 49437738368U, 49550740864U,
+ 49663735424U, 49776737408U, 49889740672U, 50002738816U, 50115738752U,
+ 50228739712U, 50341741696U, 50454736768U, 50567738752U, 50680739968U,
+ 50793736832U, 50906734976U, 51019741568U, 51132739456U, 51245741696U,
+ 51358741376U, 51471741056U, 51584738944U, 51697734272U, 51810739072U,
+ 51923736448U, 52036740736U, 52149741184U, 52262737024U, 52375738496U,
+ 52488740992U, 52601739136U, 52714740352U, 52827736448U, 52940738176U,
+ 53053741696U, 53166740864U, 53279741824U, 53392741504U, 53505739136U,
+ 53618739584U, 53731741312U, 53844741248U, 53957741696U, 54070741376U,
+ 54183740288U, 54296741504U, 54409741696U, 54522739072U, 54635737472U,
+ 54748741504U, 54861736064U, 54974740096U, 55087741568U, 55200733568U,
+ 55313741696U, 55426734464U, 55539741056U, 55652741504U, 55765741184U,
+ 55878741376U, 55991730304U, 56104740992U, 56217740672U, 56330731648U,
+ 56443737472U, 56556724352U, 56669740672U, 56782739072U, 56895740032U,
+ 57008741248U, 57121741696U, 57234740096U, 57347741312U, 57460741504U
+};
+
+// 500 Epochs worth of tabulated DAG sizes (~3.5 Years)
+
+// Generated with the following Mathematica Code:
+// GetCacheSizes[n_] := Module[{
+// DAGSizeBytesInit = 2^30,
+// MixBytes = 128,
+// DAGGrowth = 113000000,
+// HashBytes = 64,
+// DAGParents = 1024,
+// j = 0},
+// Reap[
+// While[j < n,
+// Module[{i = Floor[(DAGSizeBytesInit + DAGGrowth * j) / (DAGParents * HashBytes)]},
+// While[! PrimeQ[i], i--];
+// Sow[i*HashBytes]; j++]]]][[2]][[1]]
+
+const size_t cache_sizes[] = {
+ 1048384U, 1158208U, 1268416U, 1377856U, 1489856U, 1599296U, 1710656U,
+ 1820608U, 1930816U, 2041024U, 2151872U, 2261696U, 2371904U, 2482624U,
+ 2593216U, 2703296U, 2814016U, 2924224U, 3034816U, 3144896U, 3255488U,
+ 3365312U, 3475904U, 3586624U, 3696064U, 3806272U, 3917504U, 4027456U,
+ 4138304U, 4248512U, 4359104U, 4469312U, 4579264U, 4689728U, 4797376U,
+ 4909888U, 5020096U, 5131328U, 5241664U, 5351744U, 5461312U, 5572544U,
+ 5683264U, 5793472U, 5903552U, 6014144U, 6121664U, 6235072U, 6344896U,
+ 6454592U, 6565952U, 6675904U, 6786112U, 6896704U, 7006784U, 7117888U,
+ 7228096U, 7338304U, 7448768U, 7557952U, 7669184U, 7779776U, 7889216U,
+ 8000192U, 8110912U, 8220736U, 8331712U, 8441536U, 8552384U, 8662592U,
+ 8772928U, 8883136U, 8993728U, 9103168U, 9214528U, 9323968U, 9434816U,
+ 9545152U, 9655616U, 9766336U, 9876544U, 9986624U, 10097344U, 10207424U,
+ 10316864U, 10427968U, 10538432U, 10649152U, 10758976U, 10869568U, 10979776U,
+ 11089472U, 11200832U, 11309632U, 11420608U, 11531584U, 11641792U, 11751104U,
+ 11862976U, 11973184U, 12083264U, 12193856U, 12304064U, 12414656U, 12524608U,
+ 12635072U, 12745792U, 12855616U, 12965824U, 13076416U, 13187008U, 13297216U,
+ 13407808U, 13518016U, 13627072U, 13738688U, 13848256U, 13959488U, 14069696U,
+ 14180288U, 14290624U, 14399552U, 14511424U, 14621504U, 14732096U, 14841664U,
+ 14951744U, 15062336U, 15172672U, 15283264U, 15393088U, 15504448U, 15614272U,
+ 15723712U, 15834944U, 15945152U, 16055744U, 16165696U, 16277056U, 16387136U,
+ 16494784U, 16607936U, 16718272U, 16828736U, 16938176U, 17048384U, 17159872U,
+ 17266624U, 17380544U, 17490496U, 17600192U, 17711296U, 17821376U, 17931968U,
+ 18041152U, 18152896U, 18261952U, 18373568U, 18483392U, 18594112U, 18703936U,
+ 18814912U, 18924992U, 19034944U, 19145408U, 19256128U, 19366208U, 19477184U,
+ 19587136U, 19696576U, 19808192U, 19916992U, 20028352U, 20137664U, 20249024U,
+ 20358848U, 20470336U, 20580544U, 20689472U, 20801344U, 20911424U, 21020096U,
+ 21130688U, 21242176U, 21352384U, 21462208U, 21573824U, 21683392U, 21794624U,
+ 21904448U, 22013632U, 22125248U, 22235968U, 22344512U, 22456768U, 22566848U,
+ 22677056U, 22786496U, 22897984U, 23008064U, 23118272U, 23228992U, 23338816U,
+ 23449408U, 23560256U, 23670464U, 23780672U, 23891264U, 24001216U, 24110656U,
+ 24221888U, 24332608U, 24442688U, 24552512U, 24662464U, 24773696U, 24884032U,
+ 24994496U, 25105216U, 25215296U, 25324864U, 25435712U, 25546432U, 25655744U,
+ 25767232U, 25876672U, 25986368U, 26098112U, 26207936U, 26318912U, 26428736U,
+ 26539712U, 26650048U, 26760256U, 26869184U, 26979776U, 27091136U, 27201728U,
+ 27311552U, 27422272U, 27532352U, 27642304U, 27752896U, 27863744U, 27973952U,
+ 28082752U, 28194752U, 28305344U, 28415168U, 28524992U, 28636352U, 28746304U,
+ 28857152U, 28967104U, 29077184U, 29187904U, 29298496U, 29408576U, 29518912U,
+ 29628992U, 29739968U, 29850176U, 29960512U, 30070336U, 30180544U, 30290752U,
+ 30398912U, 30512192U, 30622784U, 30732992U, 30842176U, 30953536U, 31063744U,
+ 31174336U, 31284544U, 31395136U, 31504448U, 31615552U, 31725632U, 31835072U,
+ 31946176U, 32057024U, 32167232U, 32277568U, 32387008U, 32497984U, 32608832U,
+ 32719168U, 32829376U, 32939584U, 33050048U, 33160768U, 33271232U, 33381184U,
+ 33491648U, 33601856U, 33712576U, 33822016U, 33932992U, 34042816U, 34153024U,
+ 34263104U, 34373824U, 34485056U, 34594624U, 34704832U, 34816064U, 34926272U,
+ 35036224U, 35146816U, 35255104U, 35367104U, 35478208U, 35588416U, 35698496U,
+ 35808832U, 35918656U, 36029888U, 36139456U, 36250688U, 36360512U, 36471104U,
+ 36581696U, 36691136U, 36802112U, 36912448U, 37022912U, 37132864U, 37242944U,
+ 37354048U, 37464512U, 37574848U, 37684928U, 37794752U, 37904704U, 38015552U,
+ 38125888U, 38236864U, 38345792U, 38457152U, 38567744U, 38678336U, 38787776U,
+ 38897216U, 39009088U, 39117632U, 39230144U, 39340352U, 39450304U, 39560384U,
+ 39671488U, 39781312U, 39891392U, 40002112U, 40112704U, 40223168U, 40332608U,
+ 40443968U, 40553792U, 40664768U, 40774208U, 40884416U, 40993984U, 41105984U,
+ 41215424U, 41326528U, 41436992U, 41546048U, 41655872U, 41768128U, 41878336U,
+ 41988928U, 42098752U, 42209344U, 42319168U, 42429248U, 42540352U, 42649792U,
+ 42761024U, 42871616U, 42981824U, 43092032U, 43201856U, 43312832U, 43423552U,
+ 43533632U, 43643584U, 43753792U, 43864384U, 43974976U, 44084032U, 44195392U,
+ 44306368U, 44415296U, 44526016U, 44637248U, 44746816U, 44858048U, 44967872U,
+ 45078848U, 45188288U, 45299264U, 45409216U, 45518272U, 45630272U, 45740224U,
+ 45850432U, 45960896U, 46069696U, 46182208U, 46292416U, 46402624U, 46512064U,
+ 46623296U, 46733888U, 46843712U, 46953664U, 47065024U, 47175104U, 47285696U,
+ 47395904U, 47506496U, 47615296U, 47726912U, 47837632U, 47947712U, 48055232U,
+ 48168128U, 48277952U, 48387392U, 48499648U, 48609472U, 48720064U, 48830272U,
+ 48940096U, 49050944U, 49160896U, 49271744U, 49381568U, 49492288U, 49602752U,
+ 49712576U, 49822016U, 49934272U, 50042816U, 50154304U, 50264128U, 50374336U,
+ 50484416U, 50596288U, 50706752U, 50816704U, 50927168U, 51035456U, 51146944U,
+ 51258176U, 51366976U, 51477824U, 51589568U, 51699776U, 51809728U, 51920576U,
+ 52030016U, 52140736U, 52251328U, 52361152U, 52470592U, 52582592U, 52691776U,
+ 52803136U, 52912576U, 53020736U, 53132224U, 53242688U, 53354816U, 53465536U,
+ 53575232U, 53685568U, 53796544U, 53906752U, 54016832U, 54126656U, 54236992U,
+ 54347456U, 54457408U, 54569024U, 54679232U, 54789184U, 54899776U, 55008832U,
+ 55119296U, 55231168U, 55341248U, 55451584U, 55562048U, 55672256U, 55782208U,
+ 55893184U, 56002112U, 56113216U
+};
+
+#ifdef __cplusplus
+}
+#endif \ No newline at end of file
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/endian.h b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/endian.h
new file mode 100644
index 000000000..9ca842e47
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/endian.h
@@ -0,0 +1,74 @@
+#pragma once
+
+#include <stdint.h>
+#include "compiler.h"
+
+static const uint8_t BitReverseTable256[] =
+ {
+ 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0,
+ 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8,
+ 0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
+ 0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC,
+ 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2,
+ 0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
+ 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6,
+ 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE,
+ 0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
+ 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9,
+ 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5,
+ 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
+ 0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3,
+ 0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB,
+ 0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
+ 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF
+ };
+
+static inline uint32_t bitfn_swap32(uint32_t a) {
+ return (BitReverseTable256[a & 0xff] << 24) |
+ (BitReverseTable256[(a >> 8) & 0xff] << 16) |
+ (BitReverseTable256[(a >> 16) & 0xff] << 8) |
+ (BitReverseTable256[(a >> 24) & 0xff]);
+}
+
+static inline uint64_t bitfn_swap64(uint64_t a) {
+ return ((uint64_t) bitfn_swap32((uint32_t) (a >> 32))) |
+ (((uint64_t) bitfn_swap32((uint32_t) a)) << 32);
+}
+
+#if defined(__MINGW32__) || defined(_WIN32)
+ # define LITTLE_ENDIAN 1234
+ # define BYTE_ORDER LITTLE_ENDIAN
+#elif defined(__FreeBSD__) || defined(__DragonFly__) || defined(__NetBSD__)
+ # include <sys/endian.h>
+#elif defined(__OpenBSD__) || defined(__SVR4)
+ # include <sys/types.h>
+#elif defined(__APPLE__)
+# include <machine/endian.h>
+#elif defined( BSD ) && (BSD >= 199103)
+ # include <machine/endian.h>
+#elif defined( __QNXNTO__ ) && defined( __LITTLEENDIAN__ )
+ # define LITTLE_ENDIAN 1234
+ # define BYTE_ORDER LITTLE_ENDIAN
+#elif defined( __QNXNTO__ ) && defined( __BIGENDIAN__ )
+ # define BIG_ENDIAN 1234
+ # define BYTE_ORDER BIG_ENDIAN
+#else
+
+# include <endian.h>
+
+#endif
+
+
+#if LITTLE_ENDIAN == BYTE_ORDER
+
+#define fix_endian32(x) (x)
+#define fix_endian64(x) (x)
+
+#elif BIG_ENDIAN == BYTE_ORDER
+
+#define fix_endian32(x) bitfn_swap32(x)
+#define fix_endian64(x) bitfn_swap64(x)
+
+#else
+# error "endian not supported"
+#endif // BYTE_ORDER \ No newline at end of file
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/ethash.h b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/ethash.h
new file mode 100644
index 000000000..62edd0082
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/ethash.h
@@ -0,0 +1,88 @@
+/*
+ This file is part of cpp-ethereum.
+
+ cpp-ethereum is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ cpp-ethereum is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>.
+*/
+/** @file ethash.h
+* @date 2015
+*/
+#pragma once
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stddef.h>
+#include "compiler.h"
+
+#define REVISION 18
+#define DAGSIZE_BYTES_INIT 1073741824U
+#define DAG_GROWTH 113000000U
+#define EPOCH_LENGTH 30000U
+#define MIX_BYTES 128
+#define DAG_PARENTS 256
+#define CACHE_ROUNDS 3
+#define ACCESSES 64
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct ethash_params {
+ size_t full_size; // Size of full data set (in bytes, multiple of mix size (128)).
+ size_t cache_size; // Size of compute cache (in bytes, multiple of node size (64)).
+} ethash_params;
+
+typedef struct ethash_return_value {
+ uint8_t result[32];
+ uint8_t mix_hash[32];
+} ethash_return_value;
+
+size_t const ethash_get_datasize(const uint32_t block_number);
+size_t const ethash_get_cachesize(const uint32_t block_number);
+
+// initialize the parameters
+static inline void ethash_params_init(ethash_params *params, const uint32_t block_number) {
+ params->full_size = ethash_get_datasize(block_number);
+ params->cache_size = ethash_get_cachesize(block_number);
+}
+
+typedef struct ethash_cache {
+ void *mem;
+} ethash_cache;
+
+void ethash_mkcache(ethash_cache *cache, ethash_params const *params, const uint8_t seed[32]);
+void ethash_compute_full_data(void *mem, ethash_params const *params, ethash_cache const *cache);
+void ethash_full(ethash_return_value *ret, void const *full_mem, ethash_params const *params, const uint8_t header_hash[32], const uint64_t nonce);
+void ethash_light(ethash_return_value *ret, ethash_cache const *cache, ethash_params const *params, const uint8_t header_hash[32], const uint64_t nonce);
+
+static inline int ethash_check_difficulty(
+ const uint8_t hash[32],
+ const uint8_t difficulty[32]) {
+ // Difficulty is big endian
+ for (int i = 0; i < 32; i++) {
+ if (hash[i] == difficulty[i]) continue;
+ return hash[i] < difficulty[i];
+ }
+ return 0;
+}
+
+int ethash_quick_check_difficulty(
+ const uint8_t header_hash[32],
+ const uint64_t nonce,
+ const uint8_t mix_hash[32],
+ const uint8_t difficulty[32]);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/fnv.h b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/fnv.h
new file mode 100644
index 000000000..edabeaae2
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/fnv.h
@@ -0,0 +1,38 @@
+/*
+ This file is part of cpp-ethereum.
+
+ cpp-ethereum is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ cpp-ethereum is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>.
+*/
+/** @file fnv.h
+* @author Matthew Wampler-Doty <negacthulhu@gmail.com>
+* @date 2015
+*/
+
+#pragma once
+#include <stdint.h>
+#include "compiler.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define FNV_PRIME 0x01000193
+
+static inline uint32_t fnv_hash(const uint32_t x, const uint32_t y) {
+ return x*FNV_PRIME ^ y;
+}
+
+#ifdef __cplusplus
+}
+#endif \ No newline at end of file
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/internal.c b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/internal.c
new file mode 100644
index 000000000..cc48717d0
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/internal.c
@@ -0,0 +1,297 @@
+/*
+ This file is part of cpp-ethereum.
+
+ cpp-ethereum is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ cpp-ethereum is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>.
+*/
+/** @file dash.cpp
+* @author Tim Hughes <tim@twistedfury.com>
+* @author Matthew Wampler-Doty
+* @date 2015
+*/
+
+#include <assert.h>
+#include <inttypes.h>
+#include <stddef.h>
+#include "ethash.h"
+#include "fnv.h"
+#include "endian.h"
+#include "internal.h"
+#include "data_sizes.h"
+
+#ifdef WITH_CRYPTOPP
+
+#include "SHA3_cryptopp.h"
+
+#else
+#include "sha3.h"
+#endif // WITH_CRYPTOPP
+
+size_t const ethash_get_datasize(const uint32_t block_number) {
+ assert(block_number / EPOCH_LENGTH < 500);
+ return dag_sizes[block_number / EPOCH_LENGTH];
+}
+
+size_t const ethash_get_cachesize(const uint32_t block_number) {
+ assert(block_number / EPOCH_LENGTH < 500);
+ return cache_sizes[block_number / EPOCH_LENGTH];
+}
+
+// Follows Sergio's "STRICT MEMORY HARD HASHING FUNCTIONS" (2014)
+// https://bitslog.files.wordpress.com/2013/12/memohash-v0-3.pdf
+// SeqMemoHash(s, R, N)
+void static ethash_compute_cache_nodes(
+ node *const nodes,
+ ethash_params const *params,
+ const uint8_t seed[32]) {
+ assert((params->cache_size % sizeof(node)) == 0);
+ uint32_t const num_nodes = (uint32_t)(params->cache_size / sizeof(node));
+
+ SHA3_512(nodes[0].bytes, seed, 32);
+
+ for (unsigned i = 1; i != num_nodes; ++i) {
+ SHA3_512(nodes[i].bytes, nodes[i - 1].bytes, 64);
+ }
+
+ for (unsigned j = 0; j != CACHE_ROUNDS; j++) {
+ for (unsigned i = 0; i != num_nodes; i++) {
+ uint32_t const idx = nodes[i].words[0] % num_nodes;
+ node data;
+ data = nodes[(num_nodes - 1 + i) % num_nodes];
+ for (unsigned w = 0; w != NODE_WORDS; ++w)
+ {
+ data.words[w] ^= nodes[idx].words[w];
+ }
+ SHA3_512(nodes[i].bytes, data.bytes, sizeof(data));
+ }
+ }
+
+ // now perform endian conversion
+#if BYTE_ORDER != LITTLE_ENDIAN
+ for (unsigned w = 0; w != (num_nodes*NODE_WORDS); ++w)
+ {
+ nodes->words[w] = fix_endian32(nodes->words[w]);
+ }
+#endif
+}
+
+void ethash_mkcache(
+ ethash_cache *cache,
+ ethash_params const *params,
+ const uint8_t seed[32]) {
+ node *nodes = (node *) cache->mem;
+ ethash_compute_cache_nodes(nodes, params, seed);
+}
+
+void ethash_calculate_dag_item(
+ node *const ret,
+ const unsigned node_index,
+ const struct ethash_params *params,
+ const struct ethash_cache *cache) {
+
+ uint32_t num_parent_nodes = (uint32_t)(params->cache_size / sizeof(node));
+ node const *cache_nodes = (node const *) cache->mem;
+ node const *init = &cache_nodes[node_index % num_parent_nodes];
+
+ memcpy(ret, init, sizeof(node));
+ ret->words[0] ^= node_index;
+ SHA3_512(ret->bytes, ret->bytes, sizeof(node));
+
+#if defined(_M_X64) && ENABLE_SSE
+ __m128i const fnv_prime = _mm_set1_epi32(FNV_PRIME);
+ __m128i xmm0 = ret->xmm[0];
+ __m128i xmm1 = ret->xmm[1];
+ __m128i xmm2 = ret->xmm[2];
+ __m128i xmm3 = ret->xmm[3];
+#endif
+
+ for (unsigned i = 0; i != DAG_PARENTS; ++i)
+ {
+ uint32_t parent_index = ((node_index ^ i)*FNV_PRIME ^ ret->words[i % NODE_WORDS]) % num_parent_nodes;
+ node const *parent = &cache_nodes[parent_index];
+
+ #if defined(_M_X64) && ENABLE_SSE
+ {
+ xmm0 = _mm_mullo_epi32(xmm0, fnv_prime);
+ xmm1 = _mm_mullo_epi32(xmm1, fnv_prime);
+ xmm2 = _mm_mullo_epi32(xmm2, fnv_prime);
+ xmm3 = _mm_mullo_epi32(xmm3, fnv_prime);
+ xmm0 = _mm_xor_si128(xmm0, parent->xmm[0]);
+ xmm1 = _mm_xor_si128(xmm1, parent->xmm[1]);
+ xmm2 = _mm_xor_si128(xmm2, parent->xmm[2]);
+ xmm3 = _mm_xor_si128(xmm3, parent->xmm[3]);
+
+ // have to write to ret as values are used to compute index
+ ret->xmm[0] = xmm0;
+ ret->xmm[1] = xmm1;
+ ret->xmm[2] = xmm2;
+ ret->xmm[3] = xmm3;
+ }
+ #else
+ {
+ for (unsigned w = 0; w != NODE_WORDS; ++w) {
+ ret->words[w] = fnv_hash(ret->words[w], parent->words[w]);
+ }
+ }
+ #endif
+ }
+
+ SHA3_512(ret->bytes, ret->bytes, sizeof(node));
+}
+
+void ethash_compute_full_data(
+ void *mem,
+ ethash_params const *params,
+ ethash_cache const *cache) {
+ assert((params->full_size % (sizeof(uint32_t) * MIX_WORDS)) == 0);
+ assert((params->full_size % sizeof(node)) == 0);
+ node *full_nodes = mem;
+
+ // now compute full nodes
+ for (unsigned n = 0; n != (params->full_size / sizeof(node)); ++n) {
+ ethash_calculate_dag_item(&(full_nodes[n]), n, params, cache);
+ }
+}
+
+static void ethash_hash(
+ ethash_return_value * ret,
+ node const *full_nodes,
+ ethash_cache const *cache,
+ ethash_params const *params,
+ const uint8_t header_hash[32],
+ const uint64_t nonce) {
+
+ assert((params->full_size % MIX_WORDS) == 0);
+
+ // pack hash and nonce together into first 40 bytes of s_mix
+ assert(sizeof(node)*8 == 512);
+ node s_mix[MIX_NODES + 1];
+ memcpy(s_mix[0].bytes, header_hash, 32);
+
+#if BYTE_ORDER != LITTLE_ENDIAN
+ s_mix[0].double_words[4] = fix_endian64(nonce);
+#else
+ s_mix[0].double_words[4] = nonce;
+#endif
+
+ // compute sha3-512 hash and replicate across mix
+ SHA3_512(s_mix->bytes, s_mix->bytes, 40);
+
+#if BYTE_ORDER != LITTLE_ENDIAN
+ for (unsigned w = 0; w != 16; ++w) {
+ s_mix[0].words[w] = fix_endian32(s_mix[0].words[w]);
+ }
+#endif
+
+ node* const mix = s_mix + 1;
+ for (unsigned w = 0; w != MIX_WORDS; ++w) {
+ mix->words[w] = s_mix[0].words[w % NODE_WORDS];
+ }
+
+ unsigned const
+ page_size = sizeof(uint32_t) * MIX_WORDS,
+ num_full_pages = (unsigned)(params->full_size / page_size);
+
+
+ for (unsigned i = 0; i != ACCESSES; ++i)
+ {
+ uint32_t const index = ((s_mix->words[0] ^ i)*FNV_PRIME ^ mix->words[i % MIX_WORDS]) % num_full_pages;
+
+ for (unsigned n = 0; n != MIX_NODES; ++n)
+ {
+ const node * dag_node = &full_nodes[MIX_NODES * index + n];
+
+ if (!full_nodes) {
+ node tmp_node;
+ ethash_calculate_dag_item(&tmp_node, index * MIX_NODES + n, params, cache);
+ dag_node = &tmp_node;
+ }
+
+ #if defined(_M_X64) && ENABLE_SSE
+ {
+ __m128i fnv_prime = _mm_set1_epi32(FNV_PRIME);
+ __m128i xmm0 = _mm_mullo_epi32(fnv_prime, mix[n].xmm[0]);
+ __m128i xmm1 = _mm_mullo_epi32(fnv_prime, mix[n].xmm[1]);
+ __m128i xmm2 = _mm_mullo_epi32(fnv_prime, mix[n].xmm[2]);
+ __m128i xmm3 = _mm_mullo_epi32(fnv_prime, mix[n].xmm[3]);
+ mix[n].xmm[0] = _mm_xor_si128(xmm0, dag_node->xmm[0]);
+ mix[n].xmm[1] = _mm_xor_si128(xmm1, dag_node->xmm[1]);
+ mix[n].xmm[2] = _mm_xor_si128(xmm2, dag_node->xmm[2]);
+ mix[n].xmm[3] = _mm_xor_si128(xmm3, dag_node->xmm[3]);
+ }
+ #else
+ {
+ for (unsigned w = 0; w != NODE_WORDS; ++w) {
+ mix[n].words[w] = fnv_hash(mix[n].words[w], dag_node->words[w]);
+ }
+ }
+ #endif
+ }
+
+ }
+
+ // compress mix
+ for (unsigned w = 0; w != MIX_WORDS; w += 4)
+ {
+ uint32_t reduction = mix->words[w+0];
+ reduction = reduction*FNV_PRIME ^ mix->words[w+1];
+ reduction = reduction*FNV_PRIME ^ mix->words[w+2];
+ reduction = reduction*FNV_PRIME ^ mix->words[w+3];
+ mix->words[w/4] = reduction;
+ }
+
+#if BYTE_ORDER != LITTLE_ENDIAN
+ for (unsigned w = 0; w != MIX_WORDS/4; ++w) {
+ mix->words[w] = fix_endian32(mix->words[w]);
+ }
+#endif
+
+ memcpy(ret->mix_hash, mix->bytes, 32);
+ // final Keccak hash
+ SHA3_256(ret->result, s_mix->bytes, 64+32); // Keccak-256(s + compressed_mix)
+}
+
+void ethash_quick_hash(
+ uint8_t return_hash[32],
+ const uint8_t header_hash[32],
+ const uint64_t nonce,
+ const uint8_t mix_hash[32]) {
+
+ uint8_t buf[64+32];
+ memcpy(buf, header_hash, 32);
+#if BYTE_ORDER != LITTLE_ENDIAN
+ nonce = fix_endian64(nonce);
+#endif
+ memcpy(&(buf[32]), &nonce, 8);
+ SHA3_512(buf, buf, 40);
+ memcpy(&(buf[64]), mix_hash, 32);
+ SHA3_256(return_hash, buf, 64+32);
+}
+
+int ethash_quick_check_difficulty(
+ const uint8_t header_hash[32],
+ const uint64_t nonce,
+ const uint8_t mix_hash[32],
+ const uint8_t difficulty[32]) {
+ uint8_t return_hash[32];
+ ethash_quick_hash(return_hash, header_hash, nonce, mix_hash);
+ return ethash_check_difficulty(return_hash, difficulty);
+}
+
+void ethash_full(ethash_return_value * ret, void const *full_mem, ethash_params const *params, const uint8_t previous_hash[32], const uint64_t nonce) {
+ ethash_hash(ret, (node const *) full_mem, NULL, params, previous_hash, nonce);
+}
+
+void ethash_light(ethash_return_value * ret, ethash_cache const *cache, ethash_params const *params, const uint8_t previous_hash[32], const uint64_t nonce) {
+ ethash_hash(ret, NULL, cache, params, previous_hash, nonce);
+}
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/internal.h b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/internal.h
new file mode 100644
index 000000000..bcbacdaa4
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/internal.h
@@ -0,0 +1,48 @@
+#pragma once
+#include "compiler.h"
+#include "endian.h"
+#include "ethash.h"
+
+#define ENABLE_SSE 1
+
+#if defined(_M_X64) && ENABLE_SSE
+#include <smmintrin.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// compile time settings
+#define NODE_WORDS (64/4)
+#define MIX_WORDS (MIX_BYTES/4)
+#define MIX_NODES (MIX_WORDS / NODE_WORDS)
+#include <stdint.h>
+
+typedef union node {
+ uint8_t bytes[NODE_WORDS * 4];
+ uint32_t words[NODE_WORDS];
+ uint64_t double_words[NODE_WORDS / 2];
+
+#if defined(_M_X64) && ENABLE_SSE
+ __m128i xmm[NODE_WORDS/4];
+#endif
+
+} node;
+
+void ethash_calculate_dag_item(
+ node *const ret,
+ const unsigned node_index,
+ ethash_params const *params,
+ ethash_cache const *cache
+);
+
+void ethash_quick_hash(
+ uint8_t return_hash[32],
+ const uint8_t header_hash[32],
+ const uint64_t nonce,
+ const uint8_t mix_hash[32]);
+
+#ifdef __cplusplus
+}
+#endif \ No newline at end of file
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3.c b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3.c
new file mode 100644
index 000000000..0c28230b8
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3.c
@@ -0,0 +1,151 @@
+/** libkeccak-tiny
+*
+* A single-file implementation of SHA-3 and SHAKE.
+*
+* Implementor: David Leon Gil
+* License: CC0, attribution kindly requested. Blame taken too,
+* but not liability.
+*/
+#include "sha3.h"
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/******** The Keccak-f[1600] permutation ********/
+
+/*** Constants. ***/
+static const uint8_t rho[24] = \
+ { 1, 3, 6, 10, 15, 21,
+ 28, 36, 45, 55, 2, 14,
+ 27, 41, 56, 8, 25, 43,
+ 62, 18, 39, 61, 20, 44};
+static const uint8_t pi[24] = \
+ {10, 7, 11, 17, 18, 3,
+ 5, 16, 8, 21, 24, 4,
+ 15, 23, 19, 13, 12, 2,
+ 20, 14, 22, 9, 6, 1};
+static const uint64_t RC[24] = \
+ {1ULL, 0x8082ULL, 0x800000000000808aULL, 0x8000000080008000ULL,
+ 0x808bULL, 0x80000001ULL, 0x8000000080008081ULL, 0x8000000000008009ULL,
+ 0x8aULL, 0x88ULL, 0x80008009ULL, 0x8000000aULL,
+ 0x8000808bULL, 0x800000000000008bULL, 0x8000000000008089ULL, 0x8000000000008003ULL,
+ 0x8000000000008002ULL, 0x8000000000000080ULL, 0x800aULL, 0x800000008000000aULL,
+ 0x8000000080008081ULL, 0x8000000000008080ULL, 0x80000001ULL, 0x8000000080008008ULL};
+
+/*** Helper macros to unroll the permutation. ***/
+#define rol(x, s) (((x) << s) | ((x) >> (64 - s)))
+#define REPEAT6(e) e e e e e e
+#define REPEAT24(e) REPEAT6(e e e e)
+#define REPEAT5(e) e e e e e
+#define FOR5(v, s, e) \
+ v = 0; \
+ REPEAT5(e; v += s;)
+
+/*** Keccak-f[1600] ***/
+static inline void keccakf(void* state) {
+ uint64_t* a = (uint64_t*)state;
+ uint64_t b[5] = {0};
+ uint64_t t = 0;
+ uint8_t x, y;
+
+ for (int i = 0; i < 24; i++) {
+ // Theta
+ FOR5(x, 1,
+ b[x] = 0;
+ FOR5(y, 5,
+ b[x] ^= a[x + y]; ))
+ FOR5(x, 1,
+ FOR5(y, 5,
+ a[y + x] ^= b[(x + 4) % 5] ^ rol(b[(x + 1) % 5], 1); ))
+ // Rho and pi
+ t = a[1];
+ x = 0;
+ REPEAT24(b[0] = a[pi[x]];
+ a[pi[x]] = rol(t, rho[x]);
+ t = b[0];
+ x++; )
+ // Chi
+ FOR5(y,
+ 5,
+ FOR5(x, 1,
+ b[x] = a[y + x];)
+ FOR5(x, 1,
+ a[y + x] = b[x] ^ ((~b[(x + 1) % 5]) & b[(x + 2) % 5]); ))
+ // Iota
+ a[0] ^= RC[i];
+ }
+}
+
+/******** The FIPS202-defined functions. ********/
+
+/*** Some helper macros. ***/
+
+#define _(S) do { S } while (0)
+#define FOR(i, ST, L, S) \
+ _(for (size_t i = 0; i < L; i += ST) { S; })
+#define mkapply_ds(NAME, S) \
+ static inline void NAME(uint8_t* dst, \
+ const uint8_t* src, \
+ size_t len) { \
+ FOR(i, 1, len, S); \
+ }
+#define mkapply_sd(NAME, S) \
+ static inline void NAME(const uint8_t* src, \
+ uint8_t* dst, \
+ size_t len) { \
+ FOR(i, 1, len, S); \
+ }
+
+mkapply_ds(xorin, dst[i] ^= src[i]) // xorin
+mkapply_sd(setout, dst[i] = src[i]) // setout
+
+#define P keccakf
+#define Plen 200
+
+// Fold P*F over the full blocks of an input.
+#define foldP(I, L, F) \
+ while (L >= rate) { \
+ F(a, I, rate); \
+ P(a); \
+ I += rate; \
+ L -= rate; \
+ }
+
+/** The sponge-based hash construction. **/
+static inline int hash(uint8_t* out, size_t outlen,
+ const uint8_t* in, size_t inlen,
+ size_t rate, uint8_t delim) {
+ if ((out == NULL) || ((in == NULL) && inlen != 0) || (rate >= Plen)) {
+ return -1;
+ }
+ uint8_t a[Plen] = {0};
+ // Absorb input.
+ foldP(in, inlen, xorin);
+ // Xor in the DS and pad frame.
+ a[inlen] ^= delim;
+ a[rate - 1] ^= 0x80;
+ // Xor in the last block.
+ xorin(a, in, inlen);
+ // Apply P
+ P(a);
+ // Squeeze output.
+ foldP(out, outlen, setout);
+ setout(a, out, outlen);
+ memset(a, 0, 200);
+ return 0;
+}
+
+#define defsha3(bits) \
+ int sha3_##bits(uint8_t* out, size_t outlen, \
+ const uint8_t* in, size_t inlen) { \
+ if (outlen > (bits/8)) { \
+ return -1; \
+ } \
+ return hash(out, outlen, in, inlen, 200 - (bits / 4), 0x01); \
+ }
+
+/*** FIPS202 SHA3 FOFs ***/
+defsha3(256)
+defsha3(512) \ No newline at end of file
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3.h b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3.h
new file mode 100644
index 000000000..36a0a5301
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3.h
@@ -0,0 +1,27 @@
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "compiler.h"
+#include <stdint.h>
+#include <stdlib.h>
+
+#define decsha3(bits) \
+ int sha3_##bits(uint8_t*, size_t, const uint8_t*, size_t);
+
+decsha3(256)
+decsha3(512)
+
+static inline void SHA3_256(uint8_t * const ret, uint8_t const *data, const size_t size) {
+ sha3_256(ret, 32, data, size);
+}
+
+static inline void SHA3_512(uint8_t * const ret, uint8_t const *data, const size_t size) {
+ sha3_512(ret, 64, data, size);
+}
+
+#ifdef __cplusplus
+}
+#endif \ No newline at end of file
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3_cryptopp.cpp b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3_cryptopp.cpp
new file mode 100644
index 000000000..9454ce04a
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3_cryptopp.cpp
@@ -0,0 +1,34 @@
+/*
+ This file is part of cpp-ethereum.
+
+ cpp-ethereum is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ cpp-ethereum is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/** @file sha3.cpp
+* @author Tim Hughes <tim@twistedfury.com>
+* @date 2015
+*/
+
+#include <stdint.h>
+#include <cryptopp/sha3.h>
+
+extern "C" {
+void SHA3_256(uint8_t *const ret, const uint8_t *data, size_t size) {
+ CryptoPP::SHA3_256().CalculateDigest(ret, data, size);
+}
+
+void SHA3_512(uint8_t *const ret, const uint8_t *data, size_t size) {
+ CryptoPP::SHA3_512().CalculateDigest(ret, data, size);
+}
+} \ No newline at end of file
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3_cryptopp.h b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3_cryptopp.h
new file mode 100644
index 000000000..f910960e1
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3_cryptopp.h
@@ -0,0 +1,15 @@
+#pragma once
+
+#include "compiler.h"
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void SHA3_256(uint8_t *const ret, const uint8_t *data, size_t size);
+void SHA3_512(uint8_t *const ret, const uint8_t *data, size_t size);
+
+#ifdef __cplusplus
+}
+#endif \ No newline at end of file
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/util.c b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/util.c
new file mode 100644
index 000000000..fbf268b7d
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/util.c
@@ -0,0 +1,41 @@
+/*
+ This file is part of cpp-ethereum.
+
+ cpp-ethereum is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ cpp-ethereum is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>.
+*/
+/** @file util.c
+ * @author Tim Hughes <tim@twistedfury.com>
+ * @date 2015
+ */
+#include <stdarg.h>
+#include <stdio.h>
+#include "util.h"
+
+#ifdef _MSC_VER
+
+// foward declare without all of Windows.h
+__declspec(dllimport) void __stdcall OutputDebugStringA(const char* lpOutputString);
+
+void debugf(const char *str, ...)
+{
+ va_list args;
+ va_start(args, str);
+
+ char buf[1<<16];
+ _vsnprintf_s(buf, sizeof(buf), sizeof(buf), str, args);
+ buf[sizeof(buf)-1] = '\0';
+ OutputDebugStringA(buf);
+}
+
+#endif
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/util.h b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/util.h
new file mode 100644
index 000000000..2f59076f6
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/util.h
@@ -0,0 +1,47 @@
+/*
+ This file is part of cpp-ethereum.
+
+ cpp-ethereum is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ cpp-ethereum is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>.
+*/
+/** @file util.h
+ * @author Tim Hughes <tim@twistedfury.com>
+ * @date 2015
+ */
+#pragma once
+#include <stdint.h>
+#include "compiler.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _MSC_VER
+void debugf(const char *str, ...);
+#else
+#define debugf printf
+#endif
+
+static inline uint32_t min_u32(uint32_t a, uint32_t b)
+{
+ return a < b ? a : b;
+}
+
+static inline uint32_t clamp_u32(uint32_t x, uint32_t min_, uint32_t max_)
+{
+ return x < min_ ? min_ : (x > max_ ? max_ : x);
+}
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/binding.gyp b/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/binding.gyp
new file mode 100644
index 000000000..642c33cb3
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/binding.gyp
@@ -0,0 +1,29 @@
+{
+ "targets":
+ [{
+ "target_name": "ethash",
+ "sources": [
+ './ethash.cc',
+ '../libethash/ethash.h',
+ '../libethash/util.c',
+ '../libethash/util.h',
+ '../libethash/blum_blum_shub.h',
+ '../libethash/blum_blum_shub.c',
+ '../libethash/sha3.h',
+ '../libethash/sha3.c',
+ '../libethash/internal.h',
+ '../libethash/internal.c'
+ ],
+ "include_dirs": [
+ "../",
+ "<!(node -e \"require('nan')\")"
+ ],
+ "cflags": [
+ "-Wall",
+ "-Wno-maybe-uninitialized",
+ "-Wno-uninitialized",
+ "-Wno-unused-function",
+ "-Wextra"
+ ]
+ }]
+}
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/ethash.cc b/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/ethash.cc
new file mode 100644
index 000000000..6ab9730be
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/ethash.cc
@@ -0,0 +1,587 @@
+#include <nan.h>
+#include <iostream>
+#include <node.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include "../libethash/ethash.h"
+
+using namespace v8;
+
+class EthashValidator : public NanAsyncWorker {
+ public:
+ // Constructor
+ EthashValidator(NanCallback *callback, const unsigned blocknumber, const unsigned char * seed)
+ : NanAsyncWorker(callback), blocknumber(blocknumber), seed(seed) {}
+ // Destructor
+ ~EthashValidator() {
+ free(this->cache);
+ free(this->params);
+ }
+
+ // Executed inside the worker-thread.
+ // It is not safe to access V8, or V8 data structures
+ // here, so everything we need for input and output
+ // should go on `this`.
+ void Execute () {
+
+ /* this->result = secp256k1_ecdsa_sign(this->msg, this->sig , &this->sig_len, this->pk, NULL, NULL); */
+ }
+
+ // Executed when the async work is complete
+ // this function will be run inside the main event loop
+ // so it is safe to use V8 again
+ void HandleOKCallback () {
+ NanScope();
+ Handle<Value> argv[] = {
+ NanNew<Number>(this->result)
+ };
+ callback->Call(2, argv);
+ }
+
+ protected:
+ const unsigned blocknumber;
+ const unsigned char * seed;
+ ethash_params * params;
+ ethash_cache * cache;
+ bool result;
+ bool ready = 0;
+};
+
+/* class CompactSignWorker : public SignWorker { */
+/* public: */
+/* CompactSignWorker(NanCallback *callback, const unsigned char *msg, const unsigned char *pk ) */
+/* : SignWorker(callback, msg, pk){} */
+
+/* void Execute () { */
+/* this->result = secp256k1_ecdsa_sign_compact(this->msg, this->sig , this->pk, NULL, NULL, &this->sig_len); */
+/* } */
+
+/* void HandleOKCallback () { */
+/* NanScope(); */
+/* Handle<Value> argv[] = { */
+/* NanNew<Number>(this->result), */
+/* NanNewBufferHandle((char *)this->sig, 64), */
+/* NanNew<Number>(this->sig_len) */
+/* }; */
+/* callback->Call(3, argv); */
+/* } */
+/* }; */
+
+/* class RecoverWorker : public NanAsyncWorker { */
+/* public: */
+/* // Constructor */
+/* RecoverWorker(NanCallback *callback, const unsigned char *msg, const unsigned char *sig, int compressed, int rec_id) */
+/* : NanAsyncWorker(callback), msg(msg), sig(sig), compressed(compressed), rec_id(rec_id) {} */
+/* // Destructor */
+/* ~RecoverWorker() {} */
+
+/* void Execute () { */
+/* if(this->compressed == 1){ */
+/* this->pubkey = new unsigned char[33]; */
+/* }else{ */
+/* this->pubkey = new unsigned char[65]; */
+/* } */
+
+/* this->result = secp256k1_ecdsa_recover_compact(this->msg, this->sig, this->pubkey, &this->pubkey_len, this->compressed, this->rec_id); */
+/* } */
+
+/* void HandleOKCallback () { */
+/* NanScope(); */
+/* Handle<Value> argv[] = { */
+/* NanNew<Number>(this->result), */
+/* NanNewBufferHandle((char *)this->pubkey, this->pubkey_len) */
+/* }; */
+/* callback->Call(2, argv); */
+/* } */
+
+/* protected: */
+/* const unsigned char * msg; */
+/* const unsigned char * sig; */
+/* int compressed; */
+/* int rec_id; */
+/* int result; */
+/* unsigned char * pubkey; */
+/* int pubkey_len; */
+/* }; */
+
+/* class VerifyWorker : public NanAsyncWorker { */
+/* public: */
+/* // Constructor */
+/* VerifyWorker(NanCallback *callback, const unsigned char *msg, const unsigned char *sig, int sig_len, const unsigned char *pub_key, int pub_key_len) */
+/* : NanAsyncWorker(callback), msg(msg), sig(sig), sig_len(sig_len), pub_key(pub_key), pub_key_len(pub_key_len) {} */
+/* // Destructor */
+/* ~VerifyWorker() {} */
+
+/* void Execute () { */
+/* this->result = secp256k1_ecdsa_verify(this->msg, this->sig, this->sig_len, this->pub_key, this->pub_key_len); */
+/* } */
+
+/* void HandleOKCallback () { */
+/* NanScope(); */
+/* Handle<Value> argv[] = { */
+/* NanNew<Number>(this->result), */
+/* }; */
+/* callback->Call(1, argv); */
+/* } */
+
+/* protected: */
+/* int result; */
+/* const unsigned char * msg; */
+/* const unsigned char * sig; */
+/* int sig_len; */
+/* const unsigned char * pub_key; */
+/* int pub_key_len; */
+/* }; */
+
+/* NAN_METHOD(Verify){ */
+/* NanScope(); */
+
+/* Local<Object> pub_buf = args[0].As<Object>(); */
+/* const unsigned char *pub_data = (unsigned char *) node::Buffer::Data(pub_buf); */
+/* int pub_len = node::Buffer::Length(args[0]); */
+
+/* Local<Object> msg_buf = args[1].As<Object>(); */
+/* const unsigned char *msg_data = (unsigned char *) node::Buffer::Data(msg_buf); */
+
+/* Local<Object> sig_buf = args[2].As<Object>(); */
+/* const unsigned char *sig_data = (unsigned char *) node::Buffer::Data(sig_buf); */
+/* int sig_len = node::Buffer::Length(args[2]); */
+
+/* int result = secp256k1_ecdsa_verify(msg_data, sig_data, sig_len, pub_data, pub_len ); */
+
+/* NanReturnValue(NanNew<Number>(result)); */
+/* } */
+
+/* NAN_METHOD(Verify_Async){ */
+/* NanScope(); */
+
+/* Local<Object> pub_buf = args[0].As<Object>(); */
+/* const unsigned char *pub_data = (unsigned char *) node::Buffer::Data(pub_buf); */
+/* int pub_len = node::Buffer::Length(args[0]); */
+
+/* Local<Object> msg_buf = args[1].As<Object>(); */
+/* const unsigned char *msg_data = (unsigned char *) node::Buffer::Data(msg_buf); */
+
+/* Local<Object> sig_buf = args[2].As<Object>(); */
+/* const unsigned char *sig_data = (unsigned char *) node::Buffer::Data(sig_buf); */
+/* int sig_len = node::Buffer::Length(args[2]); */
+
+/* Local<Function> callback = args[3].As<Function>(); */
+/* NanCallback* nanCallback = new NanCallback(callback); */
+
+/* VerifyWorker* worker = new VerifyWorker(nanCallback, msg_data, sig_data, sig_len, pub_data, pub_len); */
+/* NanAsyncQueueWorker(worker); */
+
+/* NanReturnUndefined(); */
+/* } */
+
+/* NAN_METHOD(Sign){ */
+/* NanScope(); */
+
+/* //the first argument should be the private key as a buffer */
+/* Local<Object> pk_buf = args[0].As<Object>(); */
+/* const unsigned char *pk_data = (unsigned char *) node::Buffer::Data(pk_buf); */
+/* int sec_len = node::Buffer::Length(args[0]); */
+/* //the second argument is the message that we are signing */
+/* Local<Object> msg_buf = args[1].As<Object>(); */
+/* const unsigned char *msg_data = (unsigned char *) node::Buffer::Data(msg_buf); */
+
+/* unsigned char sig[72]; */
+/* int sig_len = 72; */
+/* int msg_len = node::Buffer::Length(args[1]); */
+
+/* if(sec_len != 32){ */
+/* return NanThrowError("the secret key needs tobe 32 bytes"); */
+/* } */
+
+/* if(msg_len == 0){ */
+/* return NanThrowError("messgae cannot be null"); */
+/* } */
+
+/* int result = secp256k1_ecdsa_sign(msg_data, sig , &sig_len, pk_data, NULL, NULL); */
+
+/* if(result == 1){ */
+/* NanReturnValue(NanNewBufferHandle((char *)sig, sig_len)); */
+/* }else{ */
+/* return NanThrowError("nonce invalid, try another one"); */
+/* } */
+/* } */
+
+/* NAN_METHOD(Sign_Async){ */
+
+/* NanScope(); */
+/* //the first argument should be the private key as a buffer */
+/* Local<Object> sec_buf = args[0].As<Object>(); */
+/* const unsigned char *sec_data = (unsigned char *) node::Buffer::Data(sec_buf); */
+/* int sec_len = node::Buffer::Length(args[0]); */
+/* //the second argument is the message that we are signing */
+/* Local<Object> msg_buf = args[1].As<Object>(); */
+/* const unsigned char *msg_data = (unsigned char *) node::Buffer::Data(msg_buf); */
+
+/* Local<Function> callback = args[2].As<Function>(); */
+/* NanCallback* nanCallback = new NanCallback(callback); */
+
+/* int msg_len = node::Buffer::Length(args[1]); */
+
+/* if(sec_len != 32){ */
+/* return NanThrowError("the secret key needs tobe 32 bytes"); */
+/* } */
+
+/* if(msg_len == 0){ */
+/* return NanThrowError("messgae cannot be null"); */
+/* } */
+
+/* SignWorker* worker = new SignWorker(nanCallback, msg_data, sec_data); */
+/* NanAsyncQueueWorker(worker); */
+
+/* NanReturnUndefined(); */
+/* } */
+
+/* NAN_METHOD(Sign_Compact){ */
+
+/* NanScope(); */
+
+/* Local<Object> seckey_buf = args[0].As<Object>(); */
+/* const unsigned char *seckey_data = (unsigned char *) node::Buffer::Data(seckey_buf); */
+/* int sec_len = node::Buffer::Length(args[0]); */
+
+/* Local<Object> msg_buf = args[1].As<Object>(); */
+/* const unsigned char *msg_data = (unsigned char *) node::Buffer::Data(msg_buf); */
+/* int msg_len = node::Buffer::Length(args[1]); */
+
+/* if(sec_len != 32){ */
+/* return NanThrowError("the secret key needs tobe 32 bytes"); */
+/* } */
+
+/* if(msg_len == 0){ */
+/* return NanThrowError("messgae cannot be null"); */
+/* } */
+
+/* unsigned char sig[64]; */
+/* int rec_id; */
+
+/* //TODO: change the nonce */
+/* int valid_nonce = secp256k1_ecdsa_sign_compact(msg_data, sig, seckey_data, NULL, NULL, &rec_id ); */
+
+/* Local<Array> array = NanNew<Array>(3); */
+/* array->Set(0, NanNew<Integer>(valid_nonce)); */
+/* array->Set(1, NanNew<Integer>(rec_id)); */
+/* array->Set(2, NanNewBufferHandle((char *)sig, 64)); */
+
+/* NanReturnValue(array); */
+/* } */
+
+/* NAN_METHOD(Sign_Compact_Async){ */
+/* NanScope(); */
+/* //the first argument should be the private key as a buffer */
+/* Local<Object> sec_buf = args[0].As<Object>(); */
+/* const unsigned char *sec_data = (unsigned char *) node::Buffer::Data(sec_buf); */
+/* int sec_len = node::Buffer::Length(args[0]); */
+
+/* //the second argument is the message that we are signing */
+/* Local<Object> msg_buf = args[1].As<Object>(); */
+/* const unsigned char *msg_data = (unsigned char *) node::Buffer::Data(msg_buf); */
+
+
+/* Local<Function> callback = args[2].As<Function>(); */
+/* NanCallback* nanCallback = new NanCallback(callback); */
+
+/* int msg_len = node::Buffer::Length(args[1]); */
+
+/* if(sec_len != 32){ */
+/* return NanThrowError("the secret key needs tobe 32 bytes"); */
+/* } */
+
+/* if(msg_len == 0){ */
+/* return NanThrowError("messgae cannot be null"); */
+/* } */
+
+/* CompactSignWorker* worker = new CompactSignWorker(nanCallback, msg_data, sec_data); */
+/* NanAsyncQueueWorker(worker); */
+
+/* NanReturnUndefined(); */
+/* } */
+
+/* NAN_METHOD(Recover_Compact){ */
+
+/* NanScope(); */
+
+/* Local<Object> msg_buf = args[0].As<Object>(); */
+/* const unsigned char *msg = (unsigned char *) node::Buffer::Data(msg_buf); */
+/* int msg_len = node::Buffer::Length(args[0]); */
+
+/* Local<Object> sig_buf = args[1].As<Object>(); */
+/* const unsigned char *sig = (unsigned char *) node::Buffer::Data(sig_buf); */
+
+/* Local<Number> compressed = args[2].As<Number>(); */
+/* int int_compressed = compressed->IntegerValue(); */
+
+/* Local<Number> rec_id = args[3].As<Number>(); */
+/* int int_rec_id = rec_id->IntegerValue(); */
+
+/* if(msg_len == 0){ */
+/* return NanThrowError("messgae cannot be null"); */
+/* } */
+
+/* unsigned char pubKey[65]; */
+
+/* int pubKeyLen; */
+
+/* int result = secp256k1_ecdsa_recover_compact(msg, sig, pubKey, &pubKeyLen, int_compressed, int_rec_id); */
+/* if(result == 1){ */
+/* NanReturnValue(NanNewBufferHandle((char *)pubKey, pubKeyLen)); */
+/* }else{ */
+
+/* NanReturnValue(NanFalse()); */
+/* } */
+/* } */
+
+/* NAN_METHOD(Recover_Compact_Async){ */
+
+/* NanScope(); */
+
+/* //the message */
+/* Local<Object> msg_buf = args[0].As<Object>(); */
+/* const unsigned char *msg = (unsigned char *) node::Buffer::Data(msg_buf); */
+/* int msg_len = node::Buffer::Length(args[0]); */
+
+/* //the signature length */
+/* Local<Object> sig_buf = args[1].As<Object>(); */
+/* const unsigned char *sig = (unsigned char *) node::Buffer::Data(sig_buf); */
+/* //todo sig len needs tobe 64 */
+/* int sig_len = node::Buffer::Length(args[1]); */
+
+/* //to compress or not? */
+/* Local<Number> compressed = args[2].As<Number>(); */
+/* int int_compressed = compressed->IntegerValue(); */
+
+/* //the rec_id */
+/* Local<Number> rec_id = args[3].As<Number>(); */
+/* int int_rec_id = rec_id->IntegerValue(); */
+
+/* //the callback */
+/* Local<Function> callback = args[4].As<Function>(); */
+/* NanCallback* nanCallback = new NanCallback(callback); */
+
+/* if(sig_len != 64){ */
+/* return NanThrowError("the signature needs to be 64 bytes"); */
+/* } */
+
+/* if(msg_len == 0){ */
+/* return NanThrowError("messgae cannot be null"); */
+/* } */
+
+/* RecoverWorker* worker = new RecoverWorker(nanCallback, msg, sig, int_compressed, int_rec_id); */
+/* NanAsyncQueueWorker(worker); */
+
+/* NanReturnUndefined(); */
+/* } */
+
+/* NAN_METHOD(Seckey_Verify){ */
+/* NanScope(); */
+
+/* const unsigned char *data = (const unsigned char*) node::Buffer::Data(args[0]); */
+/* int result = secp256k1_ec_seckey_verify(data); */
+/* NanReturnValue(NanNew<Number>(result)); */
+/* } */
+
+/* NAN_METHOD(Pubkey_Verify){ */
+
+/* NanScope(); */
+
+/* Local<Object> pub_buf = args[0].As<Object>(); */
+/* const unsigned char *pub_key = (unsigned char *) node::Buffer::Data(pub_buf); */
+/* int pub_key_len = node::Buffer::Length(args[0]); */
+
+/* int result = secp256k1_ec_pubkey_verify(pub_key, pub_key_len); */
+
+/* NanReturnValue(NanNew<Number>(result)); */
+/* } */
+
+/* NAN_METHOD(Pubkey_Create){ */
+/* NanScope(); */
+
+/* Handle<Object> pk_buf = args[0].As<Object>(); */
+/* const unsigned char *pk_data = (unsigned char *) node::Buffer::Data(pk_buf); */
+/* int pk_len = node::Buffer::Length(args[0]); */
+
+/* Local<Number> l_compact = args[1].As<Number>(); */
+/* int compact = l_compact->IntegerValue(); */
+/* int pubKeyLen; */
+
+/* if(pk_len != 32){ */
+/* return NanThrowError("the secert key need to be 32 bytes"); */
+/* } */
+
+/* unsigned char *pubKey; */
+/* if(compact == 1){ */
+/* pubKey = new unsigned char[33]; */
+/* }else{ */
+/* pubKey = new unsigned char[65]; */
+/* } */
+
+/* int results = secp256k1_ec_pubkey_create(pubKey,&pubKeyLen, pk_data, compact ); */
+/* if(results == 0){ */
+/* return NanThrowError("secret was invalid, try again."); */
+/* }else{ */
+/* NanReturnValue(NanNewBufferHandle((char *)pubKey, pubKeyLen)); */
+/* } */
+/* } */
+
+/* NAN_METHOD(Pubkey_Decompress){ */
+/* NanScope(); */
+
+/* //the first argument should be the private key as a buffer */
+/* Local<Object> pk_buf = args[0].As<Object>(); */
+/* unsigned char *pk_data = (unsigned char *) node::Buffer::Data(pk_buf); */
+
+/* int pk_len = node::Buffer::Length(args[0]); */
+
+/* int results = secp256k1_ec_pubkey_decompress(pk_data, &pk_len); */
+
+/* if(results == 0){ */
+/* return NanThrowError("invalid public key"); */
+/* }else{ */
+/* NanReturnValue(NanNewBufferHandle((char *)pk_data, pk_len)); */
+/* } */
+/* } */
+
+
+/* NAN_METHOD(Privkey_Import){ */
+/* NanScope(); */
+
+/* //the first argument should be the private key as a buffer */
+/* Handle<Object> pk_buf = args[0].As<Object>(); */
+/* const unsigned char *pk_data = (unsigned char *) node::Buffer::Data(pk_buf); */
+
+/* int pk_len = node::Buffer::Length(args[0]); */
+
+/* unsigned char sec_key[32]; */
+/* int results = secp256k1_ec_privkey_import(sec_key, pk_data, pk_len); */
+
+/* if(results == 0){ */
+/* return NanThrowError("invalid private key"); */
+/* }else{ */
+/* NanReturnValue(NanNewBufferHandle((char *)sec_key, 32)); */
+/* } */
+/* } */
+
+/* NAN_METHOD(Privkey_Export){ */
+/* NanScope(); */
+
+/* //the first argument should be the private key as a buffer */
+/* Handle<Object> sk_buf = args[0].As<Object>(); */
+/* const unsigned char *sk_data = (unsigned char *) node::Buffer::Data(sk_buf); */
+
+/* Local<Number> l_compressed = args[1].As<Number>(); */
+/* int compressed = l_compressed->IntegerValue(); */
+
+/* unsigned char *privKey; */
+/* int pk_len; */
+/* int results = secp256k1_ec_privkey_export(sk_data, privKey, &pk_len, compressed); */
+/* if(results == 0){ */
+/* return NanThrowError("invalid private key"); */
+/* }else{ */
+/* NanReturnValue(NanNewBufferHandle((char *)privKey, pk_len)); */
+/* } */
+/* } */
+
+/* NAN_METHOD(Privkey_Tweak_Add){ */
+/* NanScope(); */
+
+/* //the first argument should be the private key as a buffer */
+/* Handle<Object> sk_buf = args[0].As<Object>(); */
+/* unsigned char *sk = (unsigned char *) node::Buffer::Data(sk_buf); */
+
+/* Handle<Object> tweak_buf = args[1].As<Object>(); */
+/* const unsigned char *tweak= (unsigned char *) node::Buffer::Data(tweak_buf); */
+
+/* int results = secp256k1_ec_privkey_tweak_add(sk, tweak); */
+/* if(results == 0){ */
+/* return NanThrowError("invalid key"); */
+/* }else{ */
+/* NanReturnValue(NanNewBufferHandle((char *)sk, 32)); */
+/* } */
+/* } */
+
+/* NAN_METHOD(Privkey_Tweak_Mul){ */
+/* NanScope(); */
+
+/* //the first argument should be the private key as a buffer */
+/* Handle<Object> sk_buf = args[0].As<Object>(); */
+/* unsigned char *sk = (unsigned char *) node::Buffer::Data(sk_buf); */
+
+/* Handle<Object> tweak_buf = args[1].As<Object>(); */
+/* const unsigned char *tweak= (unsigned char *) node::Buffer::Data(tweak_buf); */
+
+/* int results = secp256k1_ec_privkey_tweak_mul(sk, tweak); */
+/* if(results == 0){ */
+/* return NanThrowError("invalid key"); */
+/* }else{ */
+/* NanReturnValue(NanNewBufferHandle((char *)sk, 32)); */
+/* } */
+/* } */
+
+/* NAN_METHOD(Pubkey_Tweak_Add){ */
+/* NanScope(); */
+
+/* //the first argument should be the private key as a buffer */
+/* Handle<Object> pk_buf = args[0].As<Object>(); */
+/* unsigned char *pk = (unsigned char *) node::Buffer::Data(pk_buf); */
+/* int pk_len = node::Buffer::Length(args[0]); */
+
+/* Handle<Object> tweak_buf = args[1].As<Object>(); */
+/* const unsigned char *tweak= (unsigned char *) node::Buffer::Data(tweak_buf); */
+
+/* int results = secp256k1_ec_pubkey_tweak_add(pk, pk_len, tweak); */
+/* if(results == 0){ */
+/* return NanThrowError("invalid key"); */
+/* }else{ */
+/* NanReturnValue(NanNewBufferHandle((char *)pk, pk_len)); */
+/* } */
+/* } */
+
+/* NAN_METHOD(Pubkey_Tweak_Mul){ */
+/* NanScope(); */
+
+/* //the first argument should be the private key as a buffer */
+/* Handle<Object> pk_buf = args[0].As<Object>(); */
+/* unsigned char *pk = (unsigned char *) node::Buffer::Data(pk_buf); */
+/* int pk_len = node::Buffer::Length(args[0]); */
+
+/* Handle<Object> tweak_buf = args[1].As<Object>(); */
+/* const unsigned char *tweak= (unsigned char *) node::Buffer::Data(tweak_buf); */
+
+/* int results = secp256k1_ec_pubkey_tweak_mul(pk, pk_len, tweak); */
+/* if(results == 0){ */
+/* return NanThrowError("invalid key"); */
+/* }else{ */
+/* NanReturnValue(NanNewBufferHandle((char *)pk, pk_len)); */
+/* } */
+/* } */
+
+void Init(Handle<Object> exports) {
+
+ /* secp256k1_start(SECP256K1_START_SIGN | SECP256K1_START_VERIFY); */
+ /* exports->Set(NanNew("seckeyVerify"), NanNew<FunctionTemplate>(Seckey_Verify)->GetFunction()); */
+ /* exports->Set(NanNew("sign"), NanNew<FunctionTemplate>(Sign)->GetFunction()); */
+ /* exports->Set(NanNew("signAsync"), NanNew<FunctionTemplate>(Sign_Async)->GetFunction()); */
+ /* exports->Set(NanNew("signCompact"), NanNew<FunctionTemplate>(Sign_Compact)->GetFunction()); */
+ /* exports->Set(NanNew("signCompactAsync"), NanNew<FunctionTemplate>(Sign_Compact_Async)->GetFunction()); */
+ /* exports->Set(NanNew("recoverCompact"), NanNew<FunctionTemplate>(Recover_Compact)->GetFunction()); */
+ /* exports->Set(NanNew("recoverCompactAsync"), NanNew<FunctionTemplate>(Recover_Compact_Async)->GetFunction()); */
+ /* exports->Set(NanNew("verify"), NanNew<FunctionTemplate>(Verify)->GetFunction()); */
+ /* exports->Set(NanNew("verifyAsync"), NanNew<FunctionTemplate>(Verify_Async)->GetFunction()); */
+ /* exports->Set(NanNew("secKeyVerify"), NanNew<FunctionTemplate>(Seckey_Verify)->GetFunction()); */
+ /* exports->Set(NanNew("pubKeyVerify"), NanNew<FunctionTemplate>(Pubkey_Verify)->GetFunction()); */
+ /* exports->Set(NanNew("pubKeyCreate"), NanNew<FunctionTemplate>(Pubkey_Create)->GetFunction()); */
+ /* exports->Set(NanNew("pubKeyDecompress"), NanNew<FunctionTemplate>(Pubkey_Decompress)->GetFunction()); */
+ /* exports->Set(NanNew("privKeyExport"), NanNew<FunctionTemplate>(Privkey_Export)->GetFunction()); */
+ /* exports->Set(NanNew("privKeyImport"), NanNew<FunctionTemplate>(Privkey_Import)->GetFunction()); */
+ /* exports->Set(NanNew("privKeyTweakAdd"), NanNew<FunctionTemplate>(Privkey_Tweak_Add)->GetFunction()); */
+ /* exports->Set(NanNew("privKeyTweakMul"), NanNew<FunctionTemplate>(Privkey_Tweak_Mul)->GetFunction()); */
+ /* exports->Set(NanNew("pubKeyTweakAdd"), NanNew<FunctionTemplate>(Privkey_Tweak_Add)->GetFunction()); */
+ /* exports->Set(NanNew("pubKeyTweakMul"), NanNew<FunctionTemplate>(Privkey_Tweak_Mul)->GetFunction()); */
+}
+
+NODE_MODULE(secp256k1, Init)
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/package.json b/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/package.json
new file mode 100644
index 000000000..690ea3263
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/package.json
@@ -0,0 +1,13 @@
+{
+ "name": "node-ethash",
+ "version": "1.0.0",
+ "description": "",
+ "main": "index.js",
+ "scripts": {
+ "test": "echo \"Error: no test specified\" && exit 1",
+ "install": "node-gyp rebuild"
+ },
+ "author": "",
+ "license": "ISC",
+ "gypfile": true
+}
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/readme.md b/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/readme.md
new file mode 100644
index 000000000..bb46cdd6e
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/readme.md
@@ -0,0 +1,12 @@
+# To Develop
+`npm install -g node-gyp`
+`npm install .`
+
+
+# To rebuild
+`node-gyp rebuild`
+
+
+# notes
+
+nan is good https://github.com/rvagg/nan
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/test/CMakeLists.txt b/Godeps/_workspace/src/github.com/ethereum/ethash/test/CMakeLists.txt
new file mode 100644
index 000000000..431af6a99
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/test/CMakeLists.txt
@@ -0,0 +1,30 @@
+IF( NOT Boost_FOUND )
+ find_package(Boost COMPONENTS unit_test_framework)
+ENDIF()
+
+IF( Boost_FOUND )
+ include_directories( ${Boost_INCLUDE_DIR} )
+ include_directories(..)
+
+ link_directories ( ${Boost_LIBRARY_DIRS} )
+ file(GLOB HEADERS "*.h")
+ ADD_DEFINITIONS(-DBOOST_TEST_DYN_LINK)
+
+ if (NOT CRYPTOPP_FOUND)
+ find_package (CryptoPP)
+ endif()
+
+ if (CRYPTOPP_FOUND)
+ add_definitions(-DWITH_CRYPTOPP)
+ endif()
+
+ add_executable (Test test.cpp ${HEADERS})
+ target_link_libraries (Test ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY} ${ETHHASH_LIBS})
+
+ if (CRYPTOPP_FOUND)
+ TARGET_LINK_LIBRARIES(Test ${CRYPTOPP_LIBRARIES})
+ endif()
+
+ enable_testing ()
+ add_test(NAME ethash COMMAND Test)
+ENDIF()
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/test/go/ethash_test.go b/Godeps/_workspace/src/github.com/ethereum/ethash/test/go/ethash_test.go
new file mode 100644
index 000000000..d734954e1
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/test/go/ethash_test.go
@@ -0,0 +1,54 @@
+package ethashTest
+
+import (
+ "bytes"
+ "crypto/rand"
+ "log"
+ "math/big"
+ "testing"
+
+ "github.com/ethereum/ethash"
+ "github.com/ethereum/go-ethereum/core"
+ "github.com/ethereum/go-ethereum/ethdb"
+)
+
+func TestEthash(t *testing.T) {
+ seedHash := make([]byte, 32)
+ _, err := rand.Read(seedHash)
+ if err != nil {
+ panic(err)
+ }
+
+ db, err := ethdb.NewMemDatabase()
+ if err != nil {
+ panic(err)
+ }
+
+ blockProcessor, err := core.NewCanonical(5, db)
+ if err != nil {
+ panic(err)
+ }
+
+ log.Println("Block Number: ", blockProcessor.ChainManager().CurrentBlock().Number())
+
+ e := ethash.New(blockProcessor.ChainManager())
+
+ miningHash := make([]byte, 32)
+ if _, err := rand.Read(miningHash); err != nil {
+ panic(err)
+ }
+ diff := big.NewInt(10000)
+ log.Println("difficulty", diff)
+
+ nonce := uint64(0)
+
+ ghash_full := e.FullHash(nonce, miningHash)
+ log.Printf("ethash full (on nonce): %x %x\n", ghash_full, nonce)
+
+ ghash_light := e.LightHash(nonce, miningHash)
+ log.Printf("ethash light (on nonce): %x %x\n", ghash_light, nonce)
+
+ if bytes.Compare(ghash_full, ghash_light) != 0 {
+ t.Errorf("full: %x, light: %x", ghash_full, ghash_light)
+ }
+}
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/test/test.cpp b/Godeps/_workspace/src/github.com/ethereum/ethash/test/test.cpp
new file mode 100644
index 000000000..bec44e138
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/test/test.cpp
@@ -0,0 +1,233 @@
+#include <iomanip>
+#include <libethash/fnv.h>
+#include <libethash/ethash.h>
+#include <libethash/internal.h>
+
+#ifdef WITH_CRYPTOPP
+#include <libethash/sha3_cryptopp.h>
+#else
+#include <libethash/sha3.h>
+#endif // WITH_CRYPTOPP
+
+#define BOOST_TEST_MODULE Daggerhashimoto
+#define BOOST_TEST_MAIN
+
+#include <boost/test/unit_test.hpp>
+#include <libethash/ethash.h>
+#include <iostream>
+
+std::string bytesToHexString(const uint8_t *str, const size_t s) {
+ std::ostringstream ret;
+
+ for (int i = 0; i < s; ++i)
+ ret << std::hex << std::setfill('0') << std::setw(2) << std::nouppercase << (int) str[i];
+
+ return ret.str();
+}
+
+BOOST_AUTO_TEST_CASE(fnv_hash_check) {
+ uint32_t x = 1235U;
+ const uint32_t
+ y = 9999999U,
+ expected = (FNV_PRIME * x) ^ y;
+
+ x = fnv_hash(x, y);
+
+ BOOST_REQUIRE_MESSAGE(x == expected,
+ "\nexpected: " << expected << "\n"
+ << "actual: " << x << "\n");
+
+}
+
+BOOST_AUTO_TEST_CASE(SHA256_check) {
+ uint8_t input[32], out[32];
+ memcpy(input, "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~", 32);
+ SHA3_256(out, input, 32);
+ const std::string
+ expected = "2b5ddf6f4d21c23de216f44d5e4bdc68e044b71897837ea74c83908be7037cd7",
+ actual = bytesToHexString(out, 32);
+ BOOST_REQUIRE_MESSAGE(expected == actual,
+ "\nexpected: " << expected.c_str() << "\n"
+ << "actual: " << actual.c_str() << "\n");
+}
+
+BOOST_AUTO_TEST_CASE(SHA512_check) {
+ uint8_t input[64], out[64];
+ memcpy(input, "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~", 64);
+ SHA3_512(out, input, 64);
+ const std::string
+ expected = "0be8a1d334b4655fe58c6b38789f984bb13225684e86b20517a55ab2386c7b61c306f25e0627c60064cecd6d80cd67a82b3890bd1289b7ceb473aad56a359405",
+ actual = bytesToHexString(out, 64);
+ BOOST_REQUIRE_MESSAGE(expected == actual,
+ "\nexpected: " << expected.c_str() << "\n"
+ << "actual: " << actual.c_str() << "\n");
+}
+
+BOOST_AUTO_TEST_CASE(ethash_params_init_genesis_check) {
+ ethash_params params;
+ ethash_params_init(&params, 0);
+ BOOST_REQUIRE_MESSAGE(params.full_size < DAGSIZE_BYTES_INIT,
+ "\nfull size: " << params.full_size << "\n"
+ << "should be less than or equal to: " << DAGSIZE_BYTES_INIT << "\n");
+ BOOST_REQUIRE_MESSAGE(params.full_size + 20*MIX_BYTES >= DAGSIZE_BYTES_INIT,
+ "\nfull size + 20*MIX_BYTES: " << params.full_size + 20*MIX_BYTES << "\n"
+ << "should be greater than or equal to: " << DAGSIZE_BYTES_INIT << "\n");
+ BOOST_REQUIRE_MESSAGE(params.cache_size < DAGSIZE_BYTES_INIT / 32,
+ "\ncache size: " << params.cache_size << "\n"
+ << "should be less than or equal to: " << DAGSIZE_BYTES_INIT / 32 << "\n");
+}
+
+BOOST_AUTO_TEST_CASE(ethash_params_init_genesis_calcifide_check) {
+ ethash_params params;
+ ethash_params_init(&params, 0);
+ const uint32_t expected_full_size = 1073739904;
+ const uint32_t expected_cache_size = 1048384;
+ BOOST_REQUIRE_MESSAGE(params.full_size == expected_full_size,
+ "\nexpected: " << expected_cache_size << "\n"
+ << "actual: " << params.full_size << "\n");
+ BOOST_REQUIRE_MESSAGE(params.cache_size == expected_cache_size,
+ "\nexpected: " << expected_cache_size << "\n"
+ << "actual: " << params.cache_size << "\n");
+}
+
+BOOST_AUTO_TEST_CASE(ethash_params_init_check) {
+ ethash_params params;
+ ethash_params_init(&params, 1971000);
+ const uint64_t nine_month_size = (uint64_t) 8*DAGSIZE_BYTES_INIT;
+ BOOST_REQUIRE_MESSAGE(params.full_size < nine_month_size,
+ "\nfull size: " << params.full_size << "\n"
+ << "should be less than or equal to: " << nine_month_size << "\n");
+ BOOST_REQUIRE_MESSAGE(params.full_size + DAGSIZE_BYTES_INIT / 4 > nine_month_size,
+ "\nfull size + DAGSIZE_BYTES_INIT / 4: " << params.full_size + DAGSIZE_BYTES_INIT / 4 << "\n"
+ << "should be greater than or equal to: " << nine_month_size << "\n");
+ BOOST_REQUIRE_MESSAGE(params.cache_size < nine_month_size / 1024,
+ "\nactual cache size: " << params.cache_size << "\n"
+ << "expected: " << nine_month_size / 1024 << "\n");
+ BOOST_REQUIRE_MESSAGE(params.cache_size + DAGSIZE_BYTES_INIT / 4 / 1024 > nine_month_size / 1024 ,
+ "\ncache size + DAGSIZE_BYTES_INIT / 4 / 1024: " << params.cache_size + DAGSIZE_BYTES_INIT / 4 / 1024 << "\n"
+ << "actual: " << nine_month_size / 32 << "\n");
+}
+
+BOOST_AUTO_TEST_CASE(light_and_full_client_checks) {
+ ethash_params params;
+ uint8_t seed[32], hash[32];
+ ethash_return_value light_out, full_out;
+ memcpy(seed, "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~", 32);
+ memcpy(hash, "~~~X~~~~~~~~~~~~~~~~~~~~~~~~~~~~", 32);
+ ethash_params_init(&params, 0);
+ params.cache_size = 1024;
+ params.full_size = 1024 * 32;
+ ethash_cache cache;
+ cache.mem = alloca(params.cache_size);
+ ethash_mkcache(&cache, &params, seed);
+ node * full_mem = (node *) alloca(params.full_size);
+ ethash_compute_full_data(full_mem, &params, &cache);
+
+ {
+ const std::string
+ expected = "2da2b506f21070e1143d908e867962486d6b0a02e31d468fd5e3a7143aafa76a14201f63374314e2a6aaf84ad2eb57105dea3378378965a1b3873453bb2b78f9a8620b2ebeca41fbc773bb837b5e724d6eb2de570d99858df0d7d97067fb8103b21757873b735097b35d3bea8fd1c359a9e8a63c1540c76c9784cf8d975e995ca8620b2ebeca41fbc773bb837b5e724d6eb2de570d99858df0d7d97067fb8103b21757873b735097b35d3bea8fd1c359a9e8a63c1540c76c9784cf8d975e995ca8620b2ebeca41fbc773bb837b5e724d6eb2de570d99858df0d7d97067fb8103b21757873b735097b35d3bea8fd1c359a9e8a63c1540c76c9784cf8d975e995c259440b89fa3481c2c33171477c305c8e1e421f8d8f6d59585449d0034f3e421808d8da6bbd0b6378f567647cc6c4ba6c434592b198ad444e7284905b7c6adaf70bf43ec2daa7bd5e8951aa609ab472c124cf9eba3d38cff5091dc3f58409edcc386c743c3bd66f92408796ee1e82dd149eaefbf52b00ce33014a6eb3e50625413b072a58bc01da28262f42cbe4f87d4abc2bf287d15618405a1fe4e386fcdafbb171064bd99901d8f81dd6789396ce5e364ac944bbbd75a7827291c70b42d26385910cd53ca535ab29433dd5c5714d26e0dce95514c5ef866329c12e958097e84462197c2b32087849dab33e88b11da61d52f9dbc0b92cc61f742c07dbbf751c49d7678624ee60dfbe62e5e8c47a03d8247643f3d16ad8c8e663953bcda1f59d7e2d4a9bf0768e789432212621967a8f41121ad1df6ae1fa78782530695414c6213942865b2730375019105cae91a4c17a558d4b63059661d9f108362143107babe0b848de412e4da59168cce82bfbff3c99e022dd6ac1e559db991f2e3f7bb910cefd173e65ed00a8d5d416534e2c8416ff23977dbf3eb7180b75c71580d08ce95efeb9b0afe904ea12285a392aff0c8561ff79fca67f694a62b9e52377485c57cc3598d84cac0a9d27960de0cc31ff9bbfe455acaa62c8aa5d2cce96f345da9afe843d258a99c4eaf3650fc62efd81c7b81cd0d534d2d71eeda7a6e315d540b4473c80f8730037dc2ae3e47b986240cfc65ccc565f0d8cde0bc68a57e39a271dda57440b3598bee19f799611d25731a96b5dbbbefdff6f4f656161462633030d62560ea4e9c161cf78fc96a2ca5aaa32453a6c5dea206f766244e8c9d9a8dc61185ce37f1fc804459c5f07434f8ecb34141b8dcae7eae704c950b55556c5f40140c3714b45eddb02637513268778cbf937a33e4e33183685f9deb31ef54e90161e76d969587dd782eaa94e289420e7c2ee908517f5893a26fdb5873d68f92d118d4bcf98d7a4916794d6ab290045e30f9ea00ca547c584b8482b0331ba1539a0f2714fddc3a0b06b0cfbb6a607b8339c39bcfd6640b1f653e9d70ef6c985b",
+ actual = bytesToHexString((uint8_t const *) cache.mem, params.cache_size);
+
+ BOOST_REQUIRE_MESSAGE(expected == actual,
+ "\nexpected: " << expected.c_str() << "\n"
+ << "actual: " << actual.c_str() << "\n");
+ }
+
+
+
+ {
+ node node;
+ ethash_calculate_dag_item(&node, 0, &params, &cache);
+ const std::string
+ actual = bytesToHexString((uint8_t const *) &node, sizeof(node)),
+ expected = "b1698f829f90b35455804e5185d78f549fcb1bdce2bee006d4d7e68eb154b596be1427769eb1c3c3e93180c760af75f81d1023da6a0ffbe321c153a7c0103597";
+ BOOST_REQUIRE_MESSAGE(actual == expected,
+ "\n" << "expected: " << expected.c_str() << "\n"
+ << "actual: " << actual.c_str() << "\n");
+ }
+
+ {
+ for (int i = 0 ; i < params.full_size / sizeof(node) ; ++i ) {
+ for (uint32_t j = 0; j < 32; ++j) {
+ node expected_node;
+ ethash_calculate_dag_item(&expected_node, j, &params, &cache);
+ const std::string
+ actual = bytesToHexString((uint8_t const *) &(full_mem[j]), sizeof(node)),
+ expected = bytesToHexString((uint8_t const *) &expected_node, sizeof(node));
+ BOOST_REQUIRE_MESSAGE(actual == expected,
+ "\ni: " << j << "\n"
+ << "expected: " << expected.c_str() << "\n"
+ << "actual: " << actual.c_str() << "\n");
+ }
+ }
+ }
+
+ {
+ uint64_t nonce = 0x7c7c597c;
+ ethash_full(&full_out, full_mem, &params, hash, nonce);
+ ethash_light(&light_out, &cache, &params, hash, nonce);
+ const std::string
+ light_result_string = bytesToHexString(light_out.result, 32),
+ full_result_string = bytesToHexString(full_out.result, 32);
+ BOOST_REQUIRE_MESSAGE(light_result_string == full_result_string,
+ "\nlight result: " << light_result_string.c_str() << "\n"
+ << "full result: " << full_result_string.c_str() << "\n");
+ const std::string
+ light_mix_hash_string = bytesToHexString(light_out.mix_hash, 32),
+ full_mix_hash_string = bytesToHexString(full_out.mix_hash, 32);
+ BOOST_REQUIRE_MESSAGE(full_mix_hash_string == light_mix_hash_string,
+ "\nlight mix hash: " << light_mix_hash_string.c_str() << "\n"
+ << "full mix hash: " << full_mix_hash_string.c_str() << "\n");
+ uint8_t check_hash[32];
+ ethash_quick_hash(check_hash, hash, nonce, full_out.mix_hash);
+ const std::string check_hash_string = bytesToHexString(check_hash, 32);
+ BOOST_REQUIRE_MESSAGE(check_hash_string == full_result_string,
+ "\ncheck hash string: " << check_hash_string.c_str() << "\n"
+ << "full result: " << full_result_string.c_str() << "\n");
+ }
+ {
+ ethash_full(&full_out, full_mem, &params, hash, 5);
+ std::string
+ light_result_string = bytesToHexString(light_out.result, 32),
+ full_result_string = bytesToHexString(full_out.result, 32);
+
+ BOOST_REQUIRE_MESSAGE(light_result_string != full_result_string,
+ "\nlight result and full result should differ: " << light_result_string.c_str() << "\n");
+
+ ethash_light(&light_out, &cache, &params, hash, 5);
+ light_result_string = bytesToHexString(light_out.result, 32);
+ BOOST_REQUIRE_MESSAGE(light_result_string == full_result_string,
+ "\nlight result and full result should be the same\n"
+ << "light result: " << light_result_string.c_str() << "\n"
+ << "full result: " << full_result_string.c_str() << "\n");
+ std::string
+ light_mix_hash_string = bytesToHexString(light_out.mix_hash, 32),
+ full_mix_hash_string = bytesToHexString(full_out.mix_hash, 32);
+ BOOST_REQUIRE_MESSAGE(full_mix_hash_string == light_mix_hash_string,
+ "\nlight mix hash: " << light_mix_hash_string.c_str() << "\n"
+ << "full mix hash: " << full_mix_hash_string.c_str() << "\n");
+ }
+}
+
+BOOST_AUTO_TEST_CASE(ethash_check_difficulty_check) {
+ uint8_t hash[32], target[32];
+ memset(hash, 0, 32);
+ memset(target, 0, 32);
+
+ memcpy(hash, "11111111111111111111111111111111", 32);
+ memcpy(target, "22222222222222222222222222222222", 32);
+ BOOST_REQUIRE_MESSAGE(
+ ethash_check_difficulty(hash, target),
+ "\nexpected \"" << hash << "\" to have less difficulty than \"" << target << "\"\n");
+ BOOST_REQUIRE_MESSAGE(
+ !ethash_check_difficulty(hash, hash),
+ "\nexpected \"" << hash << "\" to have the same difficulty as \"" << hash << "\"\n");
+ memcpy(target, "11111111111111111111111111111112", 32);
+ BOOST_REQUIRE_MESSAGE(
+ ethash_check_difficulty(hash, target),
+ "\nexpected \"" << hash << "\" to have less difficulty than \"" << target << "\"\n");
+ memcpy(target, "11111111111111111111111111111110", 32);
+ BOOST_REQUIRE_MESSAGE(
+ !ethash_check_difficulty(hash, target),
+ "\nexpected \"" << hash << "\" to have more difficulty than \"" << target << "\"\n");
+} \ No newline at end of file