From de9f79133faa1ff5dcd16fb4fd13d06b7799ded9 Mon Sep 17 00:00:00 2001 From: Matthew Wampler-Doty Date: Sat, 28 Feb 2015 14:58:37 -0500 Subject: Introducing ethash --- Godeps/Godeps.json | 5 + .../src/github.com/ethereum/ethash/.gitignore | 5 + .../src/github.com/ethereum/ethash/CMakeLists.txt | 14 + .../ethereum/ethash/benchmark/CMakeLists.txt | 53 + .../ethereum/ethash/benchmark/benchmark.cpp | 260 + .../ethash/cmake/modules/FindCryptoPP.cmake | 108 + .../ethereum/ethash/cmake/modules/FindOpenCL.cmake | 91 + .../ethereum/ethash/cryptopp/CMakeLists.txt | 13 + .../src/github.com/ethereum/ethash/ethash.go | 296 + .../ethereum/ethash/libethash-cl/CMakeLists.txt | 12 + .../github.com/ethereum/ethash/libethash-cl/cl.hpp | 12452 +++++++++++++++++++ .../ethash/libethash-cl/ethash_cl_miner.cpp | 754 ++ .../ethereum/ethash/libethash-cl/ethash_cl_miner.h | 43 + .../ethereum/ethash/libethash-cuda/CMakeLists.txt | 15 + .../ethereum/ethash/libethash-cuda/cuPrintf.cu | 879 ++ .../ethereum/ethash/libethash-cuda/cuPrintf.cuh | 162 + .../ethereum/ethash/libethash-cuda/libethash.cu | 27 + .../ethereum/ethash/libethash/CMakeLists.txt | 33 + .../ethereum/ethash/libethash/compiler.h | 33 + .../ethereum/ethash/libethash/data_sizes.h | 248 + .../github.com/ethereum/ethash/libethash/endian.h | 74 + .../github.com/ethereum/ethash/libethash/ethash.h | 88 + .../src/github.com/ethereum/ethash/libethash/fnv.h | 38 + .../ethereum/ethash/libethash/internal.c | 297 + .../ethereum/ethash/libethash/internal.h | 48 + .../github.com/ethereum/ethash/libethash/sha3.c | 151 + .../github.com/ethereum/ethash/libethash/sha3.h | 27 + .../ethereum/ethash/libethash/sha3_cryptopp.cpp | 34 + .../ethereum/ethash/libethash/sha3_cryptopp.h | 15 + .../github.com/ethereum/ethash/libethash/util.c | 41 + .../github.com/ethereum/ethash/libethash/util.h | 47 + .../ethereum/ethash/node-ethash/binding.gyp | 29 + .../ethereum/ethash/node-ethash/ethash.cc | 587 + .../ethereum/ethash/node-ethash/package.json | 13 + .../ethereum/ethash/node-ethash/readme.md | 12 + .../github.com/ethereum/ethash/test/CMakeLists.txt | 30 + .../ethereum/ethash/test/go/ethash_test.go | 54 + .../src/github.com/ethereum/ethash/test/test.cpp | 233 + 38 files changed, 17321 insertions(+) create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/.gitignore create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/CMakeLists.txt create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/benchmark/CMakeLists.txt create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/benchmark/benchmark.cpp create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/cmake/modules/FindCryptoPP.cmake create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/cmake/modules/FindOpenCL.cmake create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/cryptopp/CMakeLists.txt create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/ethash.go create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/CMakeLists.txt create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/cl.hpp create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/ethash_cl_miner.cpp create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/ethash_cl_miner.h create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/CMakeLists.txt create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/cuPrintf.cu create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/cuPrintf.cuh create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/libethash.cu create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/libethash/CMakeLists.txt create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/libethash/compiler.h create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/libethash/data_sizes.h create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/libethash/endian.h create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/libethash/ethash.h create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/libethash/fnv.h create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/libethash/internal.c create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/libethash/internal.h create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3.c create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3.h create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3_cryptopp.cpp create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3_cryptopp.h create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/libethash/util.c create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/libethash/util.h create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/binding.gyp create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/ethash.cc create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/package.json create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/readme.md create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/test/CMakeLists.txt create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/test/go/ethash_test.go create mode 100644 Godeps/_workspace/src/github.com/ethereum/ethash/test/test.cpp (limited to 'Godeps') diff --git a/Godeps/Godeps.json b/Godeps/Godeps.json index b66ea932f..a1025c85d 100644 --- a/Godeps/Godeps.json +++ b/Godeps/Godeps.json @@ -15,6 +15,11 @@ "Comment": "null-15", "Rev": "12e4b4183793ac4b061921e7980845e750679fd0" }, + { + "ImportPath": "github.com/ethereum/ethash", + "Comment": "v17-23-g2561e13", + "Rev": "2561e1322a7e8e3d4a2cc903c44b1e96340bcb27" + }, { "ImportPath": "github.com/ethereum/serpent-go", "Rev": "5767a0dbd759d313df3f404dadb7f98d7ab51443" diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/.gitignore b/Godeps/_workspace/src/github.com/ethereum/ethash/.gitignore new file mode 100644 index 000000000..6bb36ed15 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/.gitignore @@ -0,0 +1,5 @@ +.idea/ +.DS_Store +*/**/*un~ +.vagrant/ +cpp-build/ diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/CMakeLists.txt b/Godeps/_workspace/src/github.com/ethereum/ethash/CMakeLists.txt new file mode 100644 index 000000000..ac189457f --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/CMakeLists.txt @@ -0,0 +1,14 @@ +cmake_minimum_required(VERSION 2.8.2) +project(ethash) + +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/Modules/") +set(ETHHASH_LIBS ethash) + +if (WIN32 AND WANT_CRYPTOPP) + add_subdirectory(cryptopp) +endif() + +add_subdirectory(libethash) +add_subdirectory(libethash-cl EXCLUDE_FROM_ALL) +add_subdirectory(benchmark EXCLUDE_FROM_ALL) +add_subdirectory(test EXCLUDE_FROM_ALL) diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/benchmark/CMakeLists.txt b/Godeps/_workspace/src/github.com/ethereum/ethash/benchmark/CMakeLists.txt new file mode 100644 index 000000000..e6ba85790 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/benchmark/CMakeLists.txt @@ -0,0 +1,53 @@ +include_directories(..) + +set(CMAKE_BUILD_TYPE Release) + +if (MSVC) + add_definitions("/openmp") +endif() + +if (NOT MPI_FOUND) + find_package(MPI) +endif() + +if (NOT CRYPTOPP_FOUND) + find_package(CryptoPP 5.6.2) +endif() + +if (CRYPTOPP_FOUND) + add_definitions(-DWITH_CRYPTOPP) +endif() + +if (NOT OpenCL_FOUND) + find_package(OpenCL) +endif() +if (OpenCL_FOUND) + add_definitions(-DWITH_OPENCL) + include_directories(${OpenCL_INCLUDE_DIRS}) + list(APPEND FILES ethash_cl_miner.cpp ethash_cl_miner.h) +endif() + +if (MPI_FOUND) + include_directories(${MPI_INCLUDE_PATH}) + add_executable (Benchmark_MPI_FULL benchmark.cpp) + target_link_libraries (Benchmark_MPI_FULL ${ETHHASH_LIBS} ${MPI_LIBRARIES}) + SET_TARGET_PROPERTIES(Benchmark_MPI_FULL PROPERTIES COMPILE_FLAGS "${COMPILE_FLAGS} ${MPI_COMPILE_FLAGS} -DFULL -DMPI") + + add_executable (Benchmark_MPI_LIGHT benchmark.cpp) + target_link_libraries (Benchmark_MPI_LIGHT ${ETHHASH_LIBS} ${MPI_LIBRARIES}) + SET_TARGET_PROPERTIES(Benchmark_MPI_LIGHT PROPERTIES COMPILE_FLAGS "${COMPILE_FLAGS} ${MPI_COMPILE_FLAGS} -DMPI") +endif() + +add_executable (Benchmark_FULL benchmark.cpp) +target_link_libraries (Benchmark_FULL ${ETHHASH_LIBS}) +SET_TARGET_PROPERTIES(Benchmark_FULL PROPERTIES COMPILE_FLAGS "${COMPILE_FLAGS} -DFULL") + +add_executable (Benchmark_LIGHT benchmark.cpp) +target_link_libraries (Benchmark_LIGHT ${ETHHASH_LIBS}) + +if (OpenCL_FOUND) + add_executable (Benchmark_CL benchmark.cpp) + target_link_libraries (Benchmark_CL ${ETHHASH_LIBS} ethash-cl) + SET_TARGET_PROPERTIES(Benchmark_CL PROPERTIES COMPILE_FLAGS "${COMPILE_FLAGS} -DOPENCL") +endif() + diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/benchmark/benchmark.cpp b/Godeps/_workspace/src/github.com/ethereum/ethash/benchmark/benchmark.cpp new file mode 100644 index 000000000..4c8f700c5 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/benchmark/benchmark.cpp @@ -0,0 +1,260 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file benchmark.cpp + * @author Tim Hughes + * @date 2015 + */ + +#include +#include +#include +#include +#include +#ifdef OPENCL +#include +#endif +#include +#include + +#ifdef WITH_CRYPTOPP +#include +#include + +#else +#include "libethash/sha3.h" +#endif // WITH_CRYPTOPP + +#undef min +#undef max + +#if defined(OPENCL) +const unsigned trials = 1024*1024*32; +#elif defined(FULL) +const unsigned trials = 1024*1024/8; +#else +const unsigned trials = 1024*1024/1024; +#endif +uint8_t g_hashes[1024*32]; + +static char nibbleToChar(unsigned nibble) +{ + return (char) ((nibble >= 10 ? 'a'-10 : '0') + nibble); +} + +static uint8_t charToNibble(char chr) +{ + if (chr >= '0' && chr <= '9') + { + return (uint8_t) (chr - '0'); + } + if (chr >= 'a' && chr <= 'z') + { + return (uint8_t) (chr - 'a' + 10); + } + if (chr >= 'A' && chr <= 'Z') + { + return (uint8_t) (chr - 'A' + 10); + } + return 0; +} + +static std::vector hexStringToBytes(char const* str) +{ + std::vector bytes(strlen(str) >> 1); + for (unsigned i = 0; i != bytes.size(); ++i) + { + bytes[i] = charToNibble(str[i*2 | 0]) << 4; + bytes[i] |= charToNibble(str[i*2 | 1]); + } + return bytes; +} + +static std::string bytesToHexString(uint8_t const* bytes, unsigned size) +{ + std::string str; + for (unsigned i = 0; i != size; ++i) + { + str += nibbleToChar(bytes[i] >> 4); + str += nibbleToChar(bytes[i] & 0xf); + } + return str; +} + +extern "C" int main(void) +{ + // params for ethash + ethash_params params; + ethash_params_init(¶ms, 0); + //params.full_size = 262147 * 4096; // 1GBish; + //params.full_size = 32771 * 4096; // 128MBish; + //params.full_size = 8209 * 4096; // 8MBish; + //params.cache_size = 8209*4096; + //params.cache_size = 2053*4096; + uint8_t seed[32], previous_hash[32]; + + memcpy(seed, hexStringToBytes("9410b944535a83d9adf6bbdcc80e051f30676173c16ca0d32d6f1263fc246466").data(), 32); + memcpy(previous_hash, hexStringToBytes("c5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470").data(), 32); + + // allocate page aligned buffer for dataset +#ifdef FULL + void* full_mem_buf = malloc(params.full_size + 4095); + void* full_mem = (void*)((uintptr_t(full_mem_buf) + 4095) & ~4095); +#endif + void* cache_mem_buf = malloc(params.cache_size + 63); + void* cache_mem = (void*)((uintptr_t(cache_mem_buf) + 63) & ~63); + + ethash_cache cache; + cache.mem = cache_mem; + + // compute cache or full data + { + clock_t startTime = clock(); + ethash_mkcache(&cache, ¶ms, seed); + clock_t time = clock() - startTime; + + uint8_t cache_hash[32]; + SHA3_256(cache_hash, (uint8_t const*)cache_mem, params.cache_size); + debugf("ethash_mkcache: %ums, sha3: %s\n", (unsigned)((time*1000)/CLOCKS_PER_SEC), bytesToHexString(cache_hash,sizeof(cache_hash)).data()); + + // print a couple of test hashes + { + const clock_t startTime = clock(); + ethash_return_value hash; + ethash_light(&hash, &cache, ¶ms, previous_hash, 0); + const clock_t time = clock() - startTime; + debugf("ethash_light test: %ums, %s\n", (unsigned)((time*1000)/CLOCKS_PER_SEC), bytesToHexString(hash.result, 32).data()); + } + + #ifdef FULL + startTime = clock(); + ethash_compute_full_data(full_mem, ¶ms, &cache); + time = clock() - startTime; + debugf("ethash_compute_full_data: %ums\n", (unsigned)((time*1000)/CLOCKS_PER_SEC)); + #endif // FULL + } + +#ifdef OPENCL + ethash_cl_miner miner; + { + const clock_t startTime = clock(); + if (!miner.init(params, seed)) + exit(-1); + const clock_t time = clock() - startTime; + debugf("ethash_cl_miner init: %ums\n", (unsigned)((time*1000)/CLOCKS_PER_SEC)); + } +#endif + + +#ifdef FULL + { + const clock_t startTime = clock(); + ethash_return_value hash; + ethash_full(&hash, full_mem, ¶ms, previous_hash, 0); + const clock_t time = clock() - startTime; + debugf("ethash_full test: %uns, %s\n", (unsigned)((time*1000000)/CLOCKS_PER_SEC), bytesToHexString(hash.result, 32).data()); + } +#endif + +#ifdef OPENCL + // validate 1024 hashes against CPU + miner.hash(g_hashes, previous_hash, 0, 1024); + for (unsigned i = 0; i != 1024; ++i) + { + ethash_return_value hash; + ethash_light(&hash, &cache, ¶ms, previous_hash, i); + if (memcmp(hash.result, g_hashes + 32*i, 32) != 0) + { + debugf("nonce %u failed: %s %s\n", i, bytesToHexString(g_hashes + 32*i, 32).c_str(), bytesToHexString(hash.result, 32).c_str()); + static unsigned c = 0; + if (++c == 16) + { + exit(-1); + } + } + } +#endif + + + clock_t startTime = clock(); + unsigned hash_count = trials; + + #ifdef OPENCL + { + struct search_hook : ethash_cl_miner::search_hook + { + unsigned hash_count; + std::vector nonce_vec; + + virtual bool found(uint64_t const* nonces, uint32_t count) + { + nonce_vec.assign(nonces, nonces + count); + return false; + } + + virtual bool searched(uint64_t start_nonce, uint32_t count) + { + // do nothing + hash_count += count; + return hash_count >= trials; + } + }; + search_hook hook; + hook.hash_count = 0; + + miner.search(previous_hash, 0x000000ffffffffff, hook); + + for (unsigned i = 0; i != hook.nonce_vec.size(); ++i) + { + uint64_t nonce = hook.nonce_vec[i]; + ethash_return_value hash; + ethash_light(&hash, &cache, ¶ms, previous_hash, nonce); + debugf("found: %.8x%.8x -> %s\n", unsigned(nonce>>32), unsigned(nonce), bytesToHexString(hash.result, 32).c_str()); + } + + hash_count = hook.hash_count; + } + #else + { + //#pragma omp parallel for + for (int nonce = 0; nonce < trials; ++nonce) + { + ethash_return_value hash; + #ifdef FULL + ethash_full(&hash, full_mem, ¶ms, previous_hash, nonce); + #else + ethash_light(&hash, &cache, ¶ms, previous_hash, nonce); + #endif // FULL + } + } + #endif + + clock_t time = std::max((clock_t)1u, clock() - startTime); + + unsigned read_size = ACCESSES * MIX_BYTES; + debugf( + "hashrate: %8u, bw: %6u MB/s\n", + (unsigned)(((uint64_t)hash_count*CLOCKS_PER_SEC)/time), + (unsigned)((((uint64_t)hash_count*read_size*CLOCKS_PER_SEC)/time) / (1024*1024)) + ); + + free(cache_mem_buf); +#ifdef FULL + free(full_mem_buf); +#endif + + return 0; +} diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/cmake/modules/FindCryptoPP.cmake b/Godeps/_workspace/src/github.com/ethereum/ethash/cmake/modules/FindCryptoPP.cmake new file mode 100644 index 000000000..5ca01e446 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/cmake/modules/FindCryptoPP.cmake @@ -0,0 +1,108 @@ +# Module for locating the Crypto++ encryption library. +# +# Customizable variables: +# CRYPTOPP_ROOT_DIR +# This variable points to the CryptoPP root directory. On Windows the +# library location typically will have to be provided explicitly using the +# -D command-line option. The directory should include the include/cryptopp, +# lib and/or bin sub-directories. +# +# Read-only variables: +# CRYPTOPP_FOUND +# Indicates whether the library has been found. +# +# CRYPTOPP_INCLUDE_DIRS +# Points to the CryptoPP include directory. +# +# CRYPTOPP_LIBRARIES +# Points to the CryptoPP libraries that should be passed to +# target_link_libararies. +# +# +# Copyright (c) 2012 Sergiu Dotenco +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +INCLUDE (FindPackageHandleStandardArgs) + +FIND_PATH (CRYPTOPP_ROOT_DIR + NAMES cryptopp/cryptlib.h include/cryptopp/cryptlib.h + PATHS ENV CRYPTOPPROOT + DOC "CryptoPP root directory") + +# Re-use the previous path: +FIND_PATH (CRYPTOPP_INCLUDE_DIR + NAMES cryptopp/cryptlib.h + HINTS ${CRYPTOPP_ROOT_DIR} + PATH_SUFFIXES include + DOC "CryptoPP include directory") + +FIND_LIBRARY (CRYPTOPP_LIBRARY_DEBUG + NAMES cryptlibd cryptoppd + HINTS ${CRYPTOPP_ROOT_DIR} + PATH_SUFFIXES lib + DOC "CryptoPP debug library") + +FIND_LIBRARY (CRYPTOPP_LIBRARY_RELEASE + NAMES cryptlib cryptopp + HINTS ${CRYPTOPP_ROOT_DIR} + PATH_SUFFIXES lib + DOC "CryptoPP release library") + +IF (CRYPTOPP_LIBRARY_DEBUG AND CRYPTOPP_LIBRARY_RELEASE) + SET (CRYPTOPP_LIBRARY + optimized ${CRYPTOPP_LIBRARY_RELEASE} + debug ${CRYPTOPP_LIBRARY_DEBUG} CACHE DOC "CryptoPP library") +ELSEIF (CRYPTOPP_LIBRARY_RELEASE) + SET (CRYPTOPP_LIBRARY ${CRYPTOPP_LIBRARY_RELEASE} CACHE DOC + "CryptoPP library") +ENDIF (CRYPTOPP_LIBRARY_DEBUG AND CRYPTOPP_LIBRARY_RELEASE) + +IF (CRYPTOPP_INCLUDE_DIR) + SET (_CRYPTOPP_VERSION_HEADER ${CRYPTOPP_INCLUDE_DIR}/cryptopp/config.h) + + IF (EXISTS ${_CRYPTOPP_VERSION_HEADER}) + FILE (STRINGS ${_CRYPTOPP_VERSION_HEADER} _CRYPTOPP_VERSION_TMP REGEX + "^#define CRYPTOPP_VERSION[ \t]+[0-9]+$") + + STRING (REGEX REPLACE + "^#define CRYPTOPP_VERSION[ \t]+([0-9]+)" "\\1" _CRYPTOPP_VERSION_TMP + ${_CRYPTOPP_VERSION_TMP}) + + STRING (REGEX REPLACE "([0-9]+)[0-9][0-9]" "\\1" CRYPTOPP_VERSION_MAJOR + ${_CRYPTOPP_VERSION_TMP}) + STRING (REGEX REPLACE "[0-9]([0-9])[0-9]" "\\1" CRYPTOPP_VERSION_MINOR + ${_CRYPTOPP_VERSION_TMP}) + STRING (REGEX REPLACE "[0-9][0-9]([0-9])" "\\1" CRYPTOPP_VERSION_PATCH + ${_CRYPTOPP_VERSION_TMP}) + + SET (CRYPTOPP_VERSION_COUNT 3) + SET (CRYPTOPP_VERSION + ${CRYPTOPP_VERSION_MAJOR}.${CRYPTOPP_VERSION_MINOR}.${CRYPTOPP_VERSION_PATCH}) + ENDIF (EXISTS ${_CRYPTOPP_VERSION_HEADER}) +ENDIF (CRYPTOPP_INCLUDE_DIR) + +SET (CRYPTOPP_INCLUDE_DIRS ${CRYPTOPP_INCLUDE_DIR}) +SET (CRYPTOPP_LIBRARIES ${CRYPTOPP_LIBRARY}) + +MARK_AS_ADVANCED (CRYPTOPP_INCLUDE_DIR CRYPTOPP_LIBRARY CRYPTOPP_LIBRARY_DEBUG + CRYPTOPP_LIBRARY_RELEASE) + +FIND_PACKAGE_HANDLE_STANDARD_ARGS (CryptoPP REQUIRED_VARS CRYPTOPP_ROOT_DIR + CRYPTOPP_INCLUDE_DIR CRYPTOPP_LIBRARY VERSION_VAR CRYPTOPP_VERSION) \ No newline at end of file diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/cmake/modules/FindOpenCL.cmake b/Godeps/_workspace/src/github.com/ethereum/ethash/cmake/modules/FindOpenCL.cmake new file mode 100644 index 000000000..cc567c95e --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/cmake/modules/FindOpenCL.cmake @@ -0,0 +1,91 @@ +# +# This file taken from FindOpenCL project @ http://gitorious.com/findopencl +# +# - Try to find OpenCL +# This module tries to find an OpenCL implementation on your system. It supports +# AMD / ATI, Apple and NVIDIA implementations, but shoudl work, too. +# +# Once done this will define +# OPENCL_FOUND - system has OpenCL +# OPENCL_INCLUDE_DIRS - the OpenCL include directory +# OPENCL_LIBRARIES - link these to use OpenCL +# +# WIN32 should work, but is untested + +FIND_PACKAGE( PackageHandleStandardArgs ) + +SET (OPENCL_VERSION_STRING "0.1.0") +SET (OPENCL_VERSION_MAJOR 0) +SET (OPENCL_VERSION_MINOR 1) +SET (OPENCL_VERSION_PATCH 0) + +IF (APPLE) + + FIND_LIBRARY(OPENCL_LIBRARIES OpenCL DOC "OpenCL lib for OSX") + FIND_PATH(OPENCL_INCLUDE_DIRS OpenCL/cl.h DOC "Include for OpenCL on OSX") + FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS OpenCL/cl.hpp DOC "Include for OpenCL CPP bindings on OSX") + +ELSE (APPLE) + + IF (WIN32) + + FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h) + FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp) + + # The AMD SDK currently installs both x86 and x86_64 libraries + # This is only a hack to find out architecture + IF( ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64" ) + SET(OPENCL_LIB_DIR "$ENV{ATISTREAMSDKROOT}/lib/x86_64") + SET(OPENCL_LIB_DIR "$ENV{ATIINTERNALSTREAMSDKROOT}/lib/x86_64") + ELSE (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64") + SET(OPENCL_LIB_DIR "$ENV{ATISTREAMSDKROOT}/lib/x86") + SET(OPENCL_LIB_DIR "$ENV{ATIINTERNALSTREAMSDKROOT}/lib/x86") + ENDIF( ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64" ) + + # find out if the user asked for a 64-bit build, and use the corresponding + # 64 or 32 bit NVIDIA library paths to the search: + STRING(REGEX MATCH "Win64" ISWIN64 ${CMAKE_GENERATOR}) + IF("${ISWIN64}" STREQUAL "Win64") + FIND_LIBRARY(OPENCL_LIBRARIES OpenCL.lib ${OPENCL_LIB_DIR} $ENV{CUDA_LIB_PATH} $ENV{CUDA_PATH}/lib/x64) + ELSE("${ISWIN64}" STREQUAL "Win64") + FIND_LIBRARY(OPENCL_LIBRARIES OpenCL.lib ${OPENCL_LIB_DIR} $ENV{CUDA_LIB_PATH} $ENV{CUDA_PATH}/lib/Win32) + ENDIF("${ISWIN64}" STREQUAL "Win64") + + GET_FILENAME_COMPONENT(_OPENCL_INC_CAND ${OPENCL_LIB_DIR}/../../include ABSOLUTE) + + # On Win32 search relative to the library + FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h PATHS "${_OPENCL_INC_CAND}" $ENV{CUDA_INC_PATH} $ENV{CUDA_PATH}/include) + FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp PATHS "${_OPENCL_INC_CAND}" $ENV{CUDA_INC_PATH} $ENV{CUDA_PATH}/include) + + ELSE (WIN32) + + # Unix style platforms + FIND_LIBRARY(OPENCL_LIBRARIES OpenCL + ENV LD_LIBRARY_PATH + ) + + GET_FILENAME_COMPONENT(OPENCL_LIB_DIR ${OPENCL_LIBRARIES} PATH) + GET_FILENAME_COMPONENT(_OPENCL_INC_CAND ${OPENCL_LIB_DIR}/../../include ABSOLUTE) + + # The AMD SDK currently does not place its headers + # in /usr/include, therefore also search relative + # to the library + FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h PATHS ${_OPENCL_INC_CAND} "/usr/local/cuda/include") + FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp PATHS ${_OPENCL_INC_CAND} "/usr/local/cuda/include") + + ENDIF (WIN32) + +ENDIF (APPLE) + +FIND_PACKAGE_HANDLE_STANDARD_ARGS( OpenCL DEFAULT_MSG OPENCL_LIBRARIES OPENCL_INCLUDE_DIRS ) + +IF( _OPENCL_CPP_INCLUDE_DIRS ) + SET( OPENCL_HAS_CPP_BINDINGS TRUE ) + LIST( APPEND OPENCL_INCLUDE_DIRS ${_OPENCL_CPP_INCLUDE_DIRS} ) + # This is often the same, so clean up + LIST( REMOVE_DUPLICATES OPENCL_INCLUDE_DIRS ) +ENDIF( _OPENCL_CPP_INCLUDE_DIRS ) + +MARK_AS_ADVANCED( + OPENCL_INCLUDE_DIRS +) \ No newline at end of file diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/cryptopp/CMakeLists.txt b/Godeps/_workspace/src/github.com/ethereum/ethash/cryptopp/CMakeLists.txt new file mode 100644 index 000000000..4cd9f36c6 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/cryptopp/CMakeLists.txt @@ -0,0 +1,13 @@ +set(LIBRARY cryptopp) + +include_directories(../../cryptopp) + +# todo, subset +file(GLOB HEADERS "../../cryptopp/*.h") +file(GLOB SOURCE "../../cryptopp/*.cpp") + +add_library(${LIBRARY} ${HEADERS} ${SOURCE}) + +set(CRYPTOPP_INCLUDE_DIRS "../.." PARENT_SCOPE) +set(CRYPTOPP_LIBRARIES ${LIBRARY} PARENT_SCOPE) +set(CRYPTOPP_FOUND TRUE PARENT_SCOPE) diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/ethash.go b/Godeps/_workspace/src/github.com/ethereum/ethash/ethash.go new file mode 100644 index 000000000..32d3f0264 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/ethash.go @@ -0,0 +1,296 @@ +package ethash + +/* +#cgo CFLAGS: -std=gnu99 -Wall +#include "libethash/ethash.h" +#include "libethash/util.c" +#include "libethash/internal.c" +#include "libethash/sha3.c" +*/ +import "C" + +import ( + "bytes" + "encoding/binary" + "log" + "math/big" + "math/rand" + "sync" + "time" + "unsafe" + + "github.com/ethereum/go-ethereum/logger" + "github.com/ethereum/go-ethereum/pow" +) + +var powlogger = logger.NewLogger("POW") + +type DAG struct { + SeedBlockNum uint64 + dag unsafe.Pointer // full GB of memory for dag +} + +type ParamsAndCache struct { + params *C.ethash_params + cache *C.ethash_cache + SeedBlockNum uint64 +} + +type Ethash struct { + turbo bool + HashRate int64 + chainManager pow.ChainManager + dag *DAG + paramsAndCache *ParamsAndCache + nextdag unsafe.Pointer + ret *C.ethash_return_value + dagMutex *sync.Mutex + cacheMutex *sync.Mutex +} + +func parseNonce(nonce []byte) (uint64, error) { + nonceBuf := bytes.NewBuffer(nonce) + nonceInt, err := binary.ReadUvarint(nonceBuf) + if err != nil { + return 0, err + } + return nonceInt, nil +} + +const epochLength uint64 = 30000 + +func getSeedBlockNum(blockNum uint64) uint64 { + var seedBlockNum uint64 = 0 + if blockNum >= 2*epochLength { + seedBlockNum = ((blockNum / epochLength) - 1) * epochLength + } + return seedBlockNum +} + +func makeParamsAndCache(chainManager pow.ChainManager, blockNum uint64) *ParamsAndCache { + seedBlockNum := getSeedBlockNum(blockNum) + paramsAndCache := &ParamsAndCache{ + params: new(C.ethash_params), + cache: new(C.ethash_cache), + SeedBlockNum: seedBlockNum, + } + C.ethash_params_init(paramsAndCache.params, C.uint32_t(seedBlockNum)) + paramsAndCache.cache.mem = C.malloc(paramsAndCache.params.cache_size) + seedHash := chainManager.GetBlockByNumber(seedBlockNum).Header().Hash() + log.Println("Params", paramsAndCache.params) + + log.Println("Making Cache") + start := time.Now() + C.ethash_mkcache(paramsAndCache.cache, paramsAndCache.params, (*C.uint8_t)((unsafe.Pointer)(&seedHash[0]))) + log.Println("Took:", time.Since(start)) + return paramsAndCache +} + +func (pow *Ethash) updateCache() { + pow.cacheMutex.Lock() + seedNum := getSeedBlockNum(pow.chainManager.CurrentBlock().NumberU64()) + if pow.paramsAndCache.SeedBlockNum != seedNum { + pow.paramsAndCache = makeParamsAndCache(pow.chainManager, pow.chainManager.CurrentBlock().NumberU64()) + } + pow.cacheMutex.Unlock() +} + +func makeDAG(p *ParamsAndCache) *DAG { + d := &DAG{ + dag: C.malloc(p.params.full_size), + SeedBlockNum: p.SeedBlockNum, + } + C.ethash_compute_full_data(d.dag, p.params, p.cache) + return d +} + +func (pow *Ethash) updateDAG() { + pow.cacheMutex.Lock() + pow.dagMutex.Lock() + + seedNum := getSeedBlockNum(pow.chainManager.CurrentBlock().NumberU64()) + if pow.dag == nil || pow.dag.SeedBlockNum != seedNum { + pow.dag = nil + log.Println("Making Dag") + start := time.Now() + pow.dag = makeDAG(pow.paramsAndCache) + log.Println("Took:", time.Since(start)) + } + + pow.dagMutex.Unlock() + pow.cacheMutex.Unlock() +} + +func New(chainManager pow.ChainManager) *Ethash { + return &Ethash{ + turbo: false, + paramsAndCache: makeParamsAndCache(chainManager, chainManager.CurrentBlock().NumberU64()), + chainManager: chainManager, + dag: nil, + ret: new(C.ethash_return_value), + cacheMutex: new(sync.Mutex), + dagMutex: new(sync.Mutex), + } +} + +func (pow *Ethash) DAGSize() uint64 { + return uint64(pow.paramsAndCache.params.full_size) +} + +func (pow *Ethash) CacheSize() uint64 { + return uint64(pow.paramsAndCache.params.cache_size) +} + +func (pow *Ethash) GetSeedHash(blockNum uint64) []byte { + return pow.chainManager.GetBlockByNumber(getSeedBlockNum(blockNum)).Header().Hash() +} + +func (pow *Ethash) Stop() { + pow.cacheMutex.Lock() + pow.dagMutex.Lock() + if pow.paramsAndCache.cache != nil { + C.free(pow.paramsAndCache.cache.mem) + } + if pow.dag != nil { + C.free(pow.dag.dag) + } + pow.dagMutex.Unlock() + pow.cacheMutex.Unlock() +} + +func (pow *Ethash) Search(block pow.Block, stop <-chan struct{}) ([]byte, []byte, []byte) { + pow.updateDAG() + + // Not very elegant, multiple mining instances are not supported + pow.dagMutex.Lock() + pow.cacheMutex.Lock() + defer pow.cacheMutex.Unlock() + defer pow.dagMutex.Unlock() + + r := rand.New(rand.NewSource(time.Now().UnixNano())) + miningHash := block.HashNoNonce() + diff := block.Difficulty() + log.Println("difficulty", diff) + i := int64(0) + start := time.Now().UnixNano() + t := time.Now() + + nonce := uint64(r.Int63()) + + for { + select { + case <-stop: + powlogger.Infoln("Breaking from mining") + pow.HashRate = 0 + pow.dagMutex.Unlock() + return nil, nil, nil + default: + i++ + + if time.Since(t) > (1 * time.Second) { + elapsed := time.Now().UnixNano() - start + hashes := ((float64(1e9) / float64(elapsed)) * float64(i)) / 1000 + pow.HashRate = int64(hashes) + powlogger.Infoln("Hashing @", pow.HashRate, "khash") + + t = time.Now() + } + + cMiningHash := (*C.uint8_t)(unsafe.Pointer(&miningHash)) + cnonce := C.uint64_t(nonce) + log.Printf("seed hash, nonce: %x %x\n", miningHash, nonce) + // pow.hash is the output/return of ethash_full + C.ethash_full(pow.ret, pow.dag.dag, pow.paramsAndCache.params, cMiningHash, cnonce) + res := C.ethash_check_difficulty((*C.uint8_t)(&pow.ret.result[0]), (*C.uint8_t)(unsafe.Pointer(&diff.Bytes()[0]))) + if res == 1 { + mixDigest := C.GoBytes(unsafe.Pointer(&pow.ret.mix_hash[0]), 32) + // We don't really nead 32 bytes here + buf := make([]byte, 32) + binary.PutUvarint(buf, nonce) + return buf, mixDigest, pow.GetSeedHash(block.NumberU64()) + } + nonce += 1 + } + + if !pow.turbo { + time.Sleep(20 * time.Microsecond) + } + } +} + +func (pow *Ethash) Verify(block pow.Block) bool { + // Make sure the SeedHash is set correctly + if bytes.Compare(block.SeedHash(), pow.GetSeedHash(block.NumberU64())) != 0 { + log.Println("Block had wrong SeedHash") + log.Println("Expected: ", pow.GetSeedHash(block.NumberU64())) + log.Println("Actual: ", block.SeedHash()) + return false + } + + nonceInt, err := parseNonce(block.Nonce()) + if err != nil { + log.Println("nonce to int err:", err) + return false + } + return pow.verify(block.HashNoNonce(), block.MixDigest(), block.Difficulty(), block.NumberU64(), nonceInt) +} + +func (pow *Ethash) verify(hash []byte, mixDigest []byte, difficulty *big.Int, blockNum uint64, nonce uint64) bool { + // First check: make sure header, mixDigest, nonce are correct without hitting the DAG + // This is to prevent DOS attacks + chash := (*C.uint8_t)(unsafe.Pointer(&hash)) + cnonce := C.uint64_t(nonce) + cmixDigest := (*C.uint8_t)(unsafe.Pointer(&mixDigest)) + cdifficulty := (*C.uint8_t)(unsafe.Pointer(&difficulty.Bytes()[0])) + if C.ethash_quick_check_difficulty(chash, cnonce, cmixDigest, cdifficulty) != 1 { + log.Println("Failed to pass quick check. Are you sure that the mix digest is correct?") + return false + } + + var pAc *ParamsAndCache + // If its an old block (doesn't use the current cache) + // get the cache for it but don't update (so we don't need the mutex) + // Otherwise, it's the current block or a future. + // If current, updateCache will do nothing. + if getSeedBlockNum(blockNum) < pow.paramsAndCache.SeedBlockNum { + pAc = makeParamsAndCache(pow.chainManager, blockNum) + } else { + pow.updateCache() + pow.cacheMutex.Lock() + defer pow.cacheMutex.Unlock() + pAc = pow.paramsAndCache + } + + C.ethash_light(pow.ret, pAc.cache, pAc.params, chash, cnonce) + res := C.ethash_check_difficulty((*C.uint8_t)(unsafe.Pointer(&pow.ret.result[0])), cdifficulty) + return res == 1 +} + +func (pow *Ethash) GetHashrate() int64 { + return pow.HashRate +} + +func (pow *Ethash) Turbo(on bool) { + pow.turbo = on +} + +func (pow *Ethash) FullHash(nonce uint64, miningHash []byte) []byte { + pow.updateDAG() + pow.dagMutex.Lock() + defer pow.dagMutex.Unlock() + cMiningHash := (*C.uint8_t)(unsafe.Pointer(&miningHash)) + cnonce := C.uint64_t(nonce) + log.Println("seed hash, nonce:", miningHash, nonce) + // pow.hash is the output/return of ethash_full + C.ethash_full(pow.ret, pow.dag.dag, pow.paramsAndCache.params, cMiningHash, cnonce) + ghash_full := C.GoBytes(unsafe.Pointer(&pow.ret.result[0]), 32) + return ghash_full +} + +func (pow *Ethash) LightHash(nonce uint64, miningHash []byte) []byte { + cMiningHash := (*C.uint8_t)(unsafe.Pointer(&miningHash)) + cnonce := C.uint64_t(nonce) + C.ethash_light(pow.ret, pow.paramsAndCache.cache, pow.paramsAndCache.params, cMiningHash, cnonce) + ghash_light := C.GoBytes(unsafe.Pointer(&pow.ret.result[0]), 32) + return ghash_light +} diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/CMakeLists.txt b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/CMakeLists.txt new file mode 100644 index 000000000..19d2fecbf --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/CMakeLists.txt @@ -0,0 +1,12 @@ +set(LIBRARY ethash-cl) +set(CMAKE_BUILD_TYPE Release) + +if (NOT OPENCL_FOUND) + find_package(OpenCL) +endif() +if (OPENCL_FOUND) + include_directories(${OPENCL_INCLUDE_DIRS}) + include_directories(..) + add_library(${LIBRARY} ethash_cl_miner.cpp ethash_cl_miner.h) + TARGET_LINK_LIBRARIES(${LIBRARY} ${OPENCL_LIBRARIES} ethash) +endif() \ No newline at end of file diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/cl.hpp b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/cl.hpp new file mode 100644 index 000000000..38fac1962 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/cl.hpp @@ -0,0 +1,12452 @@ +/******************************************************************************* + * Copyright (c) 2008-2013 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + ******************************************************************************/ + +/*! \file + * + * \brief C++ bindings for OpenCL 1.0 (rev 48), OpenCL 1.1 (rev 33) and + * OpenCL 1.2 (rev 15) + * \author Benedict R. Gaster, Laurent Morichetti and Lee Howes + * + * Additions and fixes from: + * Brian Cole, March 3rd 2010 and April 2012 + * Matt Gruenke, April 2012. + * Bruce Merry, February 2013. + * Tom Deakin and Simon McIntosh-Smith, July 2013 + * + * \version 1.2.6 + * \date August 2013 + * + * Optional extension support + * + * cl + * cl_ext_device_fission + * #define USE_CL_DEVICE_FISSION + */ + +/*! \mainpage + * \section intro Introduction + * For many large applications C++ is the language of choice and so it seems + * reasonable to define C++ bindings for OpenCL. + * + * + * The interface is contained with a single C++ header file \em cl.hpp and all + * definitions are contained within the namespace \em cl. There is no additional + * requirement to include \em cl.h and to use either the C++ or original C + * bindings it is enough to simply include \em cl.hpp. + * + * The bindings themselves are lightweight and correspond closely to the + * underlying C API. Using the C++ bindings introduces no additional execution + * overhead. + * + * For detail documentation on the bindings see: + * + * The OpenCL C++ Wrapper API 1.2 (revision 09) + * http://www.khronos.org/registry/cl/specs/opencl-cplusplus-1.2.pdf + * + * \section example Example + * + * The following example shows a general use case for the C++ + * bindings, including support for the optional exception feature and + * also the supplied vector and string classes, see following sections for + * decriptions of these features. + * + * \code + * #define __CL_ENABLE_EXCEPTIONS + * + * #if defined(__APPLE__) || defined(__MACOSX) + * #include + * #else + * #include + * #endif + * #include + * #include + * #include + * + * const char * helloStr = "__kernel void " + * "hello(void) " + * "{ " + * " " + * "} "; + * + * int + * main(void) + * { + * cl_int err = CL_SUCCESS; + * try { + * + * std::vector platforms; + * cl::Platform::get(&platforms); + * if (platforms.size() == 0) { + * std::cout << "Platform size 0\n"; + * return -1; + * } + * + * cl_context_properties properties[] = + * { CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0}; + * cl::Context context(CL_DEVICE_TYPE_CPU, properties); + * + * std::vector devices = context.getInfo(); + * + * cl::Program::Sources source(1, + * std::make_pair(helloStr,strlen(helloStr))); + * cl::Program program_ = cl::Program(context, source); + * program_.build(devices); + * + * cl::Kernel kernel(program_, "hello", &err); + * + * cl::Event event; + * cl::CommandQueue queue(context, devices[0], 0, &err); + * queue.enqueueNDRangeKernel( + * kernel, + * cl::NullRange, + * cl::NDRange(4,4), + * cl::NullRange, + * NULL, + * &event); + * + * event.wait(); + * } + * catch (cl::Error err) { + * std::cerr + * << "ERROR: " + * << err.what() + * << "(" + * << err.err() + * << ")" + * << std::endl; + * } + * + * return EXIT_SUCCESS; + * } + * + * \endcode + * + */ +#ifndef CL_HPP_ +#define CL_HPP_ + +#ifdef _WIN32 + +#include +#include +#include +#include + +#if defined(__CL_ENABLE_EXCEPTIONS) +#include +#endif // #if defined(__CL_ENABLE_EXCEPTIONS) + +#pragma push_macro("max") +#undef max +#if defined(USE_DX_INTEROP) +#include +#include +#endif +#endif // _WIN32 + +// +#if defined(USE_CL_DEVICE_FISSION) +#include +#endif + +#if defined(__APPLE__) || defined(__MACOSX) +#include +#include +#include +#else +#include +#include +#endif // !__APPLE__ + +// To avoid accidentally taking ownership of core OpenCL types +// such as cl_kernel constructors are made explicit +// under OpenCL 1.2 +#if defined(CL_VERSION_1_2) && !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) +#define __CL_EXPLICIT_CONSTRUCTORS explicit +#else // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) +#define __CL_EXPLICIT_CONSTRUCTORS +#endif // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + +// Define deprecated prefixes and suffixes to ensure compilation +// in case they are not pre-defined +#if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED) +#define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED +#endif // #if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED) +#if !defined(CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED) +#define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED +#endif // #if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED) + +#if !defined(CL_CALLBACK) +#define CL_CALLBACK +#endif //CL_CALLBACK + +#include +#include + +#if !defined(__NO_STD_VECTOR) +#include +#endif + +#if !defined(__NO_STD_STRING) +#include +#endif + +#if defined(linux) || defined(__APPLE__) || defined(__MACOSX) +#include + +#include +#include +#endif // linux + +#include + + +/*! \namespace cl + * + * \brief The OpenCL C++ bindings are defined within this namespace. + * + */ +namespace cl { + +class Memory; + +/** + * Deprecated APIs for 1.2 + */ +#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) +#define __INIT_CL_EXT_FCN_PTR(name) \ + if(!pfn_##name) { \ + pfn_##name = (PFN_##name) \ + clGetExtensionFunctionAddress(#name); \ + if(!pfn_##name) { \ + } \ + } +#endif // #if defined(CL_VERSION_1_1) + +#if defined(CL_VERSION_1_2) +#define __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, name) \ + if(!pfn_##name) { \ + pfn_##name = (PFN_##name) \ + clGetExtensionFunctionAddressForPlatform(platform, #name); \ + if(!pfn_##name) { \ + } \ + } +#endif // #if defined(CL_VERSION_1_1) + +class Program; +class Device; +class Context; +class CommandQueue; +class Memory; +class Buffer; + +#if defined(__CL_ENABLE_EXCEPTIONS) +/*! \brief Exception class + * + * This may be thrown by API functions when __CL_ENABLE_EXCEPTIONS is defined. + */ +class Error : public std::exception +{ +private: + cl_int err_; + const char * errStr_; +public: + /*! \brief Create a new CL error exception for a given error code + * and corresponding message. + * + * \param err error code value. + * + * \param errStr a descriptive string that must remain in scope until + * handling of the exception has concluded. If set, it + * will be returned by what(). + */ + Error(cl_int err, const char * errStr = NULL) : err_(err), errStr_(errStr) + {} + + ~Error() throw() {} + + /*! \brief Get error string associated with exception + * + * \return A memory pointer to the error message string. + */ + virtual const char * what() const throw () + { + if (errStr_ == NULL) { + return "empty"; + } + else { + return errStr_; + } + } + + /*! \brief Get error code associated with exception + * + * \return The error code. + */ + cl_int err(void) const { return err_; } +}; + +#define __ERR_STR(x) #x +#else +#define __ERR_STR(x) NULL +#endif // __CL_ENABLE_EXCEPTIONS + + +namespace detail +{ +#if defined(__CL_ENABLE_EXCEPTIONS) +static inline cl_int errHandler ( + cl_int err, + const char * errStr = NULL) +{ + if (err != CL_SUCCESS) { + throw Error(err, errStr); + } + return err; +} +#else +static inline cl_int errHandler (cl_int err, const char * errStr = NULL) +{ + (void) errStr; // suppress unused variable warning + return err; +} +#endif // __CL_ENABLE_EXCEPTIONS +} + + + +//! \cond DOXYGEN_DETAIL +#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS) +#define __GET_DEVICE_INFO_ERR __ERR_STR(clGetDeviceInfo) +#define __GET_PLATFORM_INFO_ERR __ERR_STR(clGetPlatformInfo) +#define __GET_DEVICE_IDS_ERR __ERR_STR(clGetDeviceIDs) +#define __GET_PLATFORM_IDS_ERR __ERR_STR(clGetPlatformIDs) +#define __GET_CONTEXT_INFO_ERR __ERR_STR(clGetContextInfo) +#define __GET_EVENT_INFO_ERR __ERR_STR(clGetEventInfo) +#define __GET_EVENT_PROFILE_INFO_ERR __ERR_STR(clGetEventProfileInfo) +#define __GET_MEM_OBJECT_INFO_ERR __ERR_STR(clGetMemObjectInfo) +#define __GET_IMAGE_INFO_ERR __ERR_STR(clGetImageInfo) +#define __GET_SAMPLER_INFO_ERR __ERR_STR(clGetSamplerInfo) +#define __GET_KERNEL_INFO_ERR __ERR_STR(clGetKernelInfo) +#if defined(CL_VERSION_1_2) +#define __GET_KERNEL_ARG_INFO_ERR __ERR_STR(clGetKernelArgInfo) +#endif // #if defined(CL_VERSION_1_2) +#define __GET_KERNEL_WORK_GROUP_INFO_ERR __ERR_STR(clGetKernelWorkGroupInfo) +#define __GET_PROGRAM_INFO_ERR __ERR_STR(clGetProgramInfo) +#define __GET_PROGRAM_BUILD_INFO_ERR __ERR_STR(clGetProgramBuildInfo) +#define __GET_COMMAND_QUEUE_INFO_ERR __ERR_STR(clGetCommandQueueInfo) + +#define __CREATE_CONTEXT_ERR __ERR_STR(clCreateContext) +#define __CREATE_CONTEXT_FROM_TYPE_ERR __ERR_STR(clCreateContextFromType) +#define __GET_SUPPORTED_IMAGE_FORMATS_ERR __ERR_STR(clGetSupportedImageFormats) + +#define __CREATE_BUFFER_ERR __ERR_STR(clCreateBuffer) +#define __COPY_ERR __ERR_STR(cl::copy) +#define __CREATE_SUBBUFFER_ERR __ERR_STR(clCreateSubBuffer) +#define __CREATE_GL_BUFFER_ERR __ERR_STR(clCreateFromGLBuffer) +#define __CREATE_GL_RENDER_BUFFER_ERR __ERR_STR(clCreateFromGLBuffer) +#define __GET_GL_OBJECT_INFO_ERR __ERR_STR(clGetGLObjectInfo) +#if defined(CL_VERSION_1_2) +#define __CREATE_IMAGE_ERR __ERR_STR(clCreateImage) +#define __CREATE_GL_TEXTURE_ERR __ERR_STR(clCreateFromGLTexture) +#define __IMAGE_DIMENSION_ERR __ERR_STR(Incorrect image dimensions) +#endif // #if defined(CL_VERSION_1_2) +#define __CREATE_SAMPLER_ERR __ERR_STR(clCreateSampler) +#define __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR __ERR_STR(clSetMemObjectDestructorCallback) + +#define __CREATE_USER_EVENT_ERR __ERR_STR(clCreateUserEvent) +#define __SET_USER_EVENT_STATUS_ERR __ERR_STR(clSetUserEventStatus) +#define __SET_EVENT_CALLBACK_ERR __ERR_STR(clSetEventCallback) +#define __WAIT_FOR_EVENTS_ERR __ERR_STR(clWaitForEvents) + +#define __CREATE_KERNEL_ERR __ERR_STR(clCreateKernel) +#define __SET_KERNEL_ARGS_ERR __ERR_STR(clSetKernelArg) +#define __CREATE_PROGRAM_WITH_SOURCE_ERR __ERR_STR(clCreateProgramWithSource) +#define __CREATE_PROGRAM_WITH_BINARY_ERR __ERR_STR(clCreateProgramWithBinary) +#if defined(CL_VERSION_1_2) +#define __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR __ERR_STR(clCreateProgramWithBuiltInKernels) +#endif // #if defined(CL_VERSION_1_2) +#define __BUILD_PROGRAM_ERR __ERR_STR(clBuildProgram) +#if defined(CL_VERSION_1_2) +#define __COMPILE_PROGRAM_ERR __ERR_STR(clCompileProgram) + +#endif // #if defined(CL_VERSION_1_2) +#define __CREATE_KERNELS_IN_PROGRAM_ERR __ERR_STR(clCreateKernelsInProgram) + +#define __CREATE_COMMAND_QUEUE_ERR __ERR_STR(clCreateCommandQueue) +#define __SET_COMMAND_QUEUE_PROPERTY_ERR __ERR_STR(clSetCommandQueueProperty) +#define __ENQUEUE_READ_BUFFER_ERR __ERR_STR(clEnqueueReadBuffer) +#define __ENQUEUE_READ_BUFFER_RECT_ERR __ERR_STR(clEnqueueReadBufferRect) +#define __ENQUEUE_WRITE_BUFFER_ERR __ERR_STR(clEnqueueWriteBuffer) +#define __ENQUEUE_WRITE_BUFFER_RECT_ERR __ERR_STR(clEnqueueWriteBufferRect) +#define __ENQEUE_COPY_BUFFER_ERR __ERR_STR(clEnqueueCopyBuffer) +#define __ENQEUE_COPY_BUFFER_RECT_ERR __ERR_STR(clEnqueueCopyBufferRect) +#define __ENQUEUE_FILL_BUFFER_ERR __ERR_STR(clEnqueueFillBuffer) +#define __ENQUEUE_READ_IMAGE_ERR __ERR_STR(clEnqueueReadImage) +#define __ENQUEUE_WRITE_IMAGE_ERR __ERR_STR(clEnqueueWriteImage) +#define __ENQUEUE_COPY_IMAGE_ERR __ERR_STR(clEnqueueCopyImage) +#define __ENQUEUE_FILL_IMAGE_ERR __ERR_STR(clEnqueueFillImage) +#define __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR __ERR_STR(clEnqueueCopyImageToBuffer) +#define __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR __ERR_STR(clEnqueueCopyBufferToImage) +#define __ENQUEUE_MAP_BUFFER_ERR __ERR_STR(clEnqueueMapBuffer) +#define __ENQUEUE_MAP_IMAGE_ERR __ERR_STR(clEnqueueMapImage) +#define __ENQUEUE_UNMAP_MEM_OBJECT_ERR __ERR_STR(clEnqueueUnMapMemObject) +#define __ENQUEUE_NDRANGE_KERNEL_ERR __ERR_STR(clEnqueueNDRangeKernel) +#define __ENQUEUE_TASK_ERR __ERR_STR(clEnqueueTask) +#define __ENQUEUE_NATIVE_KERNEL __ERR_STR(clEnqueueNativeKernel) +#if defined(CL_VERSION_1_2) +#define __ENQUEUE_MIGRATE_MEM_OBJECTS_ERR __ERR_STR(clEnqueueMigrateMemObjects) +#endif // #if defined(CL_VERSION_1_2) + +#define __ENQUEUE_ACQUIRE_GL_ERR __ERR_STR(clEnqueueAcquireGLObjects) +#define __ENQUEUE_RELEASE_GL_ERR __ERR_STR(clEnqueueReleaseGLObjects) + + +#define __RETAIN_ERR __ERR_STR(Retain Object) +#define __RELEASE_ERR __ERR_STR(Release Object) +#define __FLUSH_ERR __ERR_STR(clFlush) +#define __FINISH_ERR __ERR_STR(clFinish) +#define __VECTOR_CAPACITY_ERR __ERR_STR(Vector capacity error) + +/** + * CL 1.2 version that uses device fission. + */ +#if defined(CL_VERSION_1_2) +#define __CREATE_SUB_DEVICES __ERR_STR(clCreateSubDevices) +#else +#define __CREATE_SUB_DEVICES __ERR_STR(clCreateSubDevicesEXT) +#endif // #if defined(CL_VERSION_1_2) + +/** + * Deprecated APIs for 1.2 + */ +#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) +#define __ENQUEUE_MARKER_ERR __ERR_STR(clEnqueueMarker) +#define __ENQUEUE_WAIT_FOR_EVENTS_ERR __ERR_STR(clEnqueueWaitForEvents) +#define __ENQUEUE_BARRIER_ERR __ERR_STR(clEnqueueBarrier) +#define __UNLOAD_COMPILER_ERR __ERR_STR(clUnloadCompiler) +#define __CREATE_GL_TEXTURE_2D_ERR __ERR_STR(clCreateFromGLTexture2D) +#define __CREATE_GL_TEXTURE_3D_ERR __ERR_STR(clCreateFromGLTexture3D) +#define __CREATE_IMAGE2D_ERR __ERR_STR(clCreateImage2D) +#define __CREATE_IMAGE3D_ERR __ERR_STR(clCreateImage3D) +#endif // #if defined(CL_VERSION_1_1) + +#endif // __CL_USER_OVERRIDE_ERROR_STRINGS +//! \endcond + +/** + * CL 1.2 marker and barrier commands + */ +#if defined(CL_VERSION_1_2) +#define __ENQUEUE_MARKER_WAIT_LIST_ERR __ERR_STR(clEnqueueMarkerWithWaitList) +#define __ENQUEUE_BARRIER_WAIT_LIST_ERR __ERR_STR(clEnqueueBarrierWithWaitList) +#endif // #if defined(CL_VERSION_1_2) + +#if !defined(__USE_DEV_STRING) && !defined(__NO_STD_STRING) +typedef std::string STRING_CLASS; +#elif !defined(__USE_DEV_STRING) + +/*! \class string + * \brief Simple string class, that provides a limited subset of std::string + * functionality but avoids many of the issues that come with that class. + + * \note Deprecated. Please use std::string as default or + * re-define the string class to match the std::string + * interface by defining STRING_CLASS + */ +class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED string CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED +{ +private: + ::size_t size_; + char * str_; +public: + //! \brief Constructs an empty string, allocating no memory. + string(void) : size_(0), str_(NULL) + { + } + + /*! \brief Constructs a string populated from an arbitrary value of + * specified size. + * + * An extra '\0' is added, in case none was contained in str. + * + * \param str the initial value of the string instance. Note that '\0' + * characters receive no special treatment. If NULL, + * the string is left empty, with a size of 0. + * + * \param size the number of characters to copy from str. + */ + string(const char * str, ::size_t size) : + size_(size), + str_(NULL) + { + if( size > 0 ) { + str_ = new char[size_+1]; + if (str_ != NULL) { + memcpy(str_, str, size_ * sizeof(char)); + str_[size_] = '\0'; + } + else { + size_ = 0; + } + } + } + + /*! \brief Constructs a string populated from a null-terminated value. + * + * \param str the null-terminated initial value of the string instance. + * If NULL, the string is left empty, with a size of 0. + */ + string(const char * str) : + size_(0), + str_(NULL) + { + if( str ) { + size_= ::strlen(str); + } + if( size_ > 0 ) { + str_ = new char[size_ + 1]; + if (str_ != NULL) { + memcpy(str_, str, (size_ + 1) * sizeof(char)); + } + } + } + + void resize( ::size_t n ) + { + if( size_ == n ) { + return; + } + if (n == 0) { + if( str_ ) { + delete [] str_; + } + str_ = NULL; + size_ = 0; + } + else { + char *newString = new char[n + 1]; + int copySize = n; + if( size_ < n ) { + copySize = size_; + } + size_ = n; + + if(str_) { + memcpy(newString, str_, (copySize + 1) * sizeof(char)); + } + if( copySize < size_ ) { + memset(newString + copySize, 0, size_ - copySize); + } + newString[size_] = '\0'; + + delete [] str_; + str_ = newString; + } + } + + const char& operator[] ( ::size_t pos ) const + { + return str_[pos]; + } + + char& operator[] ( ::size_t pos ) + { + return str_[pos]; + } + + /*! \brief Copies the value of another string to this one. + * + * \param rhs the string to copy. + * + * \returns a reference to the modified instance. + */ + string& operator=(const string& rhs) + { + if (this == &rhs) { + return *this; + } + + if( str_ != NULL ) { + delete [] str_; + str_ = NULL; + size_ = 0; + } + + if (rhs.size_ == 0 || rhs.str_ == NULL) { + str_ = NULL; + size_ = 0; + } + else { + str_ = new char[rhs.size_ + 1]; + size_ = rhs.size_; + + if (str_ != NULL) { + memcpy(str_, rhs.str_, (size_ + 1) * sizeof(char)); + } + else { + size_ = 0; + } + } + + return *this; + } + + /*! \brief Constructs a string by copying the value of another instance. + * + * \param rhs the string to copy. + */ + string(const string& rhs) : + size_(0), + str_(NULL) + { + *this = rhs; + } + + //! \brief Destructor - frees memory used to hold the current value. + ~string() + { + delete[] str_; + str_ = NULL; + } + + //! \brief Queries the length of the string, excluding any added '\0's. + ::size_t size(void) const { return size_; } + + //! \brief Queries the length of the string, excluding any added '\0's. + ::size_t length(void) const { return size(); } + + /*! \brief Returns a pointer to the private copy held by this instance, + * or "" if empty/unset. + */ + const char * c_str(void) const { return (str_) ? str_ : "";} +}; +typedef cl::string STRING_CLASS; +#endif // #elif !defined(__USE_DEV_STRING) + +#if !defined(__USE_DEV_VECTOR) && !defined(__NO_STD_VECTOR) +#define VECTOR_CLASS std::vector +#elif !defined(__USE_DEV_VECTOR) +#define VECTOR_CLASS cl::vector + +#if !defined(__MAX_DEFAULT_VECTOR_SIZE) +#define __MAX_DEFAULT_VECTOR_SIZE 10 +#endif + +/*! \class vector + * \brief Fixed sized vector implementation that mirroring + * + * \note Deprecated. Please use std::vector as default or + * re-define the vector class to match the std::vector + * interface by defining VECTOR_CLASS + + * \note Not recommended for use with custom objects as + * current implementation will construct N elements + * + * std::vector functionality. + * \brief Fixed sized vector compatible with std::vector. + * + * \note + * This differs from std::vector<> not just in memory allocation, + * but also in terms of when members are constructed, destroyed, + * and assigned instead of being copy constructed. + * + * \param T type of element contained in the vector. + * + * \param N maximum size of the vector. + */ +template +class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED vector CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED +{ +private: + T data_[N]; + unsigned int size_; + +public: + //! \brief Constructs an empty vector with no memory allocated. + vector() : + size_(static_cast(0)) + {} + + //! \brief Deallocates the vector's memory and destroys all of its elements. + ~vector() + { + clear(); + } + + //! \brief Returns the number of elements currently contained. + unsigned int size(void) const + { + return size_; + } + + /*! \brief Empties the vector of all elements. + * \note + * This does not deallocate memory but will invoke destructors + * on contained elements. + */ + void clear() + { + while(!empty()) { + pop_back(); + } + } + + /*! \brief Appends an element after the last valid element. + * Calling this on a vector that has reached capacity will throw an + * exception if exceptions are enabled. + */ + void push_back (const T& x) + { + if (size() < N) { + new (&data_[size_]) T(x); + size_++; + } else { + detail::errHandler(CL_MEM_OBJECT_ALLOCATION_FAILURE, __VECTOR_CAPACITY_ERR); + } + } + + /*! \brief Removes the last valid element from the vector. + * Calling this on an empty vector will throw an exception + * if exceptions are enabled. + */ + void pop_back(void) + { + if (size_ != 0) { + --size_; + data_[size_].~T(); + } else { + detail::errHandler(CL_MEM_OBJECT_ALLOCATION_FAILURE, __VECTOR_CAPACITY_ERR); + } + } + + /*! \brief Constructs with a value copied from another. + * + * \param vec the vector to copy. + */ + vector(const vector& vec) : + size_(vec.size_) + { + if (size_ != 0) { + assign(vec.begin(), vec.end()); + } + } + + /*! \brief Constructs with a specified number of initial elements. + * + * \param size number of initial elements. + * + * \param val value of initial elements. + */ + vector(unsigned int size, const T& val = T()) : + size_(0) + { + for (unsigned int i = 0; i < size; i++) { + push_back(val); + } + } + + /*! \brief Overwrites the current content with that copied from another + * instance. + * + * \param rhs vector to copy. + * + * \returns a reference to this. + */ + vector& operator=(const vector& rhs) + { + if (this == &rhs) { + return *this; + } + + if (rhs.size_ != 0) { + assign(rhs.begin(), rhs.end()); + } else { + clear(); + } + + return *this; + } + + /*! \brief Tests equality against another instance. + * + * \param vec the vector against which to compare. + */ + bool operator==(vector &vec) + { + if (size() != vec.size()) { + return false; + } + + for( unsigned int i = 0; i < size(); ++i ) { + if( operator[](i) != vec[i] ) { + return false; + } + } + return true; + } + + //! \brief Conversion operator to T*. + operator T* () { return data_; } + + //! \brief Conversion operator to const T*. + operator const T* () const { return data_; } + + //! \brief Tests whether this instance has any elements. + bool empty (void) const + { + return size_==0; + } + + //! \brief Returns the maximum number of elements this instance can hold. + unsigned int max_size (void) const + { + return N; + } + + //! \brief Returns the maximum number of elements this instance can hold. + unsigned int capacity () const + { + return N; + } + + /*! \brief Returns a reference to a given element. + * + * \param index which element to access. * + * \note + * The caller is responsible for ensuring index is >= 0 and < size(). + */ + T& operator[](int index) + { + return data_[index]; + } + + /*! \brief Returns a const reference to a given element. + * + * \param index which element to access. + * + * \note + * The caller is responsible for ensuring index is >= 0 and < size(). + */ + const T& operator[](int index) const + { + return data_[index]; + } + + /*! \brief Assigns elements of the vector based on a source iterator range. + * + * \param start Beginning iterator of source range + * \param end Enditerator of source range + * + * \note + * Will throw an exception if exceptions are enabled and size exceeded. + */ + template + void assign(I start, I end) + { + clear(); + while(start != end) { + push_back(*start); + start++; + } + } + + /*! \class iterator + * \brief Const iterator class for vectors + */ + class iterator + { + private: + const vector *vec_; + int index_; + + /** + * Internal iterator constructor to capture reference + * to the vector it iterates over rather than taking + * the vector by copy. + */ + iterator (const vector &vec, int index) : + vec_(&vec) + { + if( !vec.empty() ) { + index_ = index; + } else { + index_ = -1; + } + } + + public: + iterator(void) : + index_(-1), + vec_(NULL) + { + } + + iterator(const iterator& rhs) : + vec_(rhs.vec_), + index_(rhs.index_) + { + } + + ~iterator(void) {} + + static iterator begin(const cl::vector &vec) + { + iterator i(vec, 0); + + return i; + } + + static iterator end(const cl::vector &vec) + { + iterator i(vec, vec.size()); + + return i; + } + + bool operator==(iterator i) + { + return ((vec_ == i.vec_) && + (index_ == i.index_)); + } + + bool operator!=(iterator i) + { + return (!(*this==i)); + } + + iterator& operator++() + { + ++index_; + return *this; + } + + iterator operator++(int) + { + iterator retVal(*this); + ++index_; + return retVal; + } + + iterator& operator--() + { + --index_; + return *this; + } + + iterator operator--(int) + { + iterator retVal(*this); + --index_; + return retVal; + } + + const T& operator *() const + { + return (*vec_)[index_]; + } + }; + + iterator begin(void) + { + return iterator::begin(*this); + } + + iterator begin(void) const + { + return iterator::begin(*this); + } + + iterator end(void) + { + return iterator::end(*this); + } + + iterator end(void) const + { + return iterator::end(*this); + } + + T& front(void) + { + return data_[0]; + } + + T& back(void) + { + return data_[size_]; + } + + const T& front(void) const + { + return data_[0]; + } + + const T& back(void) const + { + return data_[size_-1]; + } +}; +#endif // #if !defined(__USE_DEV_VECTOR) && !defined(__NO_STD_VECTOR) + + + + + +namespace detail { +#define __DEFAULT_NOT_INITIALIZED 1 +#define __DEFAULT_BEING_INITIALIZED 2 +#define __DEFAULT_INITIALIZED 4 + + /* + * Compare and exchange primitives are needed for handling of defaults + */ + inline int compare_exchange(volatile int * dest, int exchange, int comparand) + { +#ifdef _WIN32 + return (int)(InterlockedCompareExchange( + (volatile long*)dest, + (long)exchange, + (long)comparand)); +#elif defined(__APPLE__) || defined(__MACOSX) + return OSAtomicOr32Orig((uint32_t)exchange, (volatile uint32_t*)dest); +#else // !_WIN32 || defined(__APPLE__) || defined(__MACOSX) + return (__sync_val_compare_and_swap( + dest, + comparand, + exchange)); +#endif // !_WIN32 + } + + inline void fence() { _mm_mfence(); } +}; // namespace detail + + +/*! \brief class used to interface between C++ and + * OpenCL C calls that require arrays of size_t values, whose + * size is known statically. + */ +template +class size_t +{ +private: + ::size_t data_[N]; + +public: + //! \brief Initialize size_t to all 0s + size_t() + { + for( int i = 0; i < N; ++i ) { + data_[i] = 0; + } + } + + ::size_t& operator[](int index) + { + return data_[index]; + } + + const ::size_t& operator[](int index) const + { + return data_[index]; + } + + //! \brief Conversion operator to T*. + operator ::size_t* () { return data_; } + + //! \brief Conversion operator to const T*. + operator const ::size_t* () const { return data_; } +}; + +namespace detail { + +// Generic getInfoHelper. The final parameter is used to guide overload +// resolution: the actual parameter passed is an int, which makes this +// a worse conversion sequence than a specialization that declares the +// parameter as an int. +template +inline cl_int getInfoHelper(Functor f, cl_uint name, T* param, long) +{ + return f(name, sizeof(T), param, NULL); +} + +// Specialized getInfoHelper for VECTOR_CLASS params +template +inline cl_int getInfoHelper(Func f, cl_uint name, VECTOR_CLASS* param, long) +{ + ::size_t required; + cl_int err = f(name, 0, NULL, &required); + if (err != CL_SUCCESS) { + return err; + } + + T* value = (T*) alloca(required); + err = f(name, required, value, NULL); + if (err != CL_SUCCESS) { + return err; + } + + param->assign(&value[0], &value[required/sizeof(T)]); + return CL_SUCCESS; +} + +/* Specialization for reference-counted types. This depends on the + * existence of Wrapper::cl_type, and none of the other types having the + * cl_type member. Note that simplify specifying the parameter as Wrapper + * does not work, because when using a derived type (e.g. Context) the generic + * template will provide a better match. + */ +template +inline cl_int getInfoHelper(Func f, cl_uint name, VECTOR_CLASS* param, int, typename T::cl_type = 0) +{ + ::size_t required; + cl_int err = f(name, 0, NULL, &required); + if (err != CL_SUCCESS) { + return err; + } + + typename T::cl_type * value = (typename T::cl_type *) alloca(required); + err = f(name, required, value, NULL); + if (err != CL_SUCCESS) { + return err; + } + + ::size_t elements = required / sizeof(typename T::cl_type); + param->assign(&value[0], &value[elements]); + for (::size_t i = 0; i < elements; i++) + { + if (value[i] != NULL) + { + err = (*param)[i].retain(); + if (err != CL_SUCCESS) { + return err; + } + } + } + return CL_SUCCESS; +} + +// Specialized for getInfo +template +inline cl_int getInfoHelper(Func f, cl_uint name, VECTOR_CLASS* param, int) +{ + cl_int err = f(name, param->size() * sizeof(char *), &(*param)[0], NULL); + + if (err != CL_SUCCESS) { + return err; + } + + return CL_SUCCESS; +} + +// Specialized GetInfoHelper for STRING_CLASS params +template +inline cl_int getInfoHelper(Func f, cl_uint name, STRING_CLASS* param, long) +{ + ::size_t required; + cl_int err = f(name, 0, NULL, &required); + if (err != CL_SUCCESS) { + return err; + } + + char* value = (char*) alloca(required); + err = f(name, required, value, NULL); + if (err != CL_SUCCESS) { + return err; + } + + *param = value; + return CL_SUCCESS; +} + +// Specialized GetInfoHelper for cl::size_t params +template +inline cl_int getInfoHelper(Func f, cl_uint name, size_t* param, long) +{ + ::size_t required; + cl_int err = f(name, 0, NULL, &required); + if (err != CL_SUCCESS) { + return err; + } + + ::size_t* value = (::size_t*) alloca(required); + err = f(name, required, value, NULL); + if (err != CL_SUCCESS) { + return err; + } + + for(int i = 0; i < N; ++i) { + (*param)[i] = value[i]; + } + + return CL_SUCCESS; +} + +template struct ReferenceHandler; + +/* Specialization for reference-counted types. This depends on the + * existence of Wrapper::cl_type, and none of the other types having the + * cl_type member. Note that simplify specifying the parameter as Wrapper + * does not work, because when using a derived type (e.g. Context) the generic + * template will provide a better match. + */ +template +inline cl_int getInfoHelper(Func f, cl_uint name, T* param, int, typename T::cl_type = 0) +{ + typename T::cl_type value; + cl_int err = f(name, sizeof(value), &value, NULL); + if (err != CL_SUCCESS) { + return err; + } + *param = value; + if (value != NULL) + { + err = param->retain(); + if (err != CL_SUCCESS) { + return err; + } + } + return CL_SUCCESS; +} + +#define __PARAM_NAME_INFO_1_0(F) \ + F(cl_platform_info, CL_PLATFORM_PROFILE, STRING_CLASS) \ + F(cl_platform_info, CL_PLATFORM_VERSION, STRING_CLASS) \ + F(cl_platform_info, CL_PLATFORM_NAME, STRING_CLASS) \ + F(cl_platform_info, CL_PLATFORM_VENDOR, STRING_CLASS) \ + F(cl_platform_info, CL_PLATFORM_EXTENSIONS, STRING_CLASS) \ + \ + F(cl_device_info, CL_DEVICE_TYPE, cl_device_type) \ + F(cl_device_info, CL_DEVICE_VENDOR_ID, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_COMPUTE_UNITS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_WORK_GROUP_SIZE, ::size_t) \ + F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_SIZES, VECTOR_CLASS< ::size_t>) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_CLOCK_FREQUENCY, cl_uint) \ + F(cl_device_info, CL_DEVICE_ADDRESS_BITS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_READ_IMAGE_ARGS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_MEM_ALLOC_SIZE, cl_ulong) \ + F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_WIDTH, ::size_t) \ + F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_HEIGHT, ::size_t) \ + F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_WIDTH, ::size_t) \ + F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_HEIGHT, ::size_t) \ + F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_DEPTH, ::size_t) \ + F(cl_device_info, CL_DEVICE_IMAGE_SUPPORT, cl_bool) \ + F(cl_device_info, CL_DEVICE_MAX_PARAMETER_SIZE, ::size_t) \ + F(cl_device_info, CL_DEVICE_MAX_SAMPLERS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MEM_BASE_ADDR_ALIGN, cl_uint) \ + F(cl_device_info, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, cl_uint) \ + F(cl_device_info, CL_DEVICE_SINGLE_FP_CONFIG, cl_device_fp_config) \ + F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, cl_device_mem_cache_type) \ + F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, cl_uint)\ + F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, cl_ulong) \ + F(cl_device_info, CL_DEVICE_GLOBAL_MEM_SIZE, cl_ulong) \ + F(cl_device_info, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, cl_ulong) \ + F(cl_device_info, CL_DEVICE_MAX_CONSTANT_ARGS, cl_uint) \ + F(cl_device_info, CL_DEVICE_LOCAL_MEM_TYPE, cl_device_local_mem_type) \ + F(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE, cl_ulong) \ + F(cl_device_info, CL_DEVICE_ERROR_CORRECTION_SUPPORT, cl_bool) \ + F(cl_device_info, CL_DEVICE_PROFILING_TIMER_RESOLUTION, ::size_t) \ + F(cl_device_info, CL_DEVICE_ENDIAN_LITTLE, cl_bool) \ + F(cl_device_info, CL_DEVICE_AVAILABLE, cl_bool) \ + F(cl_device_info, CL_DEVICE_COMPILER_AVAILABLE, cl_bool) \ + F(cl_device_info, CL_DEVICE_EXECUTION_CAPABILITIES, cl_device_exec_capabilities) \ + F(cl_device_info, CL_DEVICE_QUEUE_PROPERTIES, cl_command_queue_properties) \ + F(cl_device_info, CL_DEVICE_PLATFORM, cl_platform_id) \ + F(cl_device_info, CL_DEVICE_NAME, STRING_CLASS) \ + F(cl_device_info, CL_DEVICE_VENDOR, STRING_CLASS) \ + F(cl_device_info, CL_DRIVER_VERSION, STRING_CLASS) \ + F(cl_device_info, CL_DEVICE_PROFILE, STRING_CLASS) \ + F(cl_device_info, CL_DEVICE_VERSION, STRING_CLASS) \ + F(cl_device_info, CL_DEVICE_EXTENSIONS, STRING_CLASS) \ + \ + F(cl_context_info, CL_CONTEXT_REFERENCE_COUNT, cl_uint) \ + F(cl_context_info, CL_CONTEXT_DEVICES, VECTOR_CLASS) \ + F(cl_context_info, CL_CONTEXT_PROPERTIES, VECTOR_CLASS) \ + \ + F(cl_event_info, CL_EVENT_COMMAND_QUEUE, cl::CommandQueue) \ + F(cl_event_info, CL_EVENT_COMMAND_TYPE, cl_command_type) \ + F(cl_event_info, CL_EVENT_REFERENCE_COUNT, cl_uint) \ + F(cl_event_info, CL_EVENT_COMMAND_EXECUTION_STATUS, cl_uint) \ + \ + F(cl_profiling_info, CL_PROFILING_COMMAND_QUEUED, cl_ulong) \ + F(cl_profiling_info, CL_PROFILING_COMMAND_SUBMIT, cl_ulong) \ + F(cl_profiling_info, CL_PROFILING_COMMAND_START, cl_ulong) \ + F(cl_profiling_info, CL_PROFILING_COMMAND_END, cl_ulong) \ + \ + F(cl_mem_info, CL_MEM_TYPE, cl_mem_object_type) \ + F(cl_mem_info, CL_MEM_FLAGS, cl_mem_flags) \ + F(cl_mem_info, CL_MEM_SIZE, ::size_t) \ + F(cl_mem_info, CL_MEM_HOST_PTR, void*) \ + F(cl_mem_info, CL_MEM_MAP_COUNT, cl_uint) \ + F(cl_mem_info, CL_MEM_REFERENCE_COUNT, cl_uint) \ + F(cl_mem_info, CL_MEM_CONTEXT, cl::Context) \ + \ + F(cl_image_info, CL_IMAGE_FORMAT, cl_image_format) \ + F(cl_image_info, CL_IMAGE_ELEMENT_SIZE, ::size_t) \ + F(cl_image_info, CL_IMAGE_ROW_PITCH, ::size_t) \ + F(cl_image_info, CL_IMAGE_SLICE_PITCH, ::size_t) \ + F(cl_image_info, CL_IMAGE_WIDTH, ::size_t) \ + F(cl_image_info, CL_IMAGE_HEIGHT, ::size_t) \ + F(cl_image_info, CL_IMAGE_DEPTH, ::size_t) \ + \ + F(cl_sampler_info, CL_SAMPLER_REFERENCE_COUNT, cl_uint) \ + F(cl_sampler_info, CL_SAMPLER_CONTEXT, cl::Context) \ + F(cl_sampler_info, CL_SAMPLER_NORMALIZED_COORDS, cl_addressing_mode) \ + F(cl_sampler_info, CL_SAMPLER_ADDRESSING_MODE, cl_filter_mode) \ + F(cl_sampler_info, CL_SAMPLER_FILTER_MODE, cl_bool) \ + \ + F(cl_program_info, CL_PROGRAM_REFERENCE_COUNT, cl_uint) \ + F(cl_program_info, CL_PROGRAM_CONTEXT, cl::Context) \ + F(cl_program_info, CL_PROGRAM_NUM_DEVICES, cl_uint) \ + F(cl_program_info, CL_PROGRAM_DEVICES, VECTOR_CLASS) \ + F(cl_program_info, CL_PROGRAM_SOURCE, STRING_CLASS) \ + F(cl_program_info, CL_PROGRAM_BINARY_SIZES, VECTOR_CLASS< ::size_t>) \ + F(cl_program_info, CL_PROGRAM_BINARIES, VECTOR_CLASS) \ + \ + F(cl_program_build_info, CL_PROGRAM_BUILD_STATUS, cl_build_status) \ + F(cl_program_build_info, CL_PROGRAM_BUILD_OPTIONS, STRING_CLASS) \ + F(cl_program_build_info, CL_PROGRAM_BUILD_LOG, STRING_CLASS) \ + \ + F(cl_kernel_info, CL_KERNEL_FUNCTION_NAME, STRING_CLASS) \ + F(cl_kernel_info, CL_KERNEL_NUM_ARGS, cl_uint) \ + F(cl_kernel_info, CL_KERNEL_REFERENCE_COUNT, cl_uint) \ + F(cl_kernel_info, CL_KERNEL_CONTEXT, cl::Context) \ + F(cl_kernel_info, CL_KERNEL_PROGRAM, cl::Program) \ + \ + F(cl_kernel_work_group_info, CL_KERNEL_WORK_GROUP_SIZE, ::size_t) \ + F(cl_kernel_work_group_info, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, cl::size_t<3>) \ + F(cl_kernel_work_group_info, CL_KERNEL_LOCAL_MEM_SIZE, cl_ulong) \ + \ + F(cl_command_queue_info, CL_QUEUE_CONTEXT, cl::Context) \ + F(cl_command_queue_info, CL_QUEUE_DEVICE, cl::Device) \ + F(cl_command_queue_info, CL_QUEUE_REFERENCE_COUNT, cl_uint) \ + F(cl_command_queue_info, CL_QUEUE_PROPERTIES, cl_command_queue_properties) + +#if defined(CL_VERSION_1_1) +#define __PARAM_NAME_INFO_1_1(F) \ + F(cl_context_info, CL_CONTEXT_NUM_DEVICES, cl_uint)\ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, cl_uint) \ + F(cl_device_info, CL_DEVICE_DOUBLE_FP_CONFIG, cl_device_fp_config) \ + F(cl_device_info, CL_DEVICE_HALF_FP_CONFIG, cl_device_fp_config) \ + F(cl_device_info, CL_DEVICE_HOST_UNIFIED_MEMORY, cl_bool) \ + F(cl_device_info, CL_DEVICE_OPENCL_C_VERSION, STRING_CLASS) \ + \ + F(cl_mem_info, CL_MEM_ASSOCIATED_MEMOBJECT, cl::Memory) \ + F(cl_mem_info, CL_MEM_OFFSET, ::size_t) \ + \ + F(cl_kernel_work_group_info, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, ::size_t) \ + F(cl_kernel_work_group_info, CL_KERNEL_PRIVATE_MEM_SIZE, cl_ulong) \ + \ + F(cl_event_info, CL_EVENT_CONTEXT, cl::Context) +#endif // CL_VERSION_1_1 + + +#if defined(CL_VERSION_1_2) +#define __PARAM_NAME_INFO_1_2(F) \ + F(cl_image_info, CL_IMAGE_BUFFER, cl::Buffer) \ + \ + F(cl_program_info, CL_PROGRAM_NUM_KERNELS, ::size_t) \ + F(cl_program_info, CL_PROGRAM_KERNEL_NAMES, STRING_CLASS) \ + \ + F(cl_program_build_info, CL_PROGRAM_BINARY_TYPE, cl_program_binary_type) \ + \ + F(cl_kernel_info, CL_KERNEL_ATTRIBUTES, STRING_CLASS) \ + \ + F(cl_kernel_arg_info, CL_KERNEL_ARG_ADDRESS_QUALIFIER, cl_kernel_arg_address_qualifier) \ + F(cl_kernel_arg_info, CL_KERNEL_ARG_ACCESS_QUALIFIER, cl_kernel_arg_access_qualifier) \ + F(cl_kernel_arg_info, CL_KERNEL_ARG_TYPE_NAME, STRING_CLASS) \ + F(cl_kernel_arg_info, CL_KERNEL_ARG_NAME, STRING_CLASS) \ + \ + F(cl_device_info, CL_DEVICE_PARENT_DEVICE, cl_device_id) \ + F(cl_device_info, CL_DEVICE_PARTITION_PROPERTIES, VECTOR_CLASS) \ + F(cl_device_info, CL_DEVICE_PARTITION_TYPE, VECTOR_CLASS) \ + F(cl_device_info, CL_DEVICE_REFERENCE_COUNT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, ::size_t) \ + F(cl_device_info, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, cl_device_affinity_domain) \ + F(cl_device_info, CL_DEVICE_BUILT_IN_KERNELS, STRING_CLASS) +#endif // #if defined(CL_VERSION_1_2) + +#if defined(USE_CL_DEVICE_FISSION) +#define __PARAM_NAME_DEVICE_FISSION(F) \ + F(cl_device_info, CL_DEVICE_PARENT_DEVICE_EXT, cl_device_id) \ + F(cl_device_info, CL_DEVICE_PARTITION_TYPES_EXT, VECTOR_CLASS) \ + F(cl_device_info, CL_DEVICE_AFFINITY_DOMAINS_EXT, VECTOR_CLASS) \ + F(cl_device_info, CL_DEVICE_REFERENCE_COUNT_EXT , cl_uint) \ + F(cl_device_info, CL_DEVICE_PARTITION_STYLE_EXT, VECTOR_CLASS) +#endif // USE_CL_DEVICE_FISSION + +template +struct param_traits {}; + +#define __CL_DECLARE_PARAM_TRAITS(token, param_name, T) \ +struct token; \ +template<> \ +struct param_traits \ +{ \ + enum { value = param_name }; \ + typedef T param_type; \ +}; + +__PARAM_NAME_INFO_1_0(__CL_DECLARE_PARAM_TRAITS) +#if defined(CL_VERSION_1_1) +__PARAM_NAME_INFO_1_1(__CL_DECLARE_PARAM_TRAITS) +#endif // CL_VERSION_1_1 +#if defined(CL_VERSION_1_2) +__PARAM_NAME_INFO_1_2(__CL_DECLARE_PARAM_TRAITS) +#endif // CL_VERSION_1_1 + +#if defined(USE_CL_DEVICE_FISSION) +__PARAM_NAME_DEVICE_FISSION(__CL_DECLARE_PARAM_TRAITS); +#endif // USE_CL_DEVICE_FISSION + +#ifdef CL_PLATFORM_ICD_SUFFIX_KHR +__CL_DECLARE_PARAM_TRAITS(cl_platform_info, CL_PLATFORM_ICD_SUFFIX_KHR, STRING_CLASS) +#endif + +#ifdef CL_DEVICE_PROFILING_TIMER_OFFSET_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_PROFILING_TIMER_OFFSET_AMD, cl_ulong) +#endif + +#ifdef CL_DEVICE_GLOBAL_FREE_MEMORY_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_FREE_MEMORY_AMD, VECTOR_CLASS< ::size_t>) +#endif +#ifdef CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_SIMD_WIDTH_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_SIMD_WIDTH_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_WAVEFRONT_WIDTH_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_WAVEFRONT_WIDTH_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_LOCAL_MEM_BANKS_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_LOCAL_MEM_BANKS_AMD, cl_uint) +#endif + +#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, cl_uint) +#endif +#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, cl_uint) +#endif +#ifdef CL_DEVICE_REGISTERS_PER_BLOCK_NV +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_REGISTERS_PER_BLOCK_NV, cl_uint) +#endif +#ifdef CL_DEVICE_WARP_SIZE_NV +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_WARP_SIZE_NV, cl_uint) +#endif +#ifdef CL_DEVICE_GPU_OVERLAP_NV +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GPU_OVERLAP_NV, cl_bool) +#endif +#ifdef CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV, cl_bool) +#endif +#ifdef CL_DEVICE_INTEGRATED_MEMORY_NV +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_INTEGRATED_MEMORY_NV, cl_bool) +#endif + +// Convenience functions + +template +inline cl_int +getInfo(Func f, cl_uint name, T* param) +{ + return getInfoHelper(f, name, param, 0); +} + +template +struct GetInfoFunctor0 +{ + Func f_; const Arg0& arg0_; + cl_int operator ()( + cl_uint param, ::size_t size, void* value, ::size_t* size_ret) + { return f_(arg0_, param, size, value, size_ret); } +}; + +template +struct GetInfoFunctor1 +{ + Func f_; const Arg0& arg0_; const Arg1& arg1_; + cl_int operator ()( + cl_uint param, ::size_t size, void* value, ::size_t* size_ret) + { return f_(arg0_, arg1_, param, size, value, size_ret); } +}; + +template +inline cl_int +getInfo(Func f, const Arg0& arg0, cl_uint name, T* param) +{ + GetInfoFunctor0 f0 = { f, arg0 }; + return getInfoHelper(f0, name, param, 0); +} + +template +inline cl_int +getInfo(Func f, const Arg0& arg0, const Arg1& arg1, cl_uint name, T* param) +{ + GetInfoFunctor1 f0 = { f, arg0, arg1 }; + return getInfoHelper(f0, name, param, 0); +} + +template +struct ReferenceHandler +{ }; + +#if defined(CL_VERSION_1_2) +/** + * OpenCL 1.2 devices do have retain/release. + */ +template <> +struct ReferenceHandler +{ + /** + * Retain the device. + * \param device A valid device created using createSubDevices + * \return + * CL_SUCCESS if the function executed successfully. + * CL_INVALID_DEVICE if device was not a valid subdevice + * CL_OUT_OF_RESOURCES + * CL_OUT_OF_HOST_MEMORY + */ + static cl_int retain(cl_device_id device) + { return ::clRetainDevice(device); } + /** + * Retain the device. + * \param device A valid device created using createSubDevices + * \return + * CL_SUCCESS if the function executed successfully. + * CL_INVALID_DEVICE if device was not a valid subdevice + * CL_OUT_OF_RESOURCES + * CL_OUT_OF_HOST_MEMORY + */ + static cl_int release(cl_device_id device) + { return ::clReleaseDevice(device); } +}; +#else // #if defined(CL_VERSION_1_2) +/** + * OpenCL 1.1 devices do not have retain/release. + */ +template <> +struct ReferenceHandler +{ + // cl_device_id does not have retain(). + static cl_int retain(cl_device_id) + { return CL_SUCCESS; } + // cl_device_id does not have release(). + static cl_int release(cl_device_id) + { return CL_SUCCESS; } +}; +#endif // #if defined(CL_VERSION_1_2) + +template <> +struct ReferenceHandler +{ + // cl_platform_id does not have retain(). + static cl_int retain(cl_platform_id) + { return CL_SUCCESS; } + // cl_platform_id does not have release(). + static cl_int release(cl_platform_id) + { return CL_SUCCESS; } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_context context) + { return ::clRetainContext(context); } + static cl_int release(cl_context context) + { return ::clReleaseContext(context); } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_command_queue queue) + { return ::clRetainCommandQueue(queue); } + static cl_int release(cl_command_queue queue) + { return ::clReleaseCommandQueue(queue); } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_mem memory) + { return ::clRetainMemObject(memory); } + static cl_int release(cl_mem memory) + { return ::clReleaseMemObject(memory); } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_sampler sampler) + { return ::clRetainSampler(sampler); } + static cl_int release(cl_sampler sampler) + { return ::clReleaseSampler(sampler); } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_program program) + { return ::clRetainProgram(program); } + static cl_int release(cl_program program) + { return ::clReleaseProgram(program); } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_kernel kernel) + { return ::clRetainKernel(kernel); } + static cl_int release(cl_kernel kernel) + { return ::clReleaseKernel(kernel); } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_event event) + { return ::clRetainEvent(event); } + static cl_int release(cl_event event) + { return ::clReleaseEvent(event); } +}; + + +// Extracts version number with major in the upper 16 bits, minor in the lower 16 +static cl_uint getVersion(const char *versionInfo) +{ + int highVersion = 0; + int lowVersion = 0; + int index = 7; + while(versionInfo[index] != '.' ) { + highVersion *= 10; + highVersion += versionInfo[index]-'0'; + ++index; + } + ++index; + while(versionInfo[index] != ' ' ) { + lowVersion *= 10; + lowVersion += versionInfo[index]-'0'; + ++index; + } + return (highVersion << 16) | lowVersion; +} + +static cl_uint getPlatformVersion(cl_platform_id platform) +{ + ::size_t size = 0; + clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 0, NULL, &size); + char *versionInfo = (char *) alloca(size); + clGetPlatformInfo(platform, CL_PLATFORM_VERSION, size, &versionInfo[0], &size); + return getVersion(versionInfo); +} + +static cl_uint getDevicePlatformVersion(cl_device_id device) +{ + cl_platform_id platform; + clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform), &platform, NULL); + return getPlatformVersion(platform); +} + +#if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) +static cl_uint getContextPlatformVersion(cl_context context) +{ + // The platform cannot be queried directly, so we first have to grab a + // device and obtain its context + ::size_t size = 0; + clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &size); + if (size == 0) + return 0; + cl_device_id *devices = (cl_device_id *) alloca(size); + clGetContextInfo(context, CL_CONTEXT_DEVICES, size, devices, NULL); + return getDevicePlatformVersion(devices[0]); +} +#endif // #if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + +template +class Wrapper +{ +public: + typedef T cl_type; + +protected: + cl_type object_; + +public: + Wrapper() : object_(NULL) { } + + Wrapper(const cl_type &obj) : object_(obj) { } + + ~Wrapper() + { + if (object_ != NULL) { release(); } + } + + Wrapper(const Wrapper& rhs) + { + object_ = rhs.object_; + if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); } + } + + Wrapper& operator = (const Wrapper& rhs) + { + if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } + object_ = rhs.object_; + if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); } + return *this; + } + + Wrapper& operator = (const cl_type &rhs) + { + if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } + object_ = rhs; + return *this; + } + + cl_type operator ()() const { return object_; } + + cl_type& operator ()() { return object_; } + +protected: + template + friend inline cl_int getInfoHelper(Func, cl_uint, U*, int, typename U::cl_type); + + cl_int retain() const + { + return ReferenceHandler::retain(object_); + } + + cl_int release() const + { + return ReferenceHandler::release(object_); + } +}; + +template <> +class Wrapper +{ +public: + typedef cl_device_id cl_type; + +protected: + cl_type object_; + bool referenceCountable_; + + static bool isReferenceCountable(cl_device_id device) + { + bool retVal = false; + if (device != NULL) { + int version = getDevicePlatformVersion(device); + if(version > ((1 << 16) + 1)) { + retVal = true; + } + } + return retVal; + } + +public: + Wrapper() : object_(NULL), referenceCountable_(false) + { + } + + Wrapper(const cl_type &obj) : object_(obj), referenceCountable_(false) + { + referenceCountable_ = isReferenceCountable(obj); + } + + ~Wrapper() + { + if (object_ != NULL) { release(); } + } + + Wrapper(const Wrapper& rhs) + { + object_ = rhs.object_; + referenceCountable_ = isReferenceCountable(object_); + if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); } + } + + Wrapper& operator = (const Wrapper& rhs) + { + if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } + object_ = rhs.object_; + referenceCountable_ = rhs.referenceCountable_; + if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); } + return *this; + } + + Wrapper& operator = (const cl_type &rhs) + { + if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } + object_ = rhs; + referenceCountable_ = isReferenceCountable(object_); + return *this; + } + + cl_type operator ()() const { return object_; } + + cl_type& operator ()() { return object_; } + +protected: + template + friend inline cl_int getInfoHelper(Func, cl_uint, U*, int, typename U::cl_type); + + template + friend inline cl_int getInfoHelper(Func, cl_uint, VECTOR_CLASS*, int, typename U::cl_type); + + cl_int retain() const + { + if( referenceCountable_ ) { + return ReferenceHandler::retain(object_); + } + else { + return CL_SUCCESS; + } + } + + cl_int release() const + { + if( referenceCountable_ ) { + return ReferenceHandler::release(object_); + } + else { + return CL_SUCCESS; + } + } +}; + +} // namespace detail +//! \endcond + +/*! \stuct ImageFormat + * \brief Adds constructors and member functions for cl_image_format. + * + * \see cl_image_format + */ +struct ImageFormat : public cl_image_format +{ + //! \brief Default constructor - performs no initialization. + ImageFormat(){} + + //! \brief Initializing constructor. + ImageFormat(cl_channel_order order, cl_channel_type type) + { + image_channel_order = order; + image_channel_data_type = type; + } + + //! \brief Assignment operator. + ImageFormat& operator = (const ImageFormat& rhs) + { + if (this != &rhs) { + this->image_channel_data_type = rhs.image_channel_data_type; + this->image_channel_order = rhs.image_channel_order; + } + return *this; + } +}; + +/*! \brief Class interface for cl_device_id. + * + * \note Copies of these objects are inexpensive, since they don't 'own' + * any underlying resources or data structures. + * + * \see cl_device_id + */ +class Device : public detail::Wrapper +{ +public: + //! \brief Default constructor - initializes to NULL. + Device() : detail::Wrapper() { } + + /*! \brief Copy constructor. + * + * This simply copies the device ID value, which is an inexpensive operation. + */ + Device(const Device& device) : detail::Wrapper(device) { } + + /*! \brief Constructor from cl_device_id. + * + * This simply copies the device ID value, which is an inexpensive operation. + */ + Device(const cl_device_id &device) : detail::Wrapper(device) { } + + /*! \brief Returns the first device on the default context. + * + * \see Context::getDefault() + */ + static Device getDefault(cl_int * err = NULL); + + /*! \brief Assignment operator from Device. + * + * This simply copies the device ID value, which is an inexpensive operation. + */ + Device& operator = (const Device& rhs) + { + if (this != &rhs) { + detail::Wrapper::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment operator from cl_device_id. + * + * This simply copies the device ID value, which is an inexpensive operation. + */ + Device& operator = (const cl_device_id& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetDeviceInfo(). + template + cl_int getInfo(cl_device_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetDeviceInfo, object_, name, param), + __GET_DEVICE_INFO_ERR); + } + + //! \brief Wrapper for clGetDeviceInfo() that returns by value. + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_device_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + /** + * CL 1.2 version + */ +#if defined(CL_VERSION_1_2) + //! \brief Wrapper for clCreateSubDevicesEXT(). + cl_int createSubDevices( + const cl_device_partition_property * properties, + VECTOR_CLASS* devices) + { + cl_uint n = 0; + cl_int err = clCreateSubDevices(object_, properties, 0, NULL, &n); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_SUB_DEVICES); + } + + cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); + err = clCreateSubDevices(object_, properties, n, ids, NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_SUB_DEVICES); + } + + devices->assign(&ids[0], &ids[n]); + return CL_SUCCESS; + } +#endif // #if defined(CL_VERSION_1_2) + +/** + * CL 1.1 version that uses device fission. + */ +#if defined(CL_VERSION_1_1) +#if defined(USE_CL_DEVICE_FISSION) + cl_int createSubDevices( + const cl_device_partition_property_ext * properties, + VECTOR_CLASS* devices) + { + typedef CL_API_ENTRY cl_int + ( CL_API_CALL * PFN_clCreateSubDevicesEXT)( + cl_device_id /*in_device*/, + const cl_device_partition_property_ext * /* properties */, + cl_uint /*num_entries*/, + cl_device_id * /*out_devices*/, + cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1; + + static PFN_clCreateSubDevicesEXT pfn_clCreateSubDevicesEXT = NULL; + __INIT_CL_EXT_FCN_PTR(clCreateSubDevicesEXT); + + cl_uint n = 0; + cl_int err = pfn_clCreateSubDevicesEXT(object_, properties, 0, NULL, &n); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_SUB_DEVICES); + } + + cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); + err = pfn_clCreateSubDevicesEXT(object_, properties, n, ids, NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_SUB_DEVICES); + } + + devices->assign(&ids[0], &ids[n]); + return CL_SUCCESS; + } +#endif // #if defined(USE_CL_DEVICE_FISSION) +#endif // #if defined(CL_VERSION_1_1) +}; + +/*! \brief Class interface for cl_platform_id. + * + * \note Copies of these objects are inexpensive, since they don't 'own' + * any underlying resources or data structures. + * + * \see cl_platform_id + */ +class Platform : public detail::Wrapper +{ +public: + //! \brief Default constructor - initializes to NULL. + Platform() : detail::Wrapper() { } + + /*! \brief Copy constructor. + * + * This simply copies the platform ID value, which is an inexpensive operation. + */ + Platform(const Platform& platform) : detail::Wrapper(platform) { } + + /*! \brief Constructor from cl_platform_id. + * + * This simply copies the platform ID value, which is an inexpensive operation. + */ + Platform(const cl_platform_id &platform) : detail::Wrapper(platform) { } + + /*! \brief Assignment operator from Platform. + * + * This simply copies the platform ID value, which is an inexpensive operation. + */ + Platform& operator = (const Platform& rhs) + { + if (this != &rhs) { + detail::Wrapper::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment operator from cl_platform_id. + * + * This simply copies the platform ID value, which is an inexpensive operation. + */ + Platform& operator = (const cl_platform_id& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetPlatformInfo(). + cl_int getInfo(cl_platform_info name, STRING_CLASS* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetPlatformInfo, object_, name, param), + __GET_PLATFORM_INFO_ERR); + } + + //! \brief Wrapper for clGetPlatformInfo() that returns by value. + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_platform_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + /*! \brief Gets a list of devices for this platform. + * + * Wraps clGetDeviceIDs(). + */ + cl_int getDevices( + cl_device_type type, + VECTOR_CLASS* devices) const + { + cl_uint n = 0; + if( devices == NULL ) { + return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_DEVICE_IDS_ERR); + } + cl_int err = ::clGetDeviceIDs(object_, type, 0, NULL, &n); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_DEVICE_IDS_ERR); + } + + cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); + err = ::clGetDeviceIDs(object_, type, n, ids, NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_DEVICE_IDS_ERR); + } + + devices->assign(&ids[0], &ids[n]); + return CL_SUCCESS; + } + +#if defined(USE_DX_INTEROP) + /*! \brief Get the list of available D3D10 devices. + * + * \param d3d_device_source. + * + * \param d3d_object. + * + * \param d3d_device_set. + * + * \param devices returns a vector of OpenCL D3D10 devices found. The cl::Device + * values returned in devices can be used to identify a specific OpenCL + * device. If \a devices argument is NULL, this argument is ignored. + * + * \return One of the following values: + * - CL_SUCCESS if the function is executed successfully. + * + * The application can query specific capabilities of the OpenCL device(s) + * returned by cl::getDevices. This can be used by the application to + * determine which device(s) to use. + * + * \note In the case that exceptions are enabled and a return value + * other than CL_SUCCESS is generated, then cl::Error exception is + * generated. + */ + cl_int getDevices( + cl_d3d10_device_source_khr d3d_device_source, + void * d3d_object, + cl_d3d10_device_set_khr d3d_device_set, + VECTOR_CLASS* devices) const + { + typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clGetDeviceIDsFromD3D10KHR)( + cl_platform_id platform, + cl_d3d10_device_source_khr d3d_device_source, + void * d3d_object, + cl_d3d10_device_set_khr d3d_device_set, + cl_uint num_entries, + cl_device_id * devices, + cl_uint* num_devices); + + if( devices == NULL ) { + return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_DEVICE_IDS_ERR); + } + + static PFN_clGetDeviceIDsFromD3D10KHR pfn_clGetDeviceIDsFromD3D10KHR = NULL; + __INIT_CL_EXT_FCN_PTR_PLATFORM(object_, clGetDeviceIDsFromD3D10KHR); + + cl_uint n = 0; + cl_int err = pfn_clGetDeviceIDsFromD3D10KHR( + object_, + d3d_device_source, + d3d_object, + d3d_device_set, + 0, + NULL, + &n); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_DEVICE_IDS_ERR); + } + + cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); + err = pfn_clGetDeviceIDsFromD3D10KHR( + object_, + d3d_device_source, + d3d_object, + d3d_device_set, + n, + ids, + NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_DEVICE_IDS_ERR); + } + + devices->assign(&ids[0], &ids[n]); + return CL_SUCCESS; + } +#endif + + /*! \brief Gets a list of available platforms. + * + * Wraps clGetPlatformIDs(). + */ + static cl_int get( + VECTOR_CLASS* platforms) + { + cl_uint n = 0; + + if( platforms == NULL ) { + return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_PLATFORM_IDS_ERR); + } + + cl_int err = ::clGetPlatformIDs(0, NULL, &n); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); + } + + cl_platform_id* ids = (cl_platform_id*) alloca( + n * sizeof(cl_platform_id)); + err = ::clGetPlatformIDs(n, ids, NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); + } + + platforms->assign(&ids[0], &ids[n]); + return CL_SUCCESS; + } + + /*! \brief Gets the first available platform. + * + * Wraps clGetPlatformIDs(), returning the first result. + */ + static cl_int get( + Platform * platform) + { + cl_uint n = 0; + + if( platform == NULL ) { + return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_PLATFORM_IDS_ERR); + } + + cl_int err = ::clGetPlatformIDs(0, NULL, &n); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); + } + + cl_platform_id* ids = (cl_platform_id*) alloca( + n * sizeof(cl_platform_id)); + err = ::clGetPlatformIDs(n, ids, NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); + } + + *platform = ids[0]; + return CL_SUCCESS; + } + + /*! \brief Gets the first available platform, returning it by value. + * + * Wraps clGetPlatformIDs(), returning the first result. + */ + static Platform get( + cl_int * errResult = NULL) + { + Platform platform; + cl_uint n = 0; + cl_int err = ::clGetPlatformIDs(0, NULL, &n); + if (err != CL_SUCCESS) { + detail::errHandler(err, __GET_PLATFORM_IDS_ERR); + if (errResult != NULL) { + *errResult = err; + } + } + + cl_platform_id* ids = (cl_platform_id*) alloca( + n * sizeof(cl_platform_id)); + err = ::clGetPlatformIDs(n, ids, NULL); + + if (err != CL_SUCCESS) { + detail::errHandler(err, __GET_PLATFORM_IDS_ERR); + } + + if (errResult != NULL) { + *errResult = err; + } + + return ids[0]; + } + + static Platform getDefault( + cl_int *errResult = NULL ) + { + return get(errResult); + } + + +#if defined(CL_VERSION_1_2) + //! \brief Wrapper for clUnloadCompiler(). + cl_int + unloadCompiler() + { + return ::clUnloadPlatformCompiler(object_); + } +#endif // #if defined(CL_VERSION_1_2) +}; // class Platform + +/** + * Deprecated APIs for 1.2 + */ +#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) +/** + * Unload the OpenCL compiler. + * \note Deprecated for OpenCL 1.2. Use Platform::unloadCompiler instead. + */ +inline CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int +UnloadCompiler() CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; +inline cl_int +UnloadCompiler() +{ + return ::clUnloadCompiler(); +} +#endif // #if defined(CL_VERSION_1_1) + +/*! \brief Class interface for cl_context. + * + * \note Copies of these objects are shallow, meaning that the copy will refer + * to the same underlying cl_context as the original. For details, see + * clRetainContext() and clReleaseContext(). + * + * \see cl_context + */ +class Context + : public detail::Wrapper +{ +private: + static volatile int default_initialized_; + static Context default_; + static volatile cl_int default_error_; +public: + /*! \brief Destructor. + * + * This calls clReleaseContext() on the value held by this instance. + */ + ~Context() { } + + /*! \brief Constructs a context including a list of specified devices. + * + * Wraps clCreateContext(). + */ + Context( + const VECTOR_CLASS& devices, + cl_context_properties* properties = NULL, + void (CL_CALLBACK * notifyFptr)( + const char *, + const void *, + ::size_t, + void *) = NULL, + void* data = NULL, + cl_int* err = NULL) + { + cl_int error; + + ::size_t numDevices = devices.size(); + cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id)); + for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { + deviceIDs[deviceIndex] = (devices[deviceIndex])(); + } + + object_ = ::clCreateContext( + properties, (cl_uint) numDevices, + deviceIDs, + notifyFptr, data, &error); + + detail::errHandler(error, __CREATE_CONTEXT_ERR); + if (err != NULL) { + *err = error; + } + } + + Context( + const Device& device, + cl_context_properties* properties = NULL, + void (CL_CALLBACK * notifyFptr)( + const char *, + const void *, + ::size_t, + void *) = NULL, + void* data = NULL, + cl_int* err = NULL) + { + cl_int error; + + cl_device_id deviceID = device(); + + object_ = ::clCreateContext( + properties, 1, + &deviceID, + notifyFptr, data, &error); + + detail::errHandler(error, __CREATE_CONTEXT_ERR); + if (err != NULL) { + *err = error; + } + } + + /*! \brief Constructs a context including all or a subset of devices of a specified type. + * + * Wraps clCreateContextFromType(). + */ + Context( + cl_device_type type, + cl_context_properties* properties = NULL, + void (CL_CALLBACK * notifyFptr)( + const char *, + const void *, + ::size_t, + void *) = NULL, + void* data = NULL, + cl_int* err = NULL) + { + cl_int error; + +#if !defined(__APPLE__) || !defined(__MACOS) + cl_context_properties prop[4] = {CL_CONTEXT_PLATFORM, 0, 0, 0 }; + + if (properties == NULL) { + // Get a valid platform ID as we cannot send in a blank one + VECTOR_CLASS platforms; + error = Platform::get(&platforms); + if (error != CL_SUCCESS) { + detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); + if (err != NULL) { + *err = error; + } + return; + } + + // Check the platforms we found for a device of our specified type + cl_context_properties platform_id = 0; + for (unsigned int i = 0; i < platforms.size(); i++) { + + VECTOR_CLASS devices; + +#if defined(__CL_ENABLE_EXCEPTIONS) + try { +#endif + + error = platforms[i].getDevices(type, &devices); + +#if defined(__CL_ENABLE_EXCEPTIONS) + } catch (Error) {} + // Catch if exceptions are enabled as we don't want to exit if first platform has no devices of type + // We do error checking next anyway, and can throw there if needed +#endif + + // Only squash CL_SUCCESS and CL_DEVICE_NOT_FOUND + if (error != CL_SUCCESS && error != CL_DEVICE_NOT_FOUND) { + detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); + if (err != NULL) { + *err = error; + } + } + + if (devices.size() > 0) { + platform_id = (cl_context_properties)platforms[i](); + break; + } + } + + if (platform_id == 0) { + detail::errHandler(CL_DEVICE_NOT_FOUND, __CREATE_CONTEXT_FROM_TYPE_ERR); + if (err != NULL) { + *err = CL_DEVICE_NOT_FOUND; + } + return; + } + + prop[1] = platform_id; + properties = &prop[0]; + } +#endif + object_ = ::clCreateContextFromType( + properties, type, notifyFptr, data, &error); + + detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); + if (err != NULL) { + *err = error; + } + } + + /*! \brief Returns a singleton context including all devices of CL_DEVICE_TYPE_DEFAULT. + * + * \note All calls to this function return the same cl_context as the first. + */ + static Context getDefault(cl_int * err = NULL) + { + int state = detail::compare_exchange( + &default_initialized_, + __DEFAULT_BEING_INITIALIZED, __DEFAULT_NOT_INITIALIZED); + + if (state & __DEFAULT_INITIALIZED) { + if (err != NULL) { + *err = default_error_; + } + return default_; + } + + if (state & __DEFAULT_BEING_INITIALIZED) { + // Assume writes will propagate eventually... + while(default_initialized_ != __DEFAULT_INITIALIZED) { + detail::fence(); + } + + if (err != NULL) { + *err = default_error_; + } + return default_; + } + + cl_int error; + default_ = Context( + CL_DEVICE_TYPE_DEFAULT, + NULL, + NULL, + NULL, + &error); + + detail::fence(); + + default_error_ = error; + // Assume writes will propagate eventually... + default_initialized_ = __DEFAULT_INITIALIZED; + + detail::fence(); + + if (err != NULL) { + *err = default_error_; + } + return default_; + + } + + //! \brief Default constructor - initializes to NULL. + Context() : detail::Wrapper() { } + + /*! \brief Copy constructor. + * + * This calls clRetainContext() on the parameter's cl_context. + */ + Context(const Context& context) : detail::Wrapper(context) { } + + /*! \brief Constructor from cl_context - takes ownership. + * + * This effectively transfers ownership of a refcount on the cl_context + * into the new Context object. + */ + __CL_EXPLICIT_CONSTRUCTORS Context(const cl_context& context) : detail::Wrapper(context) { } + + /*! \brief Assignment operator from Context. + * + * This calls clRetainContext() on the parameter and clReleaseContext() on + * the previous value held by this instance. + */ + Context& operator = (const Context& rhs) + { + if (this != &rhs) { + detail::Wrapper::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment operator from cl_context - takes ownership. + * + * This effectively transfers ownership of a refcount on the rhs and calls + * clReleaseContext() on the value previously held by this instance. + */ + Context& operator = (const cl_context& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetContextInfo(). + template + cl_int getInfo(cl_context_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetContextInfo, object_, name, param), + __GET_CONTEXT_INFO_ERR); + } + + //! \brief Wrapper for clGetContextInfo() that returns by value. + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_context_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + /*! \brief Gets a list of supported image formats. + * + * Wraps clGetSupportedImageFormats(). + */ + cl_int getSupportedImageFormats( + cl_mem_flags flags, + cl_mem_object_type type, + VECTOR_CLASS* formats) const + { + cl_uint numEntries; + cl_int err = ::clGetSupportedImageFormats( + object_, + flags, + type, + 0, + NULL, + &numEntries); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); + } + + ImageFormat* value = (ImageFormat*) + alloca(numEntries * sizeof(ImageFormat)); + err = ::clGetSupportedImageFormats( + object_, + flags, + type, + numEntries, + (cl_image_format*) value, + NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); + } + + formats->assign(&value[0], &value[numEntries]); + return CL_SUCCESS; + } +}; + +inline Device Device::getDefault(cl_int * err) +{ + cl_int error; + Device device; + + Context context = Context::getDefault(&error); + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + + if (error != CL_SUCCESS) { + if (err != NULL) { + *err = error; + } + } + else { + device = context.getInfo()[0]; + if (err != NULL) { + *err = CL_SUCCESS; + } + } + + return device; +} + + +#ifdef _WIN32 +__declspec(selectany) volatile int Context::default_initialized_ = __DEFAULT_NOT_INITIALIZED; +__declspec(selectany) Context Context::default_; +__declspec(selectany) volatile cl_int Context::default_error_ = CL_SUCCESS; +#else +__attribute__((weak)) volatile int Context::default_initialized_ = __DEFAULT_NOT_INITIALIZED; +__attribute__((weak)) Context Context::default_; +__attribute__((weak)) volatile cl_int Context::default_error_ = CL_SUCCESS; +#endif + +/*! \brief Class interface for cl_event. + * + * \note Copies of these objects are shallow, meaning that the copy will refer + * to the same underlying cl_event as the original. For details, see + * clRetainEvent() and clReleaseEvent(). + * + * \see cl_event + */ +class Event : public detail::Wrapper +{ +public: + /*! \brief Destructor. + * + * This calls clReleaseEvent() on the value held by this instance. + */ + ~Event() { } + + //! \brief Default constructor - initializes to NULL. + Event() : detail::Wrapper() { } + + /*! \brief Copy constructor. + * + * This calls clRetainEvent() on the parameter's cl_event. + */ + Event(const Event& event) : detail::Wrapper(event) { } + + /*! \brief Constructor from cl_event - takes ownership. + * + * This effectively transfers ownership of a refcount on the cl_event + * into the new Event object. + */ + Event(const cl_event& event) : detail::Wrapper(event) { } + + /*! \brief Assignment operator from cl_event - takes ownership. + * + * This effectively transfers ownership of a refcount on the rhs and calls + * clReleaseEvent() on the value previously held by this instance. + */ + Event& operator = (const Event& rhs) + { + if (this != &rhs) { + detail::Wrapper::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment operator from cl_event. + * + * This calls clRetainEvent() on the parameter and clReleaseEvent() on + * the previous value held by this instance. + */ + Event& operator = (const cl_event& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetEventInfo(). + template + cl_int getInfo(cl_event_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetEventInfo, object_, name, param), + __GET_EVENT_INFO_ERR); + } + + //! \brief Wrapper for clGetEventInfo() that returns by value. + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_event_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + //! \brief Wrapper for clGetEventProfilingInfo(). + template + cl_int getProfilingInfo(cl_profiling_info name, T* param) const + { + return detail::errHandler(detail::getInfo( + &::clGetEventProfilingInfo, object_, name, param), + __GET_EVENT_PROFILE_INFO_ERR); + } + + //! \brief Wrapper for clGetEventProfilingInfo() that returns by value. + template typename + detail::param_traits::param_type + getProfilingInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_profiling_info, name>::param_type param; + cl_int result = getProfilingInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + /*! \brief Blocks the calling thread until this event completes. + * + * Wraps clWaitForEvents(). + */ + cl_int wait() const + { + return detail::errHandler( + ::clWaitForEvents(1, &object_), + __WAIT_FOR_EVENTS_ERR); + } + +#if defined(CL_VERSION_1_1) + /*! \brief Registers a user callback function for a specific command execution status. + * + * Wraps clSetEventCallback(). + */ + cl_int setCallback( + cl_int type, + void (CL_CALLBACK * pfn_notify)(cl_event, cl_int, void *), + void * user_data = NULL) + { + return detail::errHandler( + ::clSetEventCallback( + object_, + type, + pfn_notify, + user_data), + __SET_EVENT_CALLBACK_ERR); + } +#endif + + /*! \brief Blocks the calling thread until every event specified is complete. + * + * Wraps clWaitForEvents(). + */ + static cl_int + waitForEvents(const VECTOR_CLASS& events) + { + return detail::errHandler( + ::clWaitForEvents( + (cl_uint) events.size(), (cl_event*)&events.front()), + __WAIT_FOR_EVENTS_ERR); + } +}; + +#if defined(CL_VERSION_1_1) +/*! \brief Class interface for user events (a subset of cl_event's). + * + * See Event for details about copy semantics, etc. + */ +class UserEvent : public Event +{ +public: + /*! \brief Constructs a user event on a given context. + * + * Wraps clCreateUserEvent(). + */ + UserEvent( + const Context& context, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateUserEvent( + context(), + &error); + + detail::errHandler(error, __CREATE_USER_EVENT_ERR); + if (err != NULL) { + *err = error; + } + } + + //! \brief Default constructor - initializes to NULL. + UserEvent() : Event() { } + + //! \brief Copy constructor - performs shallow copy. + UserEvent(const UserEvent& event) : Event(event) { } + + //! \brief Assignment Operator - performs shallow copy. + UserEvent& operator = (const UserEvent& rhs) + { + if (this != &rhs) { + Event::operator=(rhs); + } + return *this; + } + + /*! \brief Sets the execution status of a user event object. + * + * Wraps clSetUserEventStatus(). + */ + cl_int setStatus(cl_int status) + { + return detail::errHandler( + ::clSetUserEventStatus(object_,status), + __SET_USER_EVENT_STATUS_ERR); + } +}; +#endif + +/*! \brief Blocks the calling thread until every event specified is complete. + * + * Wraps clWaitForEvents(). + */ +inline static cl_int +WaitForEvents(const VECTOR_CLASS& events) +{ + return detail::errHandler( + ::clWaitForEvents( + (cl_uint) events.size(), (cl_event*)&events.front()), + __WAIT_FOR_EVENTS_ERR); +} + +/*! \brief Class interface for cl_mem. + * + * \note Copies of these objects are shallow, meaning that the copy will refer + * to the same underlying cl_mem as the original. For details, see + * clRetainMemObject() and clReleaseMemObject(). + * + * \see cl_mem + */ +class Memory : public detail::Wrapper +{ +public: + + /*! \brief Destructor. + * + * This calls clReleaseMemObject() on the value held by this instance. + */ + ~Memory() {} + + //! \brief Default constructor - initializes to NULL. + Memory() : detail::Wrapper() { } + + /*! \brief Copy constructor - performs shallow copy. + * + * This calls clRetainMemObject() on the parameter's cl_mem. + */ + Memory(const Memory& memory) : detail::Wrapper(memory) { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * This effectively transfers ownership of a refcount on the cl_mem + * into the new Memory object. + */ + __CL_EXPLICIT_CONSTRUCTORS Memory(const cl_mem& memory) : detail::Wrapper(memory) { } + + /*! \brief Assignment operator from Memory. + * + * This calls clRetainMemObject() on the parameter and clReleaseMemObject() + * on the previous value held by this instance. + */ + Memory& operator = (const Memory& rhs) + { + if (this != &rhs) { + detail::Wrapper::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment operator from cl_mem - takes ownership. + * + * This effectively transfers ownership of a refcount on the rhs and calls + * clReleaseMemObject() on the value previously held by this instance. + */ + Memory& operator = (const cl_mem& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetMemObjectInfo(). + template + cl_int getInfo(cl_mem_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetMemObjectInfo, object_, name, param), + __GET_MEM_OBJECT_INFO_ERR); + } + + //! \brief Wrapper for clGetMemObjectInfo() that returns by value. + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_mem_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + +#if defined(CL_VERSION_1_1) + /*! \brief Registers a callback function to be called when the memory object + * is no longer needed. + * + * Wraps clSetMemObjectDestructorCallback(). + * + * Repeated calls to this function, for a given cl_mem value, will append + * to the list of functions called (in reverse order) when memory object's + * resources are freed and the memory object is deleted. + * + * \note + * The registered callbacks are associated with the underlying cl_mem + * value - not the Memory class instance. + */ + cl_int setDestructorCallback( + void (CL_CALLBACK * pfn_notify)(cl_mem, void *), + void * user_data = NULL) + { + return detail::errHandler( + ::clSetMemObjectDestructorCallback( + object_, + pfn_notify, + user_data), + __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR); + } +#endif + +}; + +// Pre-declare copy functions +class Buffer; +template< typename IteratorType > +cl_int copy( IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ); +template< typename IteratorType > +cl_int copy( const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ); +template< typename IteratorType > +cl_int copy( const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ); +template< typename IteratorType > +cl_int copy( const CommandQueue &queue, const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ); + + +/*! \brief Class interface for Buffer Memory Objects. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class Buffer : public Memory +{ +public: + + /*! \brief Constructs a Buffer in a specified context. + * + * Wraps clCreateBuffer(). + * + * \param host_ptr Storage to be used if the CL_MEM_USE_HOST_PTR flag was + * specified. Note alignment & exclusivity requirements. + */ + Buffer( + const Context& context, + cl_mem_flags flags, + ::size_t size, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error); + + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + + /*! \brief Constructs a Buffer in the default context. + * + * Wraps clCreateBuffer(). + * + * \param host_ptr Storage to be used if the CL_MEM_USE_HOST_PTR flag was + * specified. Note alignment & exclusivity requirements. + * + * \see Context::getDefault() + */ + Buffer( + cl_mem_flags flags, + ::size_t size, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + + Context context = Context::getDefault(err); + + object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error); + + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + + /*! + * \brief Construct a Buffer from a host container via iterators. + * IteratorType must be random access. + * If useHostPtr is specified iterators must represent contiguous data. + */ + template< typename IteratorType > + Buffer( + IteratorType startIterator, + IteratorType endIterator, + bool readOnly, + bool useHostPtr = false, + cl_int* err = NULL) + { + typedef typename std::iterator_traits::value_type DataType; + cl_int error; + + cl_mem_flags flags = 0; + if( readOnly ) { + flags |= CL_MEM_READ_ONLY; + } + else { + flags |= CL_MEM_READ_WRITE; + } + if( useHostPtr ) { + flags |= CL_MEM_USE_HOST_PTR; + } + + ::size_t size = sizeof(DataType)*(endIterator - startIterator); + + Context context = Context::getDefault(err); + + if( useHostPtr ) { + object_ = ::clCreateBuffer(context(), flags, size, static_cast(&*startIterator), &error); + } else { + object_ = ::clCreateBuffer(context(), flags, size, 0, &error); + } + + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + + if( !useHostPtr ) { + error = cl::copy(startIterator, endIterator, *this); + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + } + + /*! + * \brief Construct a Buffer from a host container via iterators using a specified context. + * IteratorType must be random access. + * If useHostPtr is specified iterators must represent contiguous data. + */ + template< typename IteratorType > + Buffer(const Context &context, IteratorType startIterator, IteratorType endIterator, + bool readOnly, bool useHostPtr = false, cl_int* err = NULL); + + //! \brief Default constructor - initializes to NULL. + Buffer() : Memory() { } + + /*! \brief Copy constructor - performs shallow copy. + * + * See Memory for further details. + */ + Buffer(const Buffer& buffer) : Memory(buffer) { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS Buffer(const cl_mem& buffer) : Memory(buffer) { } + + /*! \brief Assignment from Buffer - performs shallow copy. + * + * See Memory for further details. + */ + Buffer& operator = (const Buffer& rhs) + { + if (this != &rhs) { + Memory::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Buffer& operator = (const cl_mem& rhs) + { + Memory::operator=(rhs); + return *this; + } + +#if defined(CL_VERSION_1_1) + /*! \brief Creates a new buffer object from this. + * + * Wraps clCreateSubBuffer(). + */ + Buffer createSubBuffer( + cl_mem_flags flags, + cl_buffer_create_type buffer_create_type, + const void * buffer_create_info, + cl_int * err = NULL) + { + Buffer result; + cl_int error; + result.object_ = ::clCreateSubBuffer( + object_, + flags, + buffer_create_type, + buffer_create_info, + &error); + + detail::errHandler(error, __CREATE_SUBBUFFER_ERR); + if (err != NULL) { + *err = error; + } + + return result; + } +#endif +}; + +#if defined (USE_DX_INTEROP) +/*! \brief Class interface for creating OpenCL buffers from ID3D10Buffer's. + * + * This is provided to facilitate interoperability with Direct3D. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class BufferD3D10 : public Buffer +{ +public: + typedef CL_API_ENTRY cl_mem (CL_API_CALL *PFN_clCreateFromD3D10BufferKHR)( + cl_context context, cl_mem_flags flags, ID3D10Buffer* buffer, + cl_int* errcode_ret); + + /*! \brief Constructs a BufferD3D10, in a specified context, from a + * given ID3D10Buffer. + * + * Wraps clCreateFromD3D10BufferKHR(). + */ + BufferD3D10( + const Context& context, + cl_mem_flags flags, + ID3D10Buffer* bufobj, + cl_int * err = NULL) + { + static PFN_clCreateFromD3D10BufferKHR pfn_clCreateFromD3D10BufferKHR = NULL; + +#if defined(CL_VERSION_1_2) + vector props = context.getInfo(); + cl_platform platform = -1; + for( int i = 0; i < props.size(); ++i ) { + if( props[i] == CL_CONTEXT_PLATFORM ) { + platform = props[i+1]; + } + } + __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clCreateFromD3D10BufferKHR); +#endif +#if defined(CL_VERSION_1_1) + __INIT_CL_EXT_FCN_PTR(clCreateFromD3D10BufferKHR); +#endif + + cl_int error; + object_ = pfn_clCreateFromD3D10BufferKHR( + context(), + flags, + bufobj, + &error); + + detail::errHandler(error, __CREATE_GL_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + + //! \brief Default constructor - initializes to NULL. + BufferD3D10() : Buffer() { } + + /*! \brief Copy constructor - performs shallow copy. + * + * See Memory for further details. + */ + BufferD3D10(const BufferD3D10& buffer) : Buffer(buffer) { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS BufferD3D10(const cl_mem& buffer) : Buffer(buffer) { } + + /*! \brief Assignment from BufferD3D10 - performs shallow copy. + * + * See Memory for further details. + */ + BufferD3D10& operator = (const BufferD3D10& rhs) + { + if (this != &rhs) { + Buffer::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + BufferD3D10& operator = (const cl_mem& rhs) + { + Buffer::operator=(rhs); + return *this; + } +}; +#endif + +/*! \brief Class interface for GL Buffer Memory Objects. + * + * This is provided to facilitate interoperability with OpenGL. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class BufferGL : public Buffer +{ +public: + /*! \brief Constructs a BufferGL in a specified context, from a given + * GL buffer. + * + * Wraps clCreateFromGLBuffer(). + */ + BufferGL( + const Context& context, + cl_mem_flags flags, + GLuint bufobj, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateFromGLBuffer( + context(), + flags, + bufobj, + &error); + + detail::errHandler(error, __CREATE_GL_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + + //! \brief Default constructor - initializes to NULL. + BufferGL() : Buffer() { } + + /*! \brief Copy constructor - performs shallow copy. + * + * See Memory for further details. + */ + BufferGL(const BufferGL& buffer) : Buffer(buffer) { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS BufferGL(const cl_mem& buffer) : Buffer(buffer) { } + + /*! \brief Assignment from BufferGL - performs shallow copy. + * + * See Memory for further details. + */ + BufferGL& operator = (const BufferGL& rhs) + { + if (this != &rhs) { + Buffer::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + BufferGL& operator = (const cl_mem& rhs) + { + Buffer::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetGLObjectInfo(). + cl_int getObjectInfo( + cl_gl_object_type *type, + GLuint * gl_object_name) + { + return detail::errHandler( + ::clGetGLObjectInfo(object_,type,gl_object_name), + __GET_GL_OBJECT_INFO_ERR); + } +}; + +/*! \brief Class interface for GL Render Buffer Memory Objects. + * + * This is provided to facilitate interoperability with OpenGL. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class BufferRenderGL : public Buffer +{ +public: + /*! \brief Constructs a BufferRenderGL in a specified context, from a given + * GL Renderbuffer. + * + * Wraps clCreateFromGLRenderbuffer(). + */ + BufferRenderGL( + const Context& context, + cl_mem_flags flags, + GLuint bufobj, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateFromGLRenderbuffer( + context(), + flags, + bufobj, + &error); + + detail::errHandler(error, __CREATE_GL_RENDER_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + + //! \brief Default constructor - initializes to NULL. + BufferRenderGL() : Buffer() { } + + /*! \brief Copy constructor - performs shallow copy. + * + * See Memory for further details. + */ + BufferRenderGL(const BufferGL& buffer) : Buffer(buffer) { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS BufferRenderGL(const cl_mem& buffer) : Buffer(buffer) { } + + /*! \brief Assignment from BufferGL - performs shallow copy. + * + * See Memory for further details. + */ + BufferRenderGL& operator = (const BufferRenderGL& rhs) + { + if (this != &rhs) { + Buffer::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + BufferRenderGL& operator = (const cl_mem& rhs) + { + Buffer::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetGLObjectInfo(). + cl_int getObjectInfo( + cl_gl_object_type *type, + GLuint * gl_object_name) + { + return detail::errHandler( + ::clGetGLObjectInfo(object_,type,gl_object_name), + __GET_GL_OBJECT_INFO_ERR); + } +}; + +/*! \brief C++ base class for Image Memory objects. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class Image : public Memory +{ +protected: + //! \brief Default constructor - initializes to NULL. + Image() : Memory() { } + + /*! \brief Copy constructor - performs shallow copy. + * + * See Memory for further details. + */ + Image(const Image& image) : Memory(image) { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS Image(const cl_mem& image) : Memory(image) { } + + /*! \brief Assignment from Image - performs shallow copy. + * + * See Memory for further details. + */ + Image& operator = (const Image& rhs) + { + if (this != &rhs) { + Memory::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image& operator = (const cl_mem& rhs) + { + Memory::operator=(rhs); + return *this; + } + +public: + //! \brief Wrapper for clGetImageInfo(). + template + cl_int getImageInfo(cl_image_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetImageInfo, object_, name, param), + __GET_IMAGE_INFO_ERR); + } + + //! \brief Wrapper for clGetImageInfo() that returns by value. + template typename + detail::param_traits::param_type + getImageInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_image_info, name>::param_type param; + cl_int result = getImageInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } +}; + +#if defined(CL_VERSION_1_2) +/*! \brief Class interface for 1D Image Memory objects. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class Image1D : public Image +{ +public: + /*! \brief Constructs a 1D Image in a specified context. + * + * Wraps clCreateImage(). + */ + Image1D( + const Context& context, + cl_mem_flags flags, + ImageFormat format, + ::size_t width, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + cl_image_desc desc = + { + CL_MEM_OBJECT_IMAGE1D, + width, + 0, 0, 0, 0, 0, 0, 0, 0 + }; + object_ = ::clCreateImage( + context(), + flags, + &format, + &desc, + host_ptr, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + } + + //! \brief Default constructor - initializes to NULL. + Image1D() { } + + /*! \brief Copy constructor - performs shallow copy. + * + * See Memory for further details. + */ + Image1D(const Image1D& image1D) : Image(image1D) { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS Image1D(const cl_mem& image1D) : Image(image1D) { } + + /*! \brief Assignment from Image1D - performs shallow copy. + * + * See Memory for further details. + */ + Image1D& operator = (const Image1D& rhs) + { + if (this != &rhs) { + Image::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image1D& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } +}; + +/*! \class Image1DBuffer + * \brief Image interface for 1D buffer images. + */ +class Image1DBuffer : public Image +{ +public: + Image1DBuffer( + const Context& context, + cl_mem_flags flags, + ImageFormat format, + ::size_t width, + const Buffer &buffer, + cl_int* err = NULL) + { + cl_int error; + cl_image_desc desc = + { + CL_MEM_OBJECT_IMAGE1D_BUFFER, + width, + 0, 0, 0, 0, 0, 0, 0, + buffer() + }; + object_ = ::clCreateImage( + context(), + flags, + &format, + &desc, + NULL, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + } + + Image1DBuffer() { } + + Image1DBuffer(const Image1DBuffer& image1D) : Image(image1D) { } + + __CL_EXPLICIT_CONSTRUCTORS Image1DBuffer(const cl_mem& image1D) : Image(image1D) { } + + Image1DBuffer& operator = (const Image1DBuffer& rhs) + { + if (this != &rhs) { + Image::operator=(rhs); + } + return *this; + } + + Image1DBuffer& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } +}; + +/*! \class Image1DArray + * \brief Image interface for arrays of 1D images. + */ +class Image1DArray : public Image +{ +public: + Image1DArray( + const Context& context, + cl_mem_flags flags, + ImageFormat format, + ::size_t arraySize, + ::size_t width, + ::size_t rowPitch, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + cl_image_desc desc = + { + CL_MEM_OBJECT_IMAGE1D_ARRAY, + width, + 0, 0, // height, depth (unused) + arraySize, + rowPitch, + 0, 0, 0, 0 + }; + object_ = ::clCreateImage( + context(), + flags, + &format, + &desc, + host_ptr, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + } + + Image1DArray() { } + + Image1DArray(const Image1DArray& imageArray) : Image(imageArray) { } + + __CL_EXPLICIT_CONSTRUCTORS Image1DArray(const cl_mem& imageArray) : Image(imageArray) { } + + Image1DArray& operator = (const Image1DArray& rhs) + { + if (this != &rhs) { + Image::operator=(rhs); + } + return *this; + } + + Image1DArray& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } +}; +#endif // #if defined(CL_VERSION_1_2) + + +/*! \brief Class interface for 2D Image Memory objects. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class Image2D : public Image +{ +public: + /*! \brief Constructs a 1D Image in a specified context. + * + * Wraps clCreateImage(). + */ + Image2D( + const Context& context, + cl_mem_flags flags, + ImageFormat format, + ::size_t width, + ::size_t height, + ::size_t row_pitch = 0, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + bool useCreateImage; + +#if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + // Run-time decision based on the actual platform + { + cl_uint version = detail::getContextPlatformVersion(context()); + useCreateImage = (version >= 0x10002); // OpenCL 1.2 or above + } +#elif defined(CL_VERSION_1_2) + useCreateImage = true; +#else + useCreateImage = false; +#endif + +#if defined(CL_VERSION_1_2) + if (useCreateImage) + { + cl_image_desc desc = + { + CL_MEM_OBJECT_IMAGE2D, + width, + height, + 0, 0, // depth, array size (unused) + row_pitch, + 0, 0, 0, 0 + }; + object_ = ::clCreateImage( + context(), + flags, + &format, + &desc, + host_ptr, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // #if defined(CL_VERSION_1_2) +#if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + if (!useCreateImage) + { + object_ = ::clCreateImage2D( + context(), flags,&format, width, height, row_pitch, host_ptr, &error); + + detail::errHandler(error, __CREATE_IMAGE2D_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // #if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + } + + //! \brief Default constructor - initializes to NULL. + Image2D() { } + + /*! \brief Copy constructor - performs shallow copy. + * + * See Memory for further details. + */ + Image2D(const Image2D& image2D) : Image(image2D) { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS Image2D(const cl_mem& image2D) : Image(image2D) { } + + /*! \brief Assignment from Image2D - performs shallow copy. + * + * See Memory for further details. + */ + Image2D& operator = (const Image2D& rhs) + { + if (this != &rhs) { + Image::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image2D& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } +}; + + +#if !defined(CL_VERSION_1_2) +/*! \brief Class interface for GL 2D Image Memory objects. + * + * This is provided to facilitate interoperability with OpenGL. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + * \note Deprecated for OpenCL 1.2. Please use ImageGL instead. + */ +class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED Image2DGL CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED : public Image2D +{ +public: + /*! \brief Constructs an Image2DGL in a specified context, from a given + * GL Texture. + * + * Wraps clCreateFromGLTexture2D(). + */ + Image2DGL( + const Context& context, + cl_mem_flags flags, + GLenum target, + GLint miplevel, + GLuint texobj, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateFromGLTexture2D( + context(), + flags, + target, + miplevel, + texobj, + &error); + + detail::errHandler(error, __CREATE_GL_TEXTURE_2D_ERR); + if (err != NULL) { + *err = error; + } + + } + + //! \brief Default constructor - initializes to NULL. + Image2DGL() : Image2D() { } + + /*! \brief Copy constructor - performs shallow copy. + * + * See Memory for further details. + */ + Image2DGL(const Image2DGL& image) : Image2D(image) { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS Image2DGL(const cl_mem& image) : Image2D(image) { } + + /*! \brief Assignment from Image2DGL - performs shallow copy. + * + * See Memory for further details. + */ + Image2DGL& operator = (const Image2DGL& rhs) + { + if (this != &rhs) { + Image2D::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image2DGL& operator = (const cl_mem& rhs) + { + Image2D::operator=(rhs); + return *this; + } +}; +#endif // #if !defined(CL_VERSION_1_2) + +#if defined(CL_VERSION_1_2) +/*! \class Image2DArray + * \brief Image interface for arrays of 2D images. + */ +class Image2DArray : public Image +{ +public: + Image2DArray( + const Context& context, + cl_mem_flags flags, + ImageFormat format, + ::size_t arraySize, + ::size_t width, + ::size_t height, + ::size_t rowPitch, + ::size_t slicePitch, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + cl_image_desc desc = + { + CL_MEM_OBJECT_IMAGE2D_ARRAY, + width, + height, + 0, // depth (unused) + arraySize, + rowPitch, + slicePitch, + 0, 0, 0 + }; + object_ = ::clCreateImage( + context(), + flags, + &format, + &desc, + host_ptr, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + } + + Image2DArray() { } + + Image2DArray(const Image2DArray& imageArray) : Image(imageArray) { } + + __CL_EXPLICIT_CONSTRUCTORS Image2DArray(const cl_mem& imageArray) : Image(imageArray) { } + + Image2DArray& operator = (const Image2DArray& rhs) + { + if (this != &rhs) { + Image::operator=(rhs); + } + return *this; + } + + Image2DArray& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } +}; +#endif // #if defined(CL_VERSION_1_2) + +/*! \brief Class interface for 3D Image Memory objects. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class Image3D : public Image +{ +public: + /*! \brief Constructs a 3D Image in a specified context. + * + * Wraps clCreateImage(). + */ + Image3D( + const Context& context, + cl_mem_flags flags, + ImageFormat format, + ::size_t width, + ::size_t height, + ::size_t depth, + ::size_t row_pitch = 0, + ::size_t slice_pitch = 0, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + bool useCreateImage; + +#if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + // Run-time decision based on the actual platform + { + cl_uint version = detail::getContextPlatformVersion(context()); + useCreateImage = (version >= 0x10002); // OpenCL 1.2 or above + } +#elif defined(CL_VERSION_1_2) + useCreateImage = true; +#else + useCreateImage = false; +#endif + +#if defined(CL_VERSION_1_2) + if (useCreateImage) + { + cl_image_desc desc = + { + CL_MEM_OBJECT_IMAGE3D, + width, + height, + depth, + 0, // array size (unused) + row_pitch, + slice_pitch, + 0, 0, 0 + }; + object_ = ::clCreateImage( + context(), + flags, + &format, + &desc, + host_ptr, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // #if defined(CL_VERSION_1_2) +#if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + if (!useCreateImage) + { + object_ = ::clCreateImage3D( + context(), flags, &format, width, height, depth, row_pitch, + slice_pitch, host_ptr, &error); + + detail::errHandler(error, __CREATE_IMAGE3D_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // #if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + } + + //! \brief Default constructor - initializes to NULL. + Image3D() { } + + /*! \brief Copy constructor - performs shallow copy. + * + * See Memory for further details. + */ + Image3D(const Image3D& image3D) : Image(image3D) { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS Image3D(const cl_mem& image3D) : Image(image3D) { } + + /*! \brief Assignment from Image3D - performs shallow copy. + * + * See Memory for further details. + */ + Image3D& operator = (const Image3D& rhs) + { + if (this != &rhs) { + Image::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image3D& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } +}; + +#if !defined(CL_VERSION_1_2) +/*! \brief Class interface for GL 3D Image Memory objects. + * + * This is provided to facilitate interoperability with OpenGL. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class Image3DGL : public Image3D +{ +public: + /*! \brief Constructs an Image3DGL in a specified context, from a given + * GL Texture. + * + * Wraps clCreateFromGLTexture3D(). + */ + Image3DGL( + const Context& context, + cl_mem_flags flags, + GLenum target, + GLint miplevel, + GLuint texobj, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateFromGLTexture3D( + context(), + flags, + target, + miplevel, + texobj, + &error); + + detail::errHandler(error, __CREATE_GL_TEXTURE_3D_ERR); + if (err != NULL) { + *err = error; + } + } + + //! \brief Default constructor - initializes to NULL. + Image3DGL() : Image3D() { } + + /*! \brief Copy constructor - performs shallow copy. + * + * See Memory for further details. + */ + Image3DGL(const Image3DGL& image) : Image3D(image) { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS Image3DGL(const cl_mem& image) : Image3D(image) { } + + /*! \brief Assignment from Image3DGL - performs shallow copy. + * + * See Memory for further details. + */ + Image3DGL& operator = (const Image3DGL& rhs) + { + if (this != &rhs) { + Image3D::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image3DGL& operator = (const cl_mem& rhs) + { + Image3D::operator=(rhs); + return *this; + } +}; +#endif // #if !defined(CL_VERSION_1_2) + +#if defined(CL_VERSION_1_2) +/*! \class ImageGL + * \brief general image interface for GL interop. + * We abstract the 2D and 3D GL images into a single instance here + * that wraps all GL sourced images on the grounds that setup information + * was performed by OpenCL anyway. + */ +class ImageGL : public Image +{ +public: + ImageGL( + const Context& context, + cl_mem_flags flags, + GLenum target, + GLint miplevel, + GLuint texobj, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateFromGLTexture( + context(), + flags, + target, + miplevel, + texobj, + &error); + + detail::errHandler(error, __CREATE_GL_TEXTURE_ERR); + if (err != NULL) { + *err = error; + } + } + + ImageGL() : Image() { } + + ImageGL(const ImageGL& image) : Image(image) { } + + __CL_EXPLICIT_CONSTRUCTORS ImageGL(const cl_mem& image) : Image(image) { } + + ImageGL& operator = (const ImageGL& rhs) + { + if (this != &rhs) { + Image::operator=(rhs); + } + return *this; + } + + ImageGL& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } +}; +#endif // #if defined(CL_VERSION_1_2) + +/*! \brief Class interface for cl_sampler. + * + * \note Copies of these objects are shallow, meaning that the copy will refer + * to the same underlying cl_sampler as the original. For details, see + * clRetainSampler() and clReleaseSampler(). + * + * \see cl_sampler + */ +class Sampler : public detail::Wrapper +{ +public: + /*! \brief Destructor. + * + * This calls clReleaseSampler() on the value held by this instance. + */ + ~Sampler() { } + + //! \brief Default constructor - initializes to NULL. + Sampler() { } + + /*! \brief Constructs a Sampler in a specified context. + * + * Wraps clCreateSampler(). + */ + Sampler( + const Context& context, + cl_bool normalized_coords, + cl_addressing_mode addressing_mode, + cl_filter_mode filter_mode, + cl_int* err = NULL) + { + cl_int error; + object_ = ::clCreateSampler( + context(), + normalized_coords, + addressing_mode, + filter_mode, + &error); + + detail::errHandler(error, __CREATE_SAMPLER_ERR); + if (err != NULL) { + *err = error; + } + } + + /*! \brief Copy constructor - performs shallow copy. + * + * This calls clRetainSampler() on the parameter's cl_sampler. + */ + Sampler(const Sampler& sampler) : detail::Wrapper(sampler) { } + + /*! \brief Constructor from cl_sampler - takes ownership. + * + * This effectively transfers ownership of a refcount on the cl_sampler + * into the new Sampler object. + */ + Sampler(const cl_sampler& sampler) : detail::Wrapper(sampler) { } + + /*! \brief Assignment operator from Sampler. + * + * This calls clRetainSampler() on the parameter and clReleaseSampler() + * on the previous value held by this instance. + */ + Sampler& operator = (const Sampler& rhs) + { + if (this != &rhs) { + detail::Wrapper::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment operator from cl_sampler - takes ownership. + * + * This effectively transfers ownership of a refcount on the rhs and calls + * clReleaseSampler() on the value previously held by this instance. + */ + Sampler& operator = (const cl_sampler& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetSamplerInfo(). + template + cl_int getInfo(cl_sampler_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetSamplerInfo, object_, name, param), + __GET_SAMPLER_INFO_ERR); + } + + //! \brief Wrapper for clGetSamplerInfo() that returns by value. + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_sampler_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } +}; + +class Program; +class CommandQueue; +class Kernel; + +//! \brief Class interface for specifying NDRange values. +class NDRange +{ +private: + size_t<3> sizes_; + cl_uint dimensions_; + +public: + //! \brief Default constructor - resulting range has zero dimensions. + NDRange() + : dimensions_(0) + { } + + //! \brief Constructs one-dimensional range. + NDRange(::size_t size0) + : dimensions_(1) + { + sizes_[0] = size0; + } + + //! \brief Constructs two-dimensional range. + NDRange(::size_t size0, ::size_t size1) + : dimensions_(2) + { + sizes_[0] = size0; + sizes_[1] = size1; + } + + //! \brief Constructs three-dimensional range. + NDRange(::size_t size0, ::size_t size1, ::size_t size2) + : dimensions_(3) + { + sizes_[0] = size0; + sizes_[1] = size1; + sizes_[2] = size2; + } + + /*! \brief Conversion operator to const ::size_t *. + * + * \returns a pointer to the size of the first dimension. + */ + operator const ::size_t*() const { + return (const ::size_t*) sizes_; + } + + //! \brief Queries the number of dimensions in the range. + ::size_t dimensions() const { return dimensions_; } +}; + +//! \brief A zero-dimensional range. +static const NDRange NullRange; + +//! \brief Local address wrapper for use with Kernel::setArg +struct LocalSpaceArg +{ + ::size_t size_; +}; + +namespace detail { + +template +struct KernelArgumentHandler +{ + static ::size_t size(const T&) { return sizeof(T); } + static T* ptr(T& value) { return &value; } +}; + +template <> +struct KernelArgumentHandler +{ + static ::size_t size(const LocalSpaceArg& value) { return value.size_; } + static void* ptr(LocalSpaceArg&) { return NULL; } +}; + +} +//! \endcond + +/*! __local + * \brief Helper function for generating LocalSpaceArg objects. + * Deprecated. Replaced with Local. + */ +inline CL_EXT_PREFIX__VERSION_1_1_DEPRECATED LocalSpaceArg +__local(::size_t size) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; +inline LocalSpaceArg +__local(::size_t size) +{ + LocalSpaceArg ret = { size }; + return ret; +} + +/*! Local + * \brief Helper function for generating LocalSpaceArg objects. + */ +inline LocalSpaceArg +Local(::size_t size) +{ + LocalSpaceArg ret = { size }; + return ret; +} + +//class KernelFunctor; + +/*! \brief Class interface for cl_kernel. + * + * \note Copies of these objects are shallow, meaning that the copy will refer + * to the same underlying cl_kernel as the original. For details, see + * clRetainKernel() and clReleaseKernel(). + * + * \see cl_kernel + */ +class Kernel : public detail::Wrapper +{ +public: + inline Kernel(const Program& program, const char* name, cl_int* err = NULL); + + /*! \brief Destructor. + * + * This calls clReleaseKernel() on the value held by this instance. + */ + ~Kernel() { } + + //! \brief Default constructor - initializes to NULL. + Kernel() { } + + /*! \brief Copy constructor - performs shallow copy. + * + * This calls clRetainKernel() on the parameter's cl_kernel. + */ + Kernel(const Kernel& kernel) : detail::Wrapper(kernel) { } + + /*! \brief Constructor from cl_kernel - takes ownership. + * + * This effectively transfers ownership of a refcount on the cl_kernel + * into the new Kernel object. + */ + __CL_EXPLICIT_CONSTRUCTORS Kernel(const cl_kernel& kernel) : detail::Wrapper(kernel) { } + + /*! \brief Assignment operator from Kernel. + * + * This calls clRetainKernel() on the parameter and clReleaseKernel() + * on the previous value held by this instance. + */ + Kernel& operator = (const Kernel& rhs) + { + if (this != &rhs) { + detail::Wrapper::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment operator from cl_kernel - takes ownership. + * + * This effectively transfers ownership of a refcount on the rhs and calls + * clReleaseKernel() on the value previously held by this instance. + */ + Kernel& operator = (const cl_kernel& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + template + cl_int getInfo(cl_kernel_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetKernelInfo, object_, name, param), + __GET_KERNEL_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_kernel_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + +#if defined(CL_VERSION_1_2) + template + cl_int getArgInfo(cl_uint argIndex, cl_kernel_arg_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetKernelArgInfo, object_, argIndex, name, param), + __GET_KERNEL_ARG_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getArgInfo(cl_uint argIndex, cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_kernel_arg_info, name>::param_type param; + cl_int result = getArgInfo(argIndex, name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } +#endif // #if defined(CL_VERSION_1_2) + + template + cl_int getWorkGroupInfo( + const Device& device, cl_kernel_work_group_info name, T* param) const + { + return detail::errHandler( + detail::getInfo( + &::clGetKernelWorkGroupInfo, object_, device(), name, param), + __GET_KERNEL_WORK_GROUP_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getWorkGroupInfo(const Device& device, cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_kernel_work_group_info, name>::param_type param; + cl_int result = getWorkGroupInfo(device, name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + template + cl_int setArg(cl_uint index, T value) + { + return detail::errHandler( + ::clSetKernelArg( + object_, + index, + detail::KernelArgumentHandler::size(value), + detail::KernelArgumentHandler::ptr(value)), + __SET_KERNEL_ARGS_ERR); + } + + cl_int setArg(cl_uint index, ::size_t size, void* argPtr) + { + return detail::errHandler( + ::clSetKernelArg(object_, index, size, argPtr), + __SET_KERNEL_ARGS_ERR); + } +}; + +/*! \class Program + * \brief Program interface that implements cl_program. + */ +class Program : public detail::Wrapper +{ +public: + typedef VECTOR_CLASS > Binaries; + typedef VECTOR_CLASS > Sources; + + Program( + const STRING_CLASS& source, + bool build = false, + cl_int* err = NULL) + { + cl_int error; + + const char * strings = source.c_str(); + const ::size_t length = source.size(); + + Context context = Context::getDefault(err); + + object_ = ::clCreateProgramWithSource( + context(), (cl_uint)1, &strings, &length, &error); + + detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); + + if (error == CL_SUCCESS && build) { + + error = ::clBuildProgram( + object_, + 0, + NULL, + "", + NULL, + NULL); + + detail::errHandler(error, __BUILD_PROGRAM_ERR); + } + + if (err != NULL) { + *err = error; + } + } + + Program( + const Context& context, + const STRING_CLASS& source, + bool build = false, + cl_int* err = NULL) + { + cl_int error; + + const char * strings = source.c_str(); + const ::size_t length = source.size(); + + object_ = ::clCreateProgramWithSource( + context(), (cl_uint)1, &strings, &length, &error); + + detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); + + if (error == CL_SUCCESS && build) { + + error = ::clBuildProgram( + object_, + 0, + NULL, + "", + NULL, + NULL); + + detail::errHandler(error, __BUILD_PROGRAM_ERR); + } + + if (err != NULL) { + *err = error; + } + } + + Program( + const Context& context, + const Sources& sources, + cl_int* err = NULL) + { + cl_int error; + + const ::size_t n = (::size_t)sources.size(); + ::size_t* lengths = (::size_t*) alloca(n * sizeof(::size_t)); + const char** strings = (const char**) alloca(n * sizeof(const char*)); + + for (::size_t i = 0; i < n; ++i) { + strings[i] = sources[(int)i].first; + lengths[i] = sources[(int)i].second; + } + + object_ = ::clCreateProgramWithSource( + context(), (cl_uint)n, strings, lengths, &error); + + detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); + if (err != NULL) { + *err = error; + } + } + + /** + * Construct a program object from a list of devices and a per-device list of binaries. + * \param context A valid OpenCL context in which to construct the program. + * \param devices A vector of OpenCL device objects for which the program will be created. + * \param binaries A vector of pairs of a pointer to a binary object and its length. + * \param binaryStatus An optional vector that on completion will be resized to + * match the size of binaries and filled with values to specify if each binary + * was successfully loaded. + * Set to CL_SUCCESS if the binary was successfully loaded. + * Set to CL_INVALID_VALUE if the length is 0 or the binary pointer is NULL. + * Set to CL_INVALID_BINARY if the binary provided is not valid for the matching device. + * \param err if non-NULL will be set to CL_SUCCESS on successful operation or one of the following errors: + * CL_INVALID_CONTEXT if context is not a valid context. + * CL_INVALID_VALUE if the length of devices is zero; or if the length of binaries does not match the length of devices; + * or if any entry in binaries is NULL or has length 0. + * CL_INVALID_DEVICE if OpenCL devices listed in devices are not in the list of devices associated with context. + * CL_INVALID_BINARY if an invalid program binary was encountered for any device. binaryStatus will return specific status for each device. + * CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the OpenCL implementation on the host. + */ + Program( + const Context& context, + const VECTOR_CLASS& devices, + const Binaries& binaries, + VECTOR_CLASS* binaryStatus = NULL, + cl_int* err = NULL) + { + cl_int error; + + const ::size_t numDevices = devices.size(); + + // Catch size mismatch early and return + if(binaries.size() != numDevices) { + error = CL_INVALID_VALUE; + detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR); + if (err != NULL) { + *err = error; + } + return; + } + + ::size_t* lengths = (::size_t*) alloca(numDevices * sizeof(::size_t)); + const unsigned char** images = (const unsigned char**) alloca(numDevices * sizeof(const unsigned char**)); + + for (::size_t i = 0; i < numDevices; ++i) { + images[i] = (const unsigned char*)binaries[i].first; + lengths[i] = binaries[(int)i].second; + } + + cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id)); + for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { + deviceIDs[deviceIndex] = (devices[deviceIndex])(); + } + + if(binaryStatus) { + binaryStatus->resize(numDevices); + } + + object_ = ::clCreateProgramWithBinary( + context(), (cl_uint) devices.size(), + deviceIDs, + lengths, images, binaryStatus != NULL + ? &binaryStatus->front() + : NULL, &error); + + detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR); + if (err != NULL) { + *err = error; + } + } + + +#if defined(CL_VERSION_1_2) + /** + * Create program using builtin kernels. + * \param kernelNames Semi-colon separated list of builtin kernel names + */ + Program( + const Context& context, + const VECTOR_CLASS& devices, + const STRING_CLASS& kernelNames, + cl_int* err = NULL) + { + cl_int error; + + + ::size_t numDevices = devices.size(); + cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id)); + for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { + deviceIDs[deviceIndex] = (devices[deviceIndex])(); + } + + object_ = ::clCreateProgramWithBuiltInKernels( + context(), + (cl_uint) devices.size(), + deviceIDs, + kernelNames.c_str(), + &error); + + detail::errHandler(error, __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // #if defined(CL_VERSION_1_2) + + Program() { } + + Program(const Program& program) : detail::Wrapper(program) { } + + __CL_EXPLICIT_CONSTRUCTORS Program(const cl_program& program) : detail::Wrapper(program) { } + + Program& operator = (const Program& rhs) + { + if (this != &rhs) { + detail::Wrapper::operator=(rhs); + } + return *this; + } + + Program& operator = (const cl_program& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + cl_int build( + const VECTOR_CLASS& devices, + const char* options = NULL, + void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, + void* data = NULL) const + { + ::size_t numDevices = devices.size(); + cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id)); + for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { + deviceIDs[deviceIndex] = (devices[deviceIndex])(); + } + + return detail::errHandler( + ::clBuildProgram( + object_, + (cl_uint) + devices.size(), + deviceIDs, + options, + notifyFptr, + data), + __BUILD_PROGRAM_ERR); + } + + cl_int build( + const char* options = NULL, + void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, + void* data = NULL) const + { + return detail::errHandler( + ::clBuildProgram( + object_, + 0, + NULL, + options, + notifyFptr, + data), + __BUILD_PROGRAM_ERR); + } + +#if defined(CL_VERSION_1_2) + cl_int compile( + const char* options = NULL, + void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, + void* data = NULL) const + { + return detail::errHandler( + ::clCompileProgram( + object_, + 0, + NULL, + options, + 0, + NULL, + NULL, + notifyFptr, + data), + __COMPILE_PROGRAM_ERR); + } +#endif + + template + cl_int getInfo(cl_program_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetProgramInfo, object_, name, param), + __GET_PROGRAM_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_program_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + template + cl_int getBuildInfo( + const Device& device, cl_program_build_info name, T* param) const + { + return detail::errHandler( + detail::getInfo( + &::clGetProgramBuildInfo, object_, device(), name, param), + __GET_PROGRAM_BUILD_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getBuildInfo(const Device& device, cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_program_build_info, name>::param_type param; + cl_int result = getBuildInfo(device, name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + cl_int createKernels(VECTOR_CLASS* kernels) + { + cl_uint numKernels; + cl_int err = ::clCreateKernelsInProgram(object_, 0, NULL, &numKernels); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); + } + + Kernel* value = (Kernel*) alloca(numKernels * sizeof(Kernel)); + err = ::clCreateKernelsInProgram( + object_, numKernels, (cl_kernel*) value, NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); + } + + kernels->assign(&value[0], &value[numKernels]); + return CL_SUCCESS; + } +}; + +#if defined(CL_VERSION_1_2) +inline Program linkProgram( + Program input1, + Program input2, + const char* options = NULL, + void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, + void* data = NULL, + cl_int* err = NULL) +{ + cl_int err_local = CL_SUCCESS; + + cl_program programs[2] = { input1(), input2() }; + + Context ctx = input1.getInfo(); + + cl_program prog = ::clLinkProgram( + ctx(), + 0, + NULL, + options, + 2, + programs, + notifyFptr, + data, + &err_local); + + detail::errHandler(err_local,__COMPILE_PROGRAM_ERR); + if (err != NULL) { + *err = err_local; + } + + return Program(prog); +} + +inline Program linkProgram( + VECTOR_CLASS inputPrograms, + const char* options = NULL, + void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, + void* data = NULL, + cl_int* err = NULL) +{ + cl_int err_local = CL_SUCCESS; + + cl_program * programs = (cl_program*) alloca(inputPrograms.size() * sizeof(cl_program)); + + if (programs != NULL) { + for (unsigned int i = 0; i < inputPrograms.size(); i++) { + programs[i] = inputPrograms[i](); + } + } + + cl_program prog = ::clLinkProgram( + Context::getDefault()(), + 0, + NULL, + options, + (cl_uint)inputPrograms.size(), + programs, + notifyFptr, + data, + &err_local); + + detail::errHandler(err_local,__COMPILE_PROGRAM_ERR); + if (err != NULL) { + *err = err_local; + } + + return Program(prog); +} +#endif + +template<> +inline VECTOR_CLASS cl::Program::getInfo(cl_int* err) const +{ + VECTOR_CLASS< ::size_t> sizes = getInfo(); + VECTOR_CLASS binaries; + for (VECTOR_CLASS< ::size_t>::iterator s = sizes.begin(); s != sizes.end(); ++s) + { + char *ptr = NULL; + if (*s != 0) + ptr = new char[*s]; + binaries.push_back(ptr); + } + + cl_int result = getInfo(CL_PROGRAM_BINARIES, &binaries); + if (err != NULL) { + *err = result; + } + return binaries; +} + +inline Kernel::Kernel(const Program& program, const char* name, cl_int* err) +{ + cl_int error; + + object_ = ::clCreateKernel(program(), name, &error); + detail::errHandler(error, __CREATE_KERNEL_ERR); + + if (err != NULL) { + *err = error; + } + +} + +/*! \class CommandQueue + * \brief CommandQueue interface for cl_command_queue. + */ +class CommandQueue : public detail::Wrapper +{ +private: + static volatile int default_initialized_; + static CommandQueue default_; + static volatile cl_int default_error_; +public: + CommandQueue( + cl_command_queue_properties properties, + cl_int* err = NULL) + { + cl_int error; + + Context context = Context::getDefault(&error); + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + + if (error != CL_SUCCESS) { + if (err != NULL) { + *err = error; + } + } + else { + Device device = context.getInfo()[0]; + + object_ = ::clCreateCommandQueue( + context(), device(), properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + if (err != NULL) { + *err = error; + } + } + } + /*! + * \brief Constructs a CommandQueue for an implementation defined device in the given context + */ + explicit CommandQueue( + const Context& context, + cl_command_queue_properties properties = 0, + cl_int* err = NULL) + { + cl_int error; + VECTOR_CLASS devices; + error = context.getInfo(CL_CONTEXT_DEVICES, &devices); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + + if (error != CL_SUCCESS) + { + if (err != NULL) { + *err = error; + } + return; + } + + object_ = ::clCreateCommandQueue(context(), devices[0](), properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + + if (err != NULL) { + *err = error; + } + + } + + CommandQueue( + const Context& context, + const Device& device, + cl_command_queue_properties properties = 0, + cl_int* err = NULL) + { + cl_int error; + object_ = ::clCreateCommandQueue( + context(), device(), properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + if (err != NULL) { + *err = error; + } + } + + static CommandQueue getDefault(cl_int * err = NULL) + { + int state = detail::compare_exchange( + &default_initialized_, + __DEFAULT_BEING_INITIALIZED, __DEFAULT_NOT_INITIALIZED); + + if (state & __DEFAULT_INITIALIZED) { + if (err != NULL) { + *err = default_error_; + } + return default_; + } + + if (state & __DEFAULT_BEING_INITIALIZED) { + // Assume writes will propagate eventually... + while(default_initialized_ != __DEFAULT_INITIALIZED) { + detail::fence(); + } + + if (err != NULL) { + *err = default_error_; + } + return default_; + } + + cl_int error; + + Context context = Context::getDefault(&error); + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + + if (error != CL_SUCCESS) { + if (err != NULL) { + *err = error; + } + } + else { + Device device = context.getInfo()[0]; + + default_ = CommandQueue(context, device, 0, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + if (err != NULL) { + *err = error; + } + } + + detail::fence(); + + default_error_ = error; + // Assume writes will propagate eventually... + default_initialized_ = __DEFAULT_INITIALIZED; + + detail::fence(); + + if (err != NULL) { + *err = default_error_; + } + return default_; + + } + + CommandQueue() { } + + CommandQueue(const CommandQueue& commandQueue) : detail::Wrapper(commandQueue) { } + + CommandQueue(const cl_command_queue& commandQueue) : detail::Wrapper(commandQueue) { } + + CommandQueue& operator = (const CommandQueue& rhs) + { + if (this != &rhs) { + detail::Wrapper::operator=(rhs); + } + return *this; + } + + CommandQueue& operator = (const cl_command_queue& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + template + cl_int getInfo(cl_command_queue_info name, T* param) const + { + return detail::errHandler( + detail::getInfo( + &::clGetCommandQueueInfo, object_, name, param), + __GET_COMMAND_QUEUE_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_command_queue_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + cl_int enqueueReadBuffer( + const Buffer& buffer, + cl_bool blocking, + ::size_t offset, + ::size_t size, + void* ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueReadBuffer( + object_, buffer(), blocking, offset, size, + ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_READ_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueWriteBuffer( + const Buffer& buffer, + cl_bool blocking, + ::size_t offset, + ::size_t size, + const void* ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueWriteBuffer( + object_, buffer(), blocking, offset, size, + ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_WRITE_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueCopyBuffer( + const Buffer& src, + const Buffer& dst, + ::size_t src_offset, + ::size_t dst_offset, + ::size_t size, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueCopyBuffer( + object_, src(), dst(), src_offset, dst_offset, size, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQEUE_COPY_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueReadBufferRect( + const Buffer& buffer, + cl_bool blocking, + const size_t<3>& buffer_offset, + const size_t<3>& host_offset, + const size_t<3>& region, + ::size_t buffer_row_pitch, + ::size_t buffer_slice_pitch, + ::size_t host_row_pitch, + ::size_t host_slice_pitch, + void *ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueReadBufferRect( + object_, + buffer(), + blocking, + (const ::size_t *)buffer_offset, + (const ::size_t *)host_offset, + (const ::size_t *)region, + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_READ_BUFFER_RECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueWriteBufferRect( + const Buffer& buffer, + cl_bool blocking, + const size_t<3>& buffer_offset, + const size_t<3>& host_offset, + const size_t<3>& region, + ::size_t buffer_row_pitch, + ::size_t buffer_slice_pitch, + ::size_t host_row_pitch, + ::size_t host_slice_pitch, + void *ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueWriteBufferRect( + object_, + buffer(), + blocking, + (const ::size_t *)buffer_offset, + (const ::size_t *)host_offset, + (const ::size_t *)region, + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_WRITE_BUFFER_RECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueCopyBufferRect( + const Buffer& src, + const Buffer& dst, + const size_t<3>& src_origin, + const size_t<3>& dst_origin, + const size_t<3>& region, + ::size_t src_row_pitch, + ::size_t src_slice_pitch, + ::size_t dst_row_pitch, + ::size_t dst_slice_pitch, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueCopyBufferRect( + object_, + src(), + dst(), + (const ::size_t *)src_origin, + (const ::size_t *)dst_origin, + (const ::size_t *)region, + src_row_pitch, + src_slice_pitch, + dst_row_pitch, + dst_slice_pitch, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQEUE_COPY_BUFFER_RECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + +#if defined(CL_VERSION_1_2) + /** + * Enqueue a command to fill a buffer object with a pattern + * of a given size. The pattern is specified a as vector. + * \tparam PatternType The datatype of the pattern field. + * The pattern type must be an accepted OpenCL data type. + */ + template + cl_int enqueueFillBuffer( + const Buffer& buffer, + PatternType pattern, + ::size_t offset, + ::size_t size, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueFillBuffer( + object_, + buffer(), + static_cast(&pattern), + sizeof(PatternType), + offset, + size, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_FILL_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } +#endif // #if defined(CL_VERSION_1_2) + + cl_int enqueueReadImage( + const Image& image, + cl_bool blocking, + const size_t<3>& origin, + const size_t<3>& region, + ::size_t row_pitch, + ::size_t slice_pitch, + void* ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueReadImage( + object_, image(), blocking, (const ::size_t *) origin, + (const ::size_t *) region, row_pitch, slice_pitch, ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_READ_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueWriteImage( + const Image& image, + cl_bool blocking, + const size_t<3>& origin, + const size_t<3>& region, + ::size_t row_pitch, + ::size_t slice_pitch, + void* ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueWriteImage( + object_, image(), blocking, (const ::size_t *) origin, + (const ::size_t *) region, row_pitch, slice_pitch, ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_WRITE_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueCopyImage( + const Image& src, + const Image& dst, + const size_t<3>& src_origin, + const size_t<3>& dst_origin, + const size_t<3>& region, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueCopyImage( + object_, src(), dst(), (const ::size_t *) src_origin, + (const ::size_t *)dst_origin, (const ::size_t *) region, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_COPY_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + +#if defined(CL_VERSION_1_2) + /** + * Enqueue a command to fill an image object with a specified color. + * \param fillColor is the color to use to fill the image. + * This is a four component RGBA floating-point color value if + * the image channel data type is not an unnormalized signed or + * unsigned data type. + */ + cl_int enqueueFillImage( + const Image& image, + cl_float4 fillColor, + const size_t<3>& origin, + const size_t<3>& region, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueFillImage( + object_, + image(), + static_cast(&fillColor), + (const ::size_t *) origin, + (const ::size_t *) region, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_FILL_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + /** + * Enqueue a command to fill an image object with a specified color. + * \param fillColor is the color to use to fill the image. + * This is a four component RGBA signed integer color value if + * the image channel data type is an unnormalized signed integer + * type. + */ + cl_int enqueueFillImage( + const Image& image, + cl_int4 fillColor, + const size_t<3>& origin, + const size_t<3>& region, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueFillImage( + object_, + image(), + static_cast(&fillColor), + (const ::size_t *) origin, + (const ::size_t *) region, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_FILL_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + /** + * Enqueue a command to fill an image object with a specified color. + * \param fillColor is the color to use to fill the image. + * This is a four component RGBA unsigned integer color value if + * the image channel data type is an unnormalized unsigned integer + * type. + */ + cl_int enqueueFillImage( + const Image& image, + cl_uint4 fillColor, + const size_t<3>& origin, + const size_t<3>& region, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueFillImage( + object_, + image(), + static_cast(&fillColor), + (const ::size_t *) origin, + (const ::size_t *) region, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_FILL_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } +#endif // #if defined(CL_VERSION_1_2) + + cl_int enqueueCopyImageToBuffer( + const Image& src, + const Buffer& dst, + const size_t<3>& src_origin, + const size_t<3>& region, + ::size_t dst_offset, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueCopyImageToBuffer( + object_, src(), dst(), (const ::size_t *) src_origin, + (const ::size_t *) region, dst_offset, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueCopyBufferToImage( + const Buffer& src, + const Image& dst, + ::size_t src_offset, + const size_t<3>& dst_origin, + const size_t<3>& region, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueCopyBufferToImage( + object_, src(), dst(), src_offset, + (const ::size_t *) dst_origin, (const ::size_t *) region, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + void* enqueueMapBuffer( + const Buffer& buffer, + cl_bool blocking, + cl_map_flags flags, + ::size_t offset, + ::size_t size, + const VECTOR_CLASS* events = NULL, + Event* event = NULL, + cl_int* err = NULL) const + { + cl_int error; + void * result = ::clEnqueueMapBuffer( + object_, buffer(), blocking, flags, offset, size, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (cl_event*) event, + &error); + + detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + return result; + } + + void* enqueueMapImage( + const Image& buffer, + cl_bool blocking, + cl_map_flags flags, + const size_t<3>& origin, + const size_t<3>& region, + ::size_t * row_pitch, + ::size_t * slice_pitch, + const VECTOR_CLASS* events = NULL, + Event* event = NULL, + cl_int* err = NULL) const + { + cl_int error; + void * result = ::clEnqueueMapImage( + object_, buffer(), blocking, flags, + (const ::size_t *) origin, (const ::size_t *) region, + row_pitch, slice_pitch, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (cl_event*) event, + &error); + + detail::errHandler(error, __ENQUEUE_MAP_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + return result; + } + + cl_int enqueueUnmapMemObject( + const Memory& memory, + void* mapped_ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueUnmapMemObject( + object_, memory(), mapped_ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + +#if defined(CL_VERSION_1_2) + /** + * Enqueues a marker command which waits for either a list of events to complete, + * or all previously enqueued commands to complete. + * + * Enqueues a marker command which waits for either a list of events to complete, + * or if the list is empty it waits for all commands previously enqueued in command_queue + * to complete before it completes. This command returns an event which can be waited on, + * i.e. this event can be waited on to insure that all events either in the event_wait_list + * or all previously enqueued commands, queued before this command to command_queue, + * have completed. + */ + cl_int enqueueMarkerWithWaitList( + const VECTOR_CLASS *events = 0, + Event *event = 0) + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueMarkerWithWaitList( + object_, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_MARKER_WAIT_LIST_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + /** + * A synchronization point that enqueues a barrier operation. + * + * Enqueues a barrier command which waits for either a list of events to complete, + * or if the list is empty it waits for all commands previously enqueued in command_queue + * to complete before it completes. This command blocks command execution, that is, any + * following commands enqueued after it do not execute until it completes. This command + * returns an event which can be waited on, i.e. this event can be waited on to insure that + * all events either in the event_wait_list or all previously enqueued commands, queued + * before this command to command_queue, have completed. + */ + cl_int enqueueBarrierWithWaitList( + const VECTOR_CLASS *events = 0, + Event *event = 0) + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueBarrierWithWaitList( + object_, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_BARRIER_WAIT_LIST_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + /** + * Enqueues a command to indicate with which device a set of memory objects + * should be associated. + */ + cl_int enqueueMigrateMemObjects( + const VECTOR_CLASS &memObjects, + cl_mem_migration_flags flags, + const VECTOR_CLASS* events = NULL, + Event* event = NULL + ) + { + cl_event tmp; + + cl_mem* localMemObjects = static_cast(alloca(memObjects.size() * sizeof(cl_mem))); + for( int i = 0; i < (int)memObjects.size(); ++i ) { + localMemObjects[i] = memObjects[i](); + } + + + cl_int err = detail::errHandler( + ::clEnqueueMigrateMemObjects( + object_, + (cl_uint)memObjects.size(), + static_cast(localMemObjects), + flags, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } +#endif // #if defined(CL_VERSION_1_2) + + cl_int enqueueNDRangeKernel( + const Kernel& kernel, + const NDRange& offset, + const NDRange& global, + const NDRange& local = NullRange, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueNDRangeKernel( + object_, kernel(), (cl_uint) global.dimensions(), + offset.dimensions() != 0 ? (const ::size_t*) offset : NULL, + (const ::size_t*) global, + local.dimensions() != 0 ? (const ::size_t*) local : NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_NDRANGE_KERNEL_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueTask( + const Kernel& kernel, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueTask( + object_, kernel(), + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_TASK_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueNativeKernel( + void (CL_CALLBACK *userFptr)(void *), + std::pair args, + const VECTOR_CLASS* mem_objects = NULL, + const VECTOR_CLASS* mem_locs = NULL, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_mem * mems = (mem_objects != NULL && mem_objects->size() > 0) + ? (cl_mem*) alloca(mem_objects->size() * sizeof(cl_mem)) + : NULL; + + if (mems != NULL) { + for (unsigned int i = 0; i < mem_objects->size(); i++) { + mems[i] = ((*mem_objects)[i])(); + } + } + + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueNativeKernel( + object_, userFptr, args.first, args.second, + (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, + mems, + (mem_locs != NULL) ? (const void **) &mem_locs->front() : NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_NATIVE_KERNEL); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + +/** + * Deprecated APIs for 1.2 + */ +#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) + CL_EXT_PREFIX__VERSION_1_1_DEPRECATED + cl_int enqueueMarker(Event* event = NULL) const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED + { + return detail::errHandler( + ::clEnqueueMarker(object_, (cl_event*) event), + __ENQUEUE_MARKER_ERR); + } + + CL_EXT_PREFIX__VERSION_1_1_DEPRECATED + cl_int enqueueWaitForEvents(const VECTOR_CLASS& events) const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED + { + return detail::errHandler( + ::clEnqueueWaitForEvents( + object_, + (cl_uint) events.size(), + (const cl_event*) &events.front()), + __ENQUEUE_WAIT_FOR_EVENTS_ERR); + } +#endif // #if defined(CL_VERSION_1_1) + + cl_int enqueueAcquireGLObjects( + const VECTOR_CLASS* mem_objects = NULL, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueAcquireGLObjects( + object_, + (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, + (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_ACQUIRE_GL_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueReleaseGLObjects( + const VECTOR_CLASS* mem_objects = NULL, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueReleaseGLObjects( + object_, + (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, + (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_RELEASE_GL_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + +#if defined (USE_DX_INTEROP) +typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueAcquireD3D10ObjectsKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem* mem_objects, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event); +typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueReleaseD3D10ObjectsKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem* mem_objects, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event); + + cl_int enqueueAcquireD3D10Objects( + const VECTOR_CLASS* mem_objects = NULL, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + static PFN_clEnqueueAcquireD3D10ObjectsKHR pfn_clEnqueueAcquireD3D10ObjectsKHR = NULL; +#if defined(CL_VERSION_1_2) + cl_context context = getInfo(); + cl::Device device(getInfo()); + cl_platform_id platform = device.getInfo(); + __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clEnqueueAcquireD3D10ObjectsKHR); +#endif +#if defined(CL_VERSION_1_1) + __INIT_CL_EXT_FCN_PTR(clEnqueueAcquireD3D10ObjectsKHR); +#endif + + cl_event tmp; + cl_int err = detail::errHandler( + pfn_clEnqueueAcquireD3D10ObjectsKHR( + object_, + (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, + (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_ACQUIRE_GL_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueReleaseD3D10Objects( + const VECTOR_CLASS* mem_objects = NULL, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + static PFN_clEnqueueReleaseD3D10ObjectsKHR pfn_clEnqueueReleaseD3D10ObjectsKHR = NULL; +#if defined(CL_VERSION_1_2) + cl_context context = getInfo(); + cl::Device device(getInfo()); + cl_platform_id platform = device.getInfo(); + __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clEnqueueReleaseD3D10ObjectsKHR); +#endif // #if defined(CL_VERSION_1_2) +#if defined(CL_VERSION_1_1) + __INIT_CL_EXT_FCN_PTR(clEnqueueReleaseD3D10ObjectsKHR); +#endif // #if defined(CL_VERSION_1_1) + + cl_event tmp; + cl_int err = detail::errHandler( + pfn_clEnqueueReleaseD3D10ObjectsKHR( + object_, + (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, + (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_RELEASE_GL_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } +#endif + +/** + * Deprecated APIs for 1.2 + */ +#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) + CL_EXT_PREFIX__VERSION_1_1_DEPRECATED + cl_int enqueueBarrier() const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED + { + return detail::errHandler( + ::clEnqueueBarrier(object_), + __ENQUEUE_BARRIER_ERR); + } +#endif // #if defined(CL_VERSION_1_1) + + cl_int flush() const + { + return detail::errHandler(::clFlush(object_), __FLUSH_ERR); + } + + cl_int finish() const + { + return detail::errHandler(::clFinish(object_), __FINISH_ERR); + } +}; + +#ifdef _WIN32 +__declspec(selectany) volatile int CommandQueue::default_initialized_ = __DEFAULT_NOT_INITIALIZED; +__declspec(selectany) CommandQueue CommandQueue::default_; +__declspec(selectany) volatile cl_int CommandQueue::default_error_ = CL_SUCCESS; +#else +__attribute__((weak)) volatile int CommandQueue::default_initialized_ = __DEFAULT_NOT_INITIALIZED; +__attribute__((weak)) CommandQueue CommandQueue::default_; +__attribute__((weak)) volatile cl_int CommandQueue::default_error_ = CL_SUCCESS; +#endif + +template< typename IteratorType > +Buffer::Buffer( + const Context &context, + IteratorType startIterator, + IteratorType endIterator, + bool readOnly, + bool useHostPtr, + cl_int* err) +{ + typedef typename std::iterator_traits::value_type DataType; + cl_int error; + + cl_mem_flags flags = 0; + if( readOnly ) { + flags |= CL_MEM_READ_ONLY; + } + else { + flags |= CL_MEM_READ_WRITE; + } + if( useHostPtr ) { + flags |= CL_MEM_USE_HOST_PTR; + } + + ::size_t size = sizeof(DataType)*(endIterator - startIterator); + + if( useHostPtr ) { + object_ = ::clCreateBuffer(context(), flags, size, static_cast(&*startIterator), &error); + } else { + object_ = ::clCreateBuffer(context(), flags, size, 0, &error); + } + + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + + if( !useHostPtr ) { + CommandQueue queue(context, 0, &error); + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + + error = cl::copy(queue, startIterator, endIterator, *this); + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } +} + +inline cl_int enqueueReadBuffer( + const Buffer& buffer, + cl_bool blocking, + ::size_t offset, + ::size_t size, + void* ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueReadBuffer(buffer, blocking, offset, size, ptr, events, event); +} + +inline cl_int enqueueWriteBuffer( + const Buffer& buffer, + cl_bool blocking, + ::size_t offset, + ::size_t size, + const void* ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueWriteBuffer(buffer, blocking, offset, size, ptr, events, event); +} + +inline void* enqueueMapBuffer( + const Buffer& buffer, + cl_bool blocking, + cl_map_flags flags, + ::size_t offset, + ::size_t size, + const VECTOR_CLASS* events = NULL, + Event* event = NULL, + cl_int* err = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + + void * result = ::clEnqueueMapBuffer( + queue(), buffer(), blocking, flags, offset, size, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (cl_event*) event, + &error); + + detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + return result; +} + +inline cl_int enqueueUnmapMemObject( + const Memory& memory, + void* mapped_ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); + if (error != CL_SUCCESS) { + return error; + } + + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueUnmapMemObject( + queue(), memory(), mapped_ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; +} + +inline cl_int enqueueCopyBuffer( + const Buffer& src, + const Buffer& dst, + ::size_t src_offset, + ::size_t dst_offset, + ::size_t size, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueCopyBuffer(src, dst, src_offset, dst_offset, size, events, event); +} + +/** + * Blocking copy operation between iterators and a buffer. + * Host to Device. + * Uses default command queue. + */ +template< typename IteratorType > +inline cl_int copy( IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) + return error; + + return cl::copy(queue, startIterator, endIterator, buffer); +} + +/** + * Blocking copy operation between iterators and a buffer. + * Device to Host. + * Uses default command queue. + */ +template< typename IteratorType > +inline cl_int copy( const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) + return error; + + return cl::copy(queue, buffer, startIterator, endIterator); +} + +/** + * Blocking copy operation between iterators and a buffer. + * Host to Device. + * Uses specified queue. + */ +template< typename IteratorType > +inline cl_int copy( const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ) +{ + typedef typename std::iterator_traits::value_type DataType; + cl_int error; + + ::size_t length = endIterator-startIterator; + ::size_t byteLength = length*sizeof(DataType); + + DataType *pointer = + static_cast(queue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_WRITE, 0, byteLength, 0, 0, &error)); + // if exceptions enabled, enqueueMapBuffer will throw + if( error != CL_SUCCESS ) { + return error; + } +#if defined(_MSC_VER) + std::copy( + startIterator, + endIterator, + stdext::checked_array_iterator( + pointer, length)); +#else + std::copy(startIterator, endIterator, pointer); +#endif + Event endEvent; + error = queue.enqueueUnmapMemObject(buffer, pointer, 0, &endEvent); + // if exceptions enabled, enqueueUnmapMemObject will throw + if( error != CL_SUCCESS ) { + return error; + } + endEvent.wait(); + return CL_SUCCESS; +} + +/** + * Blocking copy operation between iterators and a buffer. + * Device to Host. + * Uses specified queue. + */ +template< typename IteratorType > +inline cl_int copy( const CommandQueue &queue, const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ) +{ + typedef typename std::iterator_traits::value_type DataType; + cl_int error; + + ::size_t length = endIterator-startIterator; + ::size_t byteLength = length*sizeof(DataType); + + DataType *pointer = + static_cast(queue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_READ, 0, byteLength, 0, 0, &error)); + // if exceptions enabled, enqueueMapBuffer will throw + if( error != CL_SUCCESS ) { + return error; + } + std::copy(pointer, pointer + length, startIterator); + Event endEvent; + error = queue.enqueueUnmapMemObject(buffer, pointer, 0, &endEvent); + // if exceptions enabled, enqueueUnmapMemObject will throw + if( error != CL_SUCCESS ) { + return error; + } + endEvent.wait(); + return CL_SUCCESS; +} + +#if defined(CL_VERSION_1_1) +inline cl_int enqueueReadBufferRect( + const Buffer& buffer, + cl_bool blocking, + const size_t<3>& buffer_offset, + const size_t<3>& host_offset, + const size_t<3>& region, + ::size_t buffer_row_pitch, + ::size_t buffer_slice_pitch, + ::size_t host_row_pitch, + ::size_t host_slice_pitch, + void *ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueReadBufferRect( + buffer, + blocking, + buffer_offset, + host_offset, + region, + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + events, + event); +} + +inline cl_int enqueueWriteBufferRect( + const Buffer& buffer, + cl_bool blocking, + const size_t<3>& buffer_offset, + const size_t<3>& host_offset, + const size_t<3>& region, + ::size_t buffer_row_pitch, + ::size_t buffer_slice_pitch, + ::size_t host_row_pitch, + ::size_t host_slice_pitch, + void *ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueWriteBufferRect( + buffer, + blocking, + buffer_offset, + host_offset, + region, + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + events, + event); +} + +inline cl_int enqueueCopyBufferRect( + const Buffer& src, + const Buffer& dst, + const size_t<3>& src_origin, + const size_t<3>& dst_origin, + const size_t<3>& region, + ::size_t src_row_pitch, + ::size_t src_slice_pitch, + ::size_t dst_row_pitch, + ::size_t dst_slice_pitch, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueCopyBufferRect( + src, + dst, + src_origin, + dst_origin, + region, + src_row_pitch, + src_slice_pitch, + dst_row_pitch, + dst_slice_pitch, + events, + event); +} +#endif + +inline cl_int enqueueReadImage( + const Image& image, + cl_bool blocking, + const size_t<3>& origin, + const size_t<3>& region, + ::size_t row_pitch, + ::size_t slice_pitch, + void* ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueReadImage( + image, + blocking, + origin, + region, + row_pitch, + slice_pitch, + ptr, + events, + event); +} + +inline cl_int enqueueWriteImage( + const Image& image, + cl_bool blocking, + const size_t<3>& origin, + const size_t<3>& region, + ::size_t row_pitch, + ::size_t slice_pitch, + void* ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueWriteImage( + image, + blocking, + origin, + region, + row_pitch, + slice_pitch, + ptr, + events, + event); +} + +inline cl_int enqueueCopyImage( + const Image& src, + const Image& dst, + const size_t<3>& src_origin, + const size_t<3>& dst_origin, + const size_t<3>& region, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueCopyImage( + src, + dst, + src_origin, + dst_origin, + region, + events, + event); +} + +inline cl_int enqueueCopyImageToBuffer( + const Image& src, + const Buffer& dst, + const size_t<3>& src_origin, + const size_t<3>& region, + ::size_t dst_offset, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueCopyImageToBuffer( + src, + dst, + src_origin, + region, + dst_offset, + events, + event); +} + +inline cl_int enqueueCopyBufferToImage( + const Buffer& src, + const Image& dst, + ::size_t src_offset, + const size_t<3>& dst_origin, + const size_t<3>& region, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueCopyBufferToImage( + src, + dst, + src_offset, + dst_origin, + region, + events, + event); +} + + +inline cl_int flush(void) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.flush(); +} + +inline cl_int finish(void) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + + return queue.finish(); +} + +// Kernel Functor support +// New interface as of September 2011 +// Requires the C++11 std::tr1::function (note do not support TR1) +// Visual Studio 2010 and GCC 4.2 + +struct EnqueueArgs +{ + CommandQueue queue_; + const NDRange offset_; + const NDRange global_; + const NDRange local_; + VECTOR_CLASS events_; + + EnqueueArgs(NDRange global) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(NullRange) + { + + } + + EnqueueArgs(NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(local) + { + + } + + EnqueueArgs(NDRange offset, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(offset), + global_(global), + local_(local) + { + + } + + EnqueueArgs(Event e, NDRange global) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(NullRange) + { + events_.push_back(e); + } + + EnqueueArgs(Event e, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(local) + { + events_.push_back(e); + } + + EnqueueArgs(Event e, NDRange offset, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(offset), + global_(global), + local_(local) + { + events_.push_back(e); + } + + EnqueueArgs(const VECTOR_CLASS &events, NDRange global) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(NullRange), + events_(events) + { + + } + + EnqueueArgs(const VECTOR_CLASS &events, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(local), + events_(events) + { + + } + + EnqueueArgs(const VECTOR_CLASS &events, NDRange offset, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(offset), + global_(global), + local_(local), + events_(events) + { + + } + + EnqueueArgs(CommandQueue &queue, NDRange global) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(NullRange) + { + + } + + EnqueueArgs(CommandQueue &queue, NDRange global, NDRange local) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(local) + { + + } + + EnqueueArgs(CommandQueue &queue, NDRange offset, NDRange global, NDRange local) : + queue_(queue), + offset_(offset), + global_(global), + local_(local) + { + + } + + EnqueueArgs(CommandQueue &queue, Event e, NDRange global) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(NullRange) + { + events_.push_back(e); + } + + EnqueueArgs(CommandQueue &queue, Event e, NDRange global, NDRange local) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(local) + { + events_.push_back(e); + } + + EnqueueArgs(CommandQueue &queue, Event e, NDRange offset, NDRange global, NDRange local) : + queue_(queue), + offset_(offset), + global_(global), + local_(local) + { + events_.push_back(e); + } + + EnqueueArgs(CommandQueue &queue, const VECTOR_CLASS &events, NDRange global) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(NullRange), + events_(events) + { + + } + + EnqueueArgs(CommandQueue &queue, const VECTOR_CLASS &events, NDRange global, NDRange local) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(local), + events_(events) + { + + } + + EnqueueArgs(CommandQueue &queue, const VECTOR_CLASS &events, NDRange offset, NDRange global, NDRange local) : + queue_(queue), + offset_(offset), + global_(global), + local_(local), + events_(events) + { + + } +}; + +namespace detail { + +class NullType {}; + +template +struct SetArg +{ + static void set (Kernel kernel, T0 arg) + { + kernel.setArg(index, arg); + } +}; + +template +struct SetArg +{ + static void set (Kernel, NullType) + { + } +}; + +template < + typename T0, typename T1, typename T2, typename T3, + typename T4, typename T5, typename T6, typename T7, + typename T8, typename T9, typename T10, typename T11, + typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, + typename T20, typename T21, typename T22, typename T23, + typename T24, typename T25, typename T26, typename T27, + typename T28, typename T29, typename T30, typename T31 +> +class KernelFunctorGlobal +{ +private: + Kernel kernel_; + +public: + KernelFunctorGlobal( + Kernel kernel) : + kernel_(kernel) + {} + + KernelFunctorGlobal( + const Program& program, + const STRING_CLASS name, + cl_int * err = NULL) : + kernel_(program, name.c_str(), err) + {} + + Event operator() ( + const EnqueueArgs& args, + T0 t0, + T1 t1 = NullType(), + T2 t2 = NullType(), + T3 t3 = NullType(), + T4 t4 = NullType(), + T5 t5 = NullType(), + T6 t6 = NullType(), + T7 t7 = NullType(), + T8 t8 = NullType(), + T9 t9 = NullType(), + T10 t10 = NullType(), + T11 t11 = NullType(), + T12 t12 = NullType(), + T13 t13 = NullType(), + T14 t14 = NullType(), + T15 t15 = NullType(), + T16 t16 = NullType(), + T17 t17 = NullType(), + T18 t18 = NullType(), + T19 t19 = NullType(), + T20 t20 = NullType(), + T21 t21 = NullType(), + T22 t22 = NullType(), + T23 t23 = NullType(), + T24 t24 = NullType(), + T25 t25 = NullType(), + T26 t26 = NullType(), + T27 t27 = NullType(), + T28 t28 = NullType(), + T29 t29 = NullType(), + T30 t30 = NullType(), + T31 t31 = NullType() + ) + { + Event event; + SetArg<0, T0>::set(kernel_, t0); + SetArg<1, T1>::set(kernel_, t1); + SetArg<2, T2>::set(kernel_, t2); + SetArg<3, T3>::set(kernel_, t3); + SetArg<4, T4>::set(kernel_, t4); + SetArg<5, T5>::set(kernel_, t5); + SetArg<6, T6>::set(kernel_, t6); + SetArg<7, T7>::set(kernel_, t7); + SetArg<8, T8>::set(kernel_, t8); + SetArg<9, T9>::set(kernel_, t9); + SetArg<10, T10>::set(kernel_, t10); + SetArg<11, T11>::set(kernel_, t11); + SetArg<12, T12>::set(kernel_, t12); + SetArg<13, T13>::set(kernel_, t13); + SetArg<14, T14>::set(kernel_, t14); + SetArg<15, T15>::set(kernel_, t15); + SetArg<16, T16>::set(kernel_, t16); + SetArg<17, T17>::set(kernel_, t17); + SetArg<18, T18>::set(kernel_, t18); + SetArg<19, T19>::set(kernel_, t19); + SetArg<20, T20>::set(kernel_, t20); + SetArg<21, T21>::set(kernel_, t21); + SetArg<22, T22>::set(kernel_, t22); + SetArg<23, T23>::set(kernel_, t23); + SetArg<24, T24>::set(kernel_, t24); + SetArg<25, T25>::set(kernel_, t25); + SetArg<26, T26>::set(kernel_, t26); + SetArg<27, T27>::set(kernel_, t27); + SetArg<28, T28>::set(kernel_, t28); + SetArg<29, T29>::set(kernel_, t29); + SetArg<30, T30>::set(kernel_, t30); + SetArg<31, T31>::set(kernel_, t31); + + args.queue_.enqueueNDRangeKernel( + kernel_, + args.offset_, + args.global_, + args.local_, + &args.events_, + &event); + + return event; + } + +}; + +//------------------------------------------------------------------------------------------------------ + + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31> +struct functionImplementation_ +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 32)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 31)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 30)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 29)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 28)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 27)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 26)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 25)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 24)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 23)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 22)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 21)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 20)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 19)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 18)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 17)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 16)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 15)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 14)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 13)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 12)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 11)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 10)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 9)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 8)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 7)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 6)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 5)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 4)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3); + } + + +}; + +template< + typename T0, + typename T1, + typename T2> +struct functionImplementation_ +< T0, + T1, + T2, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 3)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2); + } + + +}; + +template< + typename T0, + typename T1> +struct functionImplementation_ +< T0, + T1, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 2)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1) + { + return functor_( + enqueueArgs, + arg0, + arg1); + } + + +}; + +template< + typename T0> +struct functionImplementation_ +< T0, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 1)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0) + { + return functor_( + enqueueArgs, + arg0); + } + + +}; + + + + + +} // namespace detail + +//---------------------------------------------------------------------------------------------- + +template < + typename T0, typename T1 = detail::NullType, typename T2 = detail::NullType, + typename T3 = detail::NullType, typename T4 = detail::NullType, + typename T5 = detail::NullType, typename T6 = detail::NullType, + typename T7 = detail::NullType, typename T8 = detail::NullType, + typename T9 = detail::NullType, typename T10 = detail::NullType, + typename T11 = detail::NullType, typename T12 = detail::NullType, + typename T13 = detail::NullType, typename T14 = detail::NullType, + typename T15 = detail::NullType, typename T16 = detail::NullType, + typename T17 = detail::NullType, typename T18 = detail::NullType, + typename T19 = detail::NullType, typename T20 = detail::NullType, + typename T21 = detail::NullType, typename T22 = detail::NullType, + typename T23 = detail::NullType, typename T24 = detail::NullType, + typename T25 = detail::NullType, typename T26 = detail::NullType, + typename T27 = detail::NullType, typename T28 = detail::NullType, + typename T29 = detail::NullType, typename T30 = detail::NullType, + typename T31 = detail::NullType +> +struct make_kernel : + public detail::functionImplementation_< + T0, T1, T2, T3, + T4, T5, T6, T7, + T8, T9, T10, T11, + T12, T13, T14, T15, + T16, T17, T18, T19, + T20, T21, T22, T23, + T24, T25, T26, T27, + T28, T29, T30, T31 + > +{ +public: + typedef detail::KernelFunctorGlobal< + T0, T1, T2, T3, + T4, T5, T6, T7, + T8, T9, T10, T11, + T12, T13, T14, T15, + T16, T17, T18, T19, + T20, T21, T22, T23, + T24, T25, T26, T27, + T28, T29, T30, T31 + > FunctorType; + + make_kernel( + const Program& program, + const STRING_CLASS name, + cl_int * err = NULL) : + detail::functionImplementation_< + T0, T1, T2, T3, + T4, T5, T6, T7, + T8, T9, T10, T11, + T12, T13, T14, T15, + T16, T17, T18, T19, + T20, T21, T22, T23, + T24, T25, T26, T27, + T28, T29, T30, T31 + >( + FunctorType(program, name, err)) + {} + + make_kernel( + const Kernel kernel) : + detail::functionImplementation_< + T0, T1, T2, T3, + T4, T5, T6, T7, + T8, T9, T10, T11, + T12, T13, T14, T15, + T16, T17, T18, T19, + T20, T21, T22, T23, + T24, T25, T26, T27, + T28, T29, T30, T31 + >( + FunctorType(kernel)) + {} +}; + + +//---------------------------------------------------------------------------------------------------------------------- + +#undef __ERR_STR +#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS) +#undef __GET_DEVICE_INFO_ERR +#undef __GET_PLATFORM_INFO_ERR +#undef __GET_DEVICE_IDS_ERR +#undef __GET_CONTEXT_INFO_ERR +#undef __GET_EVENT_INFO_ERR +#undef __GET_EVENT_PROFILE_INFO_ERR +#undef __GET_MEM_OBJECT_INFO_ERR +#undef __GET_IMAGE_INFO_ERR +#undef __GET_SAMPLER_INFO_ERR +#undef __GET_KERNEL_INFO_ERR +#undef __GET_KERNEL_ARG_INFO_ERR +#undef __GET_KERNEL_WORK_GROUP_INFO_ERR +#undef __GET_PROGRAM_INFO_ERR +#undef __GET_PROGRAM_BUILD_INFO_ERR +#undef __GET_COMMAND_QUEUE_INFO_ERR + +#undef __CREATE_CONTEXT_ERR +#undef __CREATE_CONTEXT_FROM_TYPE_ERR +#undef __GET_SUPPORTED_IMAGE_FORMATS_ERR + +#undef __CREATE_BUFFER_ERR +#undef __CREATE_SUBBUFFER_ERR +#undef __CREATE_IMAGE2D_ERR +#undef __CREATE_IMAGE3D_ERR +#undef __CREATE_SAMPLER_ERR +#undef __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR + +#undef __CREATE_USER_EVENT_ERR +#undef __SET_USER_EVENT_STATUS_ERR +#undef __SET_EVENT_CALLBACK_ERR +#undef __SET_PRINTF_CALLBACK_ERR + +#undef __WAIT_FOR_EVENTS_ERR + +#undef __CREATE_KERNEL_ERR +#undef __SET_KERNEL_ARGS_ERR +#undef __CREATE_PROGRAM_WITH_SOURCE_ERR +#undef __CREATE_PROGRAM_WITH_BINARY_ERR +#undef __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR +#undef __BUILD_PROGRAM_ERR +#undef __CREATE_KERNELS_IN_PROGRAM_ERR + +#undef __CREATE_COMMAND_QUEUE_ERR +#undef __SET_COMMAND_QUEUE_PROPERTY_ERR +#undef __ENQUEUE_READ_BUFFER_ERR +#undef __ENQUEUE_WRITE_BUFFER_ERR +#undef __ENQUEUE_READ_BUFFER_RECT_ERR +#undef __ENQUEUE_WRITE_BUFFER_RECT_ERR +#undef __ENQEUE_COPY_BUFFER_ERR +#undef __ENQEUE_COPY_BUFFER_RECT_ERR +#undef __ENQUEUE_READ_IMAGE_ERR +#undef __ENQUEUE_WRITE_IMAGE_ERR +#undef __ENQUEUE_COPY_IMAGE_ERR +#undef __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR +#undef __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR +#undef __ENQUEUE_MAP_BUFFER_ERR +#undef __ENQUEUE_MAP_IMAGE_ERR +#undef __ENQUEUE_UNMAP_MEM_OBJECT_ERR +#undef __ENQUEUE_NDRANGE_KERNEL_ERR +#undef __ENQUEUE_TASK_ERR +#undef __ENQUEUE_NATIVE_KERNEL + +#undef __CL_EXPLICIT_CONSTRUCTORS + +#undef __UNLOAD_COMPILER_ERR +#endif //__CL_USER_OVERRIDE_ERROR_STRINGS + +#undef __CL_FUNCTION_TYPE + +// Extensions +/** + * Deprecated APIs for 1.2 + */ +#if defined(CL_VERSION_1_1) +#undef __INIT_CL_EXT_FCN_PTR +#endif // #if defined(CL_VERSION_1_1) +#undef __CREATE_SUB_DEVICES + +#if defined(USE_CL_DEVICE_FISSION) +#undef __PARAM_NAME_DEVICE_FISSION +#endif // USE_CL_DEVICE_FISSION + +#undef __DEFAULT_NOT_INITIALIZED +#undef __DEFAULT_BEING_INITIALIZED +#undef __DEFAULT_INITIALIZED + +} // namespace cl + +#ifdef _WIN32 +#pragma pop_macro("max") +#endif // _WIN32 + +#endif // CL_HPP_ diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/ethash_cl_miner.cpp b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/ethash_cl_miner.cpp new file mode 100644 index 000000000..11b29333c --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/ethash_cl_miner.cpp @@ -0,0 +1,754 @@ +/* + This file is part of c-ethash. + + c-ethash is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + c-ethash is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file ethash_cl_miner.cpp +* @author Tim Hughes +* @date 2015 +*/ + + +#define _CRT_SECURE_NO_WARNINGS + +#include +#include +#include "ethash_cl_miner.h" +#include + +#undef min +#undef max + +#define HASH_BYTES 32 + +static char const ethash_inner_code[] = R"( + +// author Tim Hughes +// Tested on Radeon HD 7850 +// Hashrate: 15940347 hashes/s +// Bandwidth: 124533 MB/s +// search kernel should fit in <= 84 VGPRS (3 wavefronts) + +#define THREADS_PER_HASH (128 / 16) +#define HASHES_PER_LOOP (GROUP_SIZE / THREADS_PER_HASH) + +#define FNV_PRIME 0x01000193 + +__constant uint2 const Keccak_f1600_RC[24] = { + (uint2)(0x00000001, 0x00000000), + (uint2)(0x00008082, 0x00000000), + (uint2)(0x0000808a, 0x80000000), + (uint2)(0x80008000, 0x80000000), + (uint2)(0x0000808b, 0x00000000), + (uint2)(0x80000001, 0x00000000), + (uint2)(0x80008081, 0x80000000), + (uint2)(0x00008009, 0x80000000), + (uint2)(0x0000008a, 0x00000000), + (uint2)(0x00000088, 0x00000000), + (uint2)(0x80008009, 0x00000000), + (uint2)(0x8000000a, 0x00000000), + (uint2)(0x8000808b, 0x00000000), + (uint2)(0x0000008b, 0x80000000), + (uint2)(0x00008089, 0x80000000), + (uint2)(0x00008003, 0x80000000), + (uint2)(0x00008002, 0x80000000), + (uint2)(0x00000080, 0x80000000), + (uint2)(0x0000800a, 0x00000000), + (uint2)(0x8000000a, 0x80000000), + (uint2)(0x80008081, 0x80000000), + (uint2)(0x00008080, 0x80000000), + (uint2)(0x80000001, 0x00000000), + (uint2)(0x80008008, 0x80000000), +}; + +void keccak_f1600_round(uint2* a, uint r, uint out_size) +{ + #if !__ENDIAN_LITTLE__ + for (uint i = 0; i != 25; ++i) + a[i] = a[i].yx; + #endif + + uint2 b[25]; + uint2 t; + + // Theta + b[0] = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20]; + b[1] = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21]; + b[2] = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22]; + b[3] = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23]; + b[4] = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24]; + t = b[4] ^ (uint2)(b[1].x << 1 | b[1].y >> 31, b[1].y << 1 | b[1].x >> 31); + a[0] ^= t; + a[5] ^= t; + a[10] ^= t; + a[15] ^= t; + a[20] ^= t; + t = b[0] ^ (uint2)(b[2].x << 1 | b[2].y >> 31, b[2].y << 1 | b[2].x >> 31); + a[1] ^= t; + a[6] ^= t; + a[11] ^= t; + a[16] ^= t; + a[21] ^= t; + t = b[1] ^ (uint2)(b[3].x << 1 | b[3].y >> 31, b[3].y << 1 | b[3].x >> 31); + a[2] ^= t; + a[7] ^= t; + a[12] ^= t; + a[17] ^= t; + a[22] ^= t; + t = b[2] ^ (uint2)(b[4].x << 1 | b[4].y >> 31, b[4].y << 1 | b[4].x >> 31); + a[3] ^= t; + a[8] ^= t; + a[13] ^= t; + a[18] ^= t; + a[23] ^= t; + t = b[3] ^ (uint2)(b[0].x << 1 | b[0].y >> 31, b[0].y << 1 | b[0].x >> 31); + a[4] ^= t; + a[9] ^= t; + a[14] ^= t; + a[19] ^= t; + a[24] ^= t; + + // Rho Pi + b[0] = a[0]; + b[10] = (uint2)(a[1].x << 1 | a[1].y >> 31, a[1].y << 1 | a[1].x >> 31); + b[7] = (uint2)(a[10].x << 3 | a[10].y >> 29, a[10].y << 3 | a[10].x >> 29); + b[11] = (uint2)(a[7].x << 6 | a[7].y >> 26, a[7].y << 6 | a[7].x >> 26); + b[17] = (uint2)(a[11].x << 10 | a[11].y >> 22, a[11].y << 10 | a[11].x >> 22); + b[18] = (uint2)(a[17].x << 15 | a[17].y >> 17, a[17].y << 15 | a[17].x >> 17); + b[3] = (uint2)(a[18].x << 21 | a[18].y >> 11, a[18].y << 21 | a[18].x >> 11); + b[5] = (uint2)(a[3].x << 28 | a[3].y >> 4, a[3].y << 28 | a[3].x >> 4); + b[16] = (uint2)(a[5].y << 4 | a[5].x >> 28, a[5].x << 4 | a[5].y >> 28); + b[8] = (uint2)(a[16].y << 13 | a[16].x >> 19, a[16].x << 13 | a[16].y >> 19); + b[21] = (uint2)(a[8].y << 23 | a[8].x >> 9, a[8].x << 23 | a[8].y >> 9); + b[24] = (uint2)(a[21].x << 2 | a[21].y >> 30, a[21].y << 2 | a[21].x >> 30); + b[4] = (uint2)(a[24].x << 14 | a[24].y >> 18, a[24].y << 14 | a[24].x >> 18); + b[15] = (uint2)(a[4].x << 27 | a[4].y >> 5, a[4].y << 27 | a[4].x >> 5); + b[23] = (uint2)(a[15].y << 9 | a[15].x >> 23, a[15].x << 9 | a[15].y >> 23); + b[19] = (uint2)(a[23].y << 24 | a[23].x >> 8, a[23].x << 24 | a[23].y >> 8); + b[13] = (uint2)(a[19].x << 8 | a[19].y >> 24, a[19].y << 8 | a[19].x >> 24); + b[12] = (uint2)(a[13].x << 25 | a[13].y >> 7, a[13].y << 25 | a[13].x >> 7); + b[2] = (uint2)(a[12].y << 11 | a[12].x >> 21, a[12].x << 11 | a[12].y >> 21); + b[20] = (uint2)(a[2].y << 30 | a[2].x >> 2, a[2].x << 30 | a[2].y >> 2); + b[14] = (uint2)(a[20].x << 18 | a[20].y >> 14, a[20].y << 18 | a[20].x >> 14); + b[22] = (uint2)(a[14].y << 7 | a[14].x >> 25, a[14].x << 7 | a[14].y >> 25); + b[9] = (uint2)(a[22].y << 29 | a[22].x >> 3, a[22].x << 29 | a[22].y >> 3); + b[6] = (uint2)(a[9].x << 20 | a[9].y >> 12, a[9].y << 20 | a[9].x >> 12); + b[1] = (uint2)(a[6].y << 12 | a[6].x >> 20, a[6].x << 12 | a[6].y >> 20); + + // Chi + a[0] = bitselect(b[0] ^ b[2], b[0], b[1]); + a[1] = bitselect(b[1] ^ b[3], b[1], b[2]); + a[2] = bitselect(b[2] ^ b[4], b[2], b[3]); + a[3] = bitselect(b[3] ^ b[0], b[3], b[4]); + if (out_size >= 4) + { + a[4] = bitselect(b[4] ^ b[1], b[4], b[0]); + a[5] = bitselect(b[5] ^ b[7], b[5], b[6]); + a[6] = bitselect(b[6] ^ b[8], b[6], b[7]); + a[7] = bitselect(b[7] ^ b[9], b[7], b[8]); + a[8] = bitselect(b[8] ^ b[5], b[8], b[9]); + if (out_size >= 8) + { + a[9] = bitselect(b[9] ^ b[6], b[9], b[5]); + a[10] = bitselect(b[10] ^ b[12], b[10], b[11]); + a[11] = bitselect(b[11] ^ b[13], b[11], b[12]); + a[12] = bitselect(b[12] ^ b[14], b[12], b[13]); + a[13] = bitselect(b[13] ^ b[10], b[13], b[14]); + a[14] = bitselect(b[14] ^ b[11], b[14], b[10]); + a[15] = bitselect(b[15] ^ b[17], b[15], b[16]); + a[16] = bitselect(b[16] ^ b[18], b[16], b[17]); + a[17] = bitselect(b[17] ^ b[19], b[17], b[18]); + a[18] = bitselect(b[18] ^ b[15], b[18], b[19]); + a[19] = bitselect(b[19] ^ b[16], b[19], b[15]); + a[20] = bitselect(b[20] ^ b[22], b[20], b[21]); + a[21] = bitselect(b[21] ^ b[23], b[21], b[22]); + a[22] = bitselect(b[22] ^ b[24], b[22], b[23]); + a[23] = bitselect(b[23] ^ b[20], b[23], b[24]); + a[24] = bitselect(b[24] ^ b[21], b[24], b[20]); + } + } + + // Iota + a[0] ^= Keccak_f1600_RC[r]; + + #if !__ENDIAN_LITTLE__ + for (uint i = 0; i != 25; ++i) + a[i] = a[i].yx; + #endif +} + +void keccak_f1600_no_absorb(ulong* a, uint in_size, uint out_size, uint isolate) +{ + for (uint i = in_size; i != 25; ++i) + { + a[i] = 0; + } +#if __ENDIAN_LITTLE__ + a[in_size] ^= 0x0000000000000001; + a[24-out_size*2] ^= 0x8000000000000000; +#else + a[in_size] ^= 0x0100000000000000; + a[24-out_size*2] ^= 0x0000000000000080; +#endif + + // Originally I unrolled the first and last rounds to interface + // better with surrounding code, however I haven't done this + // without causing the AMD compiler to blow up the VGPR usage. + uint r = 0; + do + { + // This dynamic branch stops the AMD compiler unrolling the loop + // and additionally saves about 33% of the VGPRs, enough to gain another + // wavefront. Ideally we'd get 4 in flight, but 3 is the best I can + // massage out of the compiler. It doesn't really seem to matter how + // much we try and help the compiler save VGPRs because it seems to throw + // that information away, hence the implementation of keccak here + // doesn't bother. + if (isolate) + { + keccak_f1600_round((uint2*)a, r++, 25); + } + } + while (r < 23); + + // final round optimised for digest size + keccak_f1600_round((uint2*)a, r++, out_size); +} + +#define copy(dst, src, count) for (uint i = 0; i != count; ++i) { (dst)[i] = (src)[i]; } + +#define countof(x) (sizeof(x) / sizeof(x[0])) + +uint fnv(uint x, uint y) +{ + return x * FNV_PRIME ^ y; +} + +uint4 fnv4(uint4 x, uint4 y) +{ + return x * FNV_PRIME ^ y; +} + +uint fnv_reduce(uint4 v) +{ + return fnv(fnv(fnv(v.x, v.y), v.z), v.w); +} + +typedef union +{ + ulong ulongs[32 / sizeof(ulong)]; + uint uints[32 / sizeof(uint)]; +} hash32_t; + +typedef union +{ + ulong ulongs[64 / sizeof(ulong)]; + uint4 uint4s[64 / sizeof(uint4)]; +} hash64_t; + +typedef union +{ + uint uints[128 / sizeof(uint)]; + uint4 uint4s[128 / sizeof(uint4)]; +} hash128_t; + +hash64_t init_hash(__constant hash32_t const* header, ulong nonce, uint isolate) +{ + hash64_t init; + uint const init_size = countof(init.ulongs); + uint const hash_size = countof(header->ulongs); + + // sha3_512(header .. nonce) + ulong state[25]; + copy(state, header->ulongs, hash_size); + state[hash_size] = nonce; + keccak_f1600_no_absorb(state, hash_size + 1, init_size, isolate); + + copy(init.ulongs, state, init_size); + return init; +} + +uint inner_loop(uint4 init, uint thread_id, __local uint* share, __global hash128_t const* g_dag, uint isolate) +{ + uint4 mix = init; + + // share init0 + if (thread_id == 0) + *share = mix.x; + barrier(CLK_LOCAL_MEM_FENCE); + uint init0 = *share; + + uint a = 0; + do + { + bool update_share = thread_id == (a/4) % THREADS_PER_HASH; + + #pragma unroll + for (uint i = 0; i != 4; ++i) + { + if (update_share) + { + uint m[4] = { mix.x, mix.y, mix.z, mix.w }; + *share = fnv(init0 ^ (a+i), m[i]) % DAG_SIZE; + } + barrier(CLK_LOCAL_MEM_FENCE); + + mix = fnv4(mix, g_dag[*share].uint4s[thread_id]); + } + } + while ((a += 4) != (ACCESSES & isolate)); + + return fnv_reduce(mix); +} + +hash32_t final_hash(hash64_t const* init, hash32_t const* mix, uint isolate) +{ + ulong state[25]; + + hash32_t hash; + uint const hash_size = countof(hash.ulongs); + uint const init_size = countof(init->ulongs); + uint const mix_size = countof(mix->ulongs); + + // keccak_256(keccak_512(header..nonce) .. mix); + copy(state, init->ulongs, init_size); + copy(state + init_size, mix->ulongs, mix_size); + keccak_f1600_no_absorb(state, init_size+mix_size, hash_size, isolate); + + // copy out + copy(hash.ulongs, state, hash_size); + return hash; +} + +hash32_t compute_hash_simple( + __constant hash32_t const* g_header, + __global hash128_t const* g_dag, + ulong nonce, + uint isolate + ) +{ + hash64_t init = init_hash(g_header, nonce, isolate); + + hash128_t mix; + for (uint i = 0; i != countof(mix.uint4s); ++i) + { + mix.uint4s[i] = init.uint4s[i % countof(init.uint4s)]; + } + + uint mix_val = mix.uints[0]; + uint init0 = mix.uints[0]; + uint a = 0; + do + { + uint pi = fnv(init0 ^ a, mix_val) % DAG_SIZE; + uint n = (a+1) % countof(mix.uints); + + #pragma unroll + for (uint i = 0; i != countof(mix.uints); ++i) + { + mix.uints[i] = fnv(mix.uints[i], g_dag[pi].uints[i]); + mix_val = i == n ? mix.uints[i] : mix_val; + } + } + while (++a != (ACCESSES & isolate)); + + // reduce to output + hash32_t fnv_mix; + for (uint i = 0; i != countof(fnv_mix.uints); ++i) + { + fnv_mix.uints[i] = fnv_reduce(mix.uint4s[i]); + } + + return final_hash(&init, &fnv_mix, isolate); +} + +typedef union +{ + struct + { + hash64_t init; + uint pad; // avoid lds bank conflicts + }; + hash32_t mix; +} compute_hash_share; + +hash32_t compute_hash( + __local compute_hash_share* share, + __constant hash32_t const* g_header, + __global hash128_t const* g_dag, + ulong nonce, + uint isolate + ) +{ + uint const gid = get_global_id(0); + + // Compute one init hash per work item. + hash64_t init = init_hash(g_header, nonce, isolate); + + // Threads work together in this phase in groups of 8. + uint const thread_id = gid % THREADS_PER_HASH; + uint const hash_id = (gid % GROUP_SIZE) / THREADS_PER_HASH; + + hash32_t mix; + uint i = 0; + do + { + // share init with other threads + if (i == thread_id) + share[hash_id].init = init; + barrier(CLK_LOCAL_MEM_FENCE); + + uint4 thread_init = share[hash_id].init.uint4s[thread_id % (64 / sizeof(uint4))]; + barrier(CLK_LOCAL_MEM_FENCE); + + uint thread_mix = inner_loop(thread_init, thread_id, share[hash_id].mix.uints, g_dag, isolate); + + share[hash_id].mix.uints[thread_id] = thread_mix; + barrier(CLK_LOCAL_MEM_FENCE); + + if (i == thread_id) + mix = share[hash_id].mix; + barrier(CLK_LOCAL_MEM_FENCE); + } + while (++i != (THREADS_PER_HASH & isolate)); + + return final_hash(&init, &mix, isolate); +} + +__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1))) +__kernel void ethash_hash_simple( + __global hash32_t* g_hashes, + __constant hash32_t const* g_header, + __global hash128_t const* g_dag, + ulong start_nonce, + uint isolate + ) +{ + uint const gid = get_global_id(0); + g_hashes[gid] = compute_hash_simple(g_header, g_dag, start_nonce + gid, isolate); +} + +__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1))) +__kernel void ethash_search_simple( + __global volatile uint* restrict g_output, + __constant hash32_t const* g_header, + __global hash128_t const* g_dag, + ulong start_nonce, + ulong target, + uint isolate + ) +{ + uint const gid = get_global_id(0); + hash32_t hash = compute_hash_simple(g_header, g_dag, start_nonce + gid, isolate); + + if (hash.ulongs[countof(hash.ulongs)-1] < target) + { + uint slot = min(MAX_OUTPUTS, atomic_inc(&g_output[0]) + 1); + g_output[slot] = gid; + } +} + +__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1))) +__kernel void ethash_hash( + __global hash32_t* g_hashes, + __constant hash32_t const* g_header, + __global hash128_t const* g_dag, + ulong start_nonce, + uint isolate + ) +{ + __local compute_hash_share share[HASHES_PER_LOOP]; + + uint const gid = get_global_id(0); + g_hashes[gid] = compute_hash(share, g_header, g_dag, start_nonce + gid, isolate); +} + +__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1))) +__kernel void ethash_search( + __global volatile uint* restrict g_output, + __constant hash32_t const* g_header, + __global hash128_t const* g_dag, + ulong start_nonce, + ulong target, + uint isolate + ) +{ + __local compute_hash_share share[HASHES_PER_LOOP]; + + uint const gid = get_global_id(0); + hash32_t hash = compute_hash(share, g_header, g_dag, start_nonce + gid, isolate); + + if (hash.ulongs[countof(hash.ulongs)-1] < target) + { + uint slot = min(MAX_OUTPUTS, atomic_inc(&g_output[0]) + 1); + g_output[slot] = gid; + } +} + +)"; + +static void add_definition(std::string& source, char const* id, unsigned value) +{ + char buf[256]; + sprintf(buf, "#define %s %uu\n", id, value); + source.insert(source.begin(), buf, buf + strlen(buf)); +} + +ethash_cl_miner::ethash_cl_miner() +{ +} + +bool ethash_cl_miner::init(ethash_params const& params, const uint8_t seed[32], unsigned workgroup_size) +{ + // store params + m_params = params; + + // get all platforms + std::vector platforms; + cl::Platform::get(&platforms); + if (platforms.empty()) + { + debugf("No OpenCL platforms found.\n"); + return false; + } + + // use default platform + debugf("Using platform: %s\n", platforms[0].getInfo().c_str()); + + // get GPU device of the default platform + std::vector devices; + platforms[0].getDevices(CL_DEVICE_TYPE_ALL, &devices); + if (devices.empty()) + { + debugf("No OpenCL devices found.\n"); + return false; + } + + // use default device + cl::Device& device = devices[0]; + debugf("Using device: %s\n", device.getInfo().c_str()); + + // create context + m_context = cl::Context({device}); + m_queue = cl::CommandQueue(m_context, device); + + // use requested workgroup size, but we require multiple of 8 + m_workgroup_size = ((workgroup_size + 7) / 8) * 8; + + // patch source code + std::string code = ethash_inner_code; + add_definition(code, "GROUP_SIZE", m_workgroup_size); + add_definition(code, "DAG_SIZE", (unsigned)(params.full_size / MIX_BYTES)); + add_definition(code, "ACCESSES", ACCESSES); + add_definition(code, "MAX_OUTPUTS", c_max_search_results); + //debugf("%s", code.c_str()); + + // create miner OpenCL program + cl::Program::Sources sources; + sources.push_back({code.c_str(), code.size()}); + + cl::Program program(m_context, sources); + try + { + program.build({device}); + } + catch (cl::Error err) + { + debugf("%s\n", program.getBuildInfo(device).c_str()); + return false; + } + m_hash_kernel = cl::Kernel(program, "ethash_hash"); + m_search_kernel = cl::Kernel(program, "ethash_search"); + + // create buffer for dag + m_dag = cl::Buffer(m_context, CL_MEM_READ_ONLY, params.full_size); + + // create buffer for header + m_header = cl::Buffer(m_context, CL_MEM_READ_ONLY, 32); + + // compute dag on CPU + { + void* cache_mem = malloc(params.cache_size + 63); + ethash_cache cache; + cache.mem = (void*)(((uintptr_t)cache_mem + 63) & ~63); + ethash_mkcache(&cache, ¶ms, seed); + + // if this throws then it's because we probably need to subdivide the dag uploads for compatibility + void* dag_ptr = m_queue.enqueueMapBuffer(m_dag, true, CL_MAP_WRITE_INVALIDATE_REGION, 0, params.full_size); + ethash_compute_full_data(dag_ptr, ¶ms, &cache); + m_queue.enqueueUnmapMemObject(m_dag, dag_ptr); + + free(cache_mem); + } + + // create mining buffers + for (unsigned i = 0; i != c_num_buffers; ++i) + { + m_hash_buf[i] = cl::Buffer(m_context, CL_MEM_WRITE_ONLY | CL_MEM_HOST_READ_ONLY, 32*c_hash_batch_size); + m_search_buf[i] = cl::Buffer(m_context, CL_MEM_WRITE_ONLY, (c_max_search_results + 1) * sizeof(uint32_t)); + } + return true; +} + +void ethash_cl_miner::hash(uint8_t* ret, uint8_t const* header, uint64_t nonce, unsigned count) +{ + struct pending_batch + { + unsigned base; + unsigned count; + unsigned buf; + }; + std::queue pending; + + // update header constant buffer + m_queue.enqueueWriteBuffer(m_header, true, 0, 32, header); + + /* + __kernel void ethash_combined_hash( + __global hash32_t* g_hashes, + __constant hash32_t const* g_header, + __global hash128_t const* g_dag, + ulong start_nonce, + uint isolate + ) + */ + m_hash_kernel.setArg(1, m_header); + m_hash_kernel.setArg(2, m_dag); + m_hash_kernel.setArg(3, nonce); + m_hash_kernel.setArg(4, ~0u); // have to pass this to stop the compile unrolling the loop + + unsigned buf = 0; + for (unsigned i = 0; i < count || !pending.empty(); ) + { + // how many this batch + if (i < count) + { + unsigned const this_count = std::min(count - i, c_hash_batch_size); + unsigned const batch_count = std::max(this_count, m_workgroup_size); + + // supply output hash buffer to kernel + m_hash_kernel.setArg(0, m_hash_buf[buf]); + + // execute it! + clock_t start_time = clock(); + m_queue.enqueueNDRangeKernel( + m_hash_kernel, + cl::NullRange, + cl::NDRange(batch_count), + cl::NDRange(m_workgroup_size) + ); + m_queue.flush(); + + pending.push({i, this_count, buf}); + i += this_count; + buf = (buf + 1) % c_num_buffers; + } + + // read results + if (i == count || pending.size() == c_num_buffers) + { + pending_batch const& batch = pending.front(); + + // could use pinned host pointer instead, but this path isn't that important. + uint8_t* hashes = (uint8_t*)m_queue.enqueueMapBuffer(m_hash_buf[batch.buf], true, CL_MAP_READ, 0, batch.count * HASH_BYTES); + memcpy(ret + batch.base*HASH_BYTES, hashes, batch.count*HASH_BYTES); + m_queue.enqueueUnmapMemObject(m_hash_buf[batch.buf], hashes); + + pending.pop(); + } + } +} + + +void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook& hook) +{ + struct pending_batch + { + uint64_t start_nonce; + unsigned buf; + }; + std::queue pending; + + static uint32_t const c_zero = 0; + + // update header constant buffer + m_queue.enqueueWriteBuffer(m_header, false, 0, 32, header); + for (unsigned i = 0; i != c_num_buffers; ++i) + { + m_queue.enqueueWriteBuffer(m_search_buf[i], false, 0, 4, &c_zero); + } + cl::Event pre_return_event; + m_queue.enqueueBarrierWithWaitList(NULL, &pre_return_event); + + /* + __kernel void ethash_combined_search( + __global hash32_t* g_hashes, // 0 + __constant hash32_t const* g_header, // 1 + __global hash128_t const* g_dag, // 2 + ulong start_nonce, // 3 + ulong target, // 4 + uint isolate // 5 + ) + */ + m_search_kernel.setArg(1, m_header); + m_search_kernel.setArg(2, m_dag); + + // pass these to stop the compiler unrolling the loops + m_search_kernel.setArg(4, target); + m_search_kernel.setArg(5, ~0u); + + + unsigned buf = 0; + for (uint64_t start_nonce = 0; ; start_nonce += c_search_batch_size) + { + // supply output buffer to kernel + m_search_kernel.setArg(0, m_search_buf[buf]); + m_search_kernel.setArg(3, start_nonce); + + // execute it! + m_queue.enqueueNDRangeKernel(m_search_kernel, cl::NullRange, c_search_batch_size, m_workgroup_size); + + pending.push({start_nonce, buf}); + buf = (buf + 1) % c_num_buffers; + + // read results + if (pending.size() == c_num_buffers) + { + pending_batch const& batch = pending.front(); + + // could use pinned host pointer instead + uint32_t* results = (uint32_t*)m_queue.enqueueMapBuffer(m_search_buf[batch.buf], true, CL_MAP_READ, 0, (1+c_max_search_results) * sizeof(uint32_t)); + unsigned num_found = std::min(results[0], c_max_search_results); + + uint64_t nonces[c_max_search_results]; + for (unsigned i = 0; i != num_found; ++i) + { + nonces[i] = batch.start_nonce + results[i+1]; + } + + m_queue.enqueueUnmapMemObject(m_search_buf[batch.buf], results); + + bool exit = num_found && hook.found(nonces, num_found); + exit |= hook.searched(batch.start_nonce, c_search_batch_size); // always report searched before exit + if (exit) + break; + + pending.pop(); + } + } + + // not safe to return until this is ready + pre_return_event.wait(); +} + diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/ethash_cl_miner.h b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/ethash_cl_miner.h new file mode 100644 index 000000000..f37100d91 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/ethash_cl_miner.h @@ -0,0 +1,43 @@ +#pragma once + +#define __CL_ENABLE_EXCEPTIONS +#define CL_USE_DEPRECATED_OPENCL_2_0_APIS +#include "cl.hpp" +#include +#include + +class ethash_cl_miner +{ +public: + struct search_hook + { + // reports progress, return true to abort + virtual bool found(uint64_t const* nonces, uint32_t count) = 0; + virtual bool searched(uint64_t start_nonce, uint32_t count) = 0; + }; + +public: + ethash_cl_miner(); + + bool init(ethash_params const& params, const uint8_t seed[32], unsigned workgroup_size = 64); + + void hash(uint8_t* ret, uint8_t const* header, uint64_t nonce, unsigned count); + void search(uint8_t const* header, uint64_t target, search_hook& hook); + +private: + static unsigned const c_max_search_results = 63; + static unsigned const c_num_buffers = 2; + static unsigned const c_hash_batch_size = 1024; + static unsigned const c_search_batch_size = 1024*256; + + ethash_params m_params; + cl::Context m_context; + cl::CommandQueue m_queue; + cl::Kernel m_hash_kernel; + cl::Kernel m_search_kernel; + cl::Buffer m_dag; + cl::Buffer m_header; + cl::Buffer m_hash_buf[c_num_buffers]; + cl::Buffer m_search_buf[c_num_buffers]; + unsigned m_workgroup_size; +}; \ No newline at end of file diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/CMakeLists.txt b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/CMakeLists.txt new file mode 100644 index 000000000..b30ed3e2d --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/CMakeLists.txt @@ -0,0 +1,15 @@ +find_package(CUDA) + +# Pass options to NVCC + + +if (CUDA_FOUND) +set(CUDA_NVCC_FLAGS " -gencode;arch=compute_30,code=sm_30; + -gencode;arch=compute_20,code=sm_20; + -gencode;arch=compute_11,code=sm_11; + -gencode;arch=compute_12,code=sm_12; + -gencode;arch=compute_13,code=sm_13;") +cuda_add_executable( + ethash-cuda + libethash.cu) +endif() \ No newline at end of file diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/cuPrintf.cu b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/cuPrintf.cu new file mode 100644 index 000000000..f06653f2d --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/cuPrintf.cu @@ -0,0 +1,879 @@ +/* + Copyright 2009 NVIDIA Corporation. All rights reserved. + + NOTICE TO LICENSEE: + + This source code and/or documentation ("Licensed Deliverables") are subject + to NVIDIA intellectual property rights under U.S. and international Copyright + laws. + + These Licensed Deliverables contained herein is PROPRIETARY and CONFIDENTIAL + to NVIDIA and is being provided under the terms and conditions of a form of + NVIDIA software license agreement by and between NVIDIA and Licensee ("License + Agreement") or electronically accepted by Licensee. Notwithstanding any terms + or conditions to the contrary in the License Agreement, reproduction or + disclosure of the Licensed Deliverables to any third party without the express + written consent of NVIDIA is prohibited. + + NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE LICENSE AGREEMENT, + NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THESE LICENSED + DELIVERABLES FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED + WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE + LICENSED DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, + NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. NOTWITHSTANDING ANY + TERMS OR CONDITIONS TO THE CONTRARY IN THE LICENSE AGREEMENT, IN NO EVENT SHALL + NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, + OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER + IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THESE LICENSED DELIVERABLES. + + U.S. Government End Users. These Licensed Deliverables are a "commercial item" + as that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of + "commercial computer software" and "commercial computer software documentation" + as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) and is provided to the + U.S. Government only as a commercial end item. Consistent with 48 C.F.R.12.212 + and 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all U.S. Government + End Users acquire the Licensed Deliverables with only those rights set forth + herein. + + Any use of the Licensed Deliverables in individual and commercial software must + include, in the user documentation and internal comments to the code, the above + Disclaimer and U.S. Government End Users Notice. + */ + +/* + * cuPrintf.cu + * + * This is a printf command callable from within a kernel. It is set + * up so that output is sent to a memory buffer, which is emptied from + * the host side - but only after a cudaThreadSynchronize() on the host. + * + * Currently, there is a limitation of around 200 characters of output + * and no more than 10 arguments to a single cuPrintf() call. Issue + * multiple calls if longer format strings are required. + * + * It requires minimal setup, and is *NOT* optimised for performance. + * For example, writes are not coalesced - this is because there is an + * assumption that people will not want to printf from every single one + * of thousands of threads, but only from individual threads at a time. + * + * Using this is simple - it requires one host-side call to initialise + * everything, and then kernels can call cuPrintf at will. Sample code + * is the easiest way to demonstrate: + * + #include "cuPrintf.cu" + + __global__ void testKernel(int val) + { + cuPrintf("Value is: %d\n", val); + } + + int main() + { + cudaPrintfInit(); + testKernel<<< 2, 3 >>>(10); + cudaPrintfDisplay(stdout, true); + cudaPrintfEnd(); + return 0; + } + * + * See the header file, "cuPrintf.cuh" for more info, especially + * arguments to cudaPrintfInit() and cudaPrintfDisplay(); + */ + +#ifndef CUPRINTF_CU +#define CUPRINTF_CU + +#include "cuPrintf.cuh" +#if __CUDA_ARCH__ > 100 // Atomics only used with > sm_10 architecture +#include +#endif + +// This is the smallest amount of memory, per-thread, which is allowed. +// It is also the largest amount of space a single printf() can take up +const static int CUPRINTF_MAX_LEN = 256; + +// This structure is used internally to track block/thread output restrictions. +typedef struct __align__(8) { + int threadid; // CUPRINTF_UNRESTRICTED for unrestricted + int blockid; // CUPRINTF_UNRESTRICTED for unrestricted +} cuPrintfRestriction; + +// The main storage is in a global print buffer, which has a known +// start/end/length. These are atomically updated so it works as a +// circular buffer. +// Since the only control primitive that can be used is atomicAdd(), +// we cannot wrap the pointer as such. The actual address must be +// calculated from printfBufferPtr by mod-ing with printfBufferLength. +// For sm_10 architecture, we must subdivide the buffer per-thread +// since we do not even have an atomic primitive. +__constant__ static char *globalPrintfBuffer = NULL; // Start of circular buffer (set up by host) +__constant__ static int printfBufferLength = 0; // Size of circular buffer (set up by host) +__device__ static cuPrintfRestriction restrictRules; // Output restrictions +__device__ volatile static char *printfBufferPtr = NULL; // Current atomically-incremented non-wrapped offset + +// This is the header preceeding all printf entries. +// NOTE: It *must* be size-aligned to the maximum entity size (size_t) +typedef struct __align__(8) { + unsigned short magic; // Magic number says we're valid + unsigned short fmtoffset; // Offset of fmt string into buffer + unsigned short blockid; // Block ID of author + unsigned short threadid; // Thread ID of author +} cuPrintfHeader; + +// Special header for sm_10 architecture +#define CUPRINTF_SM10_MAGIC 0xC810 // Not a valid ascii character +typedef struct __align__(16) { + unsigned short magic; // sm_10 specific magic number + unsigned short unused; + unsigned int thread_index; // thread ID for this buffer + unsigned int thread_buf_len; // per-thread buffer length + unsigned int offset; // most recent printf's offset +} cuPrintfHeaderSM10; + + +// Because we can't write an element which is not aligned to its bit-size, +// we have to align all sizes and variables on maximum-size boundaries. +// That means sizeof(double) in this case, but we'll use (long long) for +// better arch<1.3 support +#define CUPRINTF_ALIGN_SIZE sizeof(long long) + +// All our headers are prefixed with a magic number so we know they're ready +#define CUPRINTF_SM11_MAGIC (unsigned short)0xC811 // Not a valid ascii character + + +// +// getNextPrintfBufPtr +// +// Grabs a block of space in the general circular buffer, using an +// atomic function to ensure that it's ours. We handle wrapping +// around the circular buffer and return a pointer to a place which +// can be written to. +// +// Important notes: +// 1. We always grab CUPRINTF_MAX_LEN bytes +// 2. Because of 1, we never worry about wrapping around the end +// 3. Because of 1, printfBufferLength *must* be a factor of CUPRINTF_MAX_LEN +// +// This returns a pointer to the place where we own. +// +__device__ static char *getNextPrintfBufPtr() +{ + // Initialisation check + if(!printfBufferPtr) + return NULL; + + // Thread/block restriction check + if((restrictRules.blockid != CUPRINTF_UNRESTRICTED) && (restrictRules.blockid != (blockIdx.x + gridDim.x*blockIdx.y))) + return NULL; + if((restrictRules.threadid != CUPRINTF_UNRESTRICTED) && (restrictRules.threadid != (threadIdx.x + blockDim.x*threadIdx.y + blockDim.x*blockDim.y*threadIdx.z))) + return NULL; + + // Conditional section, dependent on architecture +#if __CUDA_ARCH__ == 100 + // For sm_10 architectures, we have no atomic add - this means we must split the + // entire available buffer into per-thread blocks. Inefficient, but what can you do. + int thread_count = (gridDim.x * gridDim.y) * (blockDim.x * blockDim.y * blockDim.z); + int thread_index = threadIdx.x + blockDim.x*threadIdx.y + blockDim.x*blockDim.y*threadIdx.z + + (blockIdx.x + gridDim.x*blockIdx.y) * (blockDim.x * blockDim.y * blockDim.z); + + // Find our own block of data and go to it. Make sure the per-thread length + // is a precise multiple of CUPRINTF_MAX_LEN, otherwise we risk size and + // alignment issues! We must round down, of course. + unsigned int thread_buf_len = printfBufferLength / thread_count; + thread_buf_len &= ~(CUPRINTF_MAX_LEN-1); + + // We *must* have a thread buffer length able to fit at least two printfs (one header, one real) + if(thread_buf_len < (CUPRINTF_MAX_LEN * 2)) + return NULL; + + // Now address our section of the buffer. The first item is a header. + char *myPrintfBuffer = globalPrintfBuffer + (thread_buf_len * thread_index); + cuPrintfHeaderSM10 hdr = *(cuPrintfHeaderSM10 *)(void *)myPrintfBuffer; + if(hdr.magic != CUPRINTF_SM10_MAGIC) + { + // If our header is not set up, initialise it + hdr.magic = CUPRINTF_SM10_MAGIC; + hdr.thread_index = thread_index; + hdr.thread_buf_len = thread_buf_len; + hdr.offset = 0; // Note we start at 0! We pre-increment below. + *(cuPrintfHeaderSM10 *)(void *)myPrintfBuffer = hdr; // Write back the header + + // For initial setup purposes, we might need to init thread0's header too + // (so that cudaPrintfDisplay() below will work). This is only run once. + cuPrintfHeaderSM10 *tophdr = (cuPrintfHeaderSM10 *)(void *)globalPrintfBuffer; + tophdr->thread_buf_len = thread_buf_len; + } + + // Adjust the offset by the right amount, and wrap it if need be + unsigned int offset = hdr.offset + CUPRINTF_MAX_LEN; + if(offset >= hdr.thread_buf_len) + offset = CUPRINTF_MAX_LEN; + + // Write back the new offset for next time and return a pointer to it + ((cuPrintfHeaderSM10 *)(void *)myPrintfBuffer)->offset = offset; + return myPrintfBuffer + offset; +#else + // Much easier with an atomic operation! + size_t offset = atomicAdd((unsigned int *)&printfBufferPtr, CUPRINTF_MAX_LEN) - (size_t)globalPrintfBuffer; + offset %= printfBufferLength; + return globalPrintfBuffer + offset; +#endif +} + + +// +// writePrintfHeader +// +// Inserts the header for containing our UID, fmt position and +// block/thread number. We generate it dynamically to avoid +// issues arising from requiring pre-initialisation. +// +__device__ static void writePrintfHeader(char *ptr, char *fmtptr) +{ + if(ptr) + { + cuPrintfHeader header; + header.magic = CUPRINTF_SM11_MAGIC; + header.fmtoffset = (unsigned short)(fmtptr - ptr); + header.blockid = blockIdx.x + gridDim.x*blockIdx.y; + header.threadid = threadIdx.x + blockDim.x*threadIdx.y + blockDim.x*blockDim.y*threadIdx.z; + *(cuPrintfHeader *)(void *)ptr = header; + } +} + + +// +// cuPrintfStrncpy +// +// This special strncpy outputs an aligned length value, followed by the +// string. It then zero-pads the rest of the string until a 64-aligned +// boundary. The length *includes* the padding. A pointer to the byte +// just after the \0 is returned. +// +// This function could overflow CUPRINTF_MAX_LEN characters in our buffer. +// To avoid it, we must count as we output and truncate where necessary. +// +__device__ static char *cuPrintfStrncpy(char *dest, const char *src, int n, char *end) +{ + // Initialisation and overflow check + if(!dest || !src || (dest >= end)) + return NULL; + + // Prepare to write the length specifier. We're guaranteed to have + // at least "CUPRINTF_ALIGN_SIZE" bytes left because we only write out in + // chunks that size, and CUPRINTF_MAX_LEN is aligned with CUPRINTF_ALIGN_SIZE. + int *lenptr = (int *)(void *)dest; + int len = 0; + dest += CUPRINTF_ALIGN_SIZE; + + // Now copy the string + while(n--) + { + if(dest >= end) // Overflow check + break; + + len++; + *dest++ = *src; + if(*src++ == '\0') + break; + } + + // Now write out the padding bytes, and we have our length. + while((dest < end) && (((long)dest & (CUPRINTF_ALIGN_SIZE-1)) != 0)) + { + len++; + *dest++ = 0; + } + *lenptr = len; + return (dest < end) ? dest : NULL; // Overflow means return NULL +} + + +// +// copyArg +// +// This copies a length specifier and then the argument out to the +// data buffer. Templates let the compiler figure all this out at +// compile-time, making life much simpler from the programming +// point of view. I'm assuimg all (const char *) is a string, and +// everything else is the variable it points at. I'd love to see +// a better way of doing it, but aside from parsing the format +// string I can't think of one. +// +// The length of the data type is inserted at the beginning (so that +// the display can distinguish between float and double), and the +// pointer to the end of the entry is returned. +// +__device__ static char *copyArg(char *ptr, const char *arg, char *end) +{ + // Initialisation check + if(!ptr || !arg) + return NULL; + + // strncpy does all our work. We just terminate. + if((ptr = cuPrintfStrncpy(ptr, arg, CUPRINTF_MAX_LEN, end)) != NULL) + *ptr = 0; + + return ptr; +} + +template +__device__ static char *copyArg(char *ptr, T &arg, char *end) +{ + // Initisalisation and overflow check. Alignment rules mean that + // we're at least CUPRINTF_ALIGN_SIZE away from "end", so we only need + // to check that one offset. + if(!ptr || ((ptr+CUPRINTF_ALIGN_SIZE) >= end)) + return NULL; + + // Write the length and argument + *(int *)(void *)ptr = sizeof(arg); + ptr += CUPRINTF_ALIGN_SIZE; + *(T *)(void *)ptr = arg; + ptr += CUPRINTF_ALIGN_SIZE; + *ptr = 0; + + return ptr; +} + + +// +// cuPrintf +// +// Templated printf functions to handle multiple arguments. +// Note we return the total amount of data copied, not the number +// of characters output. But then again, who ever looks at the +// return from printf() anyway? +// +// The format is to grab a block of circular buffer space, the +// start of which will hold a header and a pointer to the format +// string. We then write in all the arguments, and finally the +// format string itself. This is to make it easy to prevent +// overflow of our buffer (we support up to 10 arguments, each of +// which can be 12 bytes in length - that means that only the +// format string (or a %s) can actually overflow; so the overflow +// check need only be in the strcpy function. +// +// The header is written at the very last because that's what +// makes it look like we're done. +// +// Errors, which are basically lack-of-initialisation, are ignored +// in the called functions because NULL pointers are passed around +// + +// All printf variants basically do the same thing, setting up the +// buffer, writing all arguments, then finalising the header. For +// clarity, we'll pack the code into some big macros. +#define CUPRINTF_PREAMBLE \ + char *start, *end, *bufptr, *fmtstart; \ + if((start = getNextPrintfBufPtr()) == NULL) return 0; \ + end = start + CUPRINTF_MAX_LEN; \ + bufptr = start + sizeof(cuPrintfHeader); + +// Posting an argument is easy +#define CUPRINTF_ARG(argname) \ + bufptr = copyArg(bufptr, argname, end); + +// After args are done, record start-of-fmt and write the fmt and header +#define CUPRINTF_POSTAMBLE \ + fmtstart = bufptr; \ + end = cuPrintfStrncpy(bufptr, fmt, CUPRINTF_MAX_LEN, end); \ + writePrintfHeader(start, end ? fmtstart : NULL); \ + return end ? (int)(end - start) : 0; + +__device__ int cuPrintf(const char *fmt) +{ + CUPRINTF_PREAMBLE; + + CUPRINTF_POSTAMBLE; +} +template __device__ int cuPrintf(const char *fmt, T1 arg1) +{ + CUPRINTF_PREAMBLE; + + CUPRINTF_ARG(arg1); + + CUPRINTF_POSTAMBLE; +} +template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2) +{ + CUPRINTF_PREAMBLE; + + CUPRINTF_ARG(arg1); + CUPRINTF_ARG(arg2); + + CUPRINTF_POSTAMBLE; +} +template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3) +{ + CUPRINTF_PREAMBLE; + + CUPRINTF_ARG(arg1); + CUPRINTF_ARG(arg2); + CUPRINTF_ARG(arg3); + + CUPRINTF_POSTAMBLE; +} +template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4) +{ + CUPRINTF_PREAMBLE; + + CUPRINTF_ARG(arg1); + CUPRINTF_ARG(arg2); + CUPRINTF_ARG(arg3); + CUPRINTF_ARG(arg4); + + CUPRINTF_POSTAMBLE; +} +template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5) +{ + CUPRINTF_PREAMBLE; + + CUPRINTF_ARG(arg1); + CUPRINTF_ARG(arg2); + CUPRINTF_ARG(arg3); + CUPRINTF_ARG(arg4); + CUPRINTF_ARG(arg5); + + CUPRINTF_POSTAMBLE; +} +template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6) +{ + CUPRINTF_PREAMBLE; + + CUPRINTF_ARG(arg1); + CUPRINTF_ARG(arg2); + CUPRINTF_ARG(arg3); + CUPRINTF_ARG(arg4); + CUPRINTF_ARG(arg5); + CUPRINTF_ARG(arg6); + CUPRINTF_POSTAMBLE; +} +template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7) +{ + CUPRINTF_PREAMBLE; + + CUPRINTF_ARG(arg1); + CUPRINTF_ARG(arg2); + CUPRINTF_ARG(arg3); + CUPRINTF_ARG(arg4); + CUPRINTF_ARG(arg5); + CUPRINTF_ARG(arg6); + CUPRINTF_ARG(arg7); + + CUPRINTF_POSTAMBLE; +} +template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8) +{ + CUPRINTF_PREAMBLE; + + CUPRINTF_ARG(arg1); + CUPRINTF_ARG(arg2); + CUPRINTF_ARG(arg3); + CUPRINTF_ARG(arg4); + CUPRINTF_ARG(arg5); + CUPRINTF_ARG(arg6); + CUPRINTF_ARG(arg7); + CUPRINTF_ARG(arg8); + + CUPRINTF_POSTAMBLE; +} +template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8, T9 arg9) +{ + CUPRINTF_PREAMBLE; + + CUPRINTF_ARG(arg1); + CUPRINTF_ARG(arg2); + CUPRINTF_ARG(arg3); + CUPRINTF_ARG(arg4); + CUPRINTF_ARG(arg5); + CUPRINTF_ARG(arg6); + CUPRINTF_ARG(arg7); + CUPRINTF_ARG(arg8); + CUPRINTF_ARG(arg9); + + CUPRINTF_POSTAMBLE; +} +template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8, T9 arg9, T10 arg10) +{ + CUPRINTF_PREAMBLE; + + CUPRINTF_ARG(arg1); + CUPRINTF_ARG(arg2); + CUPRINTF_ARG(arg3); + CUPRINTF_ARG(arg4); + CUPRINTF_ARG(arg5); + CUPRINTF_ARG(arg6); + CUPRINTF_ARG(arg7); + CUPRINTF_ARG(arg8); + CUPRINTF_ARG(arg9); + CUPRINTF_ARG(arg10); + + CUPRINTF_POSTAMBLE; +} +#undef CUPRINTF_PREAMBLE +#undef CUPRINTF_ARG +#undef CUPRINTF_POSTAMBLE + + +// +// cuPrintfRestrict +// +// Called to restrict output to a given thread/block. +// We store the info in "restrictRules", which is set up at +// init time by the host. It's not the cleanest way to do this +// because it means restrictions will last between +// invocations, but given the output-pointer continuity, +// I feel this is reasonable. +// +__device__ void cuPrintfRestrict(int threadid, int blockid) +{ + int thread_count = blockDim.x * blockDim.y * blockDim.z; + if(((threadid < thread_count) && (threadid >= 0)) || (threadid == CUPRINTF_UNRESTRICTED)) + restrictRules.threadid = threadid; + + int block_count = gridDim.x * gridDim.y; + if(((blockid < block_count) && (blockid >= 0)) || (blockid == CUPRINTF_UNRESTRICTED)) + restrictRules.blockid = blockid; +} + + +/////////////////////////////////////////////////////////////////////////////// +// HOST SIDE + +#include +static FILE *printf_fp; + +static char *printfbuf_start=NULL; +static char *printfbuf_device=NULL; +static int printfbuf_len=0; + + +// +// outputPrintfData +// +// Our own internal function, which takes a pointer to a data buffer +// and passes it through libc's printf for output. +// +// We receive the formate string and a pointer to where the data is +// held. We then run through and print it out. +// +// Returns 0 on failure, 1 on success +// +static int outputPrintfData(char *fmt, char *data) +{ + // Format string is prefixed by a length that we don't need + fmt += CUPRINTF_ALIGN_SIZE; + + // Now run through it, printing everything we can. We must + // run to every % character, extract only that, and use printf + // to format it. + char *p = strchr(fmt, '%'); + while(p != NULL) + { + // Print up to the % character + *p = '\0'; + fputs(fmt, printf_fp); + *p = '%'; // Put back the % + + // Now handle the format specifier + char *format = p++; // Points to the '%' + p += strcspn(p, "%cdiouxXeEfgGaAnps"); + if(*p == '\0') // If no format specifier, print the whole thing + { + fmt = format; + break; + } + + // Cut out the format bit and use printf to print it. It's prefixed + // by its length. + int arglen = *(int *)data; + if(arglen > CUPRINTF_MAX_LEN) + { + fputs("Corrupt printf buffer data - aborting\n", printf_fp); + return 0; + } + + data += CUPRINTF_ALIGN_SIZE; + + char specifier = *p++; + char c = *p; // Store for later + *p = '\0'; + switch(specifier) + { + // These all take integer arguments + case 'c': + case 'd': + case 'i': + case 'o': + case 'u': + case 'x': + case 'X': + case 'p': + fprintf(printf_fp, format, *((int *)data)); + break; + + // These all take double arguments + case 'e': + case 'E': + case 'f': + case 'g': + case 'G': + case 'a': + case 'A': + if(arglen == 4) // Float vs. Double thing + fprintf(printf_fp, format, *((float *)data)); + else + fprintf(printf_fp, format, *((double *)data)); + break; + + // Strings are handled in a special way + case 's': + fprintf(printf_fp, format, (char *)data); + break; + + // % is special + case '%': + fprintf(printf_fp, "%%"); + break; + + // Everything else is just printed out as-is + default: + fprintf(printf_fp, format); + break; + } + data += CUPRINTF_ALIGN_SIZE; // Move on to next argument + *p = c; // Restore what we removed + fmt = p; // Adjust fmt string to be past the specifier + p = strchr(fmt, '%'); // and get the next specifier + } + + // Print out the last of the string + fputs(fmt, printf_fp); + return 1; +} + + +// +// doPrintfDisplay +// +// This runs through the blocks of CUPRINTF_MAX_LEN-sized data, calling the +// print function above to display them. We've got this separate from +// cudaPrintfDisplay() below so we can handle the SM_10 architecture +// partitioning. +// +static int doPrintfDisplay(int headings, int clear, char *bufstart, char *bufend, char *bufptr, char *endptr) +{ + // Grab, piece-by-piece, each output element until we catch + // up with the circular buffer end pointer + int printf_count=0; + char printfbuf_local[CUPRINTF_MAX_LEN+1]; + printfbuf_local[CUPRINTF_MAX_LEN] = '\0'; + + while(bufptr != endptr) + { + // Wrap ourselves at the end-of-buffer + if(bufptr == bufend) + bufptr = bufstart; + + // Adjust our start pointer to within the circular buffer and copy a block. + cudaMemcpy(printfbuf_local, bufptr, CUPRINTF_MAX_LEN, cudaMemcpyDeviceToHost); + + // If the magic number isn't valid, then this write hasn't gone through + // yet and we'll wait until it does (or we're past the end for non-async printfs). + cuPrintfHeader *hdr = (cuPrintfHeader *)printfbuf_local; + if((hdr->magic != CUPRINTF_SM11_MAGIC) || (hdr->fmtoffset >= CUPRINTF_MAX_LEN)) + { + //fprintf(printf_fp, "Bad magic number in printf header\n"); + break; + } + + // Extract all the info and get this printf done + if(headings) + fprintf(printf_fp, "[%d, %d]: ", hdr->blockid, hdr->threadid); + if(hdr->fmtoffset == 0) + fprintf(printf_fp, "printf buffer overflow\n"); + else if(!outputPrintfData(printfbuf_local+hdr->fmtoffset, printfbuf_local+sizeof(cuPrintfHeader))) + break; + printf_count++; + + // Clear if asked + if(clear) + cudaMemset(bufptr, 0, CUPRINTF_MAX_LEN); + + // Now advance our start location, because we're done, and keep copying + bufptr += CUPRINTF_MAX_LEN; + } + + return printf_count; +} + + +// +// cudaPrintfInit +// +// Takes a buffer length to allocate, creates the memory on the device and +// returns a pointer to it for when a kernel is called. It's up to the caller +// to free it. +// +extern "C" cudaError_t cudaPrintfInit(size_t bufferLen) +{ + // Fix up bufferlen to be a multiple of CUPRINTF_MAX_LEN + bufferLen = (bufferLen < CUPRINTF_MAX_LEN) ? CUPRINTF_MAX_LEN : bufferLen; + if((bufferLen % CUPRINTF_MAX_LEN) > 0) + bufferLen += (CUPRINTF_MAX_LEN - (bufferLen % CUPRINTF_MAX_LEN)); + printfbuf_len = (int)bufferLen; + + // Allocate a print buffer on the device and zero it + if(cudaMalloc((void **)&printfbuf_device, printfbuf_len) != cudaSuccess) + return cudaErrorInitializationError; + cudaMemset(printfbuf_device, 0, printfbuf_len); + printfbuf_start = printfbuf_device; // Where we start reading from + + // No restrictions to begin with + cuPrintfRestriction restrict; + restrict.threadid = restrict.blockid = CUPRINTF_UNRESTRICTED; + cudaMemcpyToSymbol(restrictRules, &restrict, sizeof(restrict)); + + // Initialise the buffer and the respective lengths/pointers. + cudaMemcpyToSymbol(globalPrintfBuffer, &printfbuf_device, sizeof(char *)); + cudaMemcpyToSymbol(printfBufferPtr, &printfbuf_device, sizeof(char *)); + cudaMemcpyToSymbol(printfBufferLength, &printfbuf_len, sizeof(printfbuf_len)); + + return cudaSuccess; +} + + +// +// cudaPrintfEnd +// +// Frees up the memory which we allocated +// +extern "C" void cudaPrintfEnd() +{ + if(!printfbuf_start || !printfbuf_device) + return; + + cudaFree(printfbuf_device); + printfbuf_start = printfbuf_device = NULL; +} + + +// +// cudaPrintfDisplay +// +// Each call to this function dumps the entire current contents +// of the printf buffer to the pre-specified FILE pointer. The +// circular "start" pointer is advanced so that subsequent calls +// dumps only new stuff. +// +// In the case of async memory access (via streams), call this +// repeatedly to keep trying to empty the buffer. If it's a sync +// access, then the whole buffer should empty in one go. +// +// Arguments: +// outputFP - File descriptor to output to (NULL => stdout) +// showThreadID - If true, prints [block,thread] before each line +// +extern "C" cudaError_t cudaPrintfDisplay(void *outputFP, bool showThreadID) +{ + printf_fp = (FILE *)((outputFP == NULL) ? stdout : outputFP); + + // For now, we force "synchronous" mode which means we're not concurrent + // with kernel execution. This also means we don't need clearOnPrint. + // If you're patching it for async operation, here's where you want it. + bool sync_printfs = true; + bool clearOnPrint = false; + + // Initialisation check + if(!printfbuf_start || !printfbuf_device || !printf_fp) + return cudaErrorMissingConfiguration; + + // To determine which architecture we're using, we read the + // first short from the buffer - it'll be the magic number + // relating to the version. + unsigned short magic; + cudaMemcpy(&magic, printfbuf_device, sizeof(unsigned short), cudaMemcpyDeviceToHost); + + // For SM_10 architecture, we've split our buffer into one-per-thread. + // That means we must do each thread block separately. It'll require + // extra reading. We also, for now, don't support async printfs because + // that requires tracking one start pointer per thread. + if(magic == CUPRINTF_SM10_MAGIC) + { + sync_printfs = true; + clearOnPrint = false; + int blocklen = 0; + char *blockptr = printfbuf_device; + while(blockptr < (printfbuf_device + printfbuf_len)) + { + cuPrintfHeaderSM10 hdr; + cudaMemcpy(&hdr, blockptr, sizeof(hdr), cudaMemcpyDeviceToHost); + + // We get our block-size-step from the very first header + if(hdr.thread_buf_len != 0) + blocklen = hdr.thread_buf_len; + + // No magic number means no printfs from this thread + if(hdr.magic != CUPRINTF_SM10_MAGIC) + { + if(blocklen == 0) + { + fprintf(printf_fp, "No printf headers found at all!\n"); + break; // No valid headers! + } + blockptr += blocklen; + continue; + } + + // "offset" is non-zero then we can print the block contents + if(hdr.offset > 0) + { + // For synchronous printfs, we must print from endptr->bufend, then from start->end + if(sync_printfs) + doPrintfDisplay(showThreadID, clearOnPrint, blockptr+CUPRINTF_MAX_LEN, blockptr+hdr.thread_buf_len, blockptr+hdr.offset+CUPRINTF_MAX_LEN, blockptr+hdr.thread_buf_len); + doPrintfDisplay(showThreadID, clearOnPrint, blockptr+CUPRINTF_MAX_LEN, blockptr+hdr.thread_buf_len, blockptr+CUPRINTF_MAX_LEN, blockptr+hdr.offset+CUPRINTF_MAX_LEN); + } + + // Move on to the next block and loop again + blockptr += hdr.thread_buf_len; + } + } + // For SM_11 and up, everything is a single buffer and it's simple + else if(magic == CUPRINTF_SM11_MAGIC) + { + // Grab the current "end of circular buffer" pointer. + char *printfbuf_end = NULL; + cudaMemcpyFromSymbol(&printfbuf_end, printfBufferPtr, sizeof(char *)); + + // Adjust our starting and ending pointers to within the block + char *bufptr = ((printfbuf_start - printfbuf_device) % printfbuf_len) + printfbuf_device; + char *endptr = ((printfbuf_end - printfbuf_device) % printfbuf_len) + printfbuf_device; + + // For synchronous (i.e. after-kernel-exit) printf display, we have to handle circular + // buffer wrap carefully because we could miss those past "end". + if(sync_printfs) + doPrintfDisplay(showThreadID, clearOnPrint, printfbuf_device, printfbuf_device+printfbuf_len, endptr, printfbuf_device+printfbuf_len); + doPrintfDisplay(showThreadID, clearOnPrint, printfbuf_device, printfbuf_device+printfbuf_len, bufptr, endptr); + + printfbuf_start = printfbuf_end; + } + else + ;//printf("Bad magic number in cuPrintf buffer header\n"); + + // If we were synchronous, then we must ensure that the memory is cleared on exit + // otherwise another kernel launch with a different grid size could conflict. + if(sync_printfs) + cudaMemset(printfbuf_device, 0, printfbuf_len); + + return cudaSuccess; +} + +// Cleanup +#undef CUPRINTF_MAX_LEN +#undef CUPRINTF_ALIGN_SIZE +#undef CUPRINTF_SM10_MAGIC +#undef CUPRINTF_SM11_MAGIC + +#endif diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/cuPrintf.cuh b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/cuPrintf.cuh new file mode 100644 index 000000000..cf3fe4868 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/cuPrintf.cuh @@ -0,0 +1,162 @@ +/* + Copyright 2009 NVIDIA Corporation. All rights reserved. + + NOTICE TO LICENSEE: + + This source code and/or documentation ("Licensed Deliverables") are subject + to NVIDIA intellectual property rights under U.S. and international Copyright + laws. + + These Licensed Deliverables contained herein is PROPRIETARY and CONFIDENTIAL + to NVIDIA and is being provided under the terms and conditions of a form of + NVIDIA software license agreement by and between NVIDIA and Licensee ("License + Agreement") or electronically accepted by Licensee. Notwithstanding any terms + or conditions to the contrary in the License Agreement, reproduction or + disclosure of the Licensed Deliverables to any third party without the express + written consent of NVIDIA is prohibited. + + NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE LICENSE AGREEMENT, + NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THESE LICENSED + DELIVERABLES FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED + WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE + LICENSED DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, + NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. NOTWITHSTANDING ANY + TERMS OR CONDITIONS TO THE CONTRARY IN THE LICENSE AGREEMENT, IN NO EVENT SHALL + NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, + OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER + IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THESE LICENSED DELIVERABLES. + + U.S. Government End Users. These Licensed Deliverables are a "commercial item" + as that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of + "commercial computer software" and "commercial computer software documentation" + as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) and is provided to the + U.S. Government only as a commercial end item. Consistent with 48 C.F.R.12.212 + and 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all U.S. Government + End Users acquire the Licensed Deliverables with only those rights set forth + herein. + + Any use of the Licensed Deliverables in individual and commercial software must + include, in the user documentation and internal comments to the code, the above + Disclaimer and U.S. Government End Users Notice. + */ + +#ifndef CUPRINTF_H +#define CUPRINTF_H + +/* + * This is the header file supporting cuPrintf.cu and defining both + * the host and device-side interfaces. See that file for some more + * explanation and sample use code. See also below for details of the + * host-side interfaces. + * + * Quick sample code: + * + #include "cuPrintf.cu" + + __global__ void testKernel(int val) + { + cuPrintf("Value is: %d\n", val); + } + + int main() + { + cudaPrintfInit(); + testKernel<<< 2, 3 >>>(10); + cudaPrintfDisplay(stdout, true); + cudaPrintfEnd(); + return 0; + } + */ + +/////////////////////////////////////////////////////////////////////////////// +// DEVICE SIDE +// External function definitions for device-side code + +// Abuse of templates to simulate varargs +__device__ int cuPrintf(const char *fmt); +template __device__ int cuPrintf(const char *fmt, T1 arg1); +template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2); +template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3); +template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4); +template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5); +template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6); +template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7); +template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8); +template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8, T9 arg9); +template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8, T9 arg9, T10 arg10); + + +// +// cuPrintfRestrict +// +// Called to restrict output to a given thread/block. Pass +// the constant CUPRINTF_UNRESTRICTED to unrestrict output +// for thread/block IDs. Note you can therefore allow +// "all printfs from block 3" or "printfs from thread 2 +// on all blocks", or "printfs only from block 1, thread 5". +// +// Arguments: +// threadid - Thread ID to allow printfs from +// blockid - Block ID to allow printfs from +// +// NOTE: Restrictions last between invocations of +// kernels unless cudaPrintfInit() is called again. +// +#define CUPRINTF_UNRESTRICTED -1 +__device__ void cuPrintfRestrict(int threadid, int blockid); + + + +/////////////////////////////////////////////////////////////////////////////// +// HOST SIDE +// External function definitions for host-side code + +// +// cudaPrintfInit +// +// Call this once to initialise the printf system. If the output +// file or buffer size needs to be changed, call cudaPrintfEnd() +// before re-calling cudaPrintfInit(). +// +// The default size for the buffer is 1 megabyte. For CUDA +// architecture 1.1 and above, the buffer is filled linearly and +// is completely used; however for architecture 1.0, the buffer +// is divided into as many segments are there are threads, even +// if some threads do not call cuPrintf(). +// +// Arguments: +// bufferLen - Length, in bytes, of total space to reserve +// (in device global memory) for output. +// +// Returns: +// cudaSuccess if all is well. +// +extern "C" cudaError_t cudaPrintfInit(size_t bufferLen=1048576); // 1-meg - that's enough for 4096 printfs by all threads put together + +// +// cudaPrintfEnd +// +// Cleans up all memories allocated by cudaPrintfInit(). +// Call this at exit, or before calling cudaPrintfInit() again. +// +extern "C" void cudaPrintfEnd(); + +// +// cudaPrintfDisplay +// +// Dumps the contents of the output buffer to the specified +// file pointer. If the output pointer is not specified, +// the default "stdout" is used. +// +// Arguments: +// outputFP - A file pointer to an output stream. +// showThreadID - If "true", output strings are prefixed +// by "[blockid, threadid] " at output. +// +// Returns: +// cudaSuccess if all is well. +// +extern "C" cudaError_t cudaPrintfDisplay(void *outputFP=NULL, bool showThreadID=false); + +#endif // CUPRINTF_H diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/libethash.cu b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/libethash.cu new file mode 100644 index 000000000..3e53c8853 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/libethash.cu @@ -0,0 +1,27 @@ +#include "cuPrintf.cu" +#include + +__global__ void device_greetings(void) +{ + cuPrintf("Hello, world from the device!\n"); +} + +int main(void) +{ + // greet from the host + printf("Hello, world from the host!\n"); + + // initialize cuPrintf + cudaPrintfInit(); + + // launch a kernel with a single thread to greet from the device + device_greetings<<<1,1>>>(); + + // display the device's greeting + cudaPrintfDisplay(); + + // clean up after cuPrintf + cudaPrintfEnd(); + + return 0; +} diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/CMakeLists.txt b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/CMakeLists.txt new file mode 100644 index 000000000..bef63ef0a --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/CMakeLists.txt @@ -0,0 +1,33 @@ +set(LIBRARY ethash) +set(CMAKE_BUILD_TYPE Release) + +if (NOT MSVC) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=gnu99") +endif() + +set(FILES util.c + util.h + internal.c + ethash.h + endian.h + compiler.h + fnv.h + data_sizes.h) + +if (NOT CRYPTOPP_FOUND) + find_package(CryptoPP 5.6.2) +endif() + +if (CRYPTOPP_FOUND) + add_definitions(-DWITH_CRYPTOPP) + include_directories( ${CRYPTOPP_INCLUDE_DIRS} ) + list(APPEND FILES sha3_cryptopp.cpp sha3_cryptopp.h) +else() + list(APPEND FILES sha3.c sha3.h) +endif() + +add_library(${LIBRARY} ${FILES}) + +if (CRYPTOPP_FOUND) + TARGET_LINK_LIBRARIES(${LIBRARY} ${CRYPTOPP_LIBRARIES}) +endif() \ No newline at end of file diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/compiler.h b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/compiler.h new file mode 100644 index 000000000..9695871cd --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/compiler.h @@ -0,0 +1,33 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file compiler.h + * @date 2014 + */ +#pragma once + +// Visual Studio doesn't support the inline keyword in C mode +#if defined(_MSC_VER) && !defined(__cplusplus) +#define inline __inline +#endif + +// pretend restrict is a standard keyword +#if defined(_MSC_VER) +#define restrict __restrict +#else +#define restrict __restrict__ +#endif + diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/data_sizes.h b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/data_sizes.h new file mode 100644 index 000000000..40417cb71 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/data_sizes.h @@ -0,0 +1,248 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software FoundationUUU,either version 3 of the LicenseUUU,or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be usefulU, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If notUUU,see . +*/ + +/** @file nth_prime.h +* @author Matthew Wampler-Doty +* @date 2015 +*/ + +// TODO: Update this after ~7 years + +#pragma once + +#include +//#include +#include "compiler.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +// 500 Epochs worth of tabulated DAG sizes (~3.5 Years) + +// Generated with the following Mathematica Code: +// GetDataSizes[n_] := Module[{ +// DAGSizeBytesInit = 2^30, +// MixBytes = 128, +// DAGGrowth = 113000000, +// j = 0}, +// Reap[ +// While[j < n, +// Module[{i = +// Floor[(DAGSizeBytesInit + DAGGrowth * j) / MixBytes]}, +// While[! PrimeQ[i], i--]; +// Sow[i*MixBytes]; j++]]]][[2]][[1]] + +static const size_t dag_sizes[] = { + 1073739904U, 1186739584U, 1299741568U, 1412741248U, 1525741696U, + 1638736768U, 1751741312U, 1864740736U, 1977740672U, 2090740864U, + 2203740544U, 2316741248U, 2429739392U, 2542740352U, 2655741824U, + 2768739712U, 2881740416U, 2994741632U, 3107740544U, 3220741504U, + 3333738112U, 3446741632U, 3559741312U, 3672740224U, 3785740928U, + 3898738304U, 4011741824U, 4124739712U, 4237735808U, 4350740864U, + 4463741824U, 4576741504U, 4689741184U, 4802739328U, 4915741568U, + 5028740224U, 5141740672U, 5254738304U, 5367741824U, 5480737664U, + 5593738112U, 5706741632U, 5819740544U, 5932734592U, 6045739904U, + 6158740096U, 6271740032U, 6384731776U, 6497732992U, 6610740352U, + 6723741056U, 6836741504U, 6949740416U, 7062740096U, 7175741824U, + 7288740224U, 7401741184U, 7514741632U, 7627741568U, 7740739712U, + 7853739136U, 7966740352U, 8079741568U, 8192739712U, 8305738624U, + 8418740864U, 8531740288U, 8644740736U, 8757735808U, 8870738816U, + 8983739264U, 9096740992U, 9209740928U, 9322739584U, 9435741824U, + 9548741504U, 9661739392U, 9774738304U, 9887741312U, 10000738688U, + 10113739136U, 10226741632U, 10339739776U, 10452741248U, 10565740928U, + 10678736512U, 10791734656U, 10904741248U, 11017738112U, 11130741632U, + 11243741312U, 11356739456U, 11469740416U, 11582734976U, 11695739008U, + 11808741248U, 11921734784U, 12034739072U, 12147741568U, 12260737408U, + 12373741696U, 12486738304U, 12599740544U, 12712740224U, 12825741184U, + 12938736256U, 13051741312U, 13164737408U, 13277738368U, 13390738048U, + 13503741824U, 13616741504U, 13729737088U, 13842740096U, 13955741312U, + 14068741504U, 14181740416U, 14294741632U, 14407739776U, 14520740224U, + 14633740928U, 14746736512U, 14859741824U, 14972740736U, 15085740928U, + 15198738304U, 15311732096U, 15424740736U, 15537739904U, 15650741632U, + 15763741568U, 15876737152U, 15989741696U, 16102740608U, 16215741056U, + 16328741248U, 16441740416U, 16554737792U, 16667740288U, 16780740992U, + 16893738112U, 17006741632U, 17119739008U, 17232735616U, 17345739392U, + 17458740352U, 17571736192U, 17684739712U, 17797739392U, 17910740096U, + 18023741312U, 18136740736U, 18249738112U, 18362738816U, 18475735424U, + 18588740224U, 18701738368U, 18814736768U, 18927737216U, 19040739968U, + 19153739648U, 19266736768U, 19379737984U, 19492739456U, 19605738368U, + 19718740352U, 19831741312U, 19944736384U, 20057741696U, 20170741376U, + 20283741824U, 20396737408U, 20509741696U, 20622741376U, 20735739008U, + 20848741504U, 20961740672U, 21074739328U, 21187740032U, 21300739456U, + 21413741696U, 21526740608U, 21639741824U, 21752737408U, 21865741696U, + 21978741376U, 22091741824U, 22204738432U, 22317740672U, 22430740096U, + 22543736704U, 22656741248U, 22769739904U, 22882739584U, 22995740288U, + 23108740736U, 23221740928U, 23334741376U, 23447737216U, 23560740992U, + 23673741184U, 23786740864U, 23899737728U, 24012741248U, 24125734784U, + 24238736512U, 24351741824U, 24464740736U, 24577737088U, 24690741632U, + 24803739776U, 24916740736U, 25029740416U, 25142740864U, 25255741568U, + 25368741248U, 25481740672U, 25594741376U, 25707741568U, 25820741504U, + 25933730432U, 26046739072U, 26159741824U, 26272741504U, 26385740672U, + 26498740096U, 26611741568U, 26724740992U, 26837739904U, 26950735232U, + 27063738496U, 27176741248U, 27289741184U, 27402740864U, 27515740544U, + 27628737152U, 27741740672U, 27854741632U, 27967740544U, 28080739712U, + 28193738368U, 28306741376U, 28419737728U, 28532739968U, 28645739648U, + 28758740096U, 28871741312U, 28984739456U, 29097740416U, 29210740864U, + 29323741312U, 29436740224U, 29549741696U, 29662738304U, 29775741568U, + 29888741504U, 30001740928U, 30114737024U, 30227735168U, 30340737664U, + 30453738368U, 30566737024U, 30679733632U, 30792740224U, 30905740928U, + 31018740352U, 31131740032U, 31244738944U, 31357737344U, 31470741376U, + 31583740544U, 31696740224U, 31809738112U, 31922739328U, 32035737472U, + 32148740992U, 32261741696U, 32374740352U, 32487741824U, 32600740736U, + 32713739648U, 32826740608U, 32939729792U, 33052740992U, 33165740672U, + 33278739584U, 33391741312U, 33504739712U, 33617740928U, 33730740608U, + 33843738496U, 33956739968U, 34069741696U, 34182739328U, 34295741824U, + 34408739968U, 34521740672U, 34634736512U, 34747741568U, 34860741248U, + 34973739392U, 35086738304U, 35199741056U, 35312736896U, 35425741184U, + 35538741376U, 35651740288U, 35764737152U, 35877741184U, 35990739584U, + 36103740544U, 36216740992U, 36329739392U, 36442737536U, 36555741568U, + 36668740736U, 36781741184U, 36894737024U, 37007741312U, 37120739456U, + 37233741184U, 37346736256U, 37459736192U, 37572734336U, 37685739904U, + 37798740352U, 37911737728U, 38024741504U, 38137739648U, 38250740608U, + 38363741824U, 38476740992U, 38589741184U, 38702740096U, 38815741312U, + 38928741248U, 39041738368U, 39154739584U, 39267741824U, 39380739712U, + 39493735808U, 39606741632U, 39719741312U, 39832741504U, 39945739648U, + 40058740352U, 40171740032U, 40284740992U, 40397740672U, 40510740352U, + 40623740288U, 40736738176U, 40849737856U, 40962741376U, 41075739776U, + 41188737664U, 41301735808U, 41414738048U, 41527741312U, 41640740992U, + 41753739904U, 41866739072U, 41979738496U, 42092740736U, 42205739648U, + 42318740608U, 42431741312U, 42544738688U, 42657741184U, 42770738048U, + 42883741568U, 42996741248U, 43109740928U, 43222736512U, 43335741056U, + 43448730496U, 43561740416U, 43674741632U, 43787740544U, 43900741504U, + 44013739648U, 44126740864U, 44239740544U, 44352741248U, 44465738368U, + 44578735232U, 44691739264U, 44804741504U, 44917741696U, 45030741376U, + 45143741824U, 45256740992U, 45369739136U, 45482740096U, 45595739776U, + 45708739712U, 45821740672U, 45934741376U, 46047741056U, 46160741248U, + 46273737088U, 46386740864U, 46499739008U, 46612739968U, 46725735296U, + 46838740864U, 46951741568U, 47064737152U, 47177741696U, 47290741376U, + 47403738752U, 47516741248U, 47629739648U, 47742741632U, 47855737984U, + 47968740224U, 48081738368U, 48194741632U, 48307739264U, 48420739712U, + 48533739136U, 48646738304U, 48759741824U, 48872741504U, 48985739392U, + 49098741376U, 49211741056U, 49324740992U, 49437738368U, 49550740864U, + 49663735424U, 49776737408U, 49889740672U, 50002738816U, 50115738752U, + 50228739712U, 50341741696U, 50454736768U, 50567738752U, 50680739968U, + 50793736832U, 50906734976U, 51019741568U, 51132739456U, 51245741696U, + 51358741376U, 51471741056U, 51584738944U, 51697734272U, 51810739072U, + 51923736448U, 52036740736U, 52149741184U, 52262737024U, 52375738496U, + 52488740992U, 52601739136U, 52714740352U, 52827736448U, 52940738176U, + 53053741696U, 53166740864U, 53279741824U, 53392741504U, 53505739136U, + 53618739584U, 53731741312U, 53844741248U, 53957741696U, 54070741376U, + 54183740288U, 54296741504U, 54409741696U, 54522739072U, 54635737472U, + 54748741504U, 54861736064U, 54974740096U, 55087741568U, 55200733568U, + 55313741696U, 55426734464U, 55539741056U, 55652741504U, 55765741184U, + 55878741376U, 55991730304U, 56104740992U, 56217740672U, 56330731648U, + 56443737472U, 56556724352U, 56669740672U, 56782739072U, 56895740032U, + 57008741248U, 57121741696U, 57234740096U, 57347741312U, 57460741504U +}; + +// 500 Epochs worth of tabulated DAG sizes (~3.5 Years) + +// Generated with the following Mathematica Code: +// GetCacheSizes[n_] := Module[{ +// DAGSizeBytesInit = 2^30, +// MixBytes = 128, +// DAGGrowth = 113000000, +// HashBytes = 64, +// DAGParents = 1024, +// j = 0}, +// Reap[ +// While[j < n, +// Module[{i = Floor[(DAGSizeBytesInit + DAGGrowth * j) / (DAGParents * HashBytes)]}, +// While[! PrimeQ[i], i--]; +// Sow[i*HashBytes]; j++]]]][[2]][[1]] + +const size_t cache_sizes[] = { + 1048384U, 1158208U, 1268416U, 1377856U, 1489856U, 1599296U, 1710656U, + 1820608U, 1930816U, 2041024U, 2151872U, 2261696U, 2371904U, 2482624U, + 2593216U, 2703296U, 2814016U, 2924224U, 3034816U, 3144896U, 3255488U, + 3365312U, 3475904U, 3586624U, 3696064U, 3806272U, 3917504U, 4027456U, + 4138304U, 4248512U, 4359104U, 4469312U, 4579264U, 4689728U, 4797376U, + 4909888U, 5020096U, 5131328U, 5241664U, 5351744U, 5461312U, 5572544U, + 5683264U, 5793472U, 5903552U, 6014144U, 6121664U, 6235072U, 6344896U, + 6454592U, 6565952U, 6675904U, 6786112U, 6896704U, 7006784U, 7117888U, + 7228096U, 7338304U, 7448768U, 7557952U, 7669184U, 7779776U, 7889216U, + 8000192U, 8110912U, 8220736U, 8331712U, 8441536U, 8552384U, 8662592U, + 8772928U, 8883136U, 8993728U, 9103168U, 9214528U, 9323968U, 9434816U, + 9545152U, 9655616U, 9766336U, 9876544U, 9986624U, 10097344U, 10207424U, + 10316864U, 10427968U, 10538432U, 10649152U, 10758976U, 10869568U, 10979776U, + 11089472U, 11200832U, 11309632U, 11420608U, 11531584U, 11641792U, 11751104U, + 11862976U, 11973184U, 12083264U, 12193856U, 12304064U, 12414656U, 12524608U, + 12635072U, 12745792U, 12855616U, 12965824U, 13076416U, 13187008U, 13297216U, + 13407808U, 13518016U, 13627072U, 13738688U, 13848256U, 13959488U, 14069696U, + 14180288U, 14290624U, 14399552U, 14511424U, 14621504U, 14732096U, 14841664U, + 14951744U, 15062336U, 15172672U, 15283264U, 15393088U, 15504448U, 15614272U, + 15723712U, 15834944U, 15945152U, 16055744U, 16165696U, 16277056U, 16387136U, + 16494784U, 16607936U, 16718272U, 16828736U, 16938176U, 17048384U, 17159872U, + 17266624U, 17380544U, 17490496U, 17600192U, 17711296U, 17821376U, 17931968U, + 18041152U, 18152896U, 18261952U, 18373568U, 18483392U, 18594112U, 18703936U, + 18814912U, 18924992U, 19034944U, 19145408U, 19256128U, 19366208U, 19477184U, + 19587136U, 19696576U, 19808192U, 19916992U, 20028352U, 20137664U, 20249024U, + 20358848U, 20470336U, 20580544U, 20689472U, 20801344U, 20911424U, 21020096U, + 21130688U, 21242176U, 21352384U, 21462208U, 21573824U, 21683392U, 21794624U, + 21904448U, 22013632U, 22125248U, 22235968U, 22344512U, 22456768U, 22566848U, + 22677056U, 22786496U, 22897984U, 23008064U, 23118272U, 23228992U, 23338816U, + 23449408U, 23560256U, 23670464U, 23780672U, 23891264U, 24001216U, 24110656U, + 24221888U, 24332608U, 24442688U, 24552512U, 24662464U, 24773696U, 24884032U, + 24994496U, 25105216U, 25215296U, 25324864U, 25435712U, 25546432U, 25655744U, + 25767232U, 25876672U, 25986368U, 26098112U, 26207936U, 26318912U, 26428736U, + 26539712U, 26650048U, 26760256U, 26869184U, 26979776U, 27091136U, 27201728U, + 27311552U, 27422272U, 27532352U, 27642304U, 27752896U, 27863744U, 27973952U, + 28082752U, 28194752U, 28305344U, 28415168U, 28524992U, 28636352U, 28746304U, + 28857152U, 28967104U, 29077184U, 29187904U, 29298496U, 29408576U, 29518912U, + 29628992U, 29739968U, 29850176U, 29960512U, 30070336U, 30180544U, 30290752U, + 30398912U, 30512192U, 30622784U, 30732992U, 30842176U, 30953536U, 31063744U, + 31174336U, 31284544U, 31395136U, 31504448U, 31615552U, 31725632U, 31835072U, + 31946176U, 32057024U, 32167232U, 32277568U, 32387008U, 32497984U, 32608832U, + 32719168U, 32829376U, 32939584U, 33050048U, 33160768U, 33271232U, 33381184U, + 33491648U, 33601856U, 33712576U, 33822016U, 33932992U, 34042816U, 34153024U, + 34263104U, 34373824U, 34485056U, 34594624U, 34704832U, 34816064U, 34926272U, + 35036224U, 35146816U, 35255104U, 35367104U, 35478208U, 35588416U, 35698496U, + 35808832U, 35918656U, 36029888U, 36139456U, 36250688U, 36360512U, 36471104U, + 36581696U, 36691136U, 36802112U, 36912448U, 37022912U, 37132864U, 37242944U, + 37354048U, 37464512U, 37574848U, 37684928U, 37794752U, 37904704U, 38015552U, + 38125888U, 38236864U, 38345792U, 38457152U, 38567744U, 38678336U, 38787776U, + 38897216U, 39009088U, 39117632U, 39230144U, 39340352U, 39450304U, 39560384U, + 39671488U, 39781312U, 39891392U, 40002112U, 40112704U, 40223168U, 40332608U, + 40443968U, 40553792U, 40664768U, 40774208U, 40884416U, 40993984U, 41105984U, + 41215424U, 41326528U, 41436992U, 41546048U, 41655872U, 41768128U, 41878336U, + 41988928U, 42098752U, 42209344U, 42319168U, 42429248U, 42540352U, 42649792U, + 42761024U, 42871616U, 42981824U, 43092032U, 43201856U, 43312832U, 43423552U, + 43533632U, 43643584U, 43753792U, 43864384U, 43974976U, 44084032U, 44195392U, + 44306368U, 44415296U, 44526016U, 44637248U, 44746816U, 44858048U, 44967872U, + 45078848U, 45188288U, 45299264U, 45409216U, 45518272U, 45630272U, 45740224U, + 45850432U, 45960896U, 46069696U, 46182208U, 46292416U, 46402624U, 46512064U, + 46623296U, 46733888U, 46843712U, 46953664U, 47065024U, 47175104U, 47285696U, + 47395904U, 47506496U, 47615296U, 47726912U, 47837632U, 47947712U, 48055232U, + 48168128U, 48277952U, 48387392U, 48499648U, 48609472U, 48720064U, 48830272U, + 48940096U, 49050944U, 49160896U, 49271744U, 49381568U, 49492288U, 49602752U, + 49712576U, 49822016U, 49934272U, 50042816U, 50154304U, 50264128U, 50374336U, + 50484416U, 50596288U, 50706752U, 50816704U, 50927168U, 51035456U, 51146944U, + 51258176U, 51366976U, 51477824U, 51589568U, 51699776U, 51809728U, 51920576U, + 52030016U, 52140736U, 52251328U, 52361152U, 52470592U, 52582592U, 52691776U, + 52803136U, 52912576U, 53020736U, 53132224U, 53242688U, 53354816U, 53465536U, + 53575232U, 53685568U, 53796544U, 53906752U, 54016832U, 54126656U, 54236992U, + 54347456U, 54457408U, 54569024U, 54679232U, 54789184U, 54899776U, 55008832U, + 55119296U, 55231168U, 55341248U, 55451584U, 55562048U, 55672256U, 55782208U, + 55893184U, 56002112U, 56113216U +}; + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/endian.h b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/endian.h new file mode 100644 index 000000000..9ca842e47 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/endian.h @@ -0,0 +1,74 @@ +#pragma once + +#include +#include "compiler.h" + +static const uint8_t BitReverseTable256[] = + { + 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0, + 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, + 0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4, + 0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC, + 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2, + 0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA, + 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6, + 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE, + 0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1, + 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9, + 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5, + 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD, + 0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3, + 0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB, + 0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7, + 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF + }; + +static inline uint32_t bitfn_swap32(uint32_t a) { + return (BitReverseTable256[a & 0xff] << 24) | + (BitReverseTable256[(a >> 8) & 0xff] << 16) | + (BitReverseTable256[(a >> 16) & 0xff] << 8) | + (BitReverseTable256[(a >> 24) & 0xff]); +} + +static inline uint64_t bitfn_swap64(uint64_t a) { + return ((uint64_t) bitfn_swap32((uint32_t) (a >> 32))) | + (((uint64_t) bitfn_swap32((uint32_t) a)) << 32); +} + +#if defined(__MINGW32__) || defined(_WIN32) + # define LITTLE_ENDIAN 1234 + # define BYTE_ORDER LITTLE_ENDIAN +#elif defined(__FreeBSD__) || defined(__DragonFly__) || defined(__NetBSD__) + # include +#elif defined(__OpenBSD__) || defined(__SVR4) + # include +#elif defined(__APPLE__) +# include +#elif defined( BSD ) && (BSD >= 199103) + # include +#elif defined( __QNXNTO__ ) && defined( __LITTLEENDIAN__ ) + # define LITTLE_ENDIAN 1234 + # define BYTE_ORDER LITTLE_ENDIAN +#elif defined( __QNXNTO__ ) && defined( __BIGENDIAN__ ) + # define BIG_ENDIAN 1234 + # define BYTE_ORDER BIG_ENDIAN +#else + +# include + +#endif + + +#if LITTLE_ENDIAN == BYTE_ORDER + +#define fix_endian32(x) (x) +#define fix_endian64(x) (x) + +#elif BIG_ENDIAN == BYTE_ORDER + +#define fix_endian32(x) bitfn_swap32(x) +#define fix_endian64(x) bitfn_swap64(x) + +#else +# error "endian not supported" +#endif // BYTE_ORDER \ No newline at end of file diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/ethash.h b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/ethash.h new file mode 100644 index 000000000..62edd0082 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/ethash.h @@ -0,0 +1,88 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file ethash.h +* @date 2015 +*/ +#pragma once + +#include +#include +#include +#include +#include "compiler.h" + +#define REVISION 18 +#define DAGSIZE_BYTES_INIT 1073741824U +#define DAG_GROWTH 113000000U +#define EPOCH_LENGTH 30000U +#define MIX_BYTES 128 +#define DAG_PARENTS 256 +#define CACHE_ROUNDS 3 +#define ACCESSES 64 + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct ethash_params { + size_t full_size; // Size of full data set (in bytes, multiple of mix size (128)). + size_t cache_size; // Size of compute cache (in bytes, multiple of node size (64)). +} ethash_params; + +typedef struct ethash_return_value { + uint8_t result[32]; + uint8_t mix_hash[32]; +} ethash_return_value; + +size_t const ethash_get_datasize(const uint32_t block_number); +size_t const ethash_get_cachesize(const uint32_t block_number); + +// initialize the parameters +static inline void ethash_params_init(ethash_params *params, const uint32_t block_number) { + params->full_size = ethash_get_datasize(block_number); + params->cache_size = ethash_get_cachesize(block_number); +} + +typedef struct ethash_cache { + void *mem; +} ethash_cache; + +void ethash_mkcache(ethash_cache *cache, ethash_params const *params, const uint8_t seed[32]); +void ethash_compute_full_data(void *mem, ethash_params const *params, ethash_cache const *cache); +void ethash_full(ethash_return_value *ret, void const *full_mem, ethash_params const *params, const uint8_t header_hash[32], const uint64_t nonce); +void ethash_light(ethash_return_value *ret, ethash_cache const *cache, ethash_params const *params, const uint8_t header_hash[32], const uint64_t nonce); + +static inline int ethash_check_difficulty( + const uint8_t hash[32], + const uint8_t difficulty[32]) { + // Difficulty is big endian + for (int i = 0; i < 32; i++) { + if (hash[i] == difficulty[i]) continue; + return hash[i] < difficulty[i]; + } + return 0; +} + +int ethash_quick_check_difficulty( + const uint8_t header_hash[32], + const uint64_t nonce, + const uint8_t mix_hash[32], + const uint8_t difficulty[32]); + +#ifdef __cplusplus +} +#endif diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/fnv.h b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/fnv.h new file mode 100644 index 000000000..edabeaae2 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/fnv.h @@ -0,0 +1,38 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file fnv.h +* @author Matthew Wampler-Doty +* @date 2015 +*/ + +#pragma once +#include +#include "compiler.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define FNV_PRIME 0x01000193 + +static inline uint32_t fnv_hash(const uint32_t x, const uint32_t y) { + return x*FNV_PRIME ^ y; +} + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/internal.c b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/internal.c new file mode 100644 index 000000000..cc48717d0 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/internal.c @@ -0,0 +1,297 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file dash.cpp +* @author Tim Hughes +* @author Matthew Wampler-Doty +* @date 2015 +*/ + +#include +#include +#include +#include "ethash.h" +#include "fnv.h" +#include "endian.h" +#include "internal.h" +#include "data_sizes.h" + +#ifdef WITH_CRYPTOPP + +#include "SHA3_cryptopp.h" + +#else +#include "sha3.h" +#endif // WITH_CRYPTOPP + +size_t const ethash_get_datasize(const uint32_t block_number) { + assert(block_number / EPOCH_LENGTH < 500); + return dag_sizes[block_number / EPOCH_LENGTH]; +} + +size_t const ethash_get_cachesize(const uint32_t block_number) { + assert(block_number / EPOCH_LENGTH < 500); + return cache_sizes[block_number / EPOCH_LENGTH]; +} + +// Follows Sergio's "STRICT MEMORY HARD HASHING FUNCTIONS" (2014) +// https://bitslog.files.wordpress.com/2013/12/memohash-v0-3.pdf +// SeqMemoHash(s, R, N) +void static ethash_compute_cache_nodes( + node *const nodes, + ethash_params const *params, + const uint8_t seed[32]) { + assert((params->cache_size % sizeof(node)) == 0); + uint32_t const num_nodes = (uint32_t)(params->cache_size / sizeof(node)); + + SHA3_512(nodes[0].bytes, seed, 32); + + for (unsigned i = 1; i != num_nodes; ++i) { + SHA3_512(nodes[i].bytes, nodes[i - 1].bytes, 64); + } + + for (unsigned j = 0; j != CACHE_ROUNDS; j++) { + for (unsigned i = 0; i != num_nodes; i++) { + uint32_t const idx = nodes[i].words[0] % num_nodes; + node data; + data = nodes[(num_nodes - 1 + i) % num_nodes]; + for (unsigned w = 0; w != NODE_WORDS; ++w) + { + data.words[w] ^= nodes[idx].words[w]; + } + SHA3_512(nodes[i].bytes, data.bytes, sizeof(data)); + } + } + + // now perform endian conversion +#if BYTE_ORDER != LITTLE_ENDIAN + for (unsigned w = 0; w != (num_nodes*NODE_WORDS); ++w) + { + nodes->words[w] = fix_endian32(nodes->words[w]); + } +#endif +} + +void ethash_mkcache( + ethash_cache *cache, + ethash_params const *params, + const uint8_t seed[32]) { + node *nodes = (node *) cache->mem; + ethash_compute_cache_nodes(nodes, params, seed); +} + +void ethash_calculate_dag_item( + node *const ret, + const unsigned node_index, + const struct ethash_params *params, + const struct ethash_cache *cache) { + + uint32_t num_parent_nodes = (uint32_t)(params->cache_size / sizeof(node)); + node const *cache_nodes = (node const *) cache->mem; + node const *init = &cache_nodes[node_index % num_parent_nodes]; + + memcpy(ret, init, sizeof(node)); + ret->words[0] ^= node_index; + SHA3_512(ret->bytes, ret->bytes, sizeof(node)); + +#if defined(_M_X64) && ENABLE_SSE + __m128i const fnv_prime = _mm_set1_epi32(FNV_PRIME); + __m128i xmm0 = ret->xmm[0]; + __m128i xmm1 = ret->xmm[1]; + __m128i xmm2 = ret->xmm[2]; + __m128i xmm3 = ret->xmm[3]; +#endif + + for (unsigned i = 0; i != DAG_PARENTS; ++i) + { + uint32_t parent_index = ((node_index ^ i)*FNV_PRIME ^ ret->words[i % NODE_WORDS]) % num_parent_nodes; + node const *parent = &cache_nodes[parent_index]; + + #if defined(_M_X64) && ENABLE_SSE + { + xmm0 = _mm_mullo_epi32(xmm0, fnv_prime); + xmm1 = _mm_mullo_epi32(xmm1, fnv_prime); + xmm2 = _mm_mullo_epi32(xmm2, fnv_prime); + xmm3 = _mm_mullo_epi32(xmm3, fnv_prime); + xmm0 = _mm_xor_si128(xmm0, parent->xmm[0]); + xmm1 = _mm_xor_si128(xmm1, parent->xmm[1]); + xmm2 = _mm_xor_si128(xmm2, parent->xmm[2]); + xmm3 = _mm_xor_si128(xmm3, parent->xmm[3]); + + // have to write to ret as values are used to compute index + ret->xmm[0] = xmm0; + ret->xmm[1] = xmm1; + ret->xmm[2] = xmm2; + ret->xmm[3] = xmm3; + } + #else + { + for (unsigned w = 0; w != NODE_WORDS; ++w) { + ret->words[w] = fnv_hash(ret->words[w], parent->words[w]); + } + } + #endif + } + + SHA3_512(ret->bytes, ret->bytes, sizeof(node)); +} + +void ethash_compute_full_data( + void *mem, + ethash_params const *params, + ethash_cache const *cache) { + assert((params->full_size % (sizeof(uint32_t) * MIX_WORDS)) == 0); + assert((params->full_size % sizeof(node)) == 0); + node *full_nodes = mem; + + // now compute full nodes + for (unsigned n = 0; n != (params->full_size / sizeof(node)); ++n) { + ethash_calculate_dag_item(&(full_nodes[n]), n, params, cache); + } +} + +static void ethash_hash( + ethash_return_value * ret, + node const *full_nodes, + ethash_cache const *cache, + ethash_params const *params, + const uint8_t header_hash[32], + const uint64_t nonce) { + + assert((params->full_size % MIX_WORDS) == 0); + + // pack hash and nonce together into first 40 bytes of s_mix + assert(sizeof(node)*8 == 512); + node s_mix[MIX_NODES + 1]; + memcpy(s_mix[0].bytes, header_hash, 32); + +#if BYTE_ORDER != LITTLE_ENDIAN + s_mix[0].double_words[4] = fix_endian64(nonce); +#else + s_mix[0].double_words[4] = nonce; +#endif + + // compute sha3-512 hash and replicate across mix + SHA3_512(s_mix->bytes, s_mix->bytes, 40); + +#if BYTE_ORDER != LITTLE_ENDIAN + for (unsigned w = 0; w != 16; ++w) { + s_mix[0].words[w] = fix_endian32(s_mix[0].words[w]); + } +#endif + + node* const mix = s_mix + 1; + for (unsigned w = 0; w != MIX_WORDS; ++w) { + mix->words[w] = s_mix[0].words[w % NODE_WORDS]; + } + + unsigned const + page_size = sizeof(uint32_t) * MIX_WORDS, + num_full_pages = (unsigned)(params->full_size / page_size); + + + for (unsigned i = 0; i != ACCESSES; ++i) + { + uint32_t const index = ((s_mix->words[0] ^ i)*FNV_PRIME ^ mix->words[i % MIX_WORDS]) % num_full_pages; + + for (unsigned n = 0; n != MIX_NODES; ++n) + { + const node * dag_node = &full_nodes[MIX_NODES * index + n]; + + if (!full_nodes) { + node tmp_node; + ethash_calculate_dag_item(&tmp_node, index * MIX_NODES + n, params, cache); + dag_node = &tmp_node; + } + + #if defined(_M_X64) && ENABLE_SSE + { + __m128i fnv_prime = _mm_set1_epi32(FNV_PRIME); + __m128i xmm0 = _mm_mullo_epi32(fnv_prime, mix[n].xmm[0]); + __m128i xmm1 = _mm_mullo_epi32(fnv_prime, mix[n].xmm[1]); + __m128i xmm2 = _mm_mullo_epi32(fnv_prime, mix[n].xmm[2]); + __m128i xmm3 = _mm_mullo_epi32(fnv_prime, mix[n].xmm[3]); + mix[n].xmm[0] = _mm_xor_si128(xmm0, dag_node->xmm[0]); + mix[n].xmm[1] = _mm_xor_si128(xmm1, dag_node->xmm[1]); + mix[n].xmm[2] = _mm_xor_si128(xmm2, dag_node->xmm[2]); + mix[n].xmm[3] = _mm_xor_si128(xmm3, dag_node->xmm[3]); + } + #else + { + for (unsigned w = 0; w != NODE_WORDS; ++w) { + mix[n].words[w] = fnv_hash(mix[n].words[w], dag_node->words[w]); + } + } + #endif + } + + } + + // compress mix + for (unsigned w = 0; w != MIX_WORDS; w += 4) + { + uint32_t reduction = mix->words[w+0]; + reduction = reduction*FNV_PRIME ^ mix->words[w+1]; + reduction = reduction*FNV_PRIME ^ mix->words[w+2]; + reduction = reduction*FNV_PRIME ^ mix->words[w+3]; + mix->words[w/4] = reduction; + } + +#if BYTE_ORDER != LITTLE_ENDIAN + for (unsigned w = 0; w != MIX_WORDS/4; ++w) { + mix->words[w] = fix_endian32(mix->words[w]); + } +#endif + + memcpy(ret->mix_hash, mix->bytes, 32); + // final Keccak hash + SHA3_256(ret->result, s_mix->bytes, 64+32); // Keccak-256(s + compressed_mix) +} + +void ethash_quick_hash( + uint8_t return_hash[32], + const uint8_t header_hash[32], + const uint64_t nonce, + const uint8_t mix_hash[32]) { + + uint8_t buf[64+32]; + memcpy(buf, header_hash, 32); +#if BYTE_ORDER != LITTLE_ENDIAN + nonce = fix_endian64(nonce); +#endif + memcpy(&(buf[32]), &nonce, 8); + SHA3_512(buf, buf, 40); + memcpy(&(buf[64]), mix_hash, 32); + SHA3_256(return_hash, buf, 64+32); +} + +int ethash_quick_check_difficulty( + const uint8_t header_hash[32], + const uint64_t nonce, + const uint8_t mix_hash[32], + const uint8_t difficulty[32]) { + uint8_t return_hash[32]; + ethash_quick_hash(return_hash, header_hash, nonce, mix_hash); + return ethash_check_difficulty(return_hash, difficulty); +} + +void ethash_full(ethash_return_value * ret, void const *full_mem, ethash_params const *params, const uint8_t previous_hash[32], const uint64_t nonce) { + ethash_hash(ret, (node const *) full_mem, NULL, params, previous_hash, nonce); +} + +void ethash_light(ethash_return_value * ret, ethash_cache const *cache, ethash_params const *params, const uint8_t previous_hash[32], const uint64_t nonce) { + ethash_hash(ret, NULL, cache, params, previous_hash, nonce); +} diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/internal.h b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/internal.h new file mode 100644 index 000000000..bcbacdaa4 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/internal.h @@ -0,0 +1,48 @@ +#pragma once +#include "compiler.h" +#include "endian.h" +#include "ethash.h" + +#define ENABLE_SSE 1 + +#if defined(_M_X64) && ENABLE_SSE +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +// compile time settings +#define NODE_WORDS (64/4) +#define MIX_WORDS (MIX_BYTES/4) +#define MIX_NODES (MIX_WORDS / NODE_WORDS) +#include + +typedef union node { + uint8_t bytes[NODE_WORDS * 4]; + uint32_t words[NODE_WORDS]; + uint64_t double_words[NODE_WORDS / 2]; + +#if defined(_M_X64) && ENABLE_SSE + __m128i xmm[NODE_WORDS/4]; +#endif + +} node; + +void ethash_calculate_dag_item( + node *const ret, + const unsigned node_index, + ethash_params const *params, + ethash_cache const *cache +); + +void ethash_quick_hash( + uint8_t return_hash[32], + const uint8_t header_hash[32], + const uint64_t nonce, + const uint8_t mix_hash[32]); + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3.c b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3.c new file mode 100644 index 000000000..0c28230b8 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3.c @@ -0,0 +1,151 @@ +/** libkeccak-tiny +* +* A single-file implementation of SHA-3 and SHAKE. +* +* Implementor: David Leon Gil +* License: CC0, attribution kindly requested. Blame taken too, +* but not liability. +*/ +#include "sha3.h" + +#include +#include +#include +#include + +/******** The Keccak-f[1600] permutation ********/ + +/*** Constants. ***/ +static const uint8_t rho[24] = \ + { 1, 3, 6, 10, 15, 21, + 28, 36, 45, 55, 2, 14, + 27, 41, 56, 8, 25, 43, + 62, 18, 39, 61, 20, 44}; +static const uint8_t pi[24] = \ + {10, 7, 11, 17, 18, 3, + 5, 16, 8, 21, 24, 4, + 15, 23, 19, 13, 12, 2, + 20, 14, 22, 9, 6, 1}; +static const uint64_t RC[24] = \ + {1ULL, 0x8082ULL, 0x800000000000808aULL, 0x8000000080008000ULL, + 0x808bULL, 0x80000001ULL, 0x8000000080008081ULL, 0x8000000000008009ULL, + 0x8aULL, 0x88ULL, 0x80008009ULL, 0x8000000aULL, + 0x8000808bULL, 0x800000000000008bULL, 0x8000000000008089ULL, 0x8000000000008003ULL, + 0x8000000000008002ULL, 0x8000000000000080ULL, 0x800aULL, 0x800000008000000aULL, + 0x8000000080008081ULL, 0x8000000000008080ULL, 0x80000001ULL, 0x8000000080008008ULL}; + +/*** Helper macros to unroll the permutation. ***/ +#define rol(x, s) (((x) << s) | ((x) >> (64 - s))) +#define REPEAT6(e) e e e e e e +#define REPEAT24(e) REPEAT6(e e e e) +#define REPEAT5(e) e e e e e +#define FOR5(v, s, e) \ + v = 0; \ + REPEAT5(e; v += s;) + +/*** Keccak-f[1600] ***/ +static inline void keccakf(void* state) { + uint64_t* a = (uint64_t*)state; + uint64_t b[5] = {0}; + uint64_t t = 0; + uint8_t x, y; + + for (int i = 0; i < 24; i++) { + // Theta + FOR5(x, 1, + b[x] = 0; + FOR5(y, 5, + b[x] ^= a[x + y]; )) + FOR5(x, 1, + FOR5(y, 5, + a[y + x] ^= b[(x + 4) % 5] ^ rol(b[(x + 1) % 5], 1); )) + // Rho and pi + t = a[1]; + x = 0; + REPEAT24(b[0] = a[pi[x]]; + a[pi[x]] = rol(t, rho[x]); + t = b[0]; + x++; ) + // Chi + FOR5(y, + 5, + FOR5(x, 1, + b[x] = a[y + x];) + FOR5(x, 1, + a[y + x] = b[x] ^ ((~b[(x + 1) % 5]) & b[(x + 2) % 5]); )) + // Iota + a[0] ^= RC[i]; + } +} + +/******** The FIPS202-defined functions. ********/ + +/*** Some helper macros. ***/ + +#define _(S) do { S } while (0) +#define FOR(i, ST, L, S) \ + _(for (size_t i = 0; i < L; i += ST) { S; }) +#define mkapply_ds(NAME, S) \ + static inline void NAME(uint8_t* dst, \ + const uint8_t* src, \ + size_t len) { \ + FOR(i, 1, len, S); \ + } +#define mkapply_sd(NAME, S) \ + static inline void NAME(const uint8_t* src, \ + uint8_t* dst, \ + size_t len) { \ + FOR(i, 1, len, S); \ + } + +mkapply_ds(xorin, dst[i] ^= src[i]) // xorin +mkapply_sd(setout, dst[i] = src[i]) // setout + +#define P keccakf +#define Plen 200 + +// Fold P*F over the full blocks of an input. +#define foldP(I, L, F) \ + while (L >= rate) { \ + F(a, I, rate); \ + P(a); \ + I += rate; \ + L -= rate; \ + } + +/** The sponge-based hash construction. **/ +static inline int hash(uint8_t* out, size_t outlen, + const uint8_t* in, size_t inlen, + size_t rate, uint8_t delim) { + if ((out == NULL) || ((in == NULL) && inlen != 0) || (rate >= Plen)) { + return -1; + } + uint8_t a[Plen] = {0}; + // Absorb input. + foldP(in, inlen, xorin); + // Xor in the DS and pad frame. + a[inlen] ^= delim; + a[rate - 1] ^= 0x80; + // Xor in the last block. + xorin(a, in, inlen); + // Apply P + P(a); + // Squeeze output. + foldP(out, outlen, setout); + setout(a, out, outlen); + memset(a, 0, 200); + return 0; +} + +#define defsha3(bits) \ + int sha3_##bits(uint8_t* out, size_t outlen, \ + const uint8_t* in, size_t inlen) { \ + if (outlen > (bits/8)) { \ + return -1; \ + } \ + return hash(out, outlen, in, inlen, 200 - (bits / 4), 0x01); \ + } + +/*** FIPS202 SHA3 FOFs ***/ +defsha3(256) +defsha3(512) \ No newline at end of file diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3.h b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3.h new file mode 100644 index 000000000..36a0a5301 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3.h @@ -0,0 +1,27 @@ +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +#include "compiler.h" +#include +#include + +#define decsha3(bits) \ + int sha3_##bits(uint8_t*, size_t, const uint8_t*, size_t); + +decsha3(256) +decsha3(512) + +static inline void SHA3_256(uint8_t * const ret, uint8_t const *data, const size_t size) { + sha3_256(ret, 32, data, size); +} + +static inline void SHA3_512(uint8_t * const ret, uint8_t const *data, const size_t size) { + sha3_512(ret, 64, data, size); +} + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3_cryptopp.cpp b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3_cryptopp.cpp new file mode 100644 index 000000000..9454ce04a --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3_cryptopp.cpp @@ -0,0 +1,34 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ + +/** @file sha3.cpp +* @author Tim Hughes +* @date 2015 +*/ + +#include +#include + +extern "C" { +void SHA3_256(uint8_t *const ret, const uint8_t *data, size_t size) { + CryptoPP::SHA3_256().CalculateDigest(ret, data, size); +} + +void SHA3_512(uint8_t *const ret, const uint8_t *data, size_t size) { + CryptoPP::SHA3_512().CalculateDigest(ret, data, size); +} +} \ No newline at end of file diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3_cryptopp.h b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3_cryptopp.h new file mode 100644 index 000000000..f910960e1 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3_cryptopp.h @@ -0,0 +1,15 @@ +#pragma once + +#include "compiler.h" +#include + +#ifdef __cplusplus +extern "C" { +#endif + +void SHA3_256(uint8_t *const ret, const uint8_t *data, size_t size); +void SHA3_512(uint8_t *const ret, const uint8_t *data, size_t size); + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/util.c b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/util.c new file mode 100644 index 000000000..fbf268b7d --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/util.c @@ -0,0 +1,41 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file util.c + * @author Tim Hughes + * @date 2015 + */ +#include +#include +#include "util.h" + +#ifdef _MSC_VER + +// foward declare without all of Windows.h +__declspec(dllimport) void __stdcall OutputDebugStringA(const char* lpOutputString); + +void debugf(const char *str, ...) +{ + va_list args; + va_start(args, str); + + char buf[1<<16]; + _vsnprintf_s(buf, sizeof(buf), sizeof(buf), str, args); + buf[sizeof(buf)-1] = '\0'; + OutputDebugStringA(buf); +} + +#endif diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/util.h b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/util.h new file mode 100644 index 000000000..2f59076f6 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/util.h @@ -0,0 +1,47 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file util.h + * @author Tim Hughes + * @date 2015 + */ +#pragma once +#include +#include "compiler.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _MSC_VER +void debugf(const char *str, ...); +#else +#define debugf printf +#endif + +static inline uint32_t min_u32(uint32_t a, uint32_t b) +{ + return a < b ? a : b; +} + +static inline uint32_t clamp_u32(uint32_t x, uint32_t min_, uint32_t max_) +{ + return x < min_ ? min_ : (x > max_ ? max_ : x); +} + +#ifdef __cplusplus +} +#endif diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/binding.gyp b/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/binding.gyp new file mode 100644 index 000000000..642c33cb3 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/binding.gyp @@ -0,0 +1,29 @@ +{ + "targets": + [{ + "target_name": "ethash", + "sources": [ + './ethash.cc', + '../libethash/ethash.h', + '../libethash/util.c', + '../libethash/util.h', + '../libethash/blum_blum_shub.h', + '../libethash/blum_blum_shub.c', + '../libethash/sha3.h', + '../libethash/sha3.c', + '../libethash/internal.h', + '../libethash/internal.c' + ], + "include_dirs": [ + "../", + " +#include +#include +#include +#include +#include "../libethash/ethash.h" + +using namespace v8; + +class EthashValidator : public NanAsyncWorker { + public: + // Constructor + EthashValidator(NanCallback *callback, const unsigned blocknumber, const unsigned char * seed) + : NanAsyncWorker(callback), blocknumber(blocknumber), seed(seed) {} + // Destructor + ~EthashValidator() { + free(this->cache); + free(this->params); + } + + // Executed inside the worker-thread. + // It is not safe to access V8, or V8 data structures + // here, so everything we need for input and output + // should go on `this`. + void Execute () { + + /* this->result = secp256k1_ecdsa_sign(this->msg, this->sig , &this->sig_len, this->pk, NULL, NULL); */ + } + + // Executed when the async work is complete + // this function will be run inside the main event loop + // so it is safe to use V8 again + void HandleOKCallback () { + NanScope(); + Handle argv[] = { + NanNew(this->result) + }; + callback->Call(2, argv); + } + + protected: + const unsigned blocknumber; + const unsigned char * seed; + ethash_params * params; + ethash_cache * cache; + bool result; + bool ready = 0; +}; + +/* class CompactSignWorker : public SignWorker { */ +/* public: */ +/* CompactSignWorker(NanCallback *callback, const unsigned char *msg, const unsigned char *pk ) */ +/* : SignWorker(callback, msg, pk){} */ + +/* void Execute () { */ +/* this->result = secp256k1_ecdsa_sign_compact(this->msg, this->sig , this->pk, NULL, NULL, &this->sig_len); */ +/* } */ + +/* void HandleOKCallback () { */ +/* NanScope(); */ +/* Handle argv[] = { */ +/* NanNew(this->result), */ +/* NanNewBufferHandle((char *)this->sig, 64), */ +/* NanNew(this->sig_len) */ +/* }; */ +/* callback->Call(3, argv); */ +/* } */ +/* }; */ + +/* class RecoverWorker : public NanAsyncWorker { */ +/* public: */ +/* // Constructor */ +/* RecoverWorker(NanCallback *callback, const unsigned char *msg, const unsigned char *sig, int compressed, int rec_id) */ +/* : NanAsyncWorker(callback), msg(msg), sig(sig), compressed(compressed), rec_id(rec_id) {} */ +/* // Destructor */ +/* ~RecoverWorker() {} */ + +/* void Execute () { */ +/* if(this->compressed == 1){ */ +/* this->pubkey = new unsigned char[33]; */ +/* }else{ */ +/* this->pubkey = new unsigned char[65]; */ +/* } */ + +/* this->result = secp256k1_ecdsa_recover_compact(this->msg, this->sig, this->pubkey, &this->pubkey_len, this->compressed, this->rec_id); */ +/* } */ + +/* void HandleOKCallback () { */ +/* NanScope(); */ +/* Handle argv[] = { */ +/* NanNew(this->result), */ +/* NanNewBufferHandle((char *)this->pubkey, this->pubkey_len) */ +/* }; */ +/* callback->Call(2, argv); */ +/* } */ + +/* protected: */ +/* const unsigned char * msg; */ +/* const unsigned char * sig; */ +/* int compressed; */ +/* int rec_id; */ +/* int result; */ +/* unsigned char * pubkey; */ +/* int pubkey_len; */ +/* }; */ + +/* class VerifyWorker : public NanAsyncWorker { */ +/* public: */ +/* // Constructor */ +/* VerifyWorker(NanCallback *callback, const unsigned char *msg, const unsigned char *sig, int sig_len, const unsigned char *pub_key, int pub_key_len) */ +/* : NanAsyncWorker(callback), msg(msg), sig(sig), sig_len(sig_len), pub_key(pub_key), pub_key_len(pub_key_len) {} */ +/* // Destructor */ +/* ~VerifyWorker() {} */ + +/* void Execute () { */ +/* this->result = secp256k1_ecdsa_verify(this->msg, this->sig, this->sig_len, this->pub_key, this->pub_key_len); */ +/* } */ + +/* void HandleOKCallback () { */ +/* NanScope(); */ +/* Handle argv[] = { */ +/* NanNew(this->result), */ +/* }; */ +/* callback->Call(1, argv); */ +/* } */ + +/* protected: */ +/* int result; */ +/* const unsigned char * msg; */ +/* const unsigned char * sig; */ +/* int sig_len; */ +/* const unsigned char * pub_key; */ +/* int pub_key_len; */ +/* }; */ + +/* NAN_METHOD(Verify){ */ +/* NanScope(); */ + +/* Local pub_buf = args[0].As(); */ +/* const unsigned char *pub_data = (unsigned char *) node::Buffer::Data(pub_buf); */ +/* int pub_len = node::Buffer::Length(args[0]); */ + +/* Local msg_buf = args[1].As(); */ +/* const unsigned char *msg_data = (unsigned char *) node::Buffer::Data(msg_buf); */ + +/* Local sig_buf = args[2].As(); */ +/* const unsigned char *sig_data = (unsigned char *) node::Buffer::Data(sig_buf); */ +/* int sig_len = node::Buffer::Length(args[2]); */ + +/* int result = secp256k1_ecdsa_verify(msg_data, sig_data, sig_len, pub_data, pub_len ); */ + +/* NanReturnValue(NanNew(result)); */ +/* } */ + +/* NAN_METHOD(Verify_Async){ */ +/* NanScope(); */ + +/* Local pub_buf = args[0].As(); */ +/* const unsigned char *pub_data = (unsigned char *) node::Buffer::Data(pub_buf); */ +/* int pub_len = node::Buffer::Length(args[0]); */ + +/* Local msg_buf = args[1].As(); */ +/* const unsigned char *msg_data = (unsigned char *) node::Buffer::Data(msg_buf); */ + +/* Local sig_buf = args[2].As(); */ +/* const unsigned char *sig_data = (unsigned char *) node::Buffer::Data(sig_buf); */ +/* int sig_len = node::Buffer::Length(args[2]); */ + +/* Local callback = args[3].As(); */ +/* NanCallback* nanCallback = new NanCallback(callback); */ + +/* VerifyWorker* worker = new VerifyWorker(nanCallback, msg_data, sig_data, sig_len, pub_data, pub_len); */ +/* NanAsyncQueueWorker(worker); */ + +/* NanReturnUndefined(); */ +/* } */ + +/* NAN_METHOD(Sign){ */ +/* NanScope(); */ + +/* //the first argument should be the private key as a buffer */ +/* Local pk_buf = args[0].As(); */ +/* const unsigned char *pk_data = (unsigned char *) node::Buffer::Data(pk_buf); */ +/* int sec_len = node::Buffer::Length(args[0]); */ +/* //the second argument is the message that we are signing */ +/* Local msg_buf = args[1].As(); */ +/* const unsigned char *msg_data = (unsigned char *) node::Buffer::Data(msg_buf); */ + +/* unsigned char sig[72]; */ +/* int sig_len = 72; */ +/* int msg_len = node::Buffer::Length(args[1]); */ + +/* if(sec_len != 32){ */ +/* return NanThrowError("the secret key needs tobe 32 bytes"); */ +/* } */ + +/* if(msg_len == 0){ */ +/* return NanThrowError("messgae cannot be null"); */ +/* } */ + +/* int result = secp256k1_ecdsa_sign(msg_data, sig , &sig_len, pk_data, NULL, NULL); */ + +/* if(result == 1){ */ +/* NanReturnValue(NanNewBufferHandle((char *)sig, sig_len)); */ +/* }else{ */ +/* return NanThrowError("nonce invalid, try another one"); */ +/* } */ +/* } */ + +/* NAN_METHOD(Sign_Async){ */ + +/* NanScope(); */ +/* //the first argument should be the private key as a buffer */ +/* Local sec_buf = args[0].As(); */ +/* const unsigned char *sec_data = (unsigned char *) node::Buffer::Data(sec_buf); */ +/* int sec_len = node::Buffer::Length(args[0]); */ +/* //the second argument is the message that we are signing */ +/* Local msg_buf = args[1].As(); */ +/* const unsigned char *msg_data = (unsigned char *) node::Buffer::Data(msg_buf); */ + +/* Local callback = args[2].As(); */ +/* NanCallback* nanCallback = new NanCallback(callback); */ + +/* int msg_len = node::Buffer::Length(args[1]); */ + +/* if(sec_len != 32){ */ +/* return NanThrowError("the secret key needs tobe 32 bytes"); */ +/* } */ + +/* if(msg_len == 0){ */ +/* return NanThrowError("messgae cannot be null"); */ +/* } */ + +/* SignWorker* worker = new SignWorker(nanCallback, msg_data, sec_data); */ +/* NanAsyncQueueWorker(worker); */ + +/* NanReturnUndefined(); */ +/* } */ + +/* NAN_METHOD(Sign_Compact){ */ + +/* NanScope(); */ + +/* Local seckey_buf = args[0].As(); */ +/* const unsigned char *seckey_data = (unsigned char *) node::Buffer::Data(seckey_buf); */ +/* int sec_len = node::Buffer::Length(args[0]); */ + +/* Local msg_buf = args[1].As(); */ +/* const unsigned char *msg_data = (unsigned char *) node::Buffer::Data(msg_buf); */ +/* int msg_len = node::Buffer::Length(args[1]); */ + +/* if(sec_len != 32){ */ +/* return NanThrowError("the secret key needs tobe 32 bytes"); */ +/* } */ + +/* if(msg_len == 0){ */ +/* return NanThrowError("messgae cannot be null"); */ +/* } */ + +/* unsigned char sig[64]; */ +/* int rec_id; */ + +/* //TODO: change the nonce */ +/* int valid_nonce = secp256k1_ecdsa_sign_compact(msg_data, sig, seckey_data, NULL, NULL, &rec_id ); */ + +/* Local array = NanNew(3); */ +/* array->Set(0, NanNew(valid_nonce)); */ +/* array->Set(1, NanNew(rec_id)); */ +/* array->Set(2, NanNewBufferHandle((char *)sig, 64)); */ + +/* NanReturnValue(array); */ +/* } */ + +/* NAN_METHOD(Sign_Compact_Async){ */ +/* NanScope(); */ +/* //the first argument should be the private key as a buffer */ +/* Local sec_buf = args[0].As(); */ +/* const unsigned char *sec_data = (unsigned char *) node::Buffer::Data(sec_buf); */ +/* int sec_len = node::Buffer::Length(args[0]); */ + +/* //the second argument is the message that we are signing */ +/* Local msg_buf = args[1].As(); */ +/* const unsigned char *msg_data = (unsigned char *) node::Buffer::Data(msg_buf); */ + + +/* Local callback = args[2].As(); */ +/* NanCallback* nanCallback = new NanCallback(callback); */ + +/* int msg_len = node::Buffer::Length(args[1]); */ + +/* if(sec_len != 32){ */ +/* return NanThrowError("the secret key needs tobe 32 bytes"); */ +/* } */ + +/* if(msg_len == 0){ */ +/* return NanThrowError("messgae cannot be null"); */ +/* } */ + +/* CompactSignWorker* worker = new CompactSignWorker(nanCallback, msg_data, sec_data); */ +/* NanAsyncQueueWorker(worker); */ + +/* NanReturnUndefined(); */ +/* } */ + +/* NAN_METHOD(Recover_Compact){ */ + +/* NanScope(); */ + +/* Local msg_buf = args[0].As(); */ +/* const unsigned char *msg = (unsigned char *) node::Buffer::Data(msg_buf); */ +/* int msg_len = node::Buffer::Length(args[0]); */ + +/* Local sig_buf = args[1].As(); */ +/* const unsigned char *sig = (unsigned char *) node::Buffer::Data(sig_buf); */ + +/* Local compressed = args[2].As(); */ +/* int int_compressed = compressed->IntegerValue(); */ + +/* Local rec_id = args[3].As(); */ +/* int int_rec_id = rec_id->IntegerValue(); */ + +/* if(msg_len == 0){ */ +/* return NanThrowError("messgae cannot be null"); */ +/* } */ + +/* unsigned char pubKey[65]; */ + +/* int pubKeyLen; */ + +/* int result = secp256k1_ecdsa_recover_compact(msg, sig, pubKey, &pubKeyLen, int_compressed, int_rec_id); */ +/* if(result == 1){ */ +/* NanReturnValue(NanNewBufferHandle((char *)pubKey, pubKeyLen)); */ +/* }else{ */ + +/* NanReturnValue(NanFalse()); */ +/* } */ +/* } */ + +/* NAN_METHOD(Recover_Compact_Async){ */ + +/* NanScope(); */ + +/* //the message */ +/* Local msg_buf = args[0].As(); */ +/* const unsigned char *msg = (unsigned char *) node::Buffer::Data(msg_buf); */ +/* int msg_len = node::Buffer::Length(args[0]); */ + +/* //the signature length */ +/* Local sig_buf = args[1].As(); */ +/* const unsigned char *sig = (unsigned char *) node::Buffer::Data(sig_buf); */ +/* //todo sig len needs tobe 64 */ +/* int sig_len = node::Buffer::Length(args[1]); */ + +/* //to compress or not? */ +/* Local compressed = args[2].As(); */ +/* int int_compressed = compressed->IntegerValue(); */ + +/* //the rec_id */ +/* Local rec_id = args[3].As(); */ +/* int int_rec_id = rec_id->IntegerValue(); */ + +/* //the callback */ +/* Local callback = args[4].As(); */ +/* NanCallback* nanCallback = new NanCallback(callback); */ + +/* if(sig_len != 64){ */ +/* return NanThrowError("the signature needs to be 64 bytes"); */ +/* } */ + +/* if(msg_len == 0){ */ +/* return NanThrowError("messgae cannot be null"); */ +/* } */ + +/* RecoverWorker* worker = new RecoverWorker(nanCallback, msg, sig, int_compressed, int_rec_id); */ +/* NanAsyncQueueWorker(worker); */ + +/* NanReturnUndefined(); */ +/* } */ + +/* NAN_METHOD(Seckey_Verify){ */ +/* NanScope(); */ + +/* const unsigned char *data = (const unsigned char*) node::Buffer::Data(args[0]); */ +/* int result = secp256k1_ec_seckey_verify(data); */ +/* NanReturnValue(NanNew(result)); */ +/* } */ + +/* NAN_METHOD(Pubkey_Verify){ */ + +/* NanScope(); */ + +/* Local pub_buf = args[0].As(); */ +/* const unsigned char *pub_key = (unsigned char *) node::Buffer::Data(pub_buf); */ +/* int pub_key_len = node::Buffer::Length(args[0]); */ + +/* int result = secp256k1_ec_pubkey_verify(pub_key, pub_key_len); */ + +/* NanReturnValue(NanNew(result)); */ +/* } */ + +/* NAN_METHOD(Pubkey_Create){ */ +/* NanScope(); */ + +/* Handle pk_buf = args[0].As(); */ +/* const unsigned char *pk_data = (unsigned char *) node::Buffer::Data(pk_buf); */ +/* int pk_len = node::Buffer::Length(args[0]); */ + +/* Local l_compact = args[1].As(); */ +/* int compact = l_compact->IntegerValue(); */ +/* int pubKeyLen; */ + +/* if(pk_len != 32){ */ +/* return NanThrowError("the secert key need to be 32 bytes"); */ +/* } */ + +/* unsigned char *pubKey; */ +/* if(compact == 1){ */ +/* pubKey = new unsigned char[33]; */ +/* }else{ */ +/* pubKey = new unsigned char[65]; */ +/* } */ + +/* int results = secp256k1_ec_pubkey_create(pubKey,&pubKeyLen, pk_data, compact ); */ +/* if(results == 0){ */ +/* return NanThrowError("secret was invalid, try again."); */ +/* }else{ */ +/* NanReturnValue(NanNewBufferHandle((char *)pubKey, pubKeyLen)); */ +/* } */ +/* } */ + +/* NAN_METHOD(Pubkey_Decompress){ */ +/* NanScope(); */ + +/* //the first argument should be the private key as a buffer */ +/* Local pk_buf = args[0].As(); */ +/* unsigned char *pk_data = (unsigned char *) node::Buffer::Data(pk_buf); */ + +/* int pk_len = node::Buffer::Length(args[0]); */ + +/* int results = secp256k1_ec_pubkey_decompress(pk_data, &pk_len); */ + +/* if(results == 0){ */ +/* return NanThrowError("invalid public key"); */ +/* }else{ */ +/* NanReturnValue(NanNewBufferHandle((char *)pk_data, pk_len)); */ +/* } */ +/* } */ + + +/* NAN_METHOD(Privkey_Import){ */ +/* NanScope(); */ + +/* //the first argument should be the private key as a buffer */ +/* Handle pk_buf = args[0].As(); */ +/* const unsigned char *pk_data = (unsigned char *) node::Buffer::Data(pk_buf); */ + +/* int pk_len = node::Buffer::Length(args[0]); */ + +/* unsigned char sec_key[32]; */ +/* int results = secp256k1_ec_privkey_import(sec_key, pk_data, pk_len); */ + +/* if(results == 0){ */ +/* return NanThrowError("invalid private key"); */ +/* }else{ */ +/* NanReturnValue(NanNewBufferHandle((char *)sec_key, 32)); */ +/* } */ +/* } */ + +/* NAN_METHOD(Privkey_Export){ */ +/* NanScope(); */ + +/* //the first argument should be the private key as a buffer */ +/* Handle sk_buf = args[0].As(); */ +/* const unsigned char *sk_data = (unsigned char *) node::Buffer::Data(sk_buf); */ + +/* Local l_compressed = args[1].As(); */ +/* int compressed = l_compressed->IntegerValue(); */ + +/* unsigned char *privKey; */ +/* int pk_len; */ +/* int results = secp256k1_ec_privkey_export(sk_data, privKey, &pk_len, compressed); */ +/* if(results == 0){ */ +/* return NanThrowError("invalid private key"); */ +/* }else{ */ +/* NanReturnValue(NanNewBufferHandle((char *)privKey, pk_len)); */ +/* } */ +/* } */ + +/* NAN_METHOD(Privkey_Tweak_Add){ */ +/* NanScope(); */ + +/* //the first argument should be the private key as a buffer */ +/* Handle sk_buf = args[0].As(); */ +/* unsigned char *sk = (unsigned char *) node::Buffer::Data(sk_buf); */ + +/* Handle tweak_buf = args[1].As(); */ +/* const unsigned char *tweak= (unsigned char *) node::Buffer::Data(tweak_buf); */ + +/* int results = secp256k1_ec_privkey_tweak_add(sk, tweak); */ +/* if(results == 0){ */ +/* return NanThrowError("invalid key"); */ +/* }else{ */ +/* NanReturnValue(NanNewBufferHandle((char *)sk, 32)); */ +/* } */ +/* } */ + +/* NAN_METHOD(Privkey_Tweak_Mul){ */ +/* NanScope(); */ + +/* //the first argument should be the private key as a buffer */ +/* Handle sk_buf = args[0].As(); */ +/* unsigned char *sk = (unsigned char *) node::Buffer::Data(sk_buf); */ + +/* Handle tweak_buf = args[1].As(); */ +/* const unsigned char *tweak= (unsigned char *) node::Buffer::Data(tweak_buf); */ + +/* int results = secp256k1_ec_privkey_tweak_mul(sk, tweak); */ +/* if(results == 0){ */ +/* return NanThrowError("invalid key"); */ +/* }else{ */ +/* NanReturnValue(NanNewBufferHandle((char *)sk, 32)); */ +/* } */ +/* } */ + +/* NAN_METHOD(Pubkey_Tweak_Add){ */ +/* NanScope(); */ + +/* //the first argument should be the private key as a buffer */ +/* Handle pk_buf = args[0].As(); */ +/* unsigned char *pk = (unsigned char *) node::Buffer::Data(pk_buf); */ +/* int pk_len = node::Buffer::Length(args[0]); */ + +/* Handle tweak_buf = args[1].As(); */ +/* const unsigned char *tweak= (unsigned char *) node::Buffer::Data(tweak_buf); */ + +/* int results = secp256k1_ec_pubkey_tweak_add(pk, pk_len, tweak); */ +/* if(results == 0){ */ +/* return NanThrowError("invalid key"); */ +/* }else{ */ +/* NanReturnValue(NanNewBufferHandle((char *)pk, pk_len)); */ +/* } */ +/* } */ + +/* NAN_METHOD(Pubkey_Tweak_Mul){ */ +/* NanScope(); */ + +/* //the first argument should be the private key as a buffer */ +/* Handle pk_buf = args[0].As(); */ +/* unsigned char *pk = (unsigned char *) node::Buffer::Data(pk_buf); */ +/* int pk_len = node::Buffer::Length(args[0]); */ + +/* Handle tweak_buf = args[1].As(); */ +/* const unsigned char *tweak= (unsigned char *) node::Buffer::Data(tweak_buf); */ + +/* int results = secp256k1_ec_pubkey_tweak_mul(pk, pk_len, tweak); */ +/* if(results == 0){ */ +/* return NanThrowError("invalid key"); */ +/* }else{ */ +/* NanReturnValue(NanNewBufferHandle((char *)pk, pk_len)); */ +/* } */ +/* } */ + +void Init(Handle exports) { + + /* secp256k1_start(SECP256K1_START_SIGN | SECP256K1_START_VERIFY); */ + /* exports->Set(NanNew("seckeyVerify"), NanNew(Seckey_Verify)->GetFunction()); */ + /* exports->Set(NanNew("sign"), NanNew(Sign)->GetFunction()); */ + /* exports->Set(NanNew("signAsync"), NanNew(Sign_Async)->GetFunction()); */ + /* exports->Set(NanNew("signCompact"), NanNew(Sign_Compact)->GetFunction()); */ + /* exports->Set(NanNew("signCompactAsync"), NanNew(Sign_Compact_Async)->GetFunction()); */ + /* exports->Set(NanNew("recoverCompact"), NanNew(Recover_Compact)->GetFunction()); */ + /* exports->Set(NanNew("recoverCompactAsync"), NanNew(Recover_Compact_Async)->GetFunction()); */ + /* exports->Set(NanNew("verify"), NanNew(Verify)->GetFunction()); */ + /* exports->Set(NanNew("verifyAsync"), NanNew(Verify_Async)->GetFunction()); */ + /* exports->Set(NanNew("secKeyVerify"), NanNew(Seckey_Verify)->GetFunction()); */ + /* exports->Set(NanNew("pubKeyVerify"), NanNew(Pubkey_Verify)->GetFunction()); */ + /* exports->Set(NanNew("pubKeyCreate"), NanNew(Pubkey_Create)->GetFunction()); */ + /* exports->Set(NanNew("pubKeyDecompress"), NanNew(Pubkey_Decompress)->GetFunction()); */ + /* exports->Set(NanNew("privKeyExport"), NanNew(Privkey_Export)->GetFunction()); */ + /* exports->Set(NanNew("privKeyImport"), NanNew(Privkey_Import)->GetFunction()); */ + /* exports->Set(NanNew("privKeyTweakAdd"), NanNew(Privkey_Tweak_Add)->GetFunction()); */ + /* exports->Set(NanNew("privKeyTweakMul"), NanNew(Privkey_Tweak_Mul)->GetFunction()); */ + /* exports->Set(NanNew("pubKeyTweakAdd"), NanNew(Privkey_Tweak_Add)->GetFunction()); */ + /* exports->Set(NanNew("pubKeyTweakMul"), NanNew(Privkey_Tweak_Mul)->GetFunction()); */ +} + +NODE_MODULE(secp256k1, Init) diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/package.json b/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/package.json new file mode 100644 index 000000000..690ea3263 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/package.json @@ -0,0 +1,13 @@ +{ + "name": "node-ethash", + "version": "1.0.0", + "description": "", + "main": "index.js", + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1", + "install": "node-gyp rebuild" + }, + "author": "", + "license": "ISC", + "gypfile": true +} diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/readme.md b/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/readme.md new file mode 100644 index 000000000..bb46cdd6e --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/readme.md @@ -0,0 +1,12 @@ +# To Develop +`npm install -g node-gyp` +`npm install .` + + +# To rebuild +`node-gyp rebuild` + + +# notes + +nan is good https://github.com/rvagg/nan diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/test/CMakeLists.txt b/Godeps/_workspace/src/github.com/ethereum/ethash/test/CMakeLists.txt new file mode 100644 index 000000000..431af6a99 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/test/CMakeLists.txt @@ -0,0 +1,30 @@ +IF( NOT Boost_FOUND ) + find_package(Boost COMPONENTS unit_test_framework) +ENDIF() + +IF( Boost_FOUND ) + include_directories( ${Boost_INCLUDE_DIR} ) + include_directories(..) + + link_directories ( ${Boost_LIBRARY_DIRS} ) + file(GLOB HEADERS "*.h") + ADD_DEFINITIONS(-DBOOST_TEST_DYN_LINK) + + if (NOT CRYPTOPP_FOUND) + find_package (CryptoPP) + endif() + + if (CRYPTOPP_FOUND) + add_definitions(-DWITH_CRYPTOPP) + endif() + + add_executable (Test test.cpp ${HEADERS}) + target_link_libraries (Test ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY} ${ETHHASH_LIBS}) + + if (CRYPTOPP_FOUND) + TARGET_LINK_LIBRARIES(Test ${CRYPTOPP_LIBRARIES}) + endif() + + enable_testing () + add_test(NAME ethash COMMAND Test) +ENDIF() diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/test/go/ethash_test.go b/Godeps/_workspace/src/github.com/ethereum/ethash/test/go/ethash_test.go new file mode 100644 index 000000000..d734954e1 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/test/go/ethash_test.go @@ -0,0 +1,54 @@ +package ethashTest + +import ( + "bytes" + "crypto/rand" + "log" + "math/big" + "testing" + + "github.com/ethereum/ethash" + "github.com/ethereum/go-ethereum/core" + "github.com/ethereum/go-ethereum/ethdb" +) + +func TestEthash(t *testing.T) { + seedHash := make([]byte, 32) + _, err := rand.Read(seedHash) + if err != nil { + panic(err) + } + + db, err := ethdb.NewMemDatabase() + if err != nil { + panic(err) + } + + blockProcessor, err := core.NewCanonical(5, db) + if err != nil { + panic(err) + } + + log.Println("Block Number: ", blockProcessor.ChainManager().CurrentBlock().Number()) + + e := ethash.New(blockProcessor.ChainManager()) + + miningHash := make([]byte, 32) + if _, err := rand.Read(miningHash); err != nil { + panic(err) + } + diff := big.NewInt(10000) + log.Println("difficulty", diff) + + nonce := uint64(0) + + ghash_full := e.FullHash(nonce, miningHash) + log.Printf("ethash full (on nonce): %x %x\n", ghash_full, nonce) + + ghash_light := e.LightHash(nonce, miningHash) + log.Printf("ethash light (on nonce): %x %x\n", ghash_light, nonce) + + if bytes.Compare(ghash_full, ghash_light) != 0 { + t.Errorf("full: %x, light: %x", ghash_full, ghash_light) + } +} diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/test/test.cpp b/Godeps/_workspace/src/github.com/ethereum/ethash/test/test.cpp new file mode 100644 index 000000000..bec44e138 --- /dev/null +++ b/Godeps/_workspace/src/github.com/ethereum/ethash/test/test.cpp @@ -0,0 +1,233 @@ +#include +#include +#include +#include + +#ifdef WITH_CRYPTOPP +#include +#else +#include +#endif // WITH_CRYPTOPP + +#define BOOST_TEST_MODULE Daggerhashimoto +#define BOOST_TEST_MAIN + +#include +#include +#include + +std::string bytesToHexString(const uint8_t *str, const size_t s) { + std::ostringstream ret; + + for (int i = 0; i < s; ++i) + ret << std::hex << std::setfill('0') << std::setw(2) << std::nouppercase << (int) str[i]; + + return ret.str(); +} + +BOOST_AUTO_TEST_CASE(fnv_hash_check) { + uint32_t x = 1235U; + const uint32_t + y = 9999999U, + expected = (FNV_PRIME * x) ^ y; + + x = fnv_hash(x, y); + + BOOST_REQUIRE_MESSAGE(x == expected, + "\nexpected: " << expected << "\n" + << "actual: " << x << "\n"); + +} + +BOOST_AUTO_TEST_CASE(SHA256_check) { + uint8_t input[32], out[32]; + memcpy(input, "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~", 32); + SHA3_256(out, input, 32); + const std::string + expected = "2b5ddf6f4d21c23de216f44d5e4bdc68e044b71897837ea74c83908be7037cd7", + actual = bytesToHexString(out, 32); + BOOST_REQUIRE_MESSAGE(expected == actual, + "\nexpected: " << expected.c_str() << "\n" + << "actual: " << actual.c_str() << "\n"); +} + +BOOST_AUTO_TEST_CASE(SHA512_check) { + uint8_t input[64], out[64]; + memcpy(input, "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~", 64); + SHA3_512(out, input, 64); + const std::string + expected = "0be8a1d334b4655fe58c6b38789f984bb13225684e86b20517a55ab2386c7b61c306f25e0627c60064cecd6d80cd67a82b3890bd1289b7ceb473aad56a359405", + actual = bytesToHexString(out, 64); + BOOST_REQUIRE_MESSAGE(expected == actual, + "\nexpected: " << expected.c_str() << "\n" + << "actual: " << actual.c_str() << "\n"); +} + +BOOST_AUTO_TEST_CASE(ethash_params_init_genesis_check) { + ethash_params params; + ethash_params_init(¶ms, 0); + BOOST_REQUIRE_MESSAGE(params.full_size < DAGSIZE_BYTES_INIT, + "\nfull size: " << params.full_size << "\n" + << "should be less than or equal to: " << DAGSIZE_BYTES_INIT << "\n"); + BOOST_REQUIRE_MESSAGE(params.full_size + 20*MIX_BYTES >= DAGSIZE_BYTES_INIT, + "\nfull size + 20*MIX_BYTES: " << params.full_size + 20*MIX_BYTES << "\n" + << "should be greater than or equal to: " << DAGSIZE_BYTES_INIT << "\n"); + BOOST_REQUIRE_MESSAGE(params.cache_size < DAGSIZE_BYTES_INIT / 32, + "\ncache size: " << params.cache_size << "\n" + << "should be less than or equal to: " << DAGSIZE_BYTES_INIT / 32 << "\n"); +} + +BOOST_AUTO_TEST_CASE(ethash_params_init_genesis_calcifide_check) { + ethash_params params; + ethash_params_init(¶ms, 0); + const uint32_t expected_full_size = 1073739904; + const uint32_t expected_cache_size = 1048384; + BOOST_REQUIRE_MESSAGE(params.full_size == expected_full_size, + "\nexpected: " << expected_cache_size << "\n" + << "actual: " << params.full_size << "\n"); + BOOST_REQUIRE_MESSAGE(params.cache_size == expected_cache_size, + "\nexpected: " << expected_cache_size << "\n" + << "actual: " << params.cache_size << "\n"); +} + +BOOST_AUTO_TEST_CASE(ethash_params_init_check) { + ethash_params params; + ethash_params_init(¶ms, 1971000); + const uint64_t nine_month_size = (uint64_t) 8*DAGSIZE_BYTES_INIT; + BOOST_REQUIRE_MESSAGE(params.full_size < nine_month_size, + "\nfull size: " << params.full_size << "\n" + << "should be less than or equal to: " << nine_month_size << "\n"); + BOOST_REQUIRE_MESSAGE(params.full_size + DAGSIZE_BYTES_INIT / 4 > nine_month_size, + "\nfull size + DAGSIZE_BYTES_INIT / 4: " << params.full_size + DAGSIZE_BYTES_INIT / 4 << "\n" + << "should be greater than or equal to: " << nine_month_size << "\n"); + BOOST_REQUIRE_MESSAGE(params.cache_size < nine_month_size / 1024, + "\nactual cache size: " << params.cache_size << "\n" + << "expected: " << nine_month_size / 1024 << "\n"); + BOOST_REQUIRE_MESSAGE(params.cache_size + DAGSIZE_BYTES_INIT / 4 / 1024 > nine_month_size / 1024 , + "\ncache size + DAGSIZE_BYTES_INIT / 4 / 1024: " << params.cache_size + DAGSIZE_BYTES_INIT / 4 / 1024 << "\n" + << "actual: " << nine_month_size / 32 << "\n"); +} + +BOOST_AUTO_TEST_CASE(light_and_full_client_checks) { + ethash_params params; + uint8_t seed[32], hash[32]; + ethash_return_value light_out, full_out; + memcpy(seed, "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~", 32); + memcpy(hash, "~~~X~~~~~~~~~~~~~~~~~~~~~~~~~~~~", 32); + ethash_params_init(¶ms, 0); + params.cache_size = 1024; + params.full_size = 1024 * 32; + ethash_cache cache; + cache.mem = alloca(params.cache_size); + ethash_mkcache(&cache, ¶ms, seed); + node * full_mem = (node *) alloca(params.full_size); + ethash_compute_full_data(full_mem, ¶ms, &cache); + + { + const std::string + expected = "2da2b506f21070e1143d908e867962486d6b0a02e31d468fd5e3a7143aafa76a14201f63374314e2a6aaf84ad2eb57105dea3378378965a1b3873453bb2b78f9a8620b2ebeca41fbc773bb837b5e724d6eb2de570d99858df0d7d97067fb8103b21757873b735097b35d3bea8fd1c359a9e8a63c1540c76c9784cf8d975e995ca8620b2ebeca41fbc773bb837b5e724d6eb2de570d99858df0d7d97067fb8103b21757873b735097b35d3bea8fd1c359a9e8a63c1540c76c9784cf8d975e995ca8620b2ebeca41fbc773bb837b5e724d6eb2de570d99858df0d7d97067fb8103b21757873b735097b35d3bea8fd1c359a9e8a63c1540c76c9784cf8d975e995c259440b89fa3481c2c33171477c305c8e1e421f8d8f6d59585449d0034f3e421808d8da6bbd0b6378f567647cc6c4ba6c434592b198ad444e7284905b7c6adaf70bf43ec2daa7bd5e8951aa609ab472c124cf9eba3d38cff5091dc3f58409edcc386c743c3bd66f92408796ee1e82dd149eaefbf52b00ce33014a6eb3e50625413b072a58bc01da28262f42cbe4f87d4abc2bf287d15618405a1fe4e386fcdafbb171064bd99901d8f81dd6789396ce5e364ac944bbbd75a7827291c70b42d26385910cd53ca535ab29433dd5c5714d26e0dce95514c5ef866329c12e958097e84462197c2b32087849dab33e88b11da61d52f9dbc0b92cc61f742c07dbbf751c49d7678624ee60dfbe62e5e8c47a03d8247643f3d16ad8c8e663953bcda1f59d7e2d4a9bf0768e789432212621967a8f41121ad1df6ae1fa78782530695414c6213942865b2730375019105cae91a4c17a558d4b63059661d9f108362143107babe0b848de412e4da59168cce82bfbff3c99e022dd6ac1e559db991f2e3f7bb910cefd173e65ed00a8d5d416534e2c8416ff23977dbf3eb7180b75c71580d08ce95efeb9b0afe904ea12285a392aff0c8561ff79fca67f694a62b9e52377485c57cc3598d84cac0a9d27960de0cc31ff9bbfe455acaa62c8aa5d2cce96f345da9afe843d258a99c4eaf3650fc62efd81c7b81cd0d534d2d71eeda7a6e315d540b4473c80f8730037dc2ae3e47b986240cfc65ccc565f0d8cde0bc68a57e39a271dda57440b3598bee19f799611d25731a96b5dbbbefdff6f4f656161462633030d62560ea4e9c161cf78fc96a2ca5aaa32453a6c5dea206f766244e8c9d9a8dc61185ce37f1fc804459c5f07434f8ecb34141b8dcae7eae704c950b55556c5f40140c3714b45eddb02637513268778cbf937a33e4e33183685f9deb31ef54e90161e76d969587dd782eaa94e289420e7c2ee908517f5893a26fdb5873d68f92d118d4bcf98d7a4916794d6ab290045e30f9ea00ca547c584b8482b0331ba1539a0f2714fddc3a0b06b0cfbb6a607b8339c39bcfd6640b1f653e9d70ef6c985b", + actual = bytesToHexString((uint8_t const *) cache.mem, params.cache_size); + + BOOST_REQUIRE_MESSAGE(expected == actual, + "\nexpected: " << expected.c_str() << "\n" + << "actual: " << actual.c_str() << "\n"); + } + + + + { + node node; + ethash_calculate_dag_item(&node, 0, ¶ms, &cache); + const std::string + actual = bytesToHexString((uint8_t const *) &node, sizeof(node)), + expected = "b1698f829f90b35455804e5185d78f549fcb1bdce2bee006d4d7e68eb154b596be1427769eb1c3c3e93180c760af75f81d1023da6a0ffbe321c153a7c0103597"; + BOOST_REQUIRE_MESSAGE(actual == expected, + "\n" << "expected: " << expected.c_str() << "\n" + << "actual: " << actual.c_str() << "\n"); + } + + { + for (int i = 0 ; i < params.full_size / sizeof(node) ; ++i ) { + for (uint32_t j = 0; j < 32; ++j) { + node expected_node; + ethash_calculate_dag_item(&expected_node, j, ¶ms, &cache); + const std::string + actual = bytesToHexString((uint8_t const *) &(full_mem[j]), sizeof(node)), + expected = bytesToHexString((uint8_t const *) &expected_node, sizeof(node)); + BOOST_REQUIRE_MESSAGE(actual == expected, + "\ni: " << j << "\n" + << "expected: " << expected.c_str() << "\n" + << "actual: " << actual.c_str() << "\n"); + } + } + } + + { + uint64_t nonce = 0x7c7c597c; + ethash_full(&full_out, full_mem, ¶ms, hash, nonce); + ethash_light(&light_out, &cache, ¶ms, hash, nonce); + const std::string + light_result_string = bytesToHexString(light_out.result, 32), + full_result_string = bytesToHexString(full_out.result, 32); + BOOST_REQUIRE_MESSAGE(light_result_string == full_result_string, + "\nlight result: " << light_result_string.c_str() << "\n" + << "full result: " << full_result_string.c_str() << "\n"); + const std::string + light_mix_hash_string = bytesToHexString(light_out.mix_hash, 32), + full_mix_hash_string = bytesToHexString(full_out.mix_hash, 32); + BOOST_REQUIRE_MESSAGE(full_mix_hash_string == light_mix_hash_string, + "\nlight mix hash: " << light_mix_hash_string.c_str() << "\n" + << "full mix hash: " << full_mix_hash_string.c_str() << "\n"); + uint8_t check_hash[32]; + ethash_quick_hash(check_hash, hash, nonce, full_out.mix_hash); + const std::string check_hash_string = bytesToHexString(check_hash, 32); + BOOST_REQUIRE_MESSAGE(check_hash_string == full_result_string, + "\ncheck hash string: " << check_hash_string.c_str() << "\n" + << "full result: " << full_result_string.c_str() << "\n"); + } + { + ethash_full(&full_out, full_mem, ¶ms, hash, 5); + std::string + light_result_string = bytesToHexString(light_out.result, 32), + full_result_string = bytesToHexString(full_out.result, 32); + + BOOST_REQUIRE_MESSAGE(light_result_string != full_result_string, + "\nlight result and full result should differ: " << light_result_string.c_str() << "\n"); + + ethash_light(&light_out, &cache, ¶ms, hash, 5); + light_result_string = bytesToHexString(light_out.result, 32); + BOOST_REQUIRE_MESSAGE(light_result_string == full_result_string, + "\nlight result and full result should be the same\n" + << "light result: " << light_result_string.c_str() << "\n" + << "full result: " << full_result_string.c_str() << "\n"); + std::string + light_mix_hash_string = bytesToHexString(light_out.mix_hash, 32), + full_mix_hash_string = bytesToHexString(full_out.mix_hash, 32); + BOOST_REQUIRE_MESSAGE(full_mix_hash_string == light_mix_hash_string, + "\nlight mix hash: " << light_mix_hash_string.c_str() << "\n" + << "full mix hash: " << full_mix_hash_string.c_str() << "\n"); + } +} + +BOOST_AUTO_TEST_CASE(ethash_check_difficulty_check) { + uint8_t hash[32], target[32]; + memset(hash, 0, 32); + memset(target, 0, 32); + + memcpy(hash, "11111111111111111111111111111111", 32); + memcpy(target, "22222222222222222222222222222222", 32); + BOOST_REQUIRE_MESSAGE( + ethash_check_difficulty(hash, target), + "\nexpected \"" << hash << "\" to have less difficulty than \"" << target << "\"\n"); + BOOST_REQUIRE_MESSAGE( + !ethash_check_difficulty(hash, hash), + "\nexpected \"" << hash << "\" to have the same difficulty as \"" << hash << "\"\n"); + memcpy(target, "11111111111111111111111111111112", 32); + BOOST_REQUIRE_MESSAGE( + ethash_check_difficulty(hash, target), + "\nexpected \"" << hash << "\" to have less difficulty than \"" << target << "\"\n"); + memcpy(target, "11111111111111111111111111111110", 32); + BOOST_REQUIRE_MESSAGE( + !ethash_check_difficulty(hash, target), + "\nexpected \"" << hash << "\" to have more difficulty than \"" << target << "\"\n"); +} \ No newline at end of file -- cgit v1.2.3