1 files changed, 0 insertions, 628 deletions
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/ethash_opencl.go b/Godeps/_workspace/src/github.com/ethereum/ethash/ethash_opencl.go
deleted file mode 100644
index 451049eae..000000000
--- a/Godeps/_workspace/src/github.com/ethereum/ethash/ethash_opencl.go
+++ /dev/null
@@ -1,628 +0,0 @@
-// Copyright 2014 The go-ethereum Authors
-// This file is part of the go-ethereum library.
-//
-// The go-ethereum library is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Lesser General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-//
-// The go-ethereum library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU Lesser General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public License
-// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
-
-// +build opencl
-
-package ethash
-
-//#cgo LDFLAGS: -w
-//#include <stdint.h>
-//#include <string.h>
-//#include "src/libethash/internal.h"
-import "C"
-
-import (
-	crand "crypto/rand"
-	"encoding/binary"
-	"fmt"
-	"math"
-	"math/big"
-	mrand "math/rand"
-	"strconv"
-	"strings"
-	"sync"
-	"sync/atomic"
-	"time"
-	"unsafe"
-
-	"github.com/Gustav-Simonsson/go-opencl/cl"
-	"github.com/ethereum/go-ethereum/common"
-	"github.com/ethereum/go-ethereum/pow"
-)
-
-/*
-
-  This code have two main entry points:
-
-  1. The initCL(...)  function configures one or more OpenCL device
-     (for now only GPU) and loads the Ethash DAG onto device memory
-
-  2. The Search(...) function loads a Ethash nonce into device(s) memory and
-     executes the Ethash OpenCL kernel.
-
-  Throughout the code, we refer to "host memory" and "device memory".
-  For most systems (e.g. regular PC GPU miner) the host memory is RAM and
-  device memory is the GPU global memory (e.g. GDDR5).
-
-  References mentioned in code comments:
-
-  1. https://github.com/ethereum/wiki/wiki/Ethash
-  2. https://github.com/ethereum/cpp-ethereum/blob/develop/libethash-cl/ethash_cl_miner.cpp
-  3. https://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/
-  4. http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2013/12/AMD_OpenCL_Programming_User_Guide.pdf
-
-*/
-
-type OpenCLDevice struct {
-	deviceId int
-	device   *cl.Device
-	openCL11 bool // OpenCL version 1.1 and 1.2 are handled a bit different
-	openCL12 bool
-
-	dagBuf        *cl.MemObject // Ethash full DAG in device mem
-	headerBuf     *cl.MemObject // Hash of block-to-mine in device mem
-	searchBuffers []*cl.MemObject
-
-	searchKernel *cl.Kernel
-	hashKernel   *cl.Kernel
-
-	queue         *cl.CommandQueue
-	ctx           *cl.Context
-	workGroupSize int
-
-	nonceRand *mrand.Rand // seeded by crypto/rand, see comments where it's initialised
-	result    common.Hash
-}
-
-type OpenCLMiner struct {
-	mu sync.Mutex
-
-	ethash *Ethash // Ethash full DAG & cache in host mem
-
-	deviceIds []int
-	devices   []*OpenCLDevice
-
-	dagSize uint64
-
-	hashRate int32 // Go atomics & uint64 have some issues; int32 is supported on all platforms
-}
-
-type pendingSearch struct {
-	bufIndex   uint32
-	startNonce uint64
-}
-
-const (
-	SIZEOF_UINT32 = 4
-
-	// See [1]
-	ethashMixBytesLen = 128
-	ethashAccesses    = 64
-
-	// See [4]
-	workGroupSize    = 32 // must be multiple of 8
-	maxSearchResults = 63
-	searchBufSize    = 2
-	globalWorkSize   = 1024 * 256
-)
-
-func NewCL(deviceIds []int) *OpenCLMiner {
-	ids := make([]int, len(deviceIds))
-	copy(ids, deviceIds)
-	return &OpenCLMiner{
-		ethash:    New(),
-		dagSize:   0, // to see if we need to update DAG.
-		deviceIds: ids,
-	}
-}
-
-func PrintDevices() {
-	fmt.Println("=============================================")
-	fmt.Println("============ OpenCL Device Info =============")
-	fmt.Println("=============================================")
-
-	var found []*cl.Device
-
-	platforms, err := cl.GetPlatforms()
-	if err != nil {
-		fmt.Println("Plaform error (check your OpenCL installation):", err)
-		return
-	}
-
-	for i, p := range platforms {
-		fmt.Println("Platform id             ", i)
-		fmt.Println("Platform Name           ", p.Name())
-		fmt.Println("Platform Vendor         ", p.Vendor())
-		fmt.Println("Platform Version        ", p.Version())
-		fmt.Println("Platform Extensions     ", p.Extensions())
-		fmt.Println("Platform Profile        ", p.Profile())
-		fmt.Println("")
-
-		devices, err := cl.GetDevices(p, cl.DeviceTypeGPU)
-		if err != nil {
-			fmt.Println("Device error (check your GPU drivers) :", err)
-			return
-		}
-
-		for _, d := range devices {
-			fmt.Println("Device OpenCL id        ", i)
-			fmt.Println("Device id for mining    ", len(found))
-			fmt.Println("Device Name             ", d.Name())
-			fmt.Println("Vendor                  ", d.Vendor())
-			fmt.Println("Version                 ", d.Version())
-			fmt.Println("Driver version          ", d.DriverVersion())
-			fmt.Println("Address bits            ", d.AddressBits())
-			fmt.Println("Max clock freq          ", d.MaxClockFrequency())
-			fmt.Println("Global mem size         ", d.GlobalMemSize())
-			fmt.Println("Max constant buffer size", d.MaxConstantBufferSize())
-			fmt.Println("Max mem alloc size      ", d.MaxMemAllocSize())
-			fmt.Println("Max compute units       ", d.MaxComputeUnits())
-			fmt.Println("Max work group size     ", d.MaxWorkGroupSize())
-			fmt.Println("Max work item sizes     ", d.MaxWorkItemSizes())
-			fmt.Println("=============================================")
-
-			found = append(found, d)
-		}
-	}
-	if len(found) == 0 {
-		fmt.Println("Found no GPU(s). Check that your OS can see the GPU(s)")
-	} else {
-		var idsFormat string
-		for i := 0; i < len(found); i++ {
-			idsFormat += strconv.Itoa(i)
-			if i != len(found)-1 {
-				idsFormat += ","
-			}
-		}
-		fmt.Printf("Found %v devices. Benchmark first GPU:       geth gpubench 0\n", len(found))
-		fmt.Printf("Mine using all GPUs:                        geth --minegpu %v\n", idsFormat)
-	}
-}
-
-// See [2]. We basically do the same here, but the Go OpenCL bindings
-// are at a slightly higher abtraction level.
-func InitCL(blockNum uint64, c *OpenCLMiner) error {
-	platforms, err := cl.GetPlatforms()
-	if err != nil {
-		return fmt.Errorf("Plaform error: %v\nCheck your OpenCL installation and then run geth gpuinfo", err)
-	}
-
-	var devices []*cl.Device
-	for _, p := range platforms {
-		ds, err := cl.GetDevices(p, cl.DeviceTypeGPU)
-		if err != nil {
-			return fmt.Errorf("Devices error: %v\nCheck your GPU drivers and then run geth gpuinfo", err)
-		}
-		for _, d := range ds {
-			devices = append(devices, d)
-		}
-	}
-
-	pow := New()
-	_ = pow.getDAG(blockNum)     // generates DAG if we don't have it
-	pow.Light.getCache(blockNum) // and cache
-
-	c.ethash = pow
-	dagSize := uint64(C.ethash_get_datasize(C.uint64_t(blockNum)))
-	c.dagSize = dagSize
-
-	for _, id := range c.deviceIds {
-		if id > len(devices)-1 {
-			return fmt.Errorf("Device id not found. See available device ids with: geth gpuinfo")
-		} else {
-			err := initCLDevice(id, devices[id], c)
-			if err != nil {
-				return err
-			}
-		}
-	}
-	if len(c.devices) == 0 {
-		return fmt.Errorf("No GPU devices found")
-	}
-	return nil
-}
-
-func initCLDevice(deviceId int, device *cl.Device, c *OpenCLMiner) error {
-	devMaxAlloc := uint64(device.MaxMemAllocSize())
-	devGlobalMem := uint64(device.GlobalMemSize())
-
-	// TODO: more fine grained version logic
-	if device.Version() == "OpenCL 1.0" {
-		fmt.Println("Device OpenCL version not supported: ", device.Version())
-		return fmt.Errorf("opencl version not supported")
-	}
-
-	var cl11, cl12 bool
-	if device.Version() == "OpenCL 1.1" {
-		cl11 = true
-	}
-	if device.Version() == "OpenCL 1.2" {
-		cl12 = true
-	}
-
-	// log warnings but carry on; some device drivers report inaccurate values
-	if c.dagSize > devGlobalMem {
-		fmt.Printf("WARNING: device memory may be insufficient: %v. DAG size: %v.\n", devGlobalMem, c.dagSize)
-	}
-
-	if c.dagSize > devMaxAlloc {
-		fmt.Printf("WARNING: DAG size (%v) larger than device max memory allocation size (%v).\n", c.dagSize, devMaxAlloc)
-		fmt.Printf("You probably have to export GPU_MAX_ALLOC_PERCENT=95\n")
-	}
-
-	fmt.Printf("Initialising device %v: %v\n", deviceId, device.Name())
-
-	context, err := cl.CreateContext([]*cl.Device{device})
-	if err != nil {
-		return fmt.Errorf("failed creating context: %v", err)
-	}
-
-	// TODO: test running with CL_QUEUE_PROFILING_ENABLE for profiling?
-	queue, err := context.CreateCommandQueue(device, 0)
-	if err != nil {
-		return fmt.Errorf("command queue err: %v", err)
-	}
-
-	// See [4] section 3.2 and [3] "clBuildProgram".
-	// The OpenCL kernel code is compiled at run-time.
-	kvs := make(map[string]string, 4)
-	kvs["GROUP_SIZE"] = strconv.FormatUint(workGroupSize, 10)
-	kvs["DAG_SIZE"] = strconv.FormatUint(c.dagSize/ethashMixBytesLen, 10)
-	kvs["ACCESSES"] = strconv.FormatUint(ethashAccesses, 10)
-	kvs["MAX_OUTPUTS"] = strconv.FormatUint(maxSearchResults, 10)
-	kernelCode := replaceWords(kernel, kvs)
-
-	program, err := context.CreateProgramWithSource([]string{kernelCode})
-	if err != nil {
-		return fmt.Errorf("program err: %v", err)
-	}
-
-	/* if using AMD OpenCL impl, you can set this to debug on x86 CPU device.
-	   see AMD OpenCL programming guide section 4.2
-
-	   export in shell before running:
-	   export AMD_OCL_BUILD_OPTIONS_APPEND="-g -O0"
-	   export CPU_MAX_COMPUTE_UNITS=1
-
-	buildOpts := "-g -cl-opt-disable"
-
-	*/
-	buildOpts := ""
-	err = program.BuildProgram([]*cl.Device{device}, buildOpts)
-	if err != nil {
-		return fmt.Errorf("program build err: %v", err)
-	}
-
-	var searchKernelName, hashKernelName string
-	searchKernelName = "ethash_search"
-	hashKernelName = "ethash_hash"
-
-	searchKernel, err := program.CreateKernel(searchKernelName)
-	hashKernel, err := program.CreateKernel(hashKernelName)
-	if err != nil {
-		return fmt.Errorf("kernel err: %v", err)
-	}
-
-	// TODO: when this DAG size appears, patch the Go bindings
-	// (context.go) to work with uint64 as size_t
-	if c.dagSize > math.MaxInt32 {
-		fmt.Println("DAG too large for allocation.")
-		return fmt.Errorf("DAG too large for alloc")
-	}
-
-	// TODO: patch up Go bindings to work with size_t, will overflow if > maxint32
-	// TODO: fuck. shit's gonna overflow around 2017-06-09 12:17:02
-	dagBuf := *(new(*cl.MemObject))
-	dagBuf, err = context.CreateEmptyBuffer(cl.MemReadOnly, int(c.dagSize))
-	if err != nil {
-		return fmt.Errorf("allocating dag buf failed: %v", err)
-	}
-
-	// write DAG to device mem
-	dagPtr := unsafe.Pointer(c.ethash.Full.current.ptr.data)
-	_, err = queue.EnqueueWriteBuffer(dagBuf, true, 0, int(c.dagSize), dagPtr, nil)
-	if err != nil {
-		return fmt.Errorf("writing to dag buf failed: %v", err)
-	}
-
-	searchBuffers := make([]*cl.MemObject, searchBufSize)
-	for i := 0; i < searchBufSize; i++ {
-		searchBuff, err := context.CreateEmptyBuffer(cl.MemWriteOnly, (1+maxSearchResults)*SIZEOF_UINT32)
-		if err != nil {
-			return fmt.Errorf("search buffer err: %v", err)
-		}
-		searchBuffers[i] = searchBuff
-	}
-
-	headerBuf, err := context.CreateEmptyBuffer(cl.MemReadOnly, 32)
-	if err != nil {
-		return fmt.Errorf("header buffer err: %v", err)
-	}
-
-	// Unique, random nonces are crucial for mining efficieny.
-	// While we do not need cryptographically secure PRNG for nonces,
-	// we want to have uniform distribution and minimal repetition of nonces.
-	// We could guarantee strict uniqueness of nonces by generating unique ranges,
-	// but a int64 seed from crypto/rand should be good enough.
-	// we then use math/rand for speed and to avoid draining OS entropy pool
-	seed, err := crand.Int(crand.Reader, big.NewInt(math.MaxInt64))
-	if err != nil {
-		return err
-	}
-	nonceRand := mrand.New(mrand.NewSource(seed.Int64()))
-
-	deviceStruct := &OpenCLDevice{
-		deviceId: deviceId,
-		device:   device,
-		openCL11: cl11,
-		openCL12: cl12,
-
-		dagBuf:        dagBuf,
-		headerBuf:     headerBuf,
-		searchBuffers: searchBuffers,
-
-		searchKernel: searchKernel,
-		hashKernel:   hashKernel,
-
-		queue: queue,
-		ctx:   context,
-
-		workGroupSize: workGroupSize,
-
-		nonceRand: nonceRand,
-	}
-	c.devices = append(c.devices, deviceStruct)
-
-	return nil
-}
-
-func (c *OpenCLMiner) Search(block pow.Block, stop <-chan struct{}, index int) (uint64, []byte) {
-	c.mu.Lock()
-	newDagSize := uint64(C.ethash_get_datasize(C.uint64_t(block.NumberU64())))
-	if newDagSize > c.dagSize {
-		// TODO: clean up buffers from previous DAG?
-		err := InitCL(block.NumberU64(), c)
-		if err != nil {
-			fmt.Println("OpenCL init error: ", err)
-			return 0, []byte{0}
-		}
-	}
-	defer c.mu.Unlock()
-
-	// Avoid unneeded OpenCL initialisation if we received stop while running InitCL
-	select {
-	case <-stop:
-		return 0, []byte{0}
-	default:
-	}
-
-	headerHash := block.HashNoNonce()
-	diff := block.Difficulty()
-	target256 := new(big.Int).Div(maxUint256, diff)
-	target64 := new(big.Int).Rsh(target256, 192).Uint64()
-	var zero uint32 = 0
-
-	d := c.devices[index]
-
-	_, err := d.queue.EnqueueWriteBuffer(d.headerBuf, false, 0, 32, unsafe.Pointer(&headerHash[0]), nil)
-	if err != nil {
-		fmt.Println("Error in Search clEnqueueWriterBuffer : ", err)
-		return 0, []byte{0}
-	}
-
-	for i := 0; i < searchBufSize; i++ {
-		_, err := d.queue.EnqueueWriteBuffer(d.searchBuffers[i], false, 0, 4, unsafe.Pointer(&zero), nil)
-		if err != nil {
-			fmt.Println("Error in Search clEnqueueWriterBuffer : ", err)
-			return 0, []byte{0}
-		}
-	}
-
-	// wait for all search buffers to complete
-	err = d.queue.Finish()
-	if err != nil {
-		fmt.Println("Error in Search clFinish : ", err)
-		return 0, []byte{0}
-	}
-
-	err = d.searchKernel.SetArg(1, d.headerBuf)
-	if err != nil {
-		fmt.Println("Error in Search clSetKernelArg : ", err)
-		return 0, []byte{0}
-	}
-
-	err = d.searchKernel.SetArg(2, d.dagBuf)
-	if err != nil {
-		fmt.Println("Error in Search clSetKernelArg : ", err)
-		return 0, []byte{0}
-	}
-
-	err = d.searchKernel.SetArg(4, target64)
-	if err != nil {
-		fmt.Println("Error in Search clSetKernelArg : ", err)
-		return 0, []byte{0}
-	}
-	err = d.searchKernel.SetArg(5, uint32(math.MaxUint32))
-	if err != nil {
-		fmt.Println("Error in Search clSetKernelArg : ", err)
-		return 0, []byte{0}
-	}
-
-	// wait on this before returning
-	var preReturnEvent *cl.Event
-	if d.openCL12 {
-		preReturnEvent, err = d.ctx.CreateUserEvent()
-		if err != nil {
-			fmt.Println("Error in Search create CL user event : ", err)
-			return 0, []byte{0}
-		}
-	}
-
-	pending := make([]pendingSearch, 0, searchBufSize)
-	var p *pendingSearch
-	searchBufIndex := uint32(0)
-	var checkNonce uint64
-	loops := int64(0)
-	prevHashRate := int32(0)
-	start := time.Now().UnixNano()
-	// we grab a single random nonce and sets this as argument to the kernel search function
-	// the device will then add each local threads gid to the nonce, creating a unique nonce
-	// for each device computing unit executing in parallel
-	initNonce := uint64(d.nonceRand.Int63())
-	for nonce := initNonce; ; nonce += uint64(globalWorkSize) {
-		select {
-		case <-stop:
-
-			/*
-				if d.openCL12 {
-					err = cl.WaitForEvents([]*cl.Event{preReturnEvent})
-					if err != nil {
-						fmt.Println("Error in Search WaitForEvents: ", err)
-					}
-				}
-			*/
-
-			atomic.AddInt32(&c.hashRate, -prevHashRate)
-			return 0, []byte{0}
-		default:
-		}
-
-		if (loops % (1 << 7)) == 0 {
-			elapsed := time.Now().UnixNano() - start
-			// TODO: verify if this is correct hash rate calculation
-			hashes := (float64(1e9) / float64(elapsed)) * float64(loops*1024*256)
-			hashrateDiff := int32(hashes) - prevHashRate
-			prevHashRate = int32(hashes)
-			atomic.AddInt32(&c.hashRate, hashrateDiff)
-		}
-		loops++
-
-		err = d.searchKernel.SetArg(0, d.searchBuffers[searchBufIndex])
-		if err != nil {
-			fmt.Println("Error in Search clSetKernelArg : ", err)
-			return 0, []byte{0}
-		}
-		err = d.searchKernel.SetArg(3, nonce)
-		if err != nil {
-			fmt.Println("Error in Search clSetKernelArg : ", err)
-			return 0, []byte{0}
-		}
-
-		// execute kernel
-		_, err := d.queue.EnqueueNDRangeKernel(
-			d.searchKernel,
-			[]int{0},
-			[]int{globalWorkSize},
-			[]int{d.workGroupSize},
-			nil)
-		if err != nil {
-			fmt.Println("Error in Search clEnqueueNDRangeKernel : ", err)
-			return 0, []byte{0}
-		}
-
-		pending = append(pending, pendingSearch{bufIndex: searchBufIndex, startNonce: nonce})
-		searchBufIndex = (searchBufIndex + 1) % searchBufSize
-
-		if len(pending) == searchBufSize {
-			p = &(pending[searchBufIndex])
-			cres, _, err := d.queue.EnqueueMapBuffer(d.searchBuffers[p.bufIndex], true,
-				cl.MapFlagRead, 0, (1+maxSearchResults)*SIZEOF_UINT32,
-				nil)
-			if err != nil {
-				fmt.Println("Error in Search clEnqueueMapBuffer: ", err)
-				return 0, []byte{0}
-			}
-
-			results := cres.ByteSlice()
-			nfound := binary.LittleEndian.Uint32(results)
-			nfound = uint32(math.Min(float64(nfound), float64(maxSearchResults)))
-			// OpenCL returns the offsets from the start nonce
-			for i := uint32(0); i < nfound; i++ {
-				lo := (i + 1) * SIZEOF_UINT32
-				hi := (i + 2) * SIZEOF_UINT32
-				upperNonce := uint64(binary.LittleEndian.Uint32(results[lo:hi]))
-				checkNonce = p.startNonce + upperNonce
-				if checkNonce != 0 {
-					// We verify that the nonce is indeed a solution by
-					// executing the Ethash verification function (on the CPU).
-					cache := c.ethash.Light.getCache(block.NumberU64())
-					ok, mixDigest, result := cache.compute(c.dagSize, headerHash, checkNonce)
-
-					// TODO: return result first
-					if ok && result.Big().Cmp(target256) <= 0 {
-						_, err = d.queue.EnqueueUnmapMemObject(d.searchBuffers[p.bufIndex], cres, nil)
-						if err != nil {
-							fmt.Println("Error in Search clEnqueueUnmapMemObject: ", err)
-						}
-						if d.openCL12 {
-							err = cl.WaitForEvents([]*cl.Event{preReturnEvent})
-							if err != nil {
-								fmt.Println("Error in Search WaitForEvents: ", err)
-							}
-						}
-						return checkNonce, mixDigest.Bytes()
-					}
-					_, err := d.queue.EnqueueWriteBuffer(d.searchBuffers[p.bufIndex], false, 0, 4, unsafe.Pointer(&zero), nil)
-					if err != nil {
-						fmt.Println("Error in Search cl: EnqueueWriteBuffer", err)
-						return 0, []byte{0}
-					}
-				}
-			}
-			_, err = d.queue.EnqueueUnmapMemObject(d.searchBuffers[p.bufIndex], cres, nil)
-			if err != nil {
-				fmt.Println("Error in Search clEnqueueUnMapMemObject: ", err)
-				return 0, []byte{0}
-			}
-			pending = append(pending[:searchBufIndex], pending[searchBufIndex+1:]...)
-		}
-	}
-	if d.openCL12 {
-		err := cl.WaitForEvents([]*cl.Event{preReturnEvent})
-		if err != nil {
-			fmt.Println("Error in Search clWaitForEvents: ", err)
-			return 0, []byte{0}
-		}
-	}
-	return 0, []byte{0}
-}
-
-func (c *OpenCLMiner) Verify(block pow.Block) bool {
-	return c.ethash.Light.Verify(block)
-}
-func (c *OpenCLMiner) GetHashrate() int64 {
-	return int64(atomic.LoadInt32(&c.hashRate))
-}
-func (c *OpenCLMiner) Turbo(on bool) {
-	// This is GPU mining. Always be turbo.
-}
-
-func replaceWords(text string, kvs map[string]string) string {
-	for k, v := range kvs {
-		text = strings.Replace(text, k, v, -1)
-	}
-	return text
-}
-
-func logErr(err error) {
-	if err != nil {
-		fmt.Println("Error in OpenCL call:", err)
-	}
-}
-
-func argErr(err error) error {
-	return fmt.Errorf("arg err: %v", err)
-}