Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/cuPrintf.cuh


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162

/*
    Copyright 2009 NVIDIA Corporation.  All rights reserved.

    NOTICE TO LICENSEE:   

    This source code and/or documentation ("Licensed Deliverables") are subject 
    to NVIDIA intellectual property rights under U.S. and international Copyright 
    laws.  

    These Licensed Deliverables contained herein is PROPRIETARY and CONFIDENTIAL 
    to NVIDIA and is being provided under the terms and conditions of a form of 
    NVIDIA software license agreement by and between NVIDIA and Licensee ("License 
    Agreement") or electronically accepted by Licensee.  Notwithstanding any terms 
    or conditions to the contrary in the License Agreement, reproduction or 
    disclosure of the Licensed Deliverables to any third party without the express 
    written consent of NVIDIA is prohibited.     

    NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE LICENSE AGREEMENT, 
    NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THESE LICENSED 
    DELIVERABLES FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED 
    WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE 
    LICENSED DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, 
    NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.   NOTWITHSTANDING ANY 
    TERMS OR CONDITIONS TO THE CONTRARY IN THE LICENSE AGREEMENT, IN NO EVENT SHALL 
    NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, 
    OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,  WHETHER 
    IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,  ARISING OUT OF 
    OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THESE LICENSED DELIVERABLES.  

    U.S. Government End Users. These Licensed Deliverables are a "commercial item" 
    as that term is defined at  48 C.F.R. 2.101 (OCT 1995), consisting  of 
    "commercial computer  software"  and "commercial computer software documentation" 
    as such terms are  used in 48 C.F.R. 12.212 (SEPT 1995) and is provided to the 
    U.S. Government only as a commercial end item.  Consistent with 48 C.F.R.12.212 
    and 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all U.S. Government 
    End Users acquire the Licensed Deliverables with only those rights set forth 
    herein. 

    Any use of the Licensed Deliverables in individual and commercial software must 
    include, in the user documentation and internal comments to the code, the above 
    Disclaimer and U.S. Government End Users Notice.
 */

#ifndef CUPRINTF_H
#define CUPRINTF_H

/*
 *  This is the header file supporting cuPrintf.cu and defining both
 *  the host and device-side interfaces. See that file for some more
 *  explanation and sample use code. See also below for details of the
 *  host-side interfaces.
 *
 *  Quick sample code:
 *
    #include "cuPrintf.cu"
    
    __global__ void testKernel(int val)
    {
        cuPrintf("Value is: %d\n", val);
    }

    int main()
    {
        cudaPrintfInit();
        testKernel<<< 2, 3 >>>(10);
        cudaPrintfDisplay(stdout, true);
        cudaPrintfEnd();
        return 0;
    }
 */

///////////////////////////////////////////////////////////////////////////////
// DEVICE SIDE
// External function definitions for device-side code

// Abuse of templates to simulate varargs
__device__ int cuPrintf(const char *fmt);
template <typename T1> __device__ int cuPrintf(const char *fmt, T1 arg1);
template <typename T1, typename T2> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2);
template <typename T1, typename T2, typename T3> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3);
template <typename T1, typename T2, typename T3, typename T4> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4);
template <typename T1, typename T2, typename T3, typename T4, typename T5> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5);
template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6);
template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7);
template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8);
template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8, typename T9> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8, T9 arg9);
template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8, typename T9, typename T10> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8, T9 arg9, T10 arg10);


//
//  cuPrintfRestrict
//
//  Called to restrict output to a given thread/block. Pass
//  the constant CUPRINTF_UNRESTRICTED to unrestrict output
//  for thread/block IDs. Note you can therefore allow
//  "all printfs from block 3" or "printfs from thread 2
//  on all blocks", or "printfs only from block 1, thread 5".
//
//  Arguments:
//      threadid - Thread ID to allow printfs from
//      blockid - Block ID to allow printfs from
//
//  NOTE: Restrictions last between invocations of
//  kernels unless cudaPrintfInit() is called again.
//
#define CUPRINTF_UNRESTRICTED   -1
__device__ void cuPrintfRestrict(int threadid, int blockid);


///////////////////////////////////////////////////////////////////////////////
// HOST SIDE
// External function definitions for host-side code

//
//  cudaPrintfInit
//
//  Call this once to initialise the printf system. If the output
//  file or buffer size needs to be changed, call cudaPrintfEnd()
//  before re-calling cudaPrintfInit().
//
//  The default size for the buffer is 1 megabyte. For CUDA
//  architecture 1.1 and above, the buffer is filled linearly and
//  is completely used; however for architecture 1.0, the buffer
//  is divided into as many segments are there are threads, even
//  if some threads do not call cuPrintf().
//
//  Arguments:
//      bufferLen - Length, in bytes, of total space to reserve
//                  (in device global memory) for output.
//
//  Returns:
//      cudaSuccess if all is well.
//
extern "C" cudaError_t cudaPrintfInit(size_t bufferLen=1048576);   // 1-meg - that's enough for 4096 printfs by all threads put together

//
//  cudaPrintfEnd
//
//  Cleans up all memories allocated by cudaPrintfInit().
//  Call this at exit, or before calling cudaPrintfInit() again.
//
extern "C" void cudaPrintfEnd();

//
//  cudaPrintfDisplay
//
//  Dumps the contents of the output buffer to the specified
//  file pointer. If the output pointer is not specified,
//  the default "stdout" is used.
//
//  Arguments:
//      outputFP     - A file pointer to an output stream.
//      showThreadID - If "true", output strings are prefixed
//                     by "[blockid, threadid] " at output.
//
//  Returns:
//      cudaSuccess if all is well.
//
extern "C" cudaError_t cudaPrintfDisplay(void *outputFP=NULL, bool showThreadID=false);

#endif  // CUPRINTF_H