|
- /*
- * Copyright 1993-2020 NVIDIA Corporation. All rights reserved.
- *
- * NOTICE TO LICENSEE:
- *
- * This source code and/or documentation ("Licensed Deliverables") are
- * subject to NVIDIA intellectual property rights under U.S. and
- * international Copyright laws.
- *
- * These Licensed Deliverables contained herein is PROPRIETARY and
- * CONFIDENTIAL to NVIDIA and is being provided under the terms and
- * conditions of a form of NVIDIA software license agreement by and
- * between NVIDIA and Licensee ("License Agreement") or electronically
- * accepted by Licensee. Notwithstanding any terms or conditions to
- * the contrary in the License Agreement, reproduction or disclosure
- * of the Licensed Deliverables to any third party without the express
- * written consent of NVIDIA is prohibited.
- *
- * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
- * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
- * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
- * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
- * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
- * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
- * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
- * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
- * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
- * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
- * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
- * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
- * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
- * OF THESE LICENSED DELIVERABLES.
- *
- * U.S. Government End Users. These Licensed Deliverables are a
- * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
- * 1995), consisting of "commercial computer software" and "commercial
- * computer software documentation" as such terms are used in 48
- * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
- * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
- * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
- * U.S. Government End Users acquire the Licensed Deliverables with
- * only those rights set forth herein.
- *
- * Any use of the Licensed Deliverables in individual and commercial
- * software must include, in the user documentation and internal
- * comments to the code, the above Disclaimer and U.S. Government End
- * Users Notice.
- */
-
- /*
- * cudnn_ops_infer : cuDNN's basic definitions and inference operations.
- */
-
- #if !defined(CUDNN_OPS_INFER_H_)
- #define CUDNN_OPS_INFER_H_
-
- #include <cuda_runtime.h>
- #include <stdint.h>
-
- #include "cudnn_version.h"
-
- /* These version numbers are autogenerated, do not edit manually. */
- #define CUDNN_OPS_INFER_MAJOR 8
- #define CUDNN_OPS_INFER_MINOR 0
- #define CUDNN_OPS_INFER_PATCH 4
-
- #if (CUDNN_OPS_INFER_MAJOR != CUDNN_MAJOR) || (CUDNN_OPS_INFER_MINOR != CUDNN_MINOR) || \
- (CUDNN_OPS_INFER_PATCH != CUDNN_PATCHLEVEL)
- #error Version mismatch in cuDNN OPS INFER!!!
- #endif
-
- #ifndef CUDNNWINAPI
- #ifdef _WIN32
- #define CUDNNWINAPI __stdcall
- #else
- #define CUDNNWINAPI
- #endif
- #endif
-
- /* Warnings for deprecated API-s are enabled using the CUDNN_WARN_DEPRECATED macro */
- #if defined(CUDNN_WARN_DEPRECATED) && (defined(__GNUC__) || defined(__clang__))
- /* GCC, Intel C/C++, Cray C/C++, CLANG, IBM XL C/C++ little endian */
- #define CUDNN_DEPRECATED __attribute__((deprecated))
- #elif defined(CUDNN_WARN_DEPRECATED) && defined(_MSC_VER)
- /* Microsoft Visual C++ */
- #define CUDNN_DEPRECATED __declspec(deprecated)
- #elif defined(CUDNN_WARN_DEPRECATED) && (__cplusplus >= 201402L)
- /* C++14 compilers */
- #define CUDNN_DEPRECATED [[deprecated]]
- #else
- /* No support for the deprecated attribute */
- #define CUDNN_DEPRECATED
- #endif
-
- #if defined(__cplusplus)
- extern "C" {
- #endif
-
- struct cudnnContext;
- typedef struct cudnnContext *cudnnHandle_t;
-
- size_t CUDNNWINAPI
- cudnnGetVersion(void);
-
- /* Returns CUDA Runtime version statically linked against cudnn */
- size_t CUDNNWINAPI
- cudnnGetCudartVersion(void);
-
- /*
- * CUDNN return codes
- */
- typedef enum {
- CUDNN_STATUS_SUCCESS = 0,
- CUDNN_STATUS_NOT_INITIALIZED = 1,
- CUDNN_STATUS_ALLOC_FAILED = 2,
- CUDNN_STATUS_BAD_PARAM = 3,
- CUDNN_STATUS_INTERNAL_ERROR = 4,
- CUDNN_STATUS_INVALID_VALUE = 5,
- CUDNN_STATUS_ARCH_MISMATCH = 6,
- CUDNN_STATUS_MAPPING_ERROR = 7,
- CUDNN_STATUS_EXECUTION_FAILED = 8,
- CUDNN_STATUS_NOT_SUPPORTED = 9,
- CUDNN_STATUS_LICENSE_ERROR = 10,
- CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING = 11,
- CUDNN_STATUS_RUNTIME_IN_PROGRESS = 12,
- CUDNN_STATUS_RUNTIME_FP_OVERFLOW = 13,
- CUDNN_STATUS_VERSION_MISMATCH = 14,
- } cudnnStatus_t;
-
- /* human-readable error messages */
- const char *CUDNNWINAPI
- cudnnGetErrorString(cudnnStatus_t status);
-
- /* Forward definition in this version only */
- typedef struct cudnnRuntimeTag_t cudnnRuntimeTag_t;
-
- typedef enum {
- CUDNN_ERRQUERY_RAWCODE = 0,
- CUDNN_ERRQUERY_NONBLOCKING = 1,
- CUDNN_ERRQUERY_BLOCKING = 2,
- } cudnnErrQueryMode_t;
-
- cudnnStatus_t CUDNNWINAPI
- cudnnQueryRuntimeError(cudnnHandle_t handle, cudnnStatus_t *rstatus, cudnnErrQueryMode_t mode, cudnnRuntimeTag_t *tag);
-
- #ifndef __LIBRARY_TYPES_H__
-
- typedef enum libraryPropertyType_t { MAJOR_VERSION, MINOR_VERSION, PATCH_LEVEL } libraryPropertyType;
-
- #endif
-
- cudnnStatus_t CUDNNWINAPI
- cudnnGetProperty(libraryPropertyType type, int *value);
-
- cudnnStatus_t CUDNNWINAPI
- cudnnCreate(cudnnHandle_t *handle);
- cudnnStatus_t CUDNNWINAPI
- cudnnDestroy(cudnnHandle_t handle);
- cudnnStatus_t CUDNNWINAPI
- cudnnSetStream(cudnnHandle_t handle, cudaStream_t streamId);
- cudnnStatus_t CUDNNWINAPI
- cudnnGetStream(cudnnHandle_t handle, cudaStream_t *streamId);
-
- /* Data structures to represent Image/Filter and the Neural Network Layer */
- typedef struct cudnnTensorStruct *cudnnTensorDescriptor_t;
- typedef struct cudnnPoolingStruct *cudnnPoolingDescriptor_t;
- typedef struct cudnnFilterStruct *cudnnFilterDescriptor_t;
- typedef struct cudnnLRNStruct *cudnnLRNDescriptor_t;
- typedef struct cudnnActivationStruct *cudnnActivationDescriptor_t;
- typedef struct cudnnSpatialTransformerStruct *cudnnSpatialTransformerDescriptor_t;
- typedef struct cudnnOpTensorStruct *cudnnOpTensorDescriptor_t;
- typedef struct cudnnReduceTensorStruct *cudnnReduceTensorDescriptor_t;
- typedef struct cudnnCTCLossStruct *cudnnCTCLossDescriptor_t;
- typedef struct cudnnTensorTransformStruct *cudnnTensorTransformDescriptor_t;
- /*
- * CUDNN data type
- */
- typedef enum {
- CUDNN_DATA_FLOAT = 0,
- CUDNN_DATA_DOUBLE = 1,
- CUDNN_DATA_HALF = 2,
- CUDNN_DATA_INT8 = 3,
- CUDNN_DATA_INT32 = 4,
- CUDNN_DATA_INT8x4 = 5,
- CUDNN_DATA_UINT8 = 6,
- CUDNN_DATA_UINT8x4 = 7,
- CUDNN_DATA_INT8x32 = 8,
- } cudnnDataType_t;
-
- /*
- * CUDNN math type
- */
- typedef enum {
- CUDNN_DEFAULT_MATH = 0,
- CUDNN_TENSOR_OP_MATH = 1,
- CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION = 2,
- CUDNN_FMA_MATH = 3,
- } cudnnMathType_t;
-
- /*
- * CUDNN propagate Nan
- */
- typedef enum {
- CUDNN_NOT_PROPAGATE_NAN = 0,
- CUDNN_PROPAGATE_NAN = 1,
- } cudnnNanPropagation_t;
-
- /*
- * CUDNN Determinism
- */
- typedef enum {
- CUDNN_NON_DETERMINISTIC = 0,
- CUDNN_DETERMINISTIC = 1,
- } cudnnDeterminism_t;
-
- /* Maximum supported number of tensor dimensions */
- #define CUDNN_DIM_MAX 8
-
- /* Create an instance of a generic Tensor descriptor */
- cudnnStatus_t CUDNNWINAPI
- cudnnCreateTensorDescriptor(cudnnTensorDescriptor_t *tensorDesc);
-
- typedef enum {
- CUDNN_TENSOR_NCHW = 0, /* row major (wStride = 1, hStride = w) */
- CUDNN_TENSOR_NHWC = 1, /* feature maps interleaved ( cStride = 1 )*/
- CUDNN_TENSOR_NCHW_VECT_C = 2, /* each image point is vector of element of C, vector length in data type */
- } cudnnTensorFormat_t;
-
- cudnnStatus_t CUDNNWINAPI
- cudnnSetTensor4dDescriptor(cudnnTensorDescriptor_t tensorDesc,
- cudnnTensorFormat_t format,
- cudnnDataType_t dataType, /* image data type */
- int n, /* number of inputs (batch size) */
- int c, /* number of input feature maps */
- int h, /* height of input section */
- int w); /* width of input section */
-
- cudnnStatus_t CUDNNWINAPI
- cudnnSetTensor4dDescriptorEx(cudnnTensorDescriptor_t tensorDesc,
- cudnnDataType_t dataType, /* image data type */
- int n, /* number of inputs (batch size) */
- int c, /* number of input feature maps */
- int h, /* height of input section */
- int w, /* width of input section */
- int nStride,
- int cStride,
- int hStride,
- int wStride);
-
- cudnnStatus_t CUDNNWINAPI
- cudnnGetTensor4dDescriptor(const cudnnTensorDescriptor_t tensorDesc,
- cudnnDataType_t *dataType, /* image data type */
- int *n, /* number of inputs (batch size) */
- int *c, /* number of input feature maps */
- int *h, /* height of input section */
- int *w, /* width of input section */
- int *nStride,
- int *cStride,
- int *hStride,
- int *wStride);
-
- cudnnStatus_t CUDNNWINAPI
- cudnnSetTensorNdDescriptor(cudnnTensorDescriptor_t tensorDesc,
- cudnnDataType_t dataType,
- int nbDims,
- const int dimA[],
- const int strideA[]);
-
- cudnnStatus_t CUDNNWINAPI
- cudnnSetTensorNdDescriptorEx(cudnnTensorDescriptor_t tensorDesc,
- cudnnTensorFormat_t format,
- cudnnDataType_t dataType,
- int nbDims,
- const int dimA[]);
-
- cudnnStatus_t CUDNNWINAPI
- cudnnGetTensorNdDescriptor(const cudnnTensorDescriptor_t tensorDesc,
- int nbDimsRequested,
- cudnnDataType_t *dataType,
- int *nbDims,
- int dimA[],
- int strideA[]);
-
- cudnnStatus_t CUDNNWINAPI
- cudnnGetTensorSizeInBytes(const cudnnTensorDescriptor_t tensorDesc, size_t *size);
-
- /* PixelOffset( n, c, h, w ) = n *input_stride + c * feature_stride + h * h_stride + w * w_stride
-
- 1)Example of all images in row major order one batch of features after the other (with an optional padding on row)
- input_stride : c x h x h_stride
- feature_stride : h x h_stride
- h_stride : >= w ( h_stride = w if no padding)
- w_stride : 1
-
-
- 2)Example of all images in row major with features maps interleaved
- input_stride : c x h x h_stride
- feature_stride : 1
- h_stride : w x c
- w_stride : c
-
- 3)Example of all images in column major order one batch of features after the other (with optional padding on column)
- input_stride : c x w x w_stride
- feature_stride : w x w_stride
- h_stride : 1
- w_stride : >= h
-
- */
-
- /* Destroy an instance of Tensor4d descriptor */
- cudnnStatus_t CUDNNWINAPI
- cudnnDestroyTensorDescriptor(cudnnTensorDescriptor_t tensorDesc);
-
- /* Fold/unfold transforms */
- typedef enum {
- CUDNN_TRANSFORM_FOLD = 0U,
- CUDNN_TRANSFORM_UNFOLD = 1U,
- } cudnnFoldingDirection_t;
-
- /** Create a destination descriptor for cudnnTransformTensor */
- cudnnStatus_t CUDNNWINAPI
- cudnnInitTransformDest(const cudnnTensorTransformDescriptor_t transformDesc,
- const cudnnTensorDescriptor_t srcDesc,
- cudnnTensorDescriptor_t destDesc,
- size_t *destSizeInBytes);
-
- /** Create an empty tensor transform descriptor */
- cudnnStatus_t CUDNNWINAPI
- cudnnCreateTensorTransformDescriptor(cudnnTensorTransformDescriptor_t *transformDesc);
-
- /** Initialize a previously created tensor transform descriptor. */
- cudnnStatus_t CUDNNWINAPI
- cudnnSetTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc,
- const uint32_t nbDims,
- const cudnnTensorFormat_t destFormat,
- const int32_t padBeforeA[],
- const int32_t padAfterA[],
- const uint32_t foldA[],
- const cudnnFoldingDirection_t direction);
-
- /**
- * Retrieves the values stored in a previously initialized tensor transform
- * descriptor.
- */
- cudnnStatus_t CUDNNWINAPI
- cudnnGetTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc,
- uint32_t nbDimsRequested,
- cudnnTensorFormat_t *destFormat,
- int32_t padBeforeA[],
- int32_t padAfterA[],
- uint32_t foldA[],
- cudnnFoldingDirection_t *direction);
-
- /**
- * Destroys a previously created tensor transform descriptor.
- */
- cudnnStatus_t CUDNNWINAPI
- cudnnDestroyTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc);
-
- /* Tensor layout conversion helper (y = alpha * x + beta * y) */
- cudnnStatus_t CUDNNWINAPI
- cudnnTransformTensor(cudnnHandle_t handle,
- const void *alpha,
- const cudnnTensorDescriptor_t xDesc,
- const void *x,
- const void *beta,
- const cudnnTensorDescriptor_t yDesc,
- void *y);
-
- cudnnStatus_t CUDNNWINAPI
- cudnnTransformTensorEx(cudnnHandle_t handle,
- const cudnnTensorTransformDescriptor_t transDesc,
- const void *alpha,
- const cudnnTensorDescriptor_t srcDesc,
- const void *srcData,
- const void *beta,
- const cudnnTensorDescriptor_t destDesc,
- void *destData);
-
- /* Tensor Bias addition : C = alpha * A + beta * C */
- cudnnStatus_t CUDNNWINAPI
- cudnnAddTensor(cudnnHandle_t handle,
- const void *alpha,
- const cudnnTensorDescriptor_t aDesc,
- const void *A,
- const void *beta,
- const cudnnTensorDescriptor_t cDesc,
- void *C);
-
- /*
- * CUDNN OpTensor op type
- */
- typedef enum {
- CUDNN_OP_TENSOR_ADD = 0,
- CUDNN_OP_TENSOR_MUL = 1,
- CUDNN_OP_TENSOR_MIN = 2,
- CUDNN_OP_TENSOR_MAX = 3,
- CUDNN_OP_TENSOR_SQRT = 4,
- CUDNN_OP_TENSOR_NOT = 5,
- } cudnnOpTensorOp_t;
-
- cudnnStatus_t CUDNNWINAPI
- cudnnCreateOpTensorDescriptor(cudnnOpTensorDescriptor_t *opTensorDesc);
-
- cudnnStatus_t CUDNNWINAPI
- cudnnSetOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc,
- cudnnOpTensorOp_t opTensorOp,
- cudnnDataType_t opTensorCompType,
- cudnnNanPropagation_t opTensorNanOpt);
-
- cudnnStatus_t CUDNNWINAPI
- cudnnGetOpTensorDescriptor(const cudnnOpTensorDescriptor_t opTensorDesc,
- cudnnOpTensorOp_t *opTensorOp,
- cudnnDataType_t *opTensorCompType,
- cudnnNanPropagation_t *opTensorNanOpt);
-
- cudnnStatus_t CUDNNWINAPI
- cudnnDestroyOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc);
-
- /* Tensor operation : C = op( alpha1 * A, alpha2 * B ) + beta * C */
- /* B tensor is ignored for CUDNN_OP_TENSOR_SQRT, CUDNN_OP_TENSOR_NOT. */
- cudnnStatus_t CUDNNWINAPI
- cudnnOpTensor(cudnnHandle_t handle,
- const cudnnOpTensorDescriptor_t opTensorDesc,
- const void *alpha1,
- const cudnnTensorDescriptor_t aDesc,
- const void *A,
- const void *alpha2,
- const cudnnTensorDescriptor_t bDesc,
- const void *B,
- const void *beta,
- const cudnnTensorDescriptor_t cDesc,
- void *C);
-
- /*
- * CUDNN ReduceTensor op type
- */
- typedef enum {
- CUDNN_REDUCE_TENSOR_ADD = 0,
- CUDNN_REDUCE_TENSOR_MUL = 1,
- CUDNN_REDUCE_TENSOR_MIN = 2,
- CUDNN_REDUCE_TENSOR_MAX = 3,
- CUDNN_REDUCE_TENSOR_AMAX = 4,
- CUDNN_REDUCE_TENSOR_AVG = 5,
- CUDNN_REDUCE_TENSOR_NORM1 = 6,
- CUDNN_REDUCE_TENSOR_NORM2 = 7,
- CUDNN_REDUCE_TENSOR_MUL_NO_ZEROS = 8,
- } cudnnReduceTensorOp_t;
-
- /*
- * CUDNN ReduceTensor indices type
- */
- typedef enum {
- CUDNN_REDUCE_TENSOR_NO_INDICES = 0,
- CUDNN_REDUCE_TENSOR_FLATTENED_INDICES = 1,
- } cudnnReduceTensorIndices_t;
-
- /*
- * CUDNN tensor indices type size (all unsigned)
- * Currently not supported, default is 32 bit unsigned.
- */
- typedef enum {
- CUDNN_32BIT_INDICES = 0,
- CUDNN_64BIT_INDICES = 1,
- CUDNN_16BIT_INDICES = 2,
- CUDNN_8BIT_INDICES = 3,
- } cudnnIndicesType_t;
-
- cudnnStatus_t CUDNNWINAPI
- cudnnCreateReduceTensorDescriptor(cudnnReduceTensorDescriptor_t *reduceTensorDesc);
-
- cudnnStatus_t CUDNNWINAPI
- cudnnSetReduceTensorDescriptor(cudnnReduceTensorDescriptor_t reduceTensorDesc,
- cudnnReduceTensorOp_t reduceTensorOp,
- cudnnDataType_t reduceTensorCompType,
- cudnnNanPropagation_t reduceTensorNanOpt,
- cudnnReduceTensorIndices_t reduceTensorIndices,
- cudnnIndicesType_t reduceTensorIndicesType);
-
- cudnnStatus_t CUDNNWINAPI
- cudnnGetReduceTensorDescriptor(const cudnnReduceTensorDescriptor_t reduceTensorDesc,
- cudnnReduceTensorOp_t *reduceTensorOp,
- cudnnDataType_t *reduceTensorCompType,
- cudnnNanPropagation_t *reduceTensorNanOpt,
- cudnnReduceTensorIndices_t *reduceTensorIndices,
- cudnnIndicesType_t *reduceTensorIndicesType);
-
- cudnnStatus_t CUDNNWINAPI
- cudnnDestroyReduceTensorDescriptor(cudnnReduceTensorDescriptor_t reduceTensorDesc);
-
- /* Helper function to return the minimum size of the index space to be passed to the reduction given the input and
- * output tensors */
- cudnnStatus_t CUDNNWINAPI
- cudnnGetReductionIndicesSize(cudnnHandle_t handle,
- const cudnnReduceTensorDescriptor_t reduceTensorDesc,
- const cudnnTensorDescriptor_t aDesc,
- const cudnnTensorDescriptor_t cDesc,
- size_t *sizeInBytes);
-
- /* Helper function to return the minimum size of the workspace to be passed to the reduction given the input and output
- * tensors */
- cudnnStatus_t CUDNNWINAPI
- cudnnGetReductionWorkspaceSize(cudnnHandle_t handle,
- const cudnnReduceTensorDescriptor_t reduceTensorDesc,
- const cudnnTensorDescriptor_t aDesc,
- const cudnnTensorDescriptor_t cDesc,
- size_t *sizeInBytes);
-
- /* Tensor operation : C = reduce op( alpha * A ) + beta * C */
- /* The NaN propagation enum applies to only the min and max reduce ops; the other reduce ops propagate NaN as usual. */
- /* The indices space is ignored for reduce ops other than min or max. */
- cudnnStatus_t CUDNNWINAPI
- cudnnReduceTensor(cudnnHandle_t handle,
- const cudnnReduceTensorDescriptor_t reduceTensorDesc,
- void *indices,
- size_t indicesSizeInBytes,
- void *workspace,
- size_t workspaceSizeInBytes,
- const void *alpha,
- const cudnnTensorDescriptor_t aDesc,
- const void *A,
- const void *beta,
- const cudnnTensorDescriptor_t cDesc,
- void *C);
-
- /* Set all values of a tensor to a given value : y[i] = value[0] */
- cudnnStatus_t CUDNNWINAPI
- cudnnSetTensor(cudnnHandle_t handle, const cudnnTensorDescriptor_t yDesc, void *y, const void *valuePtr);
-
- /* Scale all values of a tensor by a given factor : y[i] = alpha * y[i] */
- cudnnStatus_t CUDNNWINAPI
- cudnnScaleTensor(cudnnHandle_t handle, const cudnnTensorDescriptor_t yDesc, void *y, const void *alpha);
-
- /* Create an instance of FilterStruct */
- cudnnStatus_t CUDNNWINAPI
- cudnnCreateFilterDescriptor(cudnnFilterDescriptor_t *filterDesc);
-
- cudnnStatus_t CUDNNWINAPI
- cudnnSetFilter4dDescriptor(cudnnFilterDescriptor_t filterDesc,
- cudnnDataType_t dataType, /* image data type */
- cudnnTensorFormat_t format,
- int k, /* number of output feature maps */
- int c, /* number of input feature maps */
- int h, /* height of each input filter */
- int w); /* width of each input filter */
-
- cudnnStatus_t CUDNNWINAPI
- cudnnGetFilter4dDescriptor(const cudnnFilterDescriptor_t filterDesc,
- cudnnDataType_t *dataType, /* image data type */
- cudnnTensorFormat_t *format,
- int *k, /* number of output feature maps */
- int *c, /* number of input feature maps */
- int *h, /* height of each input filter */
- int *w); /* width of each input filter */
-
- cudnnStatus_t CUDNNWINAPI
- cudnnSetFilterNdDescriptor(cudnnFilterDescriptor_t filterDesc,
- cudnnDataType_t dataType, /* image data type */
- cudnnTensorFormat_t format,
- int nbDims,
- const int filterDimA[]);
-
- cudnnStatus_t CUDNNWINAPI
- cudnnGetFilterNdDescriptor(const cudnnFilterDescriptor_t filterDesc,
- int nbDimsRequested,
- cudnnDataType_t *dataType, /* image data type */
- cudnnTensorFormat_t *format,
- int *nbDims,
- int filterDimA[]);
- cudnnStatus_t CUDNNWINAPI
- cudnnGetFilterSizeInBytes(const cudnnFilterDescriptor_t filterDesc, size_t *size);
-
- cudnnStatus_t CUDNNWINAPI
- cudnnTransformFilter(cudnnHandle_t handle,
- const cudnnTensorTransformDescriptor_t transDesc,
- const void *alpha,
- const cudnnFilterDescriptor_t srcDesc,
- const void *srcData,
- const void *beta,
- const cudnnFilterDescriptor_t destDesc,
- void *destData);
-
- cudnnStatus_t CUDNNWINAPI
- cudnnDestroyFilterDescriptor(cudnnFilterDescriptor_t filterDesc);
-
- /*
- * softmax algorithm
- */
- typedef enum {
- CUDNN_SOFTMAX_FAST = 0, /* straightforward implementation */
- CUDNN_SOFTMAX_ACCURATE = 1, /* subtract max from every point to avoid overflow */
- CUDNN_SOFTMAX_LOG = 2
- } cudnnSoftmaxAlgorithm_t;
-
- typedef enum {
- CUDNN_SOFTMAX_MODE_INSTANCE = 0, /* compute the softmax over all C, H, W for each N */
- CUDNN_SOFTMAX_MODE_CHANNEL = 1 /* compute the softmax over all C for each H, W, N */
- } cudnnSoftmaxMode_t;
-
- /* Softmax functions: All of the form "output = alpha * Op(inputs) + beta * output" */
-
- /* Function to perform forward softmax */
- cudnnStatus_t CUDNNWINAPI
- cudnnSoftmaxForward(cudnnHandle_t handle,
- cudnnSoftmaxAlgorithm_t algo,
- cudnnSoftmaxMode_t mode,
- const void *alpha,
- const cudnnTensorDescriptor_t xDesc,
- const void *x,
- const void *beta,
- const cudnnTensorDescriptor_t yDesc,
- void *y);
-
- /*
- * pooling mode
- */
- typedef enum {
- CUDNN_POOLING_MAX = 0,
- CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING = 1, /* count for average includes padded values */
- CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING = 2, /* count for average does not include padded values */
- CUDNN_POOLING_MAX_DETERMINISTIC = 3
- } cudnnPoolingMode_t;
-
- /* Create an instance of pooling descriptor */
- cudnnStatus_t CUDNNWINAPI
- cudnnCreatePoolingDescriptor(cudnnPoolingDescriptor_t *poolingDesc);
-
- cudnnStatus_t CUDNNWINAPI
- cudnnSetPooling2dDescriptor(cudnnPoolingDescriptor_t poolingDesc,
- cudnnPoolingMode_t mode,
- cudnnNanPropagation_t maxpoolingNanOpt,
- int windowHeight,
- int windowWidth,
- int verticalPadding,
- int horizontalPadding,
- int verticalStride,
- int horizontalStride);
-
- cudnnStatus_t CUDNNWINAPI
- cudnnGetPooling2dDescriptor(const cudnnPoolingDescriptor_t poolingDesc,
- cudnnPoolingMode_t *mode,
- cudnnNanPropagation_t *maxpoolingNanOpt,
- int *windowHeight,
- int *windowWidth,
- int *verticalPadding,
- int *horizontalPadding,
- int *verticalStride,
- int *horizontalStride);
-
- cudnnStatus_t CUDNNWINAPI
- cudnnSetPoolingNdDescriptor(cudnnPoolingDescriptor_t poolingDesc,
- const cudnnPoolingMode_t mode,
- const cudnnNanPropagation_t maxpoolingNanOpt,
- int nbDims,
- const int windowDimA[],
- const int paddingA[],
- const int strideA[]);
-
- cudnnStatus_t CUDNNWINAPI
- cudnnGetPoolingNdDescriptor(const cudnnPoolingDescriptor_t poolingDesc,
- int nbDimsRequested,
- cudnnPoolingMode_t *mode,
- cudnnNanPropagation_t *maxpoolingNanOpt,
- int *nbDims,
- int windowDimA[],
- int paddingA[],
- int strideA[]);
-
- cudnnStatus_t CUDNNWINAPI
- cudnnGetPoolingNdForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc,
- const cudnnTensorDescriptor_t inputTensorDesc,
- int nbDims,
- int outputTensorDimA[]);
-
- cudnnStatus_t CUDNNWINAPI
- cudnnGetPooling2dForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc,
- const cudnnTensorDescriptor_t inputTensorDesc,
- int *n,
- int *c,
- int *h,
- int *w);
-
- /* Destroy an instance of pooling descriptor */
- cudnnStatus_t CUDNNWINAPI
- cudnnDestroyPoolingDescriptor(cudnnPoolingDescriptor_t poolingDesc);
-
- /* Pooling functions: All of the form "output = alpha * Op(inputs) + beta * output" */
-
- /* Function to perform forward pooling */
- cudnnStatus_t CUDNNWINAPI
- cudnnPoolingForward(cudnnHandle_t handle,
- const cudnnPoolingDescriptor_t poolingDesc,
- const void *alpha,
- const cudnnTensorDescriptor_t xDesc,
- const void *x,
- const void *beta,
- const cudnnTensorDescriptor_t yDesc,
- void *y);
-
- /*
- * activation mode
- */
- typedef enum {
- CUDNN_ACTIVATION_SIGMOID = 0,
- CUDNN_ACTIVATION_RELU = 1,
- CUDNN_ACTIVATION_TANH = 2,
- CUDNN_ACTIVATION_CLIPPED_RELU = 3,
- CUDNN_ACTIVATION_ELU = 4,
- CUDNN_ACTIVATION_IDENTITY = 5
- } cudnnActivationMode_t;
-
- /* Activation functions: All of the form "output = alpha * Op(inputs) + beta * output" */
- cudnnStatus_t CUDNNWINAPI
- cudnnCreateActivationDescriptor(cudnnActivationDescriptor_t *activationDesc);
-
- cudnnStatus_t CUDNNWINAPI
- cudnnSetActivationDescriptor(cudnnActivationDescriptor_t activationDesc,
- cudnnActivationMode_t mode,
- cudnnNanPropagation_t reluNanOpt,
- double coef); /* ceiling for clipped RELU, alpha for ELU */
-
- cudnnStatus_t CUDNNWINAPI
- cudnnGetActivationDescriptor(const cudnnActivationDescriptor_t activationDesc,
- cudnnActivationMode_t *mode,
- cudnnNanPropagation_t *reluNanOpt,
- double *coef); /* ceiling for clipped RELU, alpha for ELU */
-
- cudnnStatus_t CUDNNWINAPI
- cudnnDestroyActivationDescriptor(cudnnActivationDescriptor_t activationDesc);
-
- /* Function to perform forward activation */
- cudnnStatus_t CUDNNWINAPI
- cudnnActivationForward(cudnnHandle_t handle,
- cudnnActivationDescriptor_t activationDesc,
- const void *alpha,
- const cudnnTensorDescriptor_t xDesc,
- const void *x,
- const void *beta,
- const cudnnTensorDescriptor_t yDesc,
- void *y);
-
- /*
- * Create an instance of LRN (Local Response Normalization) descriptor
- * Uses lrnN=5, lrnAlpha=1e-4, lrnBeta=0.75, lrnK=2.0 as defaults from Krizhevsky'12 ImageNet paper
- */
- cudnnStatus_t CUDNNWINAPI
- cudnnCreateLRNDescriptor(cudnnLRNDescriptor_t *normDesc);
-
- #define CUDNN_LRN_MIN_N 1 /* minimum allowed lrnN */
- #define CUDNN_LRN_MAX_N 16 /* maximum allowed lrnN */
- #define CUDNN_LRN_MIN_K 1e-5 /* minimum allowed lrnK */
- #define CUDNN_LRN_MIN_BETA 0.01 /* minimum allowed lrnBeta */
-
- /* LRN layer mode */
- typedef enum {
- CUDNN_LRN_CROSS_CHANNEL_DIM1 = 0, /* Normalize across tensor's dimA[1] dimension */
- } cudnnLRNMode_t;
-
- /*
- * Uses a window [center-lookBehind, center+lookAhead], where
- * lookBehind = floor( (lrnN-1)/2 ), lookAhead = lrnN-lookBehind-1.
- * Values of double parameters cast to tensor data type.
- */
- cudnnStatus_t CUDNNWINAPI
- cudnnSetLRNDescriptor(cudnnLRNDescriptor_t normDesc, unsigned lrnN, double lrnAlpha, double lrnBeta, double lrnK);
- /*
- * Retrieve the settings currently stored in an LRN layer descriptor
- * Any of the provided pointers can be NULL (no corresponding value will be returned)
- */
- cudnnStatus_t CUDNNWINAPI
- cudnnGetLRNDescriptor(cudnnLRNDescriptor_t normDesc, unsigned *lrnN, double *lrnAlpha, double *lrnBeta, double *lrnK);
-
- /* Destroy an instance of LRN descriptor */
- cudnnStatus_t CUDNNWINAPI
- cudnnDestroyLRNDescriptor(cudnnLRNDescriptor_t lrnDesc);
-
- /* LRN functions: output = alpha * normalize(x) + beta * old_y */
-
- /* LRN cross-channel forward computation. Double parameters cast to tensor data type */
- cudnnStatus_t CUDNNWINAPI
- cudnnLRNCrossChannelForward(cudnnHandle_t handle,
- cudnnLRNDescriptor_t normDesc,
- cudnnLRNMode_t lrnMode,
- const void *alpha,
- const cudnnTensorDescriptor_t xDesc,
- const void *x,
- const void *beta,
- const cudnnTensorDescriptor_t yDesc,
- void *y);
-
- typedef enum {
- CUDNN_DIVNORM_PRECOMPUTED_MEANS = 0,
- } cudnnDivNormMode_t;
-
- /* LCN/divisive normalization functions: y = alpha * normalize(x) + beta * y */
- cudnnStatus_t CUDNNWINAPI
- cudnnDivisiveNormalizationForward(cudnnHandle_t handle,
- cudnnLRNDescriptor_t normDesc,
- cudnnDivNormMode_t mode,
- const void *alpha,
- const cudnnTensorDescriptor_t xDesc, /* same desc for means, temp, temp2 */
- const void *x,
- const void *means, /* if NULL, means are assumed to be zero */
- void *temp,
- void *temp2,
- const void *beta,
- const cudnnTensorDescriptor_t yDesc,
- void *y);
-
- typedef enum {
- /* bnScale, bnBias tensor dims are 1xCxHxWx.. (one value per CHW...-slice, normalized over N slice) */
- CUDNN_BATCHNORM_PER_ACTIVATION = 0,
-
- /* bnScale, bnBias tensor dims are 1xCx1x1 (one value per C-dim normalized over Nx1xHxW subtensors) */
- CUDNN_BATCHNORM_SPATIAL = 1,
-
- /*
- * bnScale, bnBias tensor dims are 1xCx1x1 (one value per C-dim normalized over Nx1xHxW subtensors).
- * May be faster than CUDNN_BATCHNORM_SPATIAL but imposes some limits on the range of values
- */
- CUDNN_BATCHNORM_SPATIAL_PERSISTENT = 2,
- } cudnnBatchNormMode_t;
-
- #define CUDNN_BN_MIN_EPSILON 0.0 /* Minimum epsilon allowed to be used in the Batch Normalization formula */
-
- /*
- * Derives a tensor descriptor from layer data descriptor for BatchNormalization
- * scale, invVariance, bnBias, bnScale tensors. Use this tensor desc for
- * bnScaleBiasMeanVarDesc and bnScaleBiasDiffDesc in Batch Normalization forward and backward functions.
- */
- cudnnStatus_t CUDNNWINAPI
- cudnnDeriveBNTensorDescriptor(cudnnTensorDescriptor_t derivedBnDesc,
- const cudnnTensorDescriptor_t xDesc,
- cudnnBatchNormMode_t mode);
-
- typedef enum {
- CUDNN_BATCHNORM_OPS_BN = 0, /* do batch normalization only */
- CUDNN_BATCHNORM_OPS_BN_ACTIVATION = 1, /* do batchNorm, then activation */
- CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION = 2, /* do batchNorm, then elemWiseAdd, then activation */
- } cudnnBatchNormOps_t;
-
- /*
- * Performs Batch Normalization during Inference:
- * y[i] = bnScale[k]*(x[i]-estimatedMean[k])/sqrt(epsilon+estimatedVariance[k]) + bnBias[k]
- * with bnScale, bnBias, runningMean, runningInvVariance tensors indexed
- * according to spatial or per-activation mode. Refer to cudnnBatchNormalizationForwardTraining
- * above for notes on function arguments.
- */
- cudnnStatus_t CUDNNWINAPI
- cudnnBatchNormalizationForwardInference(cudnnHandle_t handle,
- cudnnBatchNormMode_t mode,
- const void *alpha, /* alpha[0] = result blend factor */
- const void *beta, /* beta[0] = dest layer blend factor */
- const cudnnTensorDescriptor_t xDesc,
- const void *x, /* NxCxHxW */
- const cudnnTensorDescriptor_t yDesc,
- void *y, /* NxCxHxW */
- const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc,
- const void *bnScale,
- const void *bnBias,
- const void *estimatedMean,
- const void *estimatedVariance,
- double epsilon);
-
- typedef enum {
- /* bnScale, bnBias tensor dims are 1xCxHxWx.. (one value per CHW...-slice, normalized over N slice) */
- CUDNN_NORM_PER_ACTIVATION = 0,
-
- /* bnScale, bnBias tensor dims are 1xCx1x1 (one value per C-dim normalized over Nx1xHxW subtensors) */
- CUDNN_NORM_PER_CHANNEL = 1,
- } cudnnNormMode_t;
-
- typedef enum { CUDNN_NORM_ALGO_STANDARD = 0, CUDNN_NORM_ALGO_PERSIST = 1 } cudnnNormAlgo_t;
-
- /*
- * Derives a tensor descriptor from layer data descriptor for Normalization
- * scale, invVariance, bnBias, bnScale tensors. Use this tensor desc for
- * normScaleBiasMeanVarDesc and normScaleBiasDiffDesc in Normalization forward and backward functions.
- */
- cudnnStatus_t CUDNNWINAPI
- cudnnDeriveNormTensorDescriptor(cudnnTensorDescriptor_t derivedNormScaleBiasDesc,
- cudnnTensorDescriptor_t derivedNormMeanVarDesc,
- const cudnnTensorDescriptor_t xDesc,
- cudnnNormMode_t mode,
- int groupCnt); /* Place hold for future work, should be set to 1 now*/
-
- typedef enum {
- CUDNN_NORM_OPS_NORM = 0, /* do normalization only */
- CUDNN_NORM_OPS_NORM_ACTIVATION = 1, /* do Norm, then activation */
- CUDNN_NORM_OPS_NORM_ADD_ACTIVATION = 2, /* do Norm, then elemWiseAdd, then activation */
- } cudnnNormOps_t;
-
- /*
- * Performs Normalization during Inference:
- * y[i] = normScale[k]*(x[i]-estimatedMean[k])/sqrt(epsilon+estimatedVariance[k]) + normBias[k]
- * with normScale, normBias, runningMean, runningInvVariance tensors indexed
- * according to per-channel or per-activation mode. Refer to cudnnNormalizationForwardTraining
- * above for notes on function arguments.
- */
- cudnnStatus_t CUDNNWINAPI
- cudnnNormalizationForwardInference(cudnnHandle_t handle,
- cudnnNormMode_t mode,
- cudnnNormOps_t normOps,
- cudnnNormAlgo_t algo,
- const void *alpha, /* alpha[0] = result blend factor */
- const void *beta, /* beta[0] = dest layer blend factor */
- const cudnnTensorDescriptor_t xDesc,
- const void *x, /* NxCxHxW */
- const cudnnTensorDescriptor_t normScaleBiasDesc,
- const void *normScale,
- const void *normBias,
- const cudnnTensorDescriptor_t normMeanVarDesc,
- const void *estimatedMean,
- const void *estimatedVariance,
- const cudnnTensorDescriptor_t zDesc,
- const void *z,
- cudnnActivationDescriptor_t activationDesc,
- const cudnnTensorDescriptor_t yDesc,
- void *y, /* NxCxHxW */
- double epsilon,
- int groupCnt); /* Place hold for future work*/
-
- /* APIs for spatial transformer network*/
- typedef enum {
- CUDNN_SAMPLER_BILINEAR = 0,
- } cudnnSamplerType_t;
-
- cudnnStatus_t CUDNNWINAPI
- cudnnCreateSpatialTransformerDescriptor(cudnnSpatialTransformerDescriptor_t *stDesc);
-
- cudnnStatus_t CUDNNWINAPI
- cudnnSetSpatialTransformerNdDescriptor(cudnnSpatialTransformerDescriptor_t stDesc,
- cudnnSamplerType_t samplerType,
- cudnnDataType_t dataType,
- const int nbDims,
- const int dimA[]);
-
- cudnnStatus_t CUDNNWINAPI
- cudnnDestroySpatialTransformerDescriptor(cudnnSpatialTransformerDescriptor_t stDesc);
-
- cudnnStatus_t CUDNNWINAPI
- cudnnSpatialTfGridGeneratorForward(cudnnHandle_t handle,
- const cudnnSpatialTransformerDescriptor_t stDesc,
- const void *theta,
- void *grid);
-
- cudnnStatus_t CUDNNWINAPI
- cudnnSpatialTfSamplerForward(cudnnHandle_t handle,
- cudnnSpatialTransformerDescriptor_t stDesc,
- const void *alpha,
- const cudnnTensorDescriptor_t xDesc,
- const void *x,
- const void *grid,
- const void *beta,
- cudnnTensorDescriptor_t yDesc,
- void *y);
-
- typedef struct cudnnDropoutStruct *cudnnDropoutDescriptor_t;
-
- cudnnStatus_t CUDNNWINAPI
- cudnnCreateDropoutDescriptor(cudnnDropoutDescriptor_t *dropoutDesc);
-
- cudnnStatus_t CUDNNWINAPI
- cudnnDestroyDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc);
-
- /*helper function to determine size of the states to be passed to cudnnSetDropoutDescriptor */
- cudnnStatus_t CUDNNWINAPI
- cudnnDropoutGetStatesSize(cudnnHandle_t handle, size_t *sizeInBytes);
-
- /*helper function to determine size of the reserve space to be passed to dropout forward/backward calls */
- cudnnStatus_t CUDNNWINAPI
- cudnnDropoutGetReserveSpaceSize(cudnnTensorDescriptor_t xdesc, size_t *sizeInBytes);
-
- cudnnStatus_t CUDNNWINAPI
- cudnnSetDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc,
- cudnnHandle_t handle,
- float dropout,
- void *states,
- size_t stateSizeInBytes,
- unsigned long long seed);
-
- /* Restores the dropout descriptor to a previously saved-off state */
- cudnnStatus_t CUDNNWINAPI
- cudnnRestoreDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc,
- cudnnHandle_t handle,
- float dropout,
- void *states,
- size_t stateSizeInBytes,
- unsigned long long seed);
-
- cudnnStatus_t CUDNNWINAPI
- cudnnGetDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc,
- cudnnHandle_t handle,
- float *dropout,
- void **states,
- unsigned long long *seed);
-
- cudnnStatus_t CUDNNWINAPI
- cudnnDropoutForward(cudnnHandle_t handle,
- const cudnnDropoutDescriptor_t dropoutDesc,
- const cudnnTensorDescriptor_t xdesc,
- const void *x,
- const cudnnTensorDescriptor_t ydesc,
- void *y,
- void *reserveSpace,
- size_t reserveSpaceSizeInBytes);
-
- /* TODO: remove */
-
- typedef struct cudnnAlgorithmStruct *cudnnAlgorithmDescriptor_t;
- typedef struct cudnnAlgorithmPerformanceStruct *cudnnAlgorithmPerformance_t;
-
- /* TODO: move these enums out to the appropriate submodule */
- typedef enum {
- CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM = 0,
- CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM = 1,
- CUDNN_CONVOLUTION_FWD_ALGO_GEMM = 2,
- CUDNN_CONVOLUTION_FWD_ALGO_DIRECT = 3,
- CUDNN_CONVOLUTION_FWD_ALGO_FFT = 4,
- CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING = 5,
- CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD = 6,
- CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED = 7,
- CUDNN_CONVOLUTION_FWD_ALGO_COUNT = 8
- } cudnnConvolutionFwdAlgo_t;
-
- typedef enum {
- CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0 = 0, /* non-deterministic */
- CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 = 1,
- CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT = 2,
- CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3 = 3, /* non-deterministic */
- CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD = 4, /* not implemented */
- CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED = 5,
- CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING = 6,
- CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT = 7
- } cudnnConvolutionBwdFilterAlgo_t;
-
- typedef enum {
- CUDNN_CONVOLUTION_BWD_DATA_ALGO_0 = 0, /* non-deterministic */
- CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 = 1,
- CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT = 2,
- CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING = 3,
- CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD = 4,
- CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED = 5,
- CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT = 6
- } cudnnConvolutionBwdDataAlgo_t;
-
- typedef enum {
- CUDNN_RNN_ALGO_STANDARD = 0,
- CUDNN_RNN_ALGO_PERSIST_STATIC = 1,
- CUDNN_RNN_ALGO_PERSIST_DYNAMIC = 2,
- CUDNN_RNN_ALGO_COUNT = 3,
- } cudnnRNNAlgo_t;
-
- typedef enum { CUDNN_CTC_LOSS_ALGO_DETERMINISTIC = 0, CUDNN_CTC_LOSS_ALGO_NON_DETERMINISTIC = 1 } cudnnCTCLossAlgo_t;
-
- /* TODO: remove */
- typedef struct {
- union Algorithm {
- cudnnConvolutionFwdAlgo_t convFwdAlgo;
- cudnnConvolutionBwdFilterAlgo_t convBwdFilterAlgo;
- cudnnConvolutionBwdDataAlgo_t convBwdDataAlgo;
- cudnnRNNAlgo_t RNNAlgo;
- cudnnCTCLossAlgo_t CTCLossAlgo;
- } algo;
- } cudnnAlgorithm_t;
-
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
- cudnnCreateAlgorithmDescriptor(cudnnAlgorithmDescriptor_t *algoDesc);
-
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
- cudnnSetAlgorithmDescriptor(cudnnAlgorithmDescriptor_t algoDesc, cudnnAlgorithm_t algorithm);
-
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
- cudnnGetAlgorithmDescriptor(const cudnnAlgorithmDescriptor_t algoDesc, cudnnAlgorithm_t *algorithm);
-
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
- cudnnCopyAlgorithmDescriptor(const cudnnAlgorithmDescriptor_t src, cudnnAlgorithmDescriptor_t dest);
-
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
- cudnnDestroyAlgorithmDescriptor(cudnnAlgorithmDescriptor_t algoDesc);
-
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
- cudnnCreateAlgorithmPerformance(cudnnAlgorithmPerformance_t *algoPerf, int numberToCreate);
-
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
- cudnnSetAlgorithmPerformance(cudnnAlgorithmPerformance_t algoPerf,
- cudnnAlgorithmDescriptor_t algoDesc,
- cudnnStatus_t status,
- float time,
- size_t memory);
-
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
- cudnnGetAlgorithmPerformance(const cudnnAlgorithmPerformance_t algoPerf,
- cudnnAlgorithmDescriptor_t *algoDesc,
- cudnnStatus_t *status,
- float *time,
- size_t *memory);
-
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
- cudnnDestroyAlgorithmPerformance(cudnnAlgorithmPerformance_t *algoPerf, int numberToDestroy);
-
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
- cudnnGetAlgorithmSpaceSize(cudnnHandle_t handle, cudnnAlgorithmDescriptor_t algoDesc, size_t *algoSpaceSizeInBytes);
-
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
- cudnnSaveAlgorithm(cudnnHandle_t handle,
- cudnnAlgorithmDescriptor_t algoDesc,
- void *algoSpace,
- size_t algoSpaceSizeInBytes);
-
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
- cudnnRestoreAlgorithm(cudnnHandle_t handle,
- void *algoSpace,
- size_t algoSpaceSizeInBytes,
- cudnnAlgorithmDescriptor_t algoDesc);
-
- typedef enum {
- CUDNN_SEV_FATAL = 0,
- CUDNN_SEV_ERROR = 1,
- CUDNN_SEV_WARNING = 2,
- CUDNN_SEV_INFO = 3,
- } cudnnSeverity_t;
-
- /* Message masks to be used with cudnnSetCallback() */
- #define CUDNN_SEV_ERROR_EN (1U << CUDNN_SEV_ERROR)
- #define CUDNN_SEV_WARNING_EN (1U << CUDNN_SEV_WARNING)
- #define CUDNN_SEV_INFO_EN (1U << CUDNN_SEV_INFO)
-
- /* struct containing useful informaiton for each API call */
- typedef struct {
- unsigned cudnn_version;
- cudnnStatus_t cudnnStatus;
- unsigned time_sec; /* epoch time in seconds */
- unsigned time_usec; /* microseconds part of epoch time */
- unsigned time_delta; /* time since start in seconds */
- cudnnHandle_t handle; /* cudnn handle */
- cudaStream_t stream; /* cuda stream ID */
- unsigned long long pid; /* process ID */
- unsigned long long tid; /* thread ID */
- int cudaDeviceId; /* CUDA device ID */
- int reserved[15]; /* reserved for future use */
- } cudnnDebug_t;
-
- typedef void (*cudnnCallback_t)(cudnnSeverity_t sev, void *udata, const cudnnDebug_t *dbg, const char *msg);
-
- cudnnStatus_t CUDNNWINAPI
- cudnnSetCallback(unsigned mask, void *udata, cudnnCallback_t fptr);
-
- cudnnStatus_t CUDNNWINAPI
- cudnnGetCallback(unsigned *mask, void **udata, cudnnCallback_t *fptr);
-
- /*
- * \brief Cross-library version checker.
- * This function is implemented differently in each sub-library. Each sublib
- * checks whether its own version matches that of its dependencies.
- * \returns CUDNN_STATUS_SUCCESS if the version check passes,
- * CUDNN_STATUS_VERSION_MISMATCH if the versions are inconsistent.
- */
- cudnnStatus_t CUDNNWINAPI
- cudnnOpsInferVersionCheck(void);
-
- #if defined(__cplusplus)
- }
- #endif
-
- #endif /* CUDNN_OPS_INFER_H_ */
|