Skip to content
Snippets Groups Projects
Commit 727e7152 authored by Eric Wait's avatar Eric Wait
Browse files

Code cleaning

parent c45b77e6
No related branches found
No related tags found
No related merge requests found
......@@ -15,8 +15,6 @@
#ifndef CUDA_CONST_KERNEL
#define CUDA_CONST_KERNEL
__constant__ float cudaConstKernel[MAX_KERNEL_DIM*MAX_KERNEL_DIM*MAX_KERNEL_DIM];
#else
//__constant__ extern float cudaConstKernel[MAX_KERNEL_DIM*MAX_KERNEL_DIM*MAX_KERNEL_DIM];
#endif
template <class PixelType>
......
......@@ -16,8 +16,6 @@
#ifndef CUDA_CONST_KERNEL
#define CUDA_CONST_KERNEL
__constant__ float cudaConstKernel[MAX_KERNEL_DIM*MAX_KERNEL_DIM*MAX_KERNEL_DIM];
#else
//__constant__ extern float cudaConstKernel[MAX_KERNEL_DIM*MAX_KERNEL_DIM*MAX_KERNEL_DIM];
#endif
template <class PixelType>
......
......@@ -9,8 +9,6 @@
#ifndef CUDA_CONST_KERNEL
#define CUDA_CONST_KERNEL
__constant__ float cudaConstKernel[MAX_KERNEL_DIM*MAX_KERNEL_DIM*MAX_KERNEL_DIM];
#else
__constant__ extern float cudaConstKernel[MAX_KERNEL_DIM*MAX_KERNEL_DIM*MAX_KERNEL_DIM];
#endif
template <class PixelType>
......
......@@ -20,57 +20,11 @@ static void HandleError( cudaError_t err, const char *file, int line )
if (err != cudaSuccess)
{
sprintf_s(errorMessage, 255, "%s in %s at line %d\n", cudaGetErrorString( err ), file, line );
throw(errorMessage);
throw std::runtime_error(errorMessage);
}
}
#define HANDLE_ERROR( err ) (HandleError( err, __FILE__, __LINE__ ))
size_t memoryAvailable(int device, size_t* totalOut=NULL);
bool checkFreeMemory(size_t needed, int device, bool throws=false);
// Beginning of GPU Architecture definitions
inline int _ConvertSMVer2Cores(int major, int minor)
{
// Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
typedef struct
{
int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
int Cores;
} sSMtoCores;
sSMtoCores nGpuArchCoresPerSM[] =
{
{ 0x10, 8 }, // Tesla Generation (SM 1.0) G80 class
{ 0x11, 8 }, // Tesla Generation (SM 1.1) G8x class
{ 0x12, 8 }, // Tesla Generation (SM 1.2) G9x class
{ 0x13, 8 }, // Tesla Generation (SM 1.3) GT200 class
{ 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class
{ 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class
{ 0x30, 192}, // Kepler Generation (SM 3.0) GK10x class
{ 0x35, 192}, // Kepler Generation (SM 3.5) GK11x class
{ -1, -1 }
};
int index = 0;
while (nGpuArchCoresPerSM[index].SM != -1)
{
if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor))
{
return nGpuArchCoresPerSM[index].Cores;
}
index++;
}
// If we don't find the values, we default use the previous one to run properly
printf("MapSMtoCores for SM %d.%d is undefined. Default to use %d Cores/SM\n", major, minor, nGpuArchCoresPerSM[7].Cores);
return nGpuArchCoresPerSM[7].Cores;
}
// end of GPU Architecture definitions
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
......@@ -80,8 +34,7 @@ inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
throw std::runtime_error(buff);
}
}
void calcBlockThread(const Vec<size_t>& dims, const cudaDeviceProp &prop, dim3 &blocks, dim3 &threads);
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
struct Lock
{
......@@ -118,10 +71,6 @@ struct Lock
}
};
Vec<size_t> createGaussianKernel(Vec<float> sigma, float** kernel, int& iterations);
Vec<size_t> createGaussianKernel(Vec<float> sigma, float** kernel, Vec<int>& iterations);
template <class PixelType>
PixelType* setUpOutIm(Vec<size_t> dims, PixelType** imageOut)
{
......@@ -133,3 +82,9 @@ PixelType* setUpOutIm(Vec<size_t> dims, PixelType** imageOut)
return imOut;
}
size_t memoryAvailable(int device, size_t* totalOut=NULL);
bool checkFreeMemory(size_t needed, int device, bool throws=false);
void calcBlockThread(const Vec<size_t>& dims, const cudaDeviceProp &prop, dim3 &blocks, dim3 &threads);
Vec<size_t> createGaussianKernel(Vec<float> sigma, float** kernel, int& iterations);
Vec<size_t> createGaussianKernel(Vec<float> sigma, float** kernel, Vec<int>& iterations);
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment