Code cleaning

727e7152 · Eric Wait · c45b77e6 · 727e7152 · 727e7152 · 727e7152
Commit 727e7152 authored 10 years ago by Eric Wait
--- a/src/c/Cuda/CudaMaxFilter.cuh
+++ b/src/c/Cuda/CudaMaxFilter.cuh
@@ -15,8 +15,6 @@
 #ifndef CUDA_CONST_KERNEL
 #define CUDA_CONST_KERNEL
 __constant__ float cudaConstKernel[MAX_KERNEL_DIM*MAX_KERNEL_DIM*MAX_KERNEL_DIM];
-#else
-//__constant__ extern float cudaConstKernel[MAX_KERNEL_DIM*MAX_KERNEL_DIM*MAX_KERNEL_DIM];
 #endif

 template <class PixelType>

--- a/src/c/Cuda/CudaMinFilter.cuh
+++ b/src/c/Cuda/CudaMinFilter.cuh
@@ -16,8 +16,6 @@
 #ifndef CUDA_CONST_KERNEL
 #define CUDA_CONST_KERNEL
 __constant__ float cudaConstKernel[MAX_KERNEL_DIM*MAX_KERNEL_DIM*MAX_KERNEL_DIM];
-#else
-//__constant__ extern float cudaConstKernel[MAX_KERNEL_DIM*MAX_KERNEL_DIM*MAX_KERNEL_DIM];
 #endif

 template <class PixelType>

--- a/src/c/Cuda/CudaMultAddFilter.cuh
+++ b/src/c/Cuda/CudaMultAddFilter.cuh
@@ -9,8 +9,6 @@
 #ifndef CUDA_CONST_KERNEL
 #define CUDA_CONST_KERNEL
 __constant__ float cudaConstKernel[MAX_KERNEL_DIM*MAX_KERNEL_DIM*MAX_KERNEL_DIM];
-#else
-__constant__ extern float cudaConstKernel[MAX_KERNEL_DIM*MAX_KERNEL_DIM*MAX_KERNEL_DIM];
 #endif

 template <class PixelType>

--- a/src/c/Cuda/CudaUtilities.cuh
+++ b/src/c/Cuda/CudaUtilities.cuh
@@ -20,57 +20,11 @@ static void HandleError( cudaError_t err, const char *file, int line )
 	if (err != cudaSuccess) 
 	{
 		sprintf_s(errorMessage, 255, "%s in %s at line %d\n", cudaGetErrorString( err ),	file, line );
-		throw(errorMessage);
+		throw std::runtime_error(errorMessage);
 	}
 }
 #define HANDLE_ERROR( err ) (HandleError( err, __FILE__, __LINE__ ))

-size_t memoryAvailable(int device, size_t* totalOut=NULL);
-
-bool checkFreeMemory(size_t needed, int device, bool throws=false);
-
-// Beginning of GPU Architecture definitions
-inline int _ConvertSMVer2Cores(int major, int minor)
-{
-	// Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
-	typedef struct
-	{
-		int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
-		int Cores;
-	} sSMtoCores;
-
-	sSMtoCores nGpuArchCoresPerSM[] =
-	{
-		{ 0x10,  8 }, // Tesla Generation (SM 1.0) G80 class
-		{ 0x11,  8 }, // Tesla Generation (SM 1.1) G8x class
-		{ 0x12,  8 }, // Tesla Generation (SM 1.2) G9x class
-		{ 0x13,  8 }, // Tesla Generation (SM 1.3) GT200 class
-		{ 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class
-		{ 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class
-		{ 0x30, 192}, // Kepler Generation (SM 3.0) GK10x class
-		{ 0x35, 192}, // Kepler Generation (SM 3.5) GK11x class
-		{   -1, -1 }
-	};
-
-	int index = 0;
-
-	while (nGpuArchCoresPerSM[index].SM != -1)
-	{
-		if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor))
-		{
-			return nGpuArchCoresPerSM[index].Cores;
-		}
-
-		index++;
-	}
-
-	// If we don't find the values, we default use the previous one to run properly
-	printf("MapSMtoCores for SM %d.%d is undefined.  Default to use %d Cores/SM\n", major, minor, nGpuArchCoresPerSM[7].Cores);
-	return nGpuArchCoresPerSM[7].Cores;
-}
-// end of GPU Architecture definitions
-
-#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
 inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
 {
 	if (code != cudaSuccess) 
@@ -80,8 +34,7 @@ inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
 		throw std::runtime_error(buff);
 	}
 }
-
-void calcBlockThread(const Vec<size_t>& dims, const cudaDeviceProp &prop, dim3 &blocks, dim3 &threads);
+#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }

 struct Lock 
 {
@@ -118,10 +71,6 @@ struct Lock
 	}
 };

-Vec<size_t> createGaussianKernel(Vec<float> sigma, float** kernel, int& iterations);
-
-Vec<size_t> createGaussianKernel(Vec<float> sigma, float** kernel, Vec<int>& iterations);
-
 template <class PixelType>
 PixelType* setUpOutIm(Vec<size_t> dims, PixelType** imageOut)
 {
@@ -133,3 +82,9 @@ PixelType* setUpOutIm(Vec<size_t> dims, PixelType** imageOut)

 	return imOut;
 }
+
+size_t memoryAvailable(int device, size_t* totalOut=NULL);
+bool checkFreeMemory(size_t needed, int device, bool throws=false);
+void calcBlockThread(const Vec<size_t>& dims, const cudaDeviceProp &prop, dim3 &blocks, dim3 &threads);
+Vec<size_t> createGaussianKernel(Vec<float> sigma, float** kernel, int& iterations);
+Vec<size_t> createGaussianKernel(Vec<float> sigma, float** kernel, Vec<int>& iterations);