Skip to content
Snippets Groups Projects
Commit edf1e398 authored by Eric Wait's avatar Eric Wait
Browse files

Added free memory checking

parent a55a677d
No related branches found
No related tags found
No related merge requests found
......@@ -26,6 +26,7 @@ public:
imageDims = dims;
maxImageDims = dims;
this->device = device;
checkFreeMemory(sizeof(DevicePixelType)*dims.product(),device,true);
HANDLE_ERROR(cudaSetDevice(device));
HANDLE_ERROR(cudaMalloc((void**)&image,sizeof(DevicePixelType)*dims.product()));
}
......@@ -82,6 +83,7 @@ public:
{
HANDLE_ERROR(cudaFree(image));
}
checkFreeMemory(sizeof(DevicePixelType)*dims.product(),device,true);
HANDLE_ERROR(cudaMalloc((void**)&image,sizeof(DevicePixelType)*dims.product()));
imageDims = dims;
}
......
......@@ -513,7 +513,9 @@ DevicePixelType* CudaProcessBuffer::maxFilter(const DevicePixelType* imageIn, Ve
if (kernel==NULL)
{
kernalDims = kernalDims.clamp(Vec<size_t>(1,1,1),dims);
float* ones = new float[kernalDims.product()];
memset(ones,1,kernalDims.product());
HANDLE_ERROR(cudaMemcpyToSymbol(cudaConstKernel, ones, sizeof(float)*kernalDims.product()));
delete[] ones;
}
......@@ -610,7 +612,9 @@ DevicePixelType* CudaProcessBuffer::minFilter(const DevicePixelType* imageIn, Ve
if (kernel==NULL)
{
kernalDims = kernalDims.clamp(Vec<size_t>(1,1,1),dims);
float* ones = new float[kernalDims.product()];
memset(ones,1,kernalDims.product());
HANDLE_ERROR(cudaMemcpyToSymbol(cudaConstKernel, ones, sizeof(float)*kernalDims.product()));
delete[] ones;
}
......@@ -740,6 +744,9 @@ double* CudaProcessBuffer::normalizeHistogram(const DevicePixelType* imageIn, Ve
size_t* deviceHist;
double* deviceHistNorm;
checkFreeMemory(sizeof(size_t)*arraySize+sizeof(double)*arraySize,device,true);
HANDLE_ERROR(cudaMalloc((void**)&deviceHist,sizeof(size_t)*arraySize));
HANDLE_ERROR(cudaMalloc((void**)&deviceHistNorm,sizeof(double)*arraySize));
HANDLE_ERROR(cudaMemset(deviceHist,0,sizeof(size_t)*arraySize));
......@@ -775,7 +782,6 @@ DevicePixelType* CudaProcessBuffer::otsuThresholdFilter(const DevicePixelType* i
return thresholdFilter(imageIn,dims,(DevicePixelType)thresh,imageOut);
}
double CudaProcessBuffer::otsuThresholdValue(const DevicePixelType* imageIn, Vec<size_t> dims)
{
int arraySize;
......@@ -833,8 +839,8 @@ double CudaProcessBuffer::sumArray(const DevicePixelType* imageIn, size_t n)
maxDeviceDims.x = (n < (double)(deviceProp.totalGlobalMem*MAX_MEM_AVAIL)/sizeof(DevicePixelType)) ? (n) :
((size_t)(deviceProp.totalGlobalMem*MAX_MEM_AVAIL/sizeof(DevicePixelType)));
checkFreeMemory(sizeof(DevicePixelType)*maxDeviceDims.x+sizeof(double)*blocks,device,true);
HANDLE_ERROR(cudaMalloc((void**)&deviceImage,sizeof(DevicePixelType)*maxDeviceDims.x));
HANDLE_ERROR(cudaMalloc((void**)&deviceSum,sizeof(double)*blocks));
hostSum = new double[blocks];
......
......@@ -187,3 +187,31 @@ Vec<size_t> createGaussianKernel(Vec<float> sigma, float* kernel, Vec<int>& iter
return kernelDims;
}
size_t memoryAvailable(int device, size_t* totalOut/*=NULL*/)
{
HANDLE_ERROR(cudaSetDevice(device));
size_t free, total;
HANDLE_ERROR(cudaMemGetInfo(&free,&total));
if (totalOut!=NULL)
*totalOut = total;
return free;
}
bool checkFreeMemory(size_t needed, int device, bool throws/*=false*/)
{
size_t free = memoryAvailable(device);
if (needed>free)
{
if (throws)
{
char buff[255];
sprintf_s(buff,"Out of CUDA Memory!\nNeed: %d\nHave: %d\n",needed,free);
throw std::runtime_error(buff);
}
return false;
}
return true;
}
......@@ -26,6 +26,10 @@ static void HandleError( cudaError_t err, const char *file, int line )
}
#define HANDLE_ERROR( err ) (HandleError( err, __FILE__, __LINE__ ))
size_t memoryAvailable(int device, size_t* totalOut=NULL);
bool checkFreeMemory(size_t needed, int device, bool throws=false);
// Beginning of GPU Architecture definitions
inline int _ConvertSMVer2Cores(int major, int minor)
{
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment