Skip to content
Snippets Groups Projects
Commit d0f551ff authored by Eric Wait's avatar Eric Wait
Browse files

Fixed Min/Max the same way as sum array

parent f749c920
No related branches found
No related tags found
No related merge requests found
......@@ -3,27 +3,28 @@
#include "Vec.h"
template <class PixelType>
__global__ void cudaGetMinMax(PixelType* arrayIn, double* minsOut, double* maxsOut, size_t n, PixelType minVal, PixelType maxVal)
__global__ void cudaGetMinMax(PixelType* arrayIn, PixelType* minsOut, PixelType* maxsOut, size_t n, PixelType minVal, PixelType maxVal)
{
extern __shared__ double mins[];
double* maxs = mins+blockDim.x;
extern __shared__ unsigned char sharedMem[];
PixelType* mins = (PixelType*)sharedMem;
PixelType* maxs = mins+blockDim.x;
size_t i = threadIdx.x + blockIdx.x*blockDim.x;
size_t imStride = blockDim.x*gridDim.x;
if (i<n)
{
mins[threadIdx.x] = (double)(arrayIn[i]);
maxs[threadIdx.x] = (double)(arrayIn[i]);
mins[threadIdx.x] = arrayIn[i];
maxs[threadIdx.x] = arrayIn[i];
while (i+imStride<n)
{
if (mins[threadIdx.x] > (double)(arrayIn[i]))
mins[threadIdx.x] = (double)(arrayIn[i]);
if (maxs[threadIdx.x] < (double)(arrayIn[i]))
maxs[threadIdx.x] = (double)(arrayIn[i]);
i += imStride;
if (mins[threadIdx.x] > arrayIn[i])
mins[threadIdx.x] = arrayIn[i];
if (maxs[threadIdx.x] < arrayIn[i])
maxs[threadIdx.x] = arrayIn[i];
}
}
else
......@@ -43,8 +44,8 @@ __global__ void cudaGetMinMax(PixelType* arrayIn, double* minsOut, double* maxsO
mins[threadIdx.x] = mins[threadIdx.x+localStride];
if (maxs[threadIdx.x] < maxs[threadIdx.x+localStride])
maxs[threadIdx.x] = maxs[threadIdx.x+localStride];
}else
break;
}
__syncthreads();
}
......@@ -62,10 +63,12 @@ void getMinMax(const PixelType* imageIn, Vec<size_t> dims, PixelType& minVal, Pi
size_t n = dims.product();
minVal = std::numeric_limits<PixelType>::max();
maxVal = std::numeric_limits<PixelType>::lowest();
double* deviceMins;
double* deviceMaxs;
double* hostMins;
double* hostMaxs;
PixelType initMin= std::numeric_limits<PixelType>::lowest();
PixelType initMax = std::numeric_limits<PixelType>::max();
PixelType* deviceMins;
PixelType* deviceMaxs;
PixelType* hostMins;
PixelType* hostMaxs;
PixelType* deviceBuffer;
cudaDeviceProp props;
......@@ -76,15 +79,15 @@ void getMinMax(const PixelType* imageIn, Vec<size_t> dims, PixelType& minVal, Pi
size_t numValsPerChunk = MIN(n,(size_t)((availMem*MAX_MEM_AVAIL)/sizeof(PixelType)));
int maxBlocks = (int)ceil((double)numValsPerChunk/(2*props.maxThreadsPerBlock));
int threads = props.maxThreadsPerBlock;
int maxBlocks = (int)ceil((double)numValsPerChunk/(threads*2));
HANDLE_ERROR(cudaMalloc((void**)&deviceBuffer,sizeof(PixelType)*numValsPerChunk));
HANDLE_ERROR(cudaMalloc((void**)&deviceMins,sizeof(double)*props.multiProcessorCount));
HANDLE_ERROR(cudaMalloc((void**)&deviceMaxs,sizeof(double)*props.multiProcessorCount));
HANDLE_ERROR(cudaMalloc((void**)&deviceMins,sizeof(PixelType)*maxBlocks));
HANDLE_ERROR(cudaMalloc((void**)&deviceMaxs,sizeof(PixelType)*maxBlocks));
hostMins = new double[maxBlocks];
hostMaxs = new double[maxBlocks];
hostMins = new PixelType[maxBlocks];
hostMaxs = new PixelType[maxBlocks];
for (size_t startIdx=0; startIdx<n; startIdx += numValsPerChunk)
{
......@@ -92,21 +95,21 @@ void getMinMax(const PixelType* imageIn, Vec<size_t> dims, PixelType& minVal, Pi
HANDLE_ERROR(cudaMemcpy(deviceBuffer,imageIn+startIdx,sizeof(PixelType)*curNumVals,cudaMemcpyHostToDevice));
int blocks = (int)ceil((double)curNumVals/(2*props.maxThreadsPerBlock));
size_t sharedMemSize = sizeof(double)*props.maxThreadsPerBlock*2;
int blocks = (int)((double)curNumVals/(threads*2));
size_t sharedMemSize = sizeof(PixelType)*threads*2;
cudaGetMinMax<<<blocks,threads,sharedMemSize>>>(deviceBuffer,deviceMins,deviceMaxs,curNumVals,minVal,maxVal);
cudaGetMinMax<<<blocks,threads,sharedMemSize>>>(deviceBuffer,deviceMins,deviceMaxs,curNumVals,initMin,initMax);
DEBUG_KERNEL_CHECK();
HANDLE_ERROR(cudaMemcpy(hostMins,deviceMins,sizeof(double)*blocks,cudaMemcpyDeviceToHost));
HANDLE_ERROR(cudaMemcpy(hostMaxs,deviceMaxs,sizeof(double)*blocks,cudaMemcpyDeviceToHost));
HANDLE_ERROR(cudaMemcpy(hostMins,deviceMins,sizeof(PixelType)*blocks,cudaMemcpyDeviceToHost));
HANDLE_ERROR(cudaMemcpy(hostMaxs,deviceMaxs,sizeof(PixelType)*blocks,cudaMemcpyDeviceToHost));
for (int i=0; i<blocks; ++i)
{
if (minVal > (PixelType)hostMins[i])
minVal = (PixelType)hostMins[i];
if (maxVal < (PixelType)hostMaxs[i])
maxVal = (PixelType)hostMaxs[i];
if (minVal > hostMins[i])
minVal = hostMins[i];
if (maxVal < hostMaxs[i])
maxVal = hostMaxs[i];
}
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment