Skip to content
Snippets Groups Projects
Commit c9f4f6dc authored by actb's avatar actb
Browse files

fix NL means double dipping output buffer

parent d4370802
No related branches found
No related tags found
No related merge requests found
...@@ -72,6 +72,14 @@ public: ...@@ -72,6 +72,14 @@ public:
return deviceImages[trd]; return deviceImages[trd];
} }
CudaImageContainer<PixelType>* getBuffer(int idx)
{
if (numBuffers <= idx)
return NULL;
return deviceImages[idx];
}
void incrementBuffer() void incrementBuffer()
{ {
DEBUG_KERNEL_CHECK(); DEBUG_KERNEL_CHECK();
......
...@@ -18,8 +18,8 @@ ...@@ -18,8 +18,8 @@
// this is an approximate nl means. uses fisher discriminant as a distance, with mean, variance of patches computed by previous cuda filter // this is an approximate nl means. uses fisher discriminant as a distance, with mean, variance of patches computed by previous cuda filter
template <class PixelTypeIn, class PixelTypeOut> template <class PixelTypeIn, class PixelTypeOut>
// params needed: a,h, search window size, comparison nhood // params needed: a,h, search window size, comparison nhood
__global__ void cudaNLMeans_mv(CudaImageContainer<PixelTypeIn> imageIn, CudaImageContainer<PixelTypeOut> imageOutMean, CudaImageContainer<PixelTypeOut> imageVariance, __global__ void cudaNLMeans_mv(CudaImageContainer<PixelTypeIn> imageIn, CudaImageContainer<PixelTypeOut> imageMean, CudaImageContainer<PixelTypeOut> imageVariance,
double h, int searchWindowRadius, int nhoodRadius, PixelTypeOut minValue, PixelTypeOut maxValue) CudaImageContainer<PixelTypeOut> imageOut, double h, int searchWindowRadius, int nhoodRadius, PixelTypeOut minValue, PixelTypeOut maxValue)
{ {
Vec<std::size_t> threadCoordinate; Vec<std::size_t> threadCoordinate;
GetThreadBlockCoordinate(threadCoordinate); GetThreadBlockCoordinate(threadCoordinate);
...@@ -30,7 +30,7 @@ __global__ void cudaNLMeans_mv(CudaImageContainer<PixelTypeIn> imageIn, CudaImag ...@@ -30,7 +30,7 @@ __global__ void cudaNLMeans_mv(CudaImageContainer<PixelTypeIn> imageIn, CudaImag
// //
float inputVal = (float)imageIn(threadCoordinate); float inputVal = (float)imageIn(threadCoordinate);
float inputMeanVal = (float)imageOutMean(threadCoordinate); float inputMeanVal = (float)imageMean(threadCoordinate);
float inputVarVal = (float)imageVariance(threadCoordinate); float inputVarVal = (float)imageVariance(threadCoordinate);
// //
double wMax = 0.; double wMax = 0.;
...@@ -43,7 +43,7 @@ __global__ void cudaNLMeans_mv(CudaImageContainer<PixelTypeIn> imageIn, CudaImag ...@@ -43,7 +43,7 @@ __global__ void cudaNLMeans_mv(CudaImageContainer<PixelTypeIn> imageIn, CudaImag
if (kernelPos == threadCoordinate) if (kernelPos == threadCoordinate)
continue; continue;
float kernelMeanVal = (float)imageOutMean(kernelPos); float kernelMeanVal = (float)imageMean(kernelPos);
float kernelVarVal = (float)imageVariance(kernelPos); float kernelVarVal = (float)imageVariance(kernelPos);
float kernelVal = (float)imageIn(kernelPos); float kernelVal = (float)imageIn(kernelPos);
...@@ -61,7 +61,7 @@ __global__ void cudaNLMeans_mv(CudaImageContainer<PixelTypeIn> imageIn, CudaImag ...@@ -61,7 +61,7 @@ __global__ void cudaNLMeans_mv(CudaImageContainer<PixelTypeIn> imageIn, CudaImag
// now normalize // now normalize
double outVal = outputAccumulator / wAccumulator; double outVal = outputAccumulator / wAccumulator;
imageOutMean(threadCoordinate) = (PixelTypeOut)CLAMP(outVal, minValue, maxValue); imageOut(threadCoordinate) = (PixelTypeOut)CLAMP(outVal, minValue, maxValue);
} }
} // cudaNLMeans_mv } // cudaNLMeans_mv
...@@ -146,7 +146,7 @@ void cNLMeans(ImageView<PixelTypeIn> imageIn, ImageView<PixelTypeOut> imageOut, ...@@ -146,7 +146,7 @@ void cNLMeans(ImageView<PixelTypeIn> imageIn, ImageView<PixelTypeOut> imageOut,
{ {
const PixelTypeOut MIN_VAL = std::numeric_limits<PixelTypeOut>::lowest(); const PixelTypeOut MIN_VAL = std::numeric_limits<PixelTypeOut>::lowest();
const PixelTypeOut MAX_VAL = std::numeric_limits<PixelTypeOut>::max(); const PixelTypeOut MAX_VAL = std::numeric_limits<PixelTypeOut>::max();
const int NUM_BUFF_NEEDED = 3; const int NUM_BUFF_NEEDED = 4;
CudaDevices cudaDevs(cudaNLMeans_mv<PixelTypeIn, PixelTypeOut>, device); CudaDevices cudaDevs(cudaNLMeans_mv<PixelTypeIn, PixelTypeOut>, device);
...@@ -179,13 +179,12 @@ void cNLMeans(ImageView<PixelTypeIn> imageIn, ImageView<PixelTypeOut> imageOut, ...@@ -179,13 +179,12 @@ void cNLMeans(ImageView<PixelTypeIn> imageIn, ImageView<PixelTypeOut> imageOut,
deviceImages.setAllDims(chunks[i].getFullChunkSize()); deviceImages.setAllDims(chunks[i].getFullChunkSize());
cudaMeanAndVariance << <chunks[i].blocks, chunks[i].threads >> > (*(deviceImages.getCurBuffer()), *(deviceImages.getNextBuffer()), *(deviceImages.getThirdBuffer()), constKernelMem, MIN_VAL, MAX_VAL); cudaMeanAndVariance << <chunks[i].blocks, chunks[i].threads >> > (*(deviceImages.getCurBuffer()), *(deviceImages.getBuffer(1)), *(deviceImages.getBuffer(2)), constKernelMem, MIN_VAL, MAX_VAL);
cudaNLMeans_mv <<<chunks[i].blocks, chunks[i].threads>>>(*(deviceImages.getCurBuffer()), *(deviceImages.getNextBuffer()), *(deviceImages.getThirdBuffer()), cudaNLMeans_mv <<<chunks[i].blocks, chunks[i].threads>>>(*(deviceImages.getCurBuffer()), *(deviceImages.getBuffer(1)), *(deviceImages.getBuffer(2)), *(deviceImages.getBuffer(3)),
h, searchWindowRadius, nhoodRadius, MIN_VAL, MAX_VAL); h, searchWindowRadius, nhoodRadius, MIN_VAL, MAX_VAL);
deviceImages.incrementBuffer(); chunks[i].retriveROI(imageOut, deviceImages.getBuffer(3));
chunks[i].retriveROI(imageOut, deviceImages.getCurBuffer());
} }
} }
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment