From 7a8ae0fdf9a1612b0717483abc03ee17bad3b938 Mon Sep 17 00:00:00 2001 From: actb <andrew.r.cohen@drexel.edu> Date: Tue, 5 May 2020 09:48:52 -0400 Subject: [PATCH] NLM searchWindow = xy.z for anisotropy --- src/MATLAB/+HIP/@Cuda/Cuda.m | 4 +- src/MATLAB/+HIP/@Cuda/HIP.mexw64 | 4 +- src/Python/HIP.pyd | 4 +- src/c/Cuda/CudaNLMeans.cuh | 88 ++++---------------------------- 4 files changed, 15 insertions(+), 85 deletions(-) diff --git a/src/MATLAB/+HIP/@Cuda/Cuda.m b/src/MATLAB/+HIP/@Cuda/Cuda.m index ff4d59c9..ec6a4c5b 100644 --- a/src/MATLAB/+HIP/@Cuda/Cuda.m +++ b/src/MATLAB/+HIP/@Cuda/Cuda.m @@ -7,13 +7,13 @@ methods (Static) [hydraConfig] = CheckConfig() [imageOut] = IdentityFilter(imageIn,device) [cmdInfo] = Info() - [imageOut] = Gaussian(imageIn,sigmas,numIterations,device) - [imageOut] = MeanFilter(imageIn,kernel,numIterations,device) [deviceStatsArray] = DeviceStats() [imageOut] = ElementWiseDifference(image1In,image2In,device) [imageOut] = MultiplySum(imageIn,kernel,numIterations,device) [imageOut] = EntropyFilter(imageIn,kernel,device) [imageOut] = StdFilter(imageIn,kernel,numIterations,device) + [imageOut] = Gaussian(imageIn,sigmas,numIterations,device) + [imageOut] = MeanFilter(imageIn,kernel,numIterations,device) [minVal,maxVal] = GetMinMax(imageIn,device) [imageOut] = LoG(imageIn,sigmas,device) [imageOut] = MedianFilter(imageIn,kernel,numIterations,device) diff --git a/src/MATLAB/+HIP/@Cuda/HIP.mexw64 b/src/MATLAB/+HIP/@Cuda/HIP.mexw64 index 60002070..3a9a7134 100644 --- a/src/MATLAB/+HIP/@Cuda/HIP.mexw64 +++ b/src/MATLAB/+HIP/@Cuda/HIP.mexw64 @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d951ea0ed90fcb48ace55b524102091e8cf4a2164f508db1031f62c3525e9aaf -size 11287552 +oid sha256:21bd878fafa7ef29974da8f20b5b7bb14c3ef1eaa15dfc4be856daacaf35d3b9 +size 11302400 diff --git a/src/Python/HIP.pyd b/src/Python/HIP.pyd index 7d7afd78..f56ed453 100644 --- a/src/Python/HIP.pyd +++ b/src/Python/HIP.pyd @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:491df6734338f7ec649b83fa5d4db6948d5fd6c255747dde4e92e871f3ccc325 -size 259 +oid sha256:de2b9eb362a63bdb2f7ad65854c73fddd14d8f26f9b5d27b85f107a2bfde1fd6 +size 11320832 diff --git a/src/c/Cuda/CudaNLMeans.cuh b/src/c/Cuda/CudaNLMeans.cuh index 96d1de1d..63cf95d6 100644 --- a/src/c/Cuda/CudaNLMeans.cuh +++ b/src/c/Cuda/CudaNLMeans.cuh @@ -19,29 +19,32 @@ template <class PixelTypeIn, class PixelTypeOut> // params needed: a,h, search window size, comparison nhood __global__ void cudaNLMeans_mv(CudaImageContainer<PixelTypeIn> imageIn, CudaImageContainer<PixelTypeOut> imageMean, CudaImageContainer<PixelTypeOut> imageVariance, - CudaImageContainer<PixelTypeOut> imageOut, double h, int searchWindowRadius, int nhoodRadius, PixelTypeOut minValue, PixelTypeOut maxValue) + CudaImageContainer<PixelTypeOut> imageOut, double h, double searchWindowRadius, int nhoodRadius, PixelTypeOut minValue, PixelTypeOut maxValue) { Vec<std::size_t> threadCoordinate; GetThreadBlockCoordinate(threadCoordinate); if (threadCoordinate < imageIn.getDims()) { - Vec<int> searchVec = Vec<int>::min(Vec<int>(imageIn.getDims()) - 1, Vec<int>(searchWindowRadius)); + int xySearch = (int)std::floor(searchWindowRadius); + int zSearch = (int)((searchWindowRadius - std::floor(searchWindowRadius)) * (double)xySearch); + if ( (searchWindowRadius - std::floor(searchWindowRadius)) < 1e-6 ) + zSearch = xySearch; + Vec<int> nSearchWindow = Vec<int>(xySearch, xySearch, zSearch); + Vec<int> searchVec = Vec<int>::min(Vec<int>(imageIn.getDims()) - 1, nSearchWindow); // float inputVal = (float)imageIn(threadCoordinate); float inputMeanVal = (float)imageMean(threadCoordinate); float inputVarVal = (float)imageVariance(threadCoordinate); // - double wMax = 0.; + double wMax = 0.; double wAccumulator = 0.; double outputAccumulator = 0.; KernelIterator kIt(threadCoordinate, imageIn.getDims(), searchVec * 2 + 1); for (; !kIt.end(); ++kIt) { Vec<float> kernelPos = kIt.getImageCoordinate(); - if (kernelPos == threadCoordinate) - continue; float kernelMeanVal = (float)imageMean(kernelPos); float kernelVarVal = (float)imageVariance(kernelPos); @@ -65,84 +68,11 @@ __global__ void cudaNLMeans_mv(CudaImageContainer<PixelTypeIn> imageIn, CudaImag } } // cudaNLMeans_mv -template <class PixelTypeIn, class PixelTypeOut> -// params needed: a,h, search window size, comparison nhood -__global__ void cudaNLMeans(CudaImageContainer<PixelTypeIn> imageIn, CudaImageContainer<PixelTypeOut> imageOut, double h, - int searchWindowRadius, int nhoodRadius, PixelTypeOut minValue, PixelTypeOut maxValue) -{ - Vec<std::size_t> threadCoordinate; - GetThreadBlockCoordinate(threadCoordinate); - - if (threadCoordinate < imageIn.getDims()) - { - // for loop over whole image, except for searchWindowSize - Vec<int> itc = Vec<int>(threadCoordinate); - - Vec<int> nhoodVec = Vec<int>::min(Vec<int>(imageIn.getDims())-1, Vec<int>(nhoodRadius)); - - Vec<int> searchMin = Vec<int>::max(itc - searchWindowRadius, nhoodVec); - Vec<int> searchMax = Vec<int>::min(itc + searchWindowRadius+1, Vec<int>(imageIn.getDims()) - nhoodVec); - - // outValAccumulator is unnormalized output value -- we normalize at the end - double outValAccumulator= 0.0; // - // wAccumulator is for the normalizing constant - double wAccumulator = 1e-7; // - double wMax = 0; // for application when x==y==z==0 - for (int z = searchMin.z; z < searchMax.z; z++) { - for (int y = searchMin.y; y < searchMax.y; y++) { - for (int x = searchMin.x; x < searchMax.x; x++) { - - if ((0 == x) && (0 == y) && (0 == z)) - continue; - - // center the kernel on threadCoordinate - // iterator over nhoodSize - KernelIterator kIt(threadCoordinate, imageIn.getDims(), nhoodVec*2+1); - // image value at the x,y,z neighborhood window center - float searchVal = (float)imageIn( Vec<float>(x,y,z)); - float wi=0.; - for (; !kIt.end(); ++kIt) - { - Vec<float> imInPos = kIt.getImageCoordinate(); - float inVal = (float)imageIn(imInPos); - - // now get the corresponding pixel value in the search window space - Vec<float> sCoord = Vec<float>(x,y,z)-nhoodVec+ kIt.getKernelCoordinate(); - float sVal = (float)imageIn(sCoord); - - double diff = SQR(inVal - sVal); - // weight by gaussian (Ga from paper) - double ga = 1; // exp(-1 * (Vec<int>(kIt.getKernelCoordinate()) - nhoodVec).lengthSqr() / SQR(a)); - // w is gaussian weighted euclidean distance - wi += ga*diff; - } - double w = exp(-wi / SQR(h)); - - if (w > wMax) - wMax = w; - - outValAccumulator += w*searchVal; - wAccumulator += w; - } - } - } - // add in for x==y==z==0 - outValAccumulator += wMax*(float)imageIn(threadCoordinate); - wAccumulator += wMax; - - - // now normalize - double outVal = outValAccumulator / wAccumulator; - - imageOut(threadCoordinate) = (PixelTypeOut)CLAMP(outVal, minValue, maxValue); - } -} // cudaNLMeans - // this part is the templated function that gets called by the front end. // here be cpu // todo - if we chunk, make sure search window doesn't go off chunk template <class PixelTypeIn, class PixelTypeOut> -void cNLMeans(ImageView<PixelTypeIn> imageIn, ImageView<PixelTypeOut> imageOut, double h, int searchWindowRadius, int nhoodRadius, int device = -1) +void cNLMeans(ImageView<PixelTypeIn> imageIn, ImageView<PixelTypeOut> imageOut, double h, double searchWindowRadius, int nhoodRadius, int device = -1) { const PixelTypeOut MIN_VAL = std::numeric_limits<PixelTypeOut>::lowest(); const PixelTypeOut MAX_VAL = std::numeric_limits<PixelTypeOut>::max(); -- GitLab