From 7a8ae0fdf9a1612b0717483abc03ee17bad3b938 Mon Sep 17 00:00:00 2001
From: actb <andrew.r.cohen@drexel.edu>
Date: Tue, 5 May 2020 09:48:52 -0400
Subject: [PATCH] NLM searchWindow = xy.z for anisotropy

---
 src/MATLAB/+HIP/@Cuda/Cuda.m     |  4 +-
 src/MATLAB/+HIP/@Cuda/HIP.mexw64 |  4 +-
 src/Python/HIP.pyd               |  4 +-
 src/c/Cuda/CudaNLMeans.cuh       | 88 ++++----------------------------
 4 files changed, 15 insertions(+), 85 deletions(-)

diff --git a/src/MATLAB/+HIP/@Cuda/Cuda.m b/src/MATLAB/+HIP/@Cuda/Cuda.m
index ff4d59c9..ec6a4c5b 100644
--- a/src/MATLAB/+HIP/@Cuda/Cuda.m
+++ b/src/MATLAB/+HIP/@Cuda/Cuda.m
@@ -7,13 +7,13 @@ methods (Static)
     [hydraConfig] = CheckConfig()
     [imageOut] = IdentityFilter(imageIn,device)
     [cmdInfo] = Info()
-    [imageOut] = Gaussian(imageIn,sigmas,numIterations,device)
-    [imageOut] = MeanFilter(imageIn,kernel,numIterations,device)
     [deviceStatsArray] = DeviceStats()
     [imageOut] = ElementWiseDifference(image1In,image2In,device)
     [imageOut] = MultiplySum(imageIn,kernel,numIterations,device)
     [imageOut] = EntropyFilter(imageIn,kernel,device)
     [imageOut] = StdFilter(imageIn,kernel,numIterations,device)
+    [imageOut] = Gaussian(imageIn,sigmas,numIterations,device)
+    [imageOut] = MeanFilter(imageIn,kernel,numIterations,device)
     [minVal,maxVal] = GetMinMax(imageIn,device)
     [imageOut] = LoG(imageIn,sigmas,device)
     [imageOut] = MedianFilter(imageIn,kernel,numIterations,device)
diff --git a/src/MATLAB/+HIP/@Cuda/HIP.mexw64 b/src/MATLAB/+HIP/@Cuda/HIP.mexw64
index 60002070..3a9a7134 100644
--- a/src/MATLAB/+HIP/@Cuda/HIP.mexw64
+++ b/src/MATLAB/+HIP/@Cuda/HIP.mexw64
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d951ea0ed90fcb48ace55b524102091e8cf4a2164f508db1031f62c3525e9aaf
-size 11287552
+oid sha256:21bd878fafa7ef29974da8f20b5b7bb14c3ef1eaa15dfc4be856daacaf35d3b9
+size 11302400
diff --git a/src/Python/HIP.pyd b/src/Python/HIP.pyd
index 7d7afd78..f56ed453 100644
--- a/src/Python/HIP.pyd
+++ b/src/Python/HIP.pyd
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:491df6734338f7ec649b83fa5d4db6948d5fd6c255747dde4e92e871f3ccc325
-size 259
+oid sha256:de2b9eb362a63bdb2f7ad65854c73fddd14d8f26f9b5d27b85f107a2bfde1fd6
+size 11320832
diff --git a/src/c/Cuda/CudaNLMeans.cuh b/src/c/Cuda/CudaNLMeans.cuh
index 96d1de1d..63cf95d6 100644
--- a/src/c/Cuda/CudaNLMeans.cuh
+++ b/src/c/Cuda/CudaNLMeans.cuh
@@ -19,29 +19,32 @@
 template <class PixelTypeIn, class PixelTypeOut>
 // params needed: a,h, search window size, comparison nhood
 __global__ void cudaNLMeans_mv(CudaImageContainer<PixelTypeIn> imageIn, CudaImageContainer<PixelTypeOut> imageMean, CudaImageContainer<PixelTypeOut> imageVariance,
-	CudaImageContainer<PixelTypeOut> imageOut, double h, int searchWindowRadius, int nhoodRadius, PixelTypeOut minValue, PixelTypeOut maxValue)
+	CudaImageContainer<PixelTypeOut> imageOut, double h, double searchWindowRadius, int nhoodRadius, PixelTypeOut minValue, PixelTypeOut maxValue)
 {
 	Vec<std::size_t> threadCoordinate;
 	GetThreadBlockCoordinate(threadCoordinate);
 
 	if (threadCoordinate < imageIn.getDims())
 	{
-		Vec<int> searchVec = Vec<int>::min(Vec<int>(imageIn.getDims()) - 1, Vec<int>(searchWindowRadius));
+		int xySearch = (int)std::floor(searchWindowRadius);
+		int zSearch = (int)((searchWindowRadius - std::floor(searchWindowRadius)) * (double)xySearch);
+		if ( (searchWindowRadius - std::floor(searchWindowRadius)) < 1e-6 )
+			zSearch = xySearch;
+		Vec<int> nSearchWindow = Vec<int>(xySearch, xySearch, zSearch);
+		Vec<int> searchVec = Vec<int>::min(Vec<int>(imageIn.getDims()) - 1, nSearchWindow);
 
 		// 
 		float inputVal = (float)imageIn(threadCoordinate);
 		float inputMeanVal = (float)imageMean(threadCoordinate);
 		float inputVarVal = (float)imageVariance(threadCoordinate);
 		// 
-		double wMax = 0.;
+		double wMax = 0.; 
 		double wAccumulator = 0.;
 		double outputAccumulator = 0.;
 		KernelIterator kIt(threadCoordinate, imageIn.getDims(), searchVec * 2 + 1);
 		for (; !kIt.end(); ++kIt)
 		{
 			Vec<float> kernelPos = kIt.getImageCoordinate();
-			if (kernelPos == threadCoordinate)
-				continue;
 
 			float kernelMeanVal = (float)imageMean(kernelPos);
 			float kernelVarVal = (float)imageVariance(kernelPos);
@@ -65,84 +68,11 @@ __global__ void cudaNLMeans_mv(CudaImageContainer<PixelTypeIn> imageIn, CudaImag
 	}
 } // cudaNLMeans_mv
 
-template <class PixelTypeIn, class PixelTypeOut>
-// params needed: a,h, search window size, comparison nhood
-__global__ void cudaNLMeans(CudaImageContainer<PixelTypeIn> imageIn, CudaImageContainer<PixelTypeOut> imageOut, double h, 
-	int searchWindowRadius, int nhoodRadius, PixelTypeOut minValue, PixelTypeOut maxValue)
-{
-	Vec<std::size_t> threadCoordinate;
-	GetThreadBlockCoordinate(threadCoordinate);
-
-	if (threadCoordinate < imageIn.getDims())
-	{
-		// for loop over whole image, except for searchWindowSize
-		Vec<int> itc = Vec<int>(threadCoordinate);
-
-		Vec<int> nhoodVec = Vec<int>::min(Vec<int>(imageIn.getDims())-1, Vec<int>(nhoodRadius));
-
-		Vec<int> searchMin = Vec<int>::max(itc - searchWindowRadius, nhoodVec);
-		Vec<int> searchMax = Vec<int>::min(itc + searchWindowRadius+1, Vec<int>(imageIn.getDims()) - nhoodVec);
-
-		// outValAccumulator is unnormalized output value -- we normalize at the end
-		double outValAccumulator= 0.0; // 
-		// wAccumulator is for the normalizing constant
-		double wAccumulator = 1e-7; // 
-		double wMax = 0; // for application when x==y==z==0
-		for (int z = searchMin.z; z < searchMax.z; z++) {
-			for (int y = searchMin.y; y < searchMax.y; y++) {
-				for (int x = searchMin.x; x < searchMax.x; x++) {
-										
-					if ((0 == x) && (0 == y) && (0 == z))
-						continue;
-
-					// center the kernel on threadCoordinate
-					// iterator over nhoodSize
-					KernelIterator kIt(threadCoordinate, imageIn.getDims(), nhoodVec*2+1);
-					// image value at the x,y,z neighborhood window center
-					float searchVal = (float)imageIn( Vec<float>(x,y,z));
-					float wi=0.;
-					for (; !kIt.end(); ++kIt)
-					{
-						Vec<float> imInPos = kIt.getImageCoordinate();
-						float inVal = (float)imageIn(imInPos);
-						 
-						// now get the corresponding pixel value in the search window space
-						Vec<float> sCoord = Vec<float>(x,y,z)-nhoodVec+ kIt.getKernelCoordinate();
-						float sVal = (float)imageIn(sCoord);
-
-						double diff = SQR(inVal - sVal);
-						// weight by gaussian (Ga from paper)
-						double ga = 1; // exp(-1 * (Vec<int>(kIt.getKernelCoordinate()) - nhoodVec).lengthSqr() / SQR(a));
-						// w is gaussian weighted euclidean distance
-						wi += ga*diff;
-					}
-					double w = exp(-wi / SQR(h));
-
-					if (w > wMax)
-						wMax = w;
-
-					outValAccumulator += w*searchVal;
-					wAccumulator += w;
-				}
-			}
-		}
-		// add in for x==y==z==0
-		outValAccumulator += wMax*(float)imageIn(threadCoordinate);
-		wAccumulator += wMax;
-
-
-		// now normalize
-		double outVal = outValAccumulator / wAccumulator;
-				
-		imageOut(threadCoordinate) = (PixelTypeOut)CLAMP(outVal, minValue, maxValue);
-	}
-} // cudaNLMeans 
-
 // this part is the templated function that gets called by the front end.
 // here be cpu
 // todo - if we chunk, make sure search window doesn't go off chunk
 template <class PixelTypeIn, class PixelTypeOut>
-void cNLMeans(ImageView<PixelTypeIn> imageIn, ImageView<PixelTypeOut> imageOut, double h, int searchWindowRadius, int nhoodRadius, int device = -1)
+void cNLMeans(ImageView<PixelTypeIn> imageIn, ImageView<PixelTypeOut> imageOut, double h, double searchWindowRadius, int nhoodRadius, int device = -1)
 {
 	const PixelTypeOut MIN_VAL = std::numeric_limits<PixelTypeOut>::lowest();
 	const PixelTypeOut MAX_VAL = std::numeric_limits<PixelTypeOut>::max();
-- 
GitLab