diff --git a/src/MATLAB/+ImProc/BuildScript.m b/src/MATLAB/+ImProc/BuildScript.m
index 003f2793360c5946c641d52479b9c0ae172b328f..bdc7bd44dad29f04318575471f8a351a7f8a1fb4 100644
--- a/src/MATLAB/+ImProc/BuildScript.m
+++ b/src/MATLAB/+ImProc/BuildScript.m
@@ -15,7 +15,7 @@ cudaPath = fileparts(which('ImProc.BuildMexObject'));
 cd(cudaPath)
 
 % create the m files that correspond to the commands in the mex interface
-ImProc.BuildMexObject('..\..\c\Mex.mexw64','Cuda','ImProc');
+ImProc.BuildMexObject(fullfile('..','..','c','Mex.mexa64'),'Cuda','ImProc');
 
 packagePath = cudaPath;
 cudaPath = fullfile(cudaPath,'@Cuda');
diff --git a/src/c/Cuda/CWrappers.cu b/src/c/Cuda/CWrappers.cu
index 0c655cc5971f9e93537a21ac036223e1c8d44228..b56d0c96772c5a089b8aa5e0c5515ac05cc70759 100644
--- a/src/c/Cuda/CWrappers.cu
+++ b/src/c/Cuda/CWrappers.cu
@@ -188,6 +188,7 @@ void clearDevice()
 	 return cDeviceStats(stats);
  }
 
+
  double* entropyFilter(const unsigned char* imageIn, Vec<size_t> dims, Vec<size_t> kernelDims, float* kernel /*= NULL*/, double** imageOut /*= NULL*/, int device /*= 0*/)
  {
 	 return cEntropyFilter(imageIn, dims, kernelDims, kernel, imageOut, device);
@@ -223,37 +224,38 @@ void clearDevice()
 	 return cEntropyFilter(imageIn, dims, kernelDims, kernel, imageOut, device);
  }
 
- size_t* histogram(const unsigned char* imageIn, Vec<size_t> dims, unsigned int arraySize, unsigned char minVal/*=std::numeric_limits<unsigned char>::lowest()*/, unsigned char maxVal/*=std::numeric_limits<unsigned char>::max()*/, int device/*=0*/)
+
+ unsigned int* histogram(const unsigned char* imageIn, Vec<size_t> dims, unsigned int arraySize, unsigned char minVal/*=std::numeric_limits<unsigned char>::lowest()*/, unsigned char maxVal/*=std::numeric_limits<unsigned char>::max()*/, int device/*=0*/)
 {
 	return cCalculateHistogram(imageIn,dims,arraySize,minVal,maxVal,device);
 }
 
- size_t* histogram(const unsigned short* imageIn, Vec<size_t> dims, unsigned int arraySize, unsigned short minVal/*=std::numeric_limits<unsigned short>::lowest()*/, unsigned short maxVal/*=std::numeric_limits<unsigned short>::max()*/, int device/*=0*/)
+ unsigned int* histogram(const unsigned short* imageIn, Vec<size_t> dims, unsigned int arraySize, unsigned short minVal/*=std::numeric_limits<unsigned short>::lowest()*/, unsigned short maxVal/*=std::numeric_limits<unsigned short>::max()*/, int device/*=0*/)
 {
 	return cCalculateHistogram(imageIn,dims,arraySize,minVal,maxVal,device);
 }
 
- size_t* histogram(const short* imageIn, Vec<size_t> dims, unsigned int arraySize, short minVal/*=std::numeric_limits<short>::lowest()*/,short maxVal/*=std::numeric_limits<short>::max()*/, int device/*=0*/)
+ unsigned int* histogram(const short* imageIn, Vec<size_t> dims, unsigned int arraySize, short minVal/*=std::numeric_limits<short>::lowest()*/,short maxVal/*=std::numeric_limits<short>::max()*/, int device/*=0*/)
 {
 	return cCalculateHistogram(imageIn,dims,arraySize,minVal,maxVal,device);
 }
 
- size_t* histogram(const unsigned int* imageIn, Vec<size_t> dims, unsigned int arraySize, unsigned int minVal/*=std::numeric_limits<unsigned short>::lowest()*/, unsigned int maxVal/*=std::numeric_limits<unsigned int>::max()*/, int device/*=0*/)
+ unsigned int* histogram(const unsigned int* imageIn, Vec<size_t> dims, unsigned int arraySize, unsigned int minVal/*=std::numeric_limits<unsigned short>::lowest()*/, unsigned int maxVal/*=std::numeric_limits<unsigned int>::max()*/, int device/*=0*/)
 {
 	return cCalculateHistogram(imageIn,dims,arraySize,minVal,maxVal,device);
 }
 
- size_t* histogram(const int* imageIn, Vec<size_t> dims, unsigned int arraySize, int minVal/*=std::numeric_limits<int>::lowest()*/, int maxVal/*=std::numeric_limits<int>::max()*/, int device/*=0*/)
+ unsigned int* histogram(const int* imageIn, Vec<size_t> dims, unsigned int arraySize, int minVal/*=std::numeric_limits<int>::lowest()*/, int maxVal/*=std::numeric_limits<int>::max()*/, int device/*=0*/)
 {
 	return cCalculateHistogram(imageIn,dims,arraySize,minVal,maxVal,device);
 }
 
- size_t* histogram(const float* imageIn, Vec<size_t> dims, unsigned int arraySize, float minVal/*=std::numeric_limits<float>::lowest()*/, float maxVal/*=std::numeric_limits<float>::max()*/, int device/*=0*/)
+ unsigned int* histogram(const float* imageIn, Vec<size_t> dims, unsigned int arraySize, float minVal/*=std::numeric_limits<float>::lowest()*/, float maxVal/*=std::numeric_limits<float>::max()*/, int device/*=0*/)
 {
 	return cCalculateHistogram(imageIn,dims,arraySize,minVal,maxVal,device);
 }
 
- size_t* histogram(const double* imageIn, Vec<size_t> dims, unsigned int arraySize, double minVal/*=std::numeric_limits<double>::lowest()*/, double maxVal/*=std::numeric_limits<double>::max()*/, int device/*=0*/)
+ unsigned int* histogram(const double* imageIn, Vec<size_t> dims, unsigned int arraySize, double minVal/*=std::numeric_limits<double>::lowest()*/, double maxVal/*=std::numeric_limits<double>::max()*/, int device/*=0*/)
 {
 	return cCalculateHistogram(imageIn,dims,arraySize,minVal,maxVal,device);
 }
diff --git a/src/c/Cuda/CWrappers.h b/src/c/Cuda/CWrappers.h
index 335fd9831bc1efb4c6cd52e1c8aeaf1adc17de29..a5c45eddc39080e4dccd9e17bcd8d57a50007bba 100644
--- a/src/c/Cuda/CWrappers.h
+++ b/src/c/Cuda/CWrappers.h
@@ -59,13 +59,13 @@ IMAGE_PROCESSOR_API double* entropyFilter(const float* imageIn, Vec<size_t> dims
 IMAGE_PROCESSOR_API double* entropyFilter(const double* imageIn, Vec<size_t> dims, Vec<size_t> kernelDims, float* kernel = NULL, double** imageOut = NULL, int device = 0);
 
 
-IMAGE_PROCESSOR_API size_t* histogram(const unsigned char* imageIn, Vec<size_t> dims, unsigned int arraySize, unsigned char minVal = std::numeric_limits<unsigned char>::lowest(), unsigned char maxVal = std::numeric_limits<unsigned char>::max(), int device = 0);
-IMAGE_PROCESSOR_API size_t* histogram(const unsigned short* imageIn, Vec<size_t> dims, unsigned int arraySize, unsigned short minVal = std::numeric_limits<unsigned short>::lowest(), unsigned short maxVal = std::numeric_limits<unsigned short>::max(), int device = 0);
-IMAGE_PROCESSOR_API size_t* histogram(const short* imageIn, Vec<size_t> dims, unsigned int arraySize, short minVal = std::numeric_limits<short>::lowest(), short maxVal = std::numeric_limits<short>::max(), int device = 0);
-IMAGE_PROCESSOR_API size_t* histogram(const unsigned int* imageIn, Vec<size_t> dims, unsigned int arraySize, unsigned int minVal = std::numeric_limits<unsigned int>::lowest(), unsigned int maxVal = std::numeric_limits<unsigned int>::max(), int device = 0);
-IMAGE_PROCESSOR_API size_t* histogram(const int* imageIn, Vec<size_t> dims, unsigned int arraySize, int minVal = std::numeric_limits<int>::lowest(), int maxVal = std::numeric_limits<int>::max(), int device = 0);
-IMAGE_PROCESSOR_API size_t* histogram(const float* imageIn, Vec<size_t> dims, unsigned int arraySize, float minVal = std::numeric_limits<float>::lowest(), float maxVal = std::numeric_limits<float>::max(), int device = 0);
-IMAGE_PROCESSOR_API size_t* histogram(const double* imageIn, Vec<size_t> dims, unsigned int arraySize, double minVal = std::numeric_limits<double>::lowest(), double maxVal = std::numeric_limits<double>::max(), int device = 0);
+IMAGE_PROCESSOR_API unsigned int* histogram(const unsigned char* imageIn, Vec<size_t> dims, unsigned int arraySize, unsigned char minVal = std::numeric_limits<unsigned char>::lowest(), unsigned char maxVal = std::numeric_limits<unsigned char>::max(), int device = 0);
+IMAGE_PROCESSOR_API unsigned int* histogram(const unsigned short* imageIn, Vec<size_t> dims, unsigned int arraySize, unsigned short minVal = std::numeric_limits<unsigned short>::lowest(), unsigned short maxVal = std::numeric_limits<unsigned short>::max(), int device = 0);
+IMAGE_PROCESSOR_API unsigned int* histogram(const short* imageIn, Vec<size_t> dims, unsigned int arraySize, short minVal = std::numeric_limits<short>::lowest(), short maxVal = std::numeric_limits<short>::max(), int device = 0);
+IMAGE_PROCESSOR_API unsigned int* histogram(const unsigned int* imageIn, Vec<size_t> dims, unsigned int arraySize, unsigned int minVal = std::numeric_limits<unsigned int>::lowest(), unsigned int maxVal = std::numeric_limits<unsigned int>::max(), int device = 0);
+IMAGE_PROCESSOR_API unsigned int* histogram(const int* imageIn, Vec<size_t> dims, unsigned int arraySize, int minVal = std::numeric_limits<int>::lowest(), int maxVal = std::numeric_limits<int>::max(), int device = 0);
+IMAGE_PROCESSOR_API unsigned int* histogram(const float* imageIn, Vec<size_t> dims, unsigned int arraySize, float minVal = std::numeric_limits<float>::lowest(), float maxVal = std::numeric_limits<float>::max(), int device = 0);
+IMAGE_PROCESSOR_API unsigned int* histogram(const double* imageIn, Vec<size_t> dims, unsigned int arraySize, double minVal = std::numeric_limits<double>::lowest(), double maxVal = std::numeric_limits<double>::max(), int device = 0);
 
 IMAGE_PROCESSOR_API unsigned char* gaussianFilter(const unsigned char* imageIn, Vec<size_t> dims, Vec<float> sigmas, unsigned char** imageOut = NULL, int device = 0);
 IMAGE_PROCESSOR_API unsigned short* gaussianFilter(const unsigned short* imageIn, Vec<size_t> dims, Vec<float> sigmas, unsigned short** imageOut = NULL, int device = 0);
diff --git a/src/c/Cuda/CudaHistogram.cuh b/src/c/Cuda/CudaHistogram.cuh
index 6a355fe50334e57fe90283df6f1e1607deaef3f8..350a392e380ac64c0fc7dc9eb00868c933754d45 100644
--- a/src/c/Cuda/CudaHistogram.cuh
+++ b/src/c/Cuda/CudaHistogram.cuh
@@ -9,11 +9,11 @@
 #include "Defines.h"
 
 template <class PixelType>
-__global__ void cudaHistogramCreate( PixelType* values, size_t numValues, size_t* histogram, PixelType minVal, double binSize,
+__global__ void cudaHistogramCreate( PixelType* values, size_t numValues, unsigned int* histogram, PixelType minVal, double binSize,
 									unsigned int numBins)
 {
 	//This code is modified from that of Sanders - Cuda by Example
-	extern __shared__ size_t tempHisto[];
+	extern __shared__ unsigned int tempHisto[];
 
 	if (threadIdx.x<numBins)
 		tempHisto[threadIdx.x] = 0;
@@ -27,7 +27,7 @@ __global__ void cudaHistogramCreate( PixelType* values, size_t numValues, size_t
 	{
 		size_t binNum = (size_t)MAX( 0.0, ( (values[i])-minVal) / binSize );
 		binNum = MIN(binNum, (size_t)numBins-1);
-		atomicAdd(&(tempHisto[binNum]), (size_t)1);
+		atomicAdd(&(tempHisto[binNum]), (unsigned int)1);
 		i += stride;
 	}
 
@@ -38,7 +38,7 @@ __global__ void cudaHistogramCreate( PixelType* values, size_t numValues, size_t
 	__syncthreads();
 }
 
-__global__ void cudaNormalizeHistogram(size_t* histogram, double* normHistogram, unsigned int numBins, double divisor)
+__global__ void cudaNormalizeHistogram(unsigned int* histogram, double* normHistogram, unsigned int numBins, double divisor)
 {
 	int i = threadIdx.x + blockIdx.x*blockDim.x;
 	int stride = blockDim.x * gridDim.x;
@@ -50,18 +50,18 @@ __global__ void cudaNormalizeHistogram(size_t* histogram, double* normHistogram,
 }
 
 template <class PixelType>
-size_t* createHistogram(int device, unsigned int arraySize, Vec<size_t> dims, PixelType maxVal, PixelType minVal, const PixelType* imageIn)
+unsigned int* createHistogram(int device, size_t arraySize, Vec<size_t> dims, PixelType maxVal, PixelType minVal, const PixelType* imageIn)
 {
 	cudaSetDevice(device);
 	cudaDeviceProp props;
 	cudaGetDeviceProperties(&props,device);
 
-	if ((size_t)props.sharedMemPerBlock<sizeof(size_t)*arraySize)
+	if ((size_t)props.sharedMemPerBlock<sizeof(unsigned int)*arraySize)
 		throw std::runtime_error("Too many bins to calculate on GPU with current shared memory constraints!");
 
-	size_t* deviceHist;
-	HANDLE_ERROR(cudaMalloc((void**)&deviceHist,sizeof(size_t)*arraySize));
-	HANDLE_ERROR(cudaMemset(deviceHist,0,sizeof(size_t)*arraySize));
+	unsigned int* deviceHist;
+	HANDLE_ERROR(cudaMalloc((void**)&deviceHist,sizeof(unsigned int)*arraySize));
+	HANDLE_ERROR(cudaMemset(deviceHist,0,sizeof(unsigned int)*arraySize));
 	DEBUG_KERNEL_CHECK();
 
 	size_t availMem, total;
@@ -86,7 +86,7 @@ size_t* createHistogram(int device, unsigned int arraySize, Vec<size_t> dims, Pi
 		int threads = (int)MIN(numValues,maxThreads);
 		int blocks = (int)MIN(numValues/threads,(size_t)props.multiProcessorCount);
 
-		cudaHistogramCreate<<<blocks,threads,sizeof(size_t)*arraySize>>>(deviceBuffer, numValues, deviceHist, minVal, binSize, arraySize);
+		cudaHistogramCreate<<<blocks,threads,sizeof(unsigned int)*arraySize>>>(deviceBuffer, numValues, deviceHist, minVal, binSize, arraySize);
 		DEBUG_KERNEL_CHECK();
 		cudaThreadSynchronize();
 	}
@@ -97,26 +97,26 @@ size_t* createHistogram(int device, unsigned int arraySize, Vec<size_t> dims, Pi
 }
 
 template <class PixelType>
-size_t* cCalculateHistogram(const PixelType* imageIn, Vec<size_t> dims, unsigned int arraySize,
+unsigned int* cCalculateHistogram(const PixelType* imageIn, Vec<size_t> dims, size_t arraySize,
 						   PixelType minVal=std::numeric_limits<PixelType>::lowest(), PixelType maxVal=std::numeric_limits<PixelType>::max(),
 						   int device=0)
 {
-	size_t* hostHist = new size_t[arraySize];
+	unsigned int* hostHist = new unsigned int[arraySize];
 
-	size_t* deviceHist = createHistogram(device, arraySize, dims, maxVal, minVal, imageIn);
+	unsigned int* deviceHist = createHistogram(device, arraySize, dims, maxVal, minVal, imageIn);
 
-	HANDLE_ERROR(cudaMemcpy(hostHist,deviceHist,sizeof(size_t)*arraySize,cudaMemcpyDeviceToHost));
+	HANDLE_ERROR(cudaMemcpy(hostHist,deviceHist,sizeof(unsigned int)*arraySize,cudaMemcpyDeviceToHost));
 	HANDLE_ERROR(cudaFree(deviceHist));
 
 	return hostHist;
 }
 
 template <class PixelType>
-double* cNormalizeHistogram(const PixelType* imageIn, Vec<size_t> dims, unsigned int arraySize,
+double* cNormalizeHistogram(const PixelType* imageIn, Vec<size_t> dims, size_t arraySize,
 						   PixelType minVal=std::numeric_limits<PixelType>::lowest(), PixelType maxVal=std::numeric_limits<PixelType>::max(),
 						   int device=0)
 {
-	size_t* deviceHist = createHistogram(device, arraySize, dims, maxVal, minVal, imageIn);
+	unsigned int* deviceHist = createHistogram(device, arraySize, dims, maxVal, minVal, imageIn);
 
 	cudaDeviceProp props;
 	cudaGetDeviceProperties(&props,device);
@@ -144,7 +144,7 @@ PixelType cOtsuThresholdValue(const PixelType* imageIn, Vec<size_t> dims, int de
 {
 	PixelType minVal, maxVal;
 	cGetMinMax(imageIn,dims,minVal,maxVal,device);
-	unsigned int arraySize = NUM_BINS;
+	size_t arraySize = NUM_BINS;
 
 	double* hist = cNormalizeHistogram(imageIn,dims,arraySize,minVal,maxVal,device);
 
diff --git a/src/c/Cuda/CudaImageContainerClean.cuh b/src/c/Cuda/CudaImageContainerClean.cuh
index 913a7a3ac43f16024b2734c2e865ca73ca1fdc53..8fd4d25c2b3e018bc37cc3a3cdab46afbe66cce6 100644
--- a/src/c/Cuda/CudaImageContainerClean.cuh
+++ b/src/c/Cuda/CudaImageContainerClean.cuh
@@ -8,60 +8,62 @@ class CudaImageContainerClean : public CudaImageContainer<PixelType>
 public:
 	CudaImageContainerClean(const PixelType* imageIn, Vec<size_t> dims, int device=0)
 	{
-		defaults();
-		image = NULL;
-		maxImageDims = dims;
-		roiSizes = dims;
+		this->defaults();
+		this->image = NULL;
+		this->maxImageDims = dims;
+		this->image = NULL;
+		this->maxImageDims = dims;
+		this->roiSizes = dims;
 		this->device = device;
 		loadImage(imageIn,dims);
 	};
 
 	CudaImageContainerClean(Vec<size_t> dims, int device=0) 
 	{
-		defaults();
-		image = NULL;
-		maxImageDims = dims;
-		imageDims = dims;
-		roiSizes = dims;
+		this->defaults();
+		this->image = NULL;
+		this->maxImageDims = dims;
+		this->imageDims = dims;
+		this->roiSizes = dims;
 		this->device = device;
 		HANDLE_ERROR(cudaSetDevice(device));
-		HANDLE_ERROR(cudaMalloc((void**)&image,sizeof(PixelType)*dims.product()));
-		HANDLE_ERROR(cudaMemset(image,0,sizeof(PixelType)*dims.product()));
+		HANDLE_ERROR(cudaMalloc((void**)&this->image,sizeof(PixelType)*dims.product()));
+		HANDLE_ERROR(cudaMemset(this->image,0,sizeof(PixelType)*dims.product()));
 	};
 
 	~CudaImageContainerClean()
 	{
-		if (image!=NULL)
+		if (this->image!=NULL)
 		{
-			HANDLE_ERROR(cudaSetDevice(device));
+			HANDLE_ERROR(cudaSetDevice(this->device));
 			try
 			{
-				HANDLE_ERROR(cudaFree(image));
+				HANDLE_ERROR(cudaFree(this->image));
 			}
 			catch (char* err)
 			{
 				if (err!=NULL)
 					err[0] = 'e';
 			}
-			image = NULL;
+			this->image = NULL;
 		}
 	}
 
 	CudaImageContainerClean(const CudaImageContainerClean& other)
 	{
-		device = other.getDeviceNumber();
-		imageDims = other.getDims();
-		image = NULL;
+		this->device = other.getDeviceNumber();
+		this->imageDims = other.getDims();
+		this->image = NULL;
 
-		HANDLE_ERROR(cudaSetDevice(device));
+		HANDLE_ERROR(cudaSetDevice(this->device));
 
-		if (imageDims>Vec<size_t>(0,0,0))
+		if (this->imageDims>Vec<size_t>(0,0,0))
 		{
-			HANDLE_ERROR(cudaMalloc((void**)&image,sizeof(PixelType)*imageDims.product()));
-			HANDLE_ERROR(cudaMemcpy(image,other.getConstImagePointer(),sizeof(PixelType)*imageDims.product(),cudaMemcpyDeviceToDevice));
+			HANDLE_ERROR(cudaMalloc((void**)&this->image,sizeof(PixelType)*this->imageDims.product()));
+			HANDLE_ERROR(cudaMemcpy(this->image,other.getConstImagePointer(),sizeof(PixelType)*this->imageDims.product(),cudaMemcpyDeviceToDevice));
 		}
 	}
 
 protected:
-	CudaImageContainerClean() : CudaImageContainer(){};
+	CudaImageContainerClean() : CudaImageContainer<PixelType>(){};
 };
\ No newline at end of file
diff --git a/src/c/Cuda/CudaUtilities.cu b/src/c/Cuda/CudaUtilities.cu
index 475f5e2fe4ebe4d83b40986d0c4b9b76b1f962a5..ef66d9ddcd3103bdc923c23b64a4c5b27aaf1c09 100644
--- a/src/c/Cuda/CudaUtilities.cu
+++ b/src/c/Cuda/CudaUtilities.cu
@@ -1,5 +1,16 @@
 #include "CudaUtilities.cuh"
 
+int bitscanReverse(size_t mask)
+{
+	for ( int i=63; i>=0; --i )
+	{
+		if ( (mask >> i) && 0x01 )
+			return i;
+	}
+ 
+	return -1;
+}
+
 void calcBlockThread(const Vec<size_t>& dims, const cudaDeviceProp &prop, dim3 &blocks, dim3 &threads,
 					 size_t maxThreads/*=std::numeric_limits<size_t>::max()*/)
 {
@@ -49,8 +60,7 @@ void calcBlockThread(const Vec<size_t>& dims, const cudaDeviceProp &prop, dim3 &
 		}
 		else
 		{
-			unsigned long index;
-			_BitScanReverse(&index,unsigned long(mxThreads));
+			int index = bitscanReverse(mxThreads);
 
 			int dim = index/3;
 			threads.x = 1 << MAX(dim,(int)index - 2*dim);
@@ -208,7 +218,7 @@ bool checkFreeMemory(size_t needed, int device, bool throws/*=false*/)
 		if (throws)
 		{
 			char buff[255];
-			sprintf_s(buff,"Out of CUDA Memory!\nNeed: %zu\nHave: %zu\n",needed,free);
+			sprintf(buff,"Out of CUDA Memory!\nNeed: %zu\nHave: %zu\n",needed,free);
 			throw std::runtime_error(buff);
 		}
 		return false;
diff --git a/src/c/Cuda/CudaUtilities.cuh b/src/c/Cuda/CudaUtilities.cuh
index 534cf0d4e8c2fbad638ed931c1e55bfc89c2f01d..6658f5e426006766cd1da67cf7089f1a3cb98261 100644
--- a/src/c/Cuda/CudaUtilities.cuh
+++ b/src/c/Cuda/CudaUtilities.cuh
@@ -9,6 +9,7 @@
 
 #include <cuda_occupancy.h>
 #include <functional>
+#include <limits>
 
 template <typename T, typename U>
 int getKernelMaxThreadsSharedMem(T func, U f, int threadLimit = 0)
@@ -39,23 +40,23 @@ int getKernelMaxThreads(T func, int threadLimit=0)
 #define DEBUG_KERNEL_CHECK() {}
 #endif // _DEBUG
 
-static void HandleError( cudaError_t err, const char *file, int line ) 
+static void HandleError( cudaError_t err, const char* file, int line ) 
 {
 	if (err != cudaSuccess) 
 	{
 		char* errorMessage = new char[255];
-		sprintf_s(errorMessage, 255, "%s in %s at line %d\n", cudaGetErrorString( err ),	file, line );
+		sprintf(errorMessage, "%s in %s at line %d\n", cudaGetErrorString( err ),	file, line );
 		throw std::runtime_error(errorMessage);
 	}
 }
 #define HANDLE_ERROR( err ) (HandleError( err, __FILE__, __LINE__ ))
 
-inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
+inline void gpuAssert(cudaError_t code, const char* file, int line, bool abort=true)
 {
 	if (code != cudaSuccess) 
 	{
 		char buff[255];
-		sprintf_s(buff, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
+		sprintf(buff, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
 		throw std::runtime_error(buff);
 	}
 }
diff --git a/src/c/Cuda/Vec.h b/src/c/Cuda/Vec.h
index ebbacfd286cc616585b44bf4fe9941a67d746b74..a639696c549742eca160b6c63366fd972d6c2b64 100644
--- a/src/c/Cuda/Vec.h
+++ b/src/c/Cuda/Vec.h
@@ -19,6 +19,8 @@
 #include "Defines.h"
 
 #include <type_traits>
+#include <stdlib.h>
+#include <cmath>
 
 #undef min
 #undef max
@@ -192,8 +194,6 @@ public:
 	MIXED_PREFIX Vec<size_t> coordAddressOf(U idx)const
 	{
 		Vec<size_t> vecOut = Vec<size_t>(0,0,0);
-		if(x==0 && y==0 && z==0)
-			throw runtime_error("Not a valid vector to index into!");
 
 		if(x==0)
 		{
@@ -329,7 +329,7 @@ public:
 
 	MIXED_PREFIX double EuclideanDistanceTo(const Vec<T>& other)
 	{
-		return sqrt((double)(SQR(x-other.x)+SQR(y-other.y)+SQR(z-other.z)));
+		return ::sqrt((double)(SQR(x-other.x)+SQR(y-other.y)+SQR(z-other.z)));
 	}
 
 	MIXED_PREFIX double lengthSqr()
diff --git a/src/c/Mex/MexCommand.cpp b/src/c/Mex/MexCommand.cpp
index daee3a5d805fa3021b25655a6515996d3c2a9718..d1b45c7df0184991ffce3ec60109384753b59c1f 100644
--- a/src/c/Mex/MexCommand.cpp
+++ b/src/c/Mex/MexCommand.cpp
@@ -9,16 +9,16 @@
 #undef BUILD_COMMANDS
 
 // Module name info
-HMODULE ModuleInfo::hModule;
+//HMODULE ModuleInfo::hModule;
 std::string ModuleInfo::name;
 
-BOOL WINAPI DllMain(HINSTANCE hInstDLL,DWORD fdwReason,LPVOID lpReserved)
-{
-	if(fdwReason == DLL_PROCESS_ATTACH)
-		ModuleInfo::setModuleHandle(hInstDLL);
+// BOOL WINAPI DllMain(HINSTANCE hInstDLL,DWORD fdwReason,LPVOID lpReserved)
+// {
+// 	if(fdwReason == DLL_PROCESS_ATTACH)
+// 		ModuleInfo::setModuleHandle(hInstDLL);
 
-	return TRUE;
-}
+// 	return TRUE;
+// }
 
 
 // MexCommandInfo - This command can be used to provide an easy to parse matlab command info structure for all MEX commands.
@@ -264,4 +264,11 @@ Vec<size_t> MexCommand::FillKernel(const mxArray* matKernelIn, float** kernel )
     }
 
     return kernDims;
-}
\ No newline at end of file
+}
+
+// Logical array creation specialziation
+template <>
+mxArray* MexCommand::createArray<bool>(mwSize ndim, const mwSize* dims)
+{
+	return mxCreateLogicalArray(ndim, dims);
+}
diff --git a/src/c/Mex/MexCommand.h b/src/c/Mex/MexCommand.h
index 8b05448d873be709332a8f90ad491b81715dc896..0754d3ad0f34a3345ce3607bad7556aecde260c1 100644
--- a/src/c/Mex/MexCommand.h
+++ b/src/c/Mex/MexCommand.h
@@ -1,25 +1,26 @@
 #pragma once
-#include "Vec.h"
-#include "ScopedProcessMutex.h"
+#include "../Cuda/Vec.h"
+//#include "ScopedProcessMutex.h"
 
 #include "mex.h"
-#include "windows.h"
+//#include "windows.h"
 #undef min
 #undef max
 
 #include <vector>
-#include <string>
+#include <string.h>
 #include <algorithm>
 #include <exception>
+#include <stdexcept>
 
 // Static class for holding some module information
 class ModuleInfo
 {
 public:
-	static void setModuleHandle(HMODULE handle)
-	{
-		hModule = handle;
-	}
+	// static void setModuleHandle(HMODULE handle)
+	// {
+	// 	hModule = handle;
+	// }
 
 	static void initModuleInfo()
 	{
@@ -35,27 +36,28 @@ public:
 private:
 	static std::string initName()
 	{
-		if(hModule == NULL)
-			return "";
+		return "Mex";
+		// if(hModule == NULL)
+		// 	return "";
 
-		char pathBuffer[1024];
-		DWORD result = GetModuleFileName((HMODULE)hModule,pathBuffer,1024);
-		if(FAILED(result))
-			return "";
+		// char pathBuffer[1024];
+		// DWORD result = GetModuleFileName((HMODULE)hModule,pathBuffer,1024);
+		// if(FAILED(result))
+		// 	return "";
 
-		std::string path(pathBuffer);
+		// std::string path(pathBuffer);
 
-		size_t startOffset = path.find_last_of('\\') + 1;
-		path = path.substr(startOffset);
+		// size_t startOffset = path.find_last_of('\\') + 1;
+		// path = path.substr(startOffset);
 
-		size_t endOffset = path.find_last_of('.');
+		// size_t endOffset = path.find_last_of('.');
 
-		return path.substr(0,endOffset);
+		// return path.substr(0,endOffset);
 	}
 
 private:
 	static std::string name;
-	static HMODULE hModule;
+	//static HMODULE hModule;
 };
 
 // Abstract base class for mex commands
@@ -101,7 +103,7 @@ public:
 
 		try
 		{
-			ScopedProcessMutex("CudaMutex");
+			//ScopedProcessMutex("CudaMutex");
 			mexCmd->execute(nlhs,plhs,cmdNRHS,cmdPRHS);
 		} catch(const std::runtime_error& err)
 		{
@@ -221,14 +223,6 @@ protected:
 
 	// Simple template-specialization map for C++ to mex types
 	template <typename T> struct TypeMap		{static const mxClassID mxType;};
-	template <> struct TypeMap<char>			{static const mxClassID mxType = mxINT8_CLASS;};
-	template <> struct TypeMap<short>			{static const mxClassID mxType = mxINT16_CLASS;};
-	template <> struct TypeMap<int>				{static const mxClassID mxType = mxINT32_CLASS;};
-	template <> struct TypeMap<unsigned char>	{static const mxClassID mxType = mxUINT8_CLASS;};
-	template <> struct TypeMap<unsigned short>	{static const mxClassID mxType = mxUINT16_CLASS;};
-	template <> struct TypeMap<unsigned int>	{static const mxClassID mxType = mxUINT32_CLASS;};
-	template <> struct TypeMap<float>			{static const mxClassID mxType = mxSINGLE_CLASS;};
-	template <> struct TypeMap<double>			{static const mxClassID mxType = mxDOUBLE_CLASS;};
 
 	// General array creation method
 	template <typename T>
@@ -237,13 +231,6 @@ protected:
 		return mxCreateNumericArray(ndim, dims, TypeMap<T>::mxType, mxREAL);
 	}
 
-	// Logical array creation specialziation
-	template <>
-	static mxArray* createArray<bool>(mwSize ndim, const mwSize* dims)
-	{
-		return mxCreateLogicalArray(ndim, dims);
-	}
-
 	template <typename T>
 	static void setupImagePointers(const mxArray* imageIn, T** image, Vec<size_t>* dims, mxArray** argOut = NULL, T** imageOut = NULL)
 	{
@@ -271,7 +258,7 @@ protected:
 	}
 
 
-    static Vec<size_t> MexCommand::FillKernel(const mxArray* matKernelIn, float** kernel);
+    static Vec<size_t> FillKernel(const mxArray* matKernelIn, float** kernel);
 
 private:
 	static const size_t m_numCommands;
@@ -280,5 +267,16 @@ private:
 	std::string m_cmdString;
 };
 
+template <> struct MexCommand::TypeMap<char>			{static const mxClassID mxType = mxINT8_CLASS;};
+template <> struct MexCommand::TypeMap<short>			{static const mxClassID mxType = mxINT16_CLASS;};
+template <> struct MexCommand::TypeMap<int>				{static const mxClassID mxType = mxINT32_CLASS;};
+template <> struct MexCommand::TypeMap<unsigned char>	{static const mxClassID mxType = mxUINT8_CLASS;};
+template <> struct MexCommand::TypeMap<unsigned short>	{static const mxClassID mxType = mxUINT16_CLASS;};
+template <> struct MexCommand::TypeMap<unsigned int>	{static const mxClassID mxType = mxUINT32_CLASS;};
+template <> struct MexCommand::TypeMap<float>			{static const mxClassID mxType = mxSINGLE_CLASS;};
+template <> struct MexCommand::TypeMap<double>			{static const mxClassID mxType = mxDOUBLE_CLASS;};
+
+template <>
+mxArray* MexCommand::createArray<bool>(mwSize ndim, const mwSize* dims);
 
 #include "CommandList.h"
diff --git a/src/c/Mex/MexDeviceCount.cpp b/src/c/Mex/MexDeviceCount.cpp
index 94184bd78e7931dc8bb937a5e529eb1c0d064ffb..eaa313725050176c959cdb4a4762e56eba06a560 100644
--- a/src/c/Mex/MexDeviceCount.cpp
+++ b/src/c/Mex/MexDeviceCount.cpp
@@ -12,7 +12,7 @@ void MexDeviceCount::execute(int nlhs, mxArray* plhs[], int nrhs, const mxArray*
 		int numDevices = memoryStats(&memStats);
 		plhs[0] = mxCreateDoubleScalar(numDevices);
 		const char* fieldNames[] = {"total","available"};
-		mwSize dims[2] ={1,numDevices};
+		mwSize dims[2] ={1,(mwSize)numDevices};
 		plhs[1] = mxCreateStructArray(2,dims,2,fieldNames);
 
 		int total_field = mxGetFieldNumber(plhs[1],"total");
diff --git a/src/c/Mex/MexDeviceStats.cpp b/src/c/Mex/MexDeviceStats.cpp
index cf197b88f22d17b7310bae72347d32bdd7884c64..b0d662207e41a8f9286fa1dd7feaadca7cbac6a3 100644
--- a/src/c/Mex/MexDeviceStats.cpp
+++ b/src/c/Mex/MexDeviceStats.cpp
@@ -7,7 +7,7 @@ void MexDeviceStats::execute(int nlhs, mxArray* plhs[], int nrhs, const mxArray*
     DevStats* devStats;
     int numDevices = deviceStats(&devStats);
 
-    mwSize dims[2] = {numDevices,1};
+    mwSize dims[2] = {(mwSize)numDevices,1};
     const char* fieldNames[] = {"name", "major", "minor", "constMem", "sharedMem", "totalMem", "tccDriver", "mpCount", "threadsPerMP", "warpSize", "maxThreads"};
     plhs[0] = mxCreateStructArray(2, dims, 11, fieldNames);
     int name_field = mxGetFieldNumber(plhs[0], "name");
diff --git a/src/c/Mex/MexHistogram.cpp b/src/c/Mex/MexHistogram.cpp
index b81c9da5219723d7477fb78055d78fe04d7d4acf..d1876a99560465c2966ab40b0197b3302a3ea1ed 100644
--- a/src/c/Mex/MexHistogram.cpp
+++ b/src/c/Mex/MexHistogram.cpp
@@ -10,7 +10,7 @@
 		 device = mat_to_c((int)mxGetScalar(prhs[4]));
 
  	unsigned int arraySize = (unsigned int)mxGetScalar(prhs[1]);
-	size_t* hist;
+	unsigned int* hist;
 
 	Vec<size_t> imageDims;
 	if (mxIsUint8(prhs[0]))
diff --git a/src/c/Mex/MexResize.cpp b/src/c/Mex/MexResize.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f879e95d0b01d854bb44aadb60a18641e7be9bc9
--- /dev/null
+++ b/src/c/Mex/MexResize.cpp
@@ -0,0 +1,183 @@
+#include "MexCommand.h"
+#include "Vec.h"
+#include "CWrappers.h"
+#include "Defines.h"
+
+void MexResize::execute(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) const
+{
+    Vec<size_t> imageInDims(0, 0, 0);
+    setupDims(prhs[0], &imageInDims);
+    Vec<double> reductionFactors(0.0, 0.0, 0.0);
+    if(!mxIsEmpty(prhs[1]))
+    {
+        double* reductionD = (double*)mxGetData(prhs[1]);
+        reductionFactors = Vec<double>(reductionD[0], reductionD[1], reductionD[2]);
+    }
+
+    Vec<size_t> imageOutDims(0, 0, 0);
+    if(!mxIsEmpty(prhs[2]))
+    {
+        double* outDim = (double*)mxGetData(prhs[2]);
+        imageOutDims = Vec<size_t>(outDim[0], outDim[1], outDim[2]);
+
+        reductionFactors = Vec<double>(imageOutDims)/Vec<double>(imageInDims);
+    }
+
+    char method[36];
+    ReductionMethods mthd = REDUC_MEAN;
+    mxGetString(prhs[3], method, 255);
+    if(strcmp(method, "mean")==0)
+        mthd = REDUC_MEAN;
+    else if(strcmp(method, "median")==0)
+        mthd = REDUC_MEDIAN;
+    else if(strcmp(method, "min")==0)
+        mthd = REDUC_MIN;
+    else if(strcmp(method, "max")==0)
+        mthd = REDUC_MAX;
+    else if(strcmp(method, "gaussian")==0)
+        mthd = REDUC_GAUS;
+    else
+        mexErrMsgTxt("Method of resize not supported!");
+
+    int device = 0;
+    if(nrhs>4)
+        device = mat_to_c((int)mxGetScalar(prhs[4]));
+
+    imageOutDims = Vec<size_t>((Vec<double>(imageInDims)*reductionFactors).floor());
+
+    if(mxIsLogical(prhs[0]))
+    {
+        bool* imageIn, *imageOut;
+        setupInputPointers(prhs[0], &imageInDims, &imageIn);
+        setupOutputPointers(&(plhs[0]), imageOutDims, &imageOut);
+
+        resize(imageIn, imageInDims, reductionFactors, imageOutDims, mthd, &imageOut, device);
+    } else if(mxIsUint8(prhs[0]))
+    {
+        unsigned char* imageIn, *imageOut;
+        setupInputPointers(prhs[0], &imageInDims, &imageIn);
+        setupOutputPointers(&(plhs[0]), imageOutDims, &imageOut);
+
+        resize(imageIn, imageInDims, reductionFactors, imageOutDims, mthd, &imageOut, device);
+    } else if(mxIsUint16(prhs[0]))
+    {
+        unsigned short* imageIn, *imageOut;
+        setupInputPointers(prhs[0], &imageInDims, &imageIn);
+        setupOutputPointers(&(plhs[0]), imageOutDims, &imageOut);
+        
+        resize(imageIn, imageInDims, reductionFactors, imageOutDims, mthd, &imageOut, device);
+    } else if(mxIsInt16(prhs[0]))
+    {
+        short* imageIn, *imageOut;
+        setupInputPointers(prhs[0], &imageInDims, &imageIn);
+        setupOutputPointers(&(plhs[0]), imageOutDims, &imageOut);
+        
+        resize(imageIn, imageInDims, reductionFactors, imageOutDims, mthd, &imageOut, device);
+    } else if(mxIsUint32(prhs[0]))
+    {
+        unsigned int* imageIn, *imageOut;
+        setupInputPointers(prhs[0], &imageInDims, &imageIn);
+        setupOutputPointers(&(plhs[0]), imageOutDims, &imageOut);
+        
+        resize(imageIn, imageInDims, reductionFactors, imageOutDims, mthd, &imageOut, device);
+    } else if(mxIsInt32(prhs[0]))
+    {
+        int* imageIn, *imageOut;
+        setupInputPointers(prhs[0], &imageInDims, &imageIn);
+        setupOutputPointers(&(plhs[0]), imageOutDims, &imageOut);
+        
+        resize(imageIn, imageInDims, reductionFactors, imageOutDims, mthd, &imageOut, device);
+    } else if(mxIsSingle(prhs[0]))
+    {
+        float* imageIn, *imageOut;
+        setupInputPointers(prhs[0], &imageInDims, &imageIn);
+        setupOutputPointers(&(plhs[0]), imageOutDims, &imageOut);
+        
+        resize(imageIn, imageInDims, reductionFactors, imageOutDims, mthd, &imageOut, device);
+    } else if(mxIsDouble(prhs[0]))
+    {
+        double* imageIn, *imageOut;
+        setupInputPointers(prhs[0], &imageInDims, &imageIn);
+        setupOutputPointers(&(plhs[0]), imageOutDims, &imageOut);
+        
+        resize(imageIn, imageInDims, reductionFactors, imageOutDims, mthd, &imageOut, device);
+    } else
+    {
+        mexErrMsgTxt("Image type not supported!");
+    }
+}
+
+std::string MexResize::check(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) const
+{
+    if (nrhs<3 || nrhs>5)
+        return "Incorrect number of inputs!";
+
+    size_t numDims = mxGetNumberOfDimensions(prhs[0]);
+    if(numDims>3||numDims<2)
+        return "Image can only be either 2D or 3D!";
+
+    size_t numElFac = mxGetNumberOfElements(prhs[1]);
+    size_t numElSize = mxGetNumberOfElements(prhs[2]);
+
+    if(numElFac==0&&numElSize==0)
+        return "Either ResizeFactor of ExplicitSize must be set.";
+
+    if(!mxIsEmpty(prhs[1]) && (numElFac!=3||!mxIsDouble(prhs[1])))
+        return "Resize amounts have to be an array of three doubles!";
+
+    if(!mxIsEmpty(prhs[2]) &&(numElSize!=3||!mxIsDouble(prhs[2])))
+        return "ExplicitSize amounts have to be an array of three doubles!";
+
+    Vec<double> reductionFactors(0.0, 0.0, 0.0);
+    if(!mxIsEmpty(prhs[1]))
+    {
+        double* reductionD = (double*)mxGetData(prhs[1]);
+        reductionFactors = Vec<double>(reductionD[0], reductionD[1], reductionD[2]);
+
+        if(reductionFactors.maxValue()>1)
+            return "Enlarging any dimension is currently not implemented!";
+    }
+
+    if(!mxIsEmpty(prhs[2]))
+    {
+        Vec<size_t> imageOutDims(0, 0, 0);
+        double* outDim = (double*)mxGetData(prhs[2]);
+        imageOutDims = Vec<size_t>(outDim[0], outDim[1], outDim[2]);
+
+        Vec<size_t> imageInDims(0, 0, 0);
+        double* inDim = (double*)mxGetData(prhs[0]);
+        imageInDims = Vec<size_t>(inDim[0], inDim[1], inDim[2]);
+
+        if (imageInDims>imageOutDims)
+            return "Enlarging any dimension is currently not implemented!";
+    }
+
+    return "";
+}
+
+void MexResize::usage(std::vector<std::string>& outArgs, std::vector<std::string>& inArgs) const
+{
+    inArgs.push_back("imageIn");
+    inArgs.push_back("resizeFactor");
+    inArgs.push_back("explicitSize");
+    inArgs.push_back("method");
+    inArgs.push_back("device");
+
+    outArgs.push_back("imageOut");
+}
+
+void MexResize::help(std::vector<std::string>& helpLines) const
+{
+    helpLines.push_back("Resizes image using various methods.");
+
+    helpLines.push_back("\tImageIn -- can be an image up to three dimensions and of type (logical,uint8,int8,uint16,int16,uint32,int32,single,double).");
+    helpLines.push_back("\tResizeFactor_rcz -- This represents the output size relative to input (r,c,z). Values less than one but greater than zero will reduce the image.");
+    helpLines.push_back("\t\tValues greater than one will enlarge the image. If this is an empty array, it will be calculated from the explicit resize.");
+    helpLines.push_back("\t\t\tIf both resizeFactor and explicitSize are both set, the explicitSize will be used.");
+    helpLines.push_back("\tExplicitSize_rcz -- This is the size that the output should be (r,c,z). If this is an empty array, then the resize factor is used.");
+    helpLines.push_back("\t\t\tIf both resizeFactor and explicitSize are both set, the explicitSize will be used.");
+    helpLines.push_back("\tMethod -- This is the neighborhood operation to apply when resizing (mean, median, min, max, gaussian).");
+    helpLines.push_back("\tDevice -- this is an optional parameter that indicates which Cuda capable device to use.");
+
+    helpLines.push_back("\tImageOut -- This will be a resize image the same type as the input image.");
+}
diff --git a/src/c/Mex/ScopedProcessMutex.cpp b/src/c/Mex/ScopedProcessMutex.cpp
index 3fb5ddca4b7e8d28f4c2555881f26612e7a6cdd7..db9ae12421064cb5b701891a77202fadaa9c5ed5 100644
--- a/src/c/Mex/ScopedProcessMutex.cpp
+++ b/src/c/Mex/ScopedProcessMutex.cpp
@@ -1,34 +1,34 @@
 #include "ScopedProcessMutex.h"
 
-HANDLE ScopedProcessMutex::mutexHandle = NULL;
+//HANDLE ScopedProcessMutex::mutexHandle = NULL;
 
 ScopedProcessMutex::ScopedProcessMutex(const std::string& name)
 {
-	if ( !mutexHandle )
-	{
-		mutexHandle = CreateMutex(NULL, false, name.c_str());
-		if ( !mutexHandle && GetLastError() == ERROR_ACCESS_DENIED )
-			mutexHandle = OpenMutex(SYNCHRONIZE, false, name.c_str());
+	// if ( !mutexHandle )
+	// {
+	// 	mutexHandle = CreateMutex(NULL, false, name.c_str());
+	// 	if ( !mutexHandle && GetLastError() == ERROR_ACCESS_DENIED )
+	// 		mutexHandle = OpenMutex(SYNCHRONIZE, false, name.c_str());
 
-		if ( !mutexHandle )
-			throw std::runtime_error("Error creating mutex handle!");
-	}
+	// 	if ( !mutexHandle )
+	// 		throw std::runtime_error("Error creating mutex handle!");
+	// }
 
-	DWORD waitResult = WaitForSingleObject(mutexHandle, INFINITE);
-	if ( waitResult == WAIT_FAILED )
-	{
-		mutexHandle = NULL;
-		throw std::runtime_error("Error unable to acquire mutex!");
-	}
-	else if ( waitResult == WAIT_ABANDONED )
-	{
-		mutexHandle = NULL;
-		throw std::runtime_error("Previous thread terminated without releasing mutex!");
-	}
+	// DWORD waitResult = WaitForSingleObject(mutexHandle, INFINITE);
+	// if ( waitResult == WAIT_FAILED )
+	// {
+	// 	mutexHandle = NULL;
+	// 	throw std::runtime_error("Error unable to acquire mutex!");
+	// }
+	// else if ( waitResult == WAIT_ABANDONED )
+	// {
+	// 	mutexHandle = NULL;
+	// 	throw std::runtime_error("Previous thread terminated without releasing mutex!");
+	// }
 }
 
 ScopedProcessMutex::~ScopedProcessMutex()
 {
-	if ( mutexHandle )
-		ReleaseMutex(mutexHandle);
+	// if ( mutexHandle )
+	// 	ReleaseMutex(mutexHandle);
 }
diff --git a/src/c/Mex/ScopedProcessMutex.h b/src/c/Mex/ScopedProcessMutex.h
index 7974ff3f5521f02acc0d6b053316d50358d80ebc..7b88d947d7adb46283abd7ddfef7215a529b3ee3 100644
--- a/src/c/Mex/ScopedProcessMutex.h
+++ b/src/c/Mex/ScopedProcessMutex.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include <string>
-#include <windows.h>
+//#include <windows.h>
 
 class ScopedProcessMutex
 {
@@ -13,5 +13,5 @@ private:
 	ScopedProcessMutex(){}
 	ScopedProcessMutex(const ScopedProcessMutex& other){}
 
-	static HANDLE mutexHandle;
+	//static HANDLE mutexHandle;
 };
diff --git a/src/c/makefile b/src/c/makefile
new file mode 100644
index 0000000000000000000000000000000000000000..7e3519cf9913e3572862f2a5ee44612d3f0ff729
--- /dev/null
+++ b/src/c/makefile
@@ -0,0 +1,63 @@
+CU_FILES := $(wildcard Cuda/*.cu)
+CUPP_FILES := $(wildcard Cuda/*.cpp)
+CPP_FILES := $(wildcard Mex/*.cpp)
+CUDA_OBJ = $(notdir $(CU_FILES:.cu=.o))
+CUDA_CPP_OBJ = $(notdir $(CUPP_FILES:.cpp=.o))
+CPP_OBJ = $(notdir $(CPP_FILES:.cpp=.o))
+
+#########################################
+# Nvidia nvcc parameters
+#########################################
+NVCC_PATH = nvcc
+NVCC_FLAGS = -std=c++11 
+SMODEL = -arch=sm_30
+NVCC_INC = -I/usr/include
+C_COMPILER = clang++
+
+#########################################
+# gcc/g++ parameters
+#########################################
+GCC_FLAGS = -std=c++11
+GCC_LIB = -L/usr/local/cuda/lib64 /usr/local/MATLAB/R2017a/bin/glnxa64/libmx.so /usr/local/MATLAB/R2017a/bin/glnxa64/libmex.so
+#-lcudart
+GCC_INC = -I/usr/local/MATLAB/R2017a/extern/include -I./Cuda
+
+#########################################
+# List all of the projects to build
+#########################################
+all: Mex.mexa64
+	@echo Finished compiling
+
+#########################################
+# link it all together
+#########################################
+Mex.mexa64: $(CUDA_OBJ) $(CPP_OBJ) $(CUDA_CPP_OBJ)
+	$(NVCC_PATH) $(GCC_LIB) -shared $^ -o Mex.mexa64 $(SMODEL) --linker-options '--no-undefined'
+
+#########################################
+# compile the cuda tools
+#########################################
+%.o: Cuda/%.cu
+	$(NVCC_PATH) $(NVCC_FLAGS) $(SMODEL) --compiler-options '-fPIC' -c $< -o $@ $(NVCC_INC) -dc
+
+%.o: Cuda/%.cpp
+	$(C_COMPILER) $(GCC_FLAGS) -fPIC $(GCC_INC) -c $< -o $@
+
+# CWrappers.o: Cuda/CWrappers.cu
+# 	$(NVCC_PATH) $(NVCC_FLAGS) --compiler-options '-fPIC' -c $^ -o $@ $(NVCC_INC) -dc
+
+#########################################
+# compile the mex tools
+#########################################
+%.o: Mex/%.cpp
+	$(C_COMPILER) $(GCC_FLAGS) -fPIC $(GCC_INC) -c $< -o $@
+
+# cudaMex.o: Mex/CudaMex.cpp
+# 	$(C_COMPILER) $(GCC_FLAGS) -fPIC $(GCC_INC) -c $^ -o $@
+
+#########################################
+# File to clean up
+#########################################
+clean:
+	@echo Cleaning
+	rm -f *.o Mex.mexa64