From c552fa86686b9f84ed80a6e3edb83ff614f4760c Mon Sep 17 00:00:00 2001 From: Eric Wait <eric@waitphoto.com> Date: Wed, 29 Mar 2017 23:23:32 -0400 Subject: [PATCH] First attempt at linux compile --- src/MATLAB/+ImProc/BuildScript.m | 2 +- src/c/Cuda/CWrappers.cu | 16 ++- src/c/Cuda/CWrappers.h | 14 +- src/c/Cuda/CudaHistogram.cuh | 34 ++--- src/c/Cuda/CudaImageContainerClean.cuh | 48 +++---- src/c/Cuda/CudaUtilities.cu | 16 ++- src/c/Cuda/CudaUtilities.cuh | 9 +- src/c/Cuda/Vec.h | 6 +- src/c/Mex/MexCommand.cpp | 23 ++-- src/c/Mex/MexCommand.h | 72 +++++----- src/c/Mex/MexDeviceCount.cpp | 2 +- src/c/Mex/MexDeviceStats.cpp | 2 +- src/c/Mex/MexHistogram.cpp | 2 +- src/c/Mex/MexResize.cpp | 183 +++++++++++++++++++++++++ src/c/Mex/ScopedProcessMutex.cpp | 44 +++--- src/c/Mex/ScopedProcessMutex.h | 4 +- src/c/makefile | 63 +++++++++ 17 files changed, 403 insertions(+), 137 deletions(-) create mode 100644 src/c/Mex/MexResize.cpp create mode 100644 src/c/makefile diff --git a/src/MATLAB/+ImProc/BuildScript.m b/src/MATLAB/+ImProc/BuildScript.m index 003f2793..bdc7bd44 100644 --- a/src/MATLAB/+ImProc/BuildScript.m +++ b/src/MATLAB/+ImProc/BuildScript.m @@ -15,7 +15,7 @@ cudaPath = fileparts(which('ImProc.BuildMexObject')); cd(cudaPath) % create the m files that correspond to the commands in the mex interface -ImProc.BuildMexObject('..\..\c\Mex.mexw64','Cuda','ImProc'); +ImProc.BuildMexObject(fullfile('..','..','c','Mex.mexa64'),'Cuda','ImProc'); packagePath = cudaPath; cudaPath = fullfile(cudaPath,'@Cuda'); diff --git a/src/c/Cuda/CWrappers.cu b/src/c/Cuda/CWrappers.cu index 0c655cc5..b56d0c96 100644 --- a/src/c/Cuda/CWrappers.cu +++ b/src/c/Cuda/CWrappers.cu @@ -188,6 +188,7 @@ void clearDevice() return cDeviceStats(stats); } + double* entropyFilter(const unsigned char* imageIn, Vec<size_t> dims, Vec<size_t> kernelDims, float* kernel /*= NULL*/, double** imageOut /*= NULL*/, int device /*= 0*/) { return cEntropyFilter(imageIn, dims, kernelDims, kernel, imageOut, device); @@ -223,37 +224,38 @@ void clearDevice() return cEntropyFilter(imageIn, dims, kernelDims, kernel, imageOut, device); } - size_t* histogram(const unsigned char* imageIn, Vec<size_t> dims, unsigned int arraySize, unsigned char minVal/*=std::numeric_limits<unsigned char>::lowest()*/, unsigned char maxVal/*=std::numeric_limits<unsigned char>::max()*/, int device/*=0*/) + + unsigned int* histogram(const unsigned char* imageIn, Vec<size_t> dims, unsigned int arraySize, unsigned char minVal/*=std::numeric_limits<unsigned char>::lowest()*/, unsigned char maxVal/*=std::numeric_limits<unsigned char>::max()*/, int device/*=0*/) { return cCalculateHistogram(imageIn,dims,arraySize,minVal,maxVal,device); } - size_t* histogram(const unsigned short* imageIn, Vec<size_t> dims, unsigned int arraySize, unsigned short minVal/*=std::numeric_limits<unsigned short>::lowest()*/, unsigned short maxVal/*=std::numeric_limits<unsigned short>::max()*/, int device/*=0*/) + unsigned int* histogram(const unsigned short* imageIn, Vec<size_t> dims, unsigned int arraySize, unsigned short minVal/*=std::numeric_limits<unsigned short>::lowest()*/, unsigned short maxVal/*=std::numeric_limits<unsigned short>::max()*/, int device/*=0*/) { return cCalculateHistogram(imageIn,dims,arraySize,minVal,maxVal,device); } - size_t* histogram(const short* imageIn, Vec<size_t> dims, unsigned int arraySize, short minVal/*=std::numeric_limits<short>::lowest()*/,short maxVal/*=std::numeric_limits<short>::max()*/, int device/*=0*/) + unsigned int* histogram(const short* imageIn, Vec<size_t> dims, unsigned int arraySize, short minVal/*=std::numeric_limits<short>::lowest()*/,short maxVal/*=std::numeric_limits<short>::max()*/, int device/*=0*/) { return cCalculateHistogram(imageIn,dims,arraySize,minVal,maxVal,device); } - size_t* histogram(const unsigned int* imageIn, Vec<size_t> dims, unsigned int arraySize, unsigned int minVal/*=std::numeric_limits<unsigned short>::lowest()*/, unsigned int maxVal/*=std::numeric_limits<unsigned int>::max()*/, int device/*=0*/) + unsigned int* histogram(const unsigned int* imageIn, Vec<size_t> dims, unsigned int arraySize, unsigned int minVal/*=std::numeric_limits<unsigned short>::lowest()*/, unsigned int maxVal/*=std::numeric_limits<unsigned int>::max()*/, int device/*=0*/) { return cCalculateHistogram(imageIn,dims,arraySize,minVal,maxVal,device); } - size_t* histogram(const int* imageIn, Vec<size_t> dims, unsigned int arraySize, int minVal/*=std::numeric_limits<int>::lowest()*/, int maxVal/*=std::numeric_limits<int>::max()*/, int device/*=0*/) + unsigned int* histogram(const int* imageIn, Vec<size_t> dims, unsigned int arraySize, int minVal/*=std::numeric_limits<int>::lowest()*/, int maxVal/*=std::numeric_limits<int>::max()*/, int device/*=0*/) { return cCalculateHistogram(imageIn,dims,arraySize,minVal,maxVal,device); } - size_t* histogram(const float* imageIn, Vec<size_t> dims, unsigned int arraySize, float minVal/*=std::numeric_limits<float>::lowest()*/, float maxVal/*=std::numeric_limits<float>::max()*/, int device/*=0*/) + unsigned int* histogram(const float* imageIn, Vec<size_t> dims, unsigned int arraySize, float minVal/*=std::numeric_limits<float>::lowest()*/, float maxVal/*=std::numeric_limits<float>::max()*/, int device/*=0*/) { return cCalculateHistogram(imageIn,dims,arraySize,minVal,maxVal,device); } - size_t* histogram(const double* imageIn, Vec<size_t> dims, unsigned int arraySize, double minVal/*=std::numeric_limits<double>::lowest()*/, double maxVal/*=std::numeric_limits<double>::max()*/, int device/*=0*/) + unsigned int* histogram(const double* imageIn, Vec<size_t> dims, unsigned int arraySize, double minVal/*=std::numeric_limits<double>::lowest()*/, double maxVal/*=std::numeric_limits<double>::max()*/, int device/*=0*/) { return cCalculateHistogram(imageIn,dims,arraySize,minVal,maxVal,device); } diff --git a/src/c/Cuda/CWrappers.h b/src/c/Cuda/CWrappers.h index 335fd983..a5c45edd 100644 --- a/src/c/Cuda/CWrappers.h +++ b/src/c/Cuda/CWrappers.h @@ -59,13 +59,13 @@ IMAGE_PROCESSOR_API double* entropyFilter(const float* imageIn, Vec<size_t> dims IMAGE_PROCESSOR_API double* entropyFilter(const double* imageIn, Vec<size_t> dims, Vec<size_t> kernelDims, float* kernel = NULL, double** imageOut = NULL, int device = 0); -IMAGE_PROCESSOR_API size_t* histogram(const unsigned char* imageIn, Vec<size_t> dims, unsigned int arraySize, unsigned char minVal = std::numeric_limits<unsigned char>::lowest(), unsigned char maxVal = std::numeric_limits<unsigned char>::max(), int device = 0); -IMAGE_PROCESSOR_API size_t* histogram(const unsigned short* imageIn, Vec<size_t> dims, unsigned int arraySize, unsigned short minVal = std::numeric_limits<unsigned short>::lowest(), unsigned short maxVal = std::numeric_limits<unsigned short>::max(), int device = 0); -IMAGE_PROCESSOR_API size_t* histogram(const short* imageIn, Vec<size_t> dims, unsigned int arraySize, short minVal = std::numeric_limits<short>::lowest(), short maxVal = std::numeric_limits<short>::max(), int device = 0); -IMAGE_PROCESSOR_API size_t* histogram(const unsigned int* imageIn, Vec<size_t> dims, unsigned int arraySize, unsigned int minVal = std::numeric_limits<unsigned int>::lowest(), unsigned int maxVal = std::numeric_limits<unsigned int>::max(), int device = 0); -IMAGE_PROCESSOR_API size_t* histogram(const int* imageIn, Vec<size_t> dims, unsigned int arraySize, int minVal = std::numeric_limits<int>::lowest(), int maxVal = std::numeric_limits<int>::max(), int device = 0); -IMAGE_PROCESSOR_API size_t* histogram(const float* imageIn, Vec<size_t> dims, unsigned int arraySize, float minVal = std::numeric_limits<float>::lowest(), float maxVal = std::numeric_limits<float>::max(), int device = 0); -IMAGE_PROCESSOR_API size_t* histogram(const double* imageIn, Vec<size_t> dims, unsigned int arraySize, double minVal = std::numeric_limits<double>::lowest(), double maxVal = std::numeric_limits<double>::max(), int device = 0); +IMAGE_PROCESSOR_API unsigned int* histogram(const unsigned char* imageIn, Vec<size_t> dims, unsigned int arraySize, unsigned char minVal = std::numeric_limits<unsigned char>::lowest(), unsigned char maxVal = std::numeric_limits<unsigned char>::max(), int device = 0); +IMAGE_PROCESSOR_API unsigned int* histogram(const unsigned short* imageIn, Vec<size_t> dims, unsigned int arraySize, unsigned short minVal = std::numeric_limits<unsigned short>::lowest(), unsigned short maxVal = std::numeric_limits<unsigned short>::max(), int device = 0); +IMAGE_PROCESSOR_API unsigned int* histogram(const short* imageIn, Vec<size_t> dims, unsigned int arraySize, short minVal = std::numeric_limits<short>::lowest(), short maxVal = std::numeric_limits<short>::max(), int device = 0); +IMAGE_PROCESSOR_API unsigned int* histogram(const unsigned int* imageIn, Vec<size_t> dims, unsigned int arraySize, unsigned int minVal = std::numeric_limits<unsigned int>::lowest(), unsigned int maxVal = std::numeric_limits<unsigned int>::max(), int device = 0); +IMAGE_PROCESSOR_API unsigned int* histogram(const int* imageIn, Vec<size_t> dims, unsigned int arraySize, int minVal = std::numeric_limits<int>::lowest(), int maxVal = std::numeric_limits<int>::max(), int device = 0); +IMAGE_PROCESSOR_API unsigned int* histogram(const float* imageIn, Vec<size_t> dims, unsigned int arraySize, float minVal = std::numeric_limits<float>::lowest(), float maxVal = std::numeric_limits<float>::max(), int device = 0); +IMAGE_PROCESSOR_API unsigned int* histogram(const double* imageIn, Vec<size_t> dims, unsigned int arraySize, double minVal = std::numeric_limits<double>::lowest(), double maxVal = std::numeric_limits<double>::max(), int device = 0); IMAGE_PROCESSOR_API unsigned char* gaussianFilter(const unsigned char* imageIn, Vec<size_t> dims, Vec<float> sigmas, unsigned char** imageOut = NULL, int device = 0); IMAGE_PROCESSOR_API unsigned short* gaussianFilter(const unsigned short* imageIn, Vec<size_t> dims, Vec<float> sigmas, unsigned short** imageOut = NULL, int device = 0); diff --git a/src/c/Cuda/CudaHistogram.cuh b/src/c/Cuda/CudaHistogram.cuh index 6a355fe5..350a392e 100644 --- a/src/c/Cuda/CudaHistogram.cuh +++ b/src/c/Cuda/CudaHistogram.cuh @@ -9,11 +9,11 @@ #include "Defines.h" template <class PixelType> -__global__ void cudaHistogramCreate( PixelType* values, size_t numValues, size_t* histogram, PixelType minVal, double binSize, +__global__ void cudaHistogramCreate( PixelType* values, size_t numValues, unsigned int* histogram, PixelType minVal, double binSize, unsigned int numBins) { //This code is modified from that of Sanders - Cuda by Example - extern __shared__ size_t tempHisto[]; + extern __shared__ unsigned int tempHisto[]; if (threadIdx.x<numBins) tempHisto[threadIdx.x] = 0; @@ -27,7 +27,7 @@ __global__ void cudaHistogramCreate( PixelType* values, size_t numValues, size_t { size_t binNum = (size_t)MAX( 0.0, ( (values[i])-minVal) / binSize ); binNum = MIN(binNum, (size_t)numBins-1); - atomicAdd(&(tempHisto[binNum]), (size_t)1); + atomicAdd(&(tempHisto[binNum]), (unsigned int)1); i += stride; } @@ -38,7 +38,7 @@ __global__ void cudaHistogramCreate( PixelType* values, size_t numValues, size_t __syncthreads(); } -__global__ void cudaNormalizeHistogram(size_t* histogram, double* normHistogram, unsigned int numBins, double divisor) +__global__ void cudaNormalizeHistogram(unsigned int* histogram, double* normHistogram, unsigned int numBins, double divisor) { int i = threadIdx.x + blockIdx.x*blockDim.x; int stride = blockDim.x * gridDim.x; @@ -50,18 +50,18 @@ __global__ void cudaNormalizeHistogram(size_t* histogram, double* normHistogram, } template <class PixelType> -size_t* createHistogram(int device, unsigned int arraySize, Vec<size_t> dims, PixelType maxVal, PixelType minVal, const PixelType* imageIn) +unsigned int* createHistogram(int device, size_t arraySize, Vec<size_t> dims, PixelType maxVal, PixelType minVal, const PixelType* imageIn) { cudaSetDevice(device); cudaDeviceProp props; cudaGetDeviceProperties(&props,device); - if ((size_t)props.sharedMemPerBlock<sizeof(size_t)*arraySize) + if ((size_t)props.sharedMemPerBlock<sizeof(unsigned int)*arraySize) throw std::runtime_error("Too many bins to calculate on GPU with current shared memory constraints!"); - size_t* deviceHist; - HANDLE_ERROR(cudaMalloc((void**)&deviceHist,sizeof(size_t)*arraySize)); - HANDLE_ERROR(cudaMemset(deviceHist,0,sizeof(size_t)*arraySize)); + unsigned int* deviceHist; + HANDLE_ERROR(cudaMalloc((void**)&deviceHist,sizeof(unsigned int)*arraySize)); + HANDLE_ERROR(cudaMemset(deviceHist,0,sizeof(unsigned int)*arraySize)); DEBUG_KERNEL_CHECK(); size_t availMem, total; @@ -86,7 +86,7 @@ size_t* createHistogram(int device, unsigned int arraySize, Vec<size_t> dims, Pi int threads = (int)MIN(numValues,maxThreads); int blocks = (int)MIN(numValues/threads,(size_t)props.multiProcessorCount); - cudaHistogramCreate<<<blocks,threads,sizeof(size_t)*arraySize>>>(deviceBuffer, numValues, deviceHist, minVal, binSize, arraySize); + cudaHistogramCreate<<<blocks,threads,sizeof(unsigned int)*arraySize>>>(deviceBuffer, numValues, deviceHist, minVal, binSize, arraySize); DEBUG_KERNEL_CHECK(); cudaThreadSynchronize(); } @@ -97,26 +97,26 @@ size_t* createHistogram(int device, unsigned int arraySize, Vec<size_t> dims, Pi } template <class PixelType> -size_t* cCalculateHistogram(const PixelType* imageIn, Vec<size_t> dims, unsigned int arraySize, +unsigned int* cCalculateHistogram(const PixelType* imageIn, Vec<size_t> dims, size_t arraySize, PixelType minVal=std::numeric_limits<PixelType>::lowest(), PixelType maxVal=std::numeric_limits<PixelType>::max(), int device=0) { - size_t* hostHist = new size_t[arraySize]; + unsigned int* hostHist = new unsigned int[arraySize]; - size_t* deviceHist = createHistogram(device, arraySize, dims, maxVal, minVal, imageIn); + unsigned int* deviceHist = createHistogram(device, arraySize, dims, maxVal, minVal, imageIn); - HANDLE_ERROR(cudaMemcpy(hostHist,deviceHist,sizeof(size_t)*arraySize,cudaMemcpyDeviceToHost)); + HANDLE_ERROR(cudaMemcpy(hostHist,deviceHist,sizeof(unsigned int)*arraySize,cudaMemcpyDeviceToHost)); HANDLE_ERROR(cudaFree(deviceHist)); return hostHist; } template <class PixelType> -double* cNormalizeHistogram(const PixelType* imageIn, Vec<size_t> dims, unsigned int arraySize, +double* cNormalizeHistogram(const PixelType* imageIn, Vec<size_t> dims, size_t arraySize, PixelType minVal=std::numeric_limits<PixelType>::lowest(), PixelType maxVal=std::numeric_limits<PixelType>::max(), int device=0) { - size_t* deviceHist = createHistogram(device, arraySize, dims, maxVal, minVal, imageIn); + unsigned int* deviceHist = createHistogram(device, arraySize, dims, maxVal, minVal, imageIn); cudaDeviceProp props; cudaGetDeviceProperties(&props,device); @@ -144,7 +144,7 @@ PixelType cOtsuThresholdValue(const PixelType* imageIn, Vec<size_t> dims, int de { PixelType minVal, maxVal; cGetMinMax(imageIn,dims,minVal,maxVal,device); - unsigned int arraySize = NUM_BINS; + size_t arraySize = NUM_BINS; double* hist = cNormalizeHistogram(imageIn,dims,arraySize,minVal,maxVal,device); diff --git a/src/c/Cuda/CudaImageContainerClean.cuh b/src/c/Cuda/CudaImageContainerClean.cuh index 913a7a3a..8fd4d25c 100644 --- a/src/c/Cuda/CudaImageContainerClean.cuh +++ b/src/c/Cuda/CudaImageContainerClean.cuh @@ -8,60 +8,62 @@ class CudaImageContainerClean : public CudaImageContainer<PixelType> public: CudaImageContainerClean(const PixelType* imageIn, Vec<size_t> dims, int device=0) { - defaults(); - image = NULL; - maxImageDims = dims; - roiSizes = dims; + this->defaults(); + this->image = NULL; + this->maxImageDims = dims; + this->image = NULL; + this->maxImageDims = dims; + this->roiSizes = dims; this->device = device; loadImage(imageIn,dims); }; CudaImageContainerClean(Vec<size_t> dims, int device=0) { - defaults(); - image = NULL; - maxImageDims = dims; - imageDims = dims; - roiSizes = dims; + this->defaults(); + this->image = NULL; + this->maxImageDims = dims; + this->imageDims = dims; + this->roiSizes = dims; this->device = device; HANDLE_ERROR(cudaSetDevice(device)); - HANDLE_ERROR(cudaMalloc((void**)&image,sizeof(PixelType)*dims.product())); - HANDLE_ERROR(cudaMemset(image,0,sizeof(PixelType)*dims.product())); + HANDLE_ERROR(cudaMalloc((void**)&this->image,sizeof(PixelType)*dims.product())); + HANDLE_ERROR(cudaMemset(this->image,0,sizeof(PixelType)*dims.product())); }; ~CudaImageContainerClean() { - if (image!=NULL) + if (this->image!=NULL) { - HANDLE_ERROR(cudaSetDevice(device)); + HANDLE_ERROR(cudaSetDevice(this->device)); try { - HANDLE_ERROR(cudaFree(image)); + HANDLE_ERROR(cudaFree(this->image)); } catch (char* err) { if (err!=NULL) err[0] = 'e'; } - image = NULL; + this->image = NULL; } } CudaImageContainerClean(const CudaImageContainerClean& other) { - device = other.getDeviceNumber(); - imageDims = other.getDims(); - image = NULL; + this->device = other.getDeviceNumber(); + this->imageDims = other.getDims(); + this->image = NULL; - HANDLE_ERROR(cudaSetDevice(device)); + HANDLE_ERROR(cudaSetDevice(this->device)); - if (imageDims>Vec<size_t>(0,0,0)) + if (this->imageDims>Vec<size_t>(0,0,0)) { - HANDLE_ERROR(cudaMalloc((void**)&image,sizeof(PixelType)*imageDims.product())); - HANDLE_ERROR(cudaMemcpy(image,other.getConstImagePointer(),sizeof(PixelType)*imageDims.product(),cudaMemcpyDeviceToDevice)); + HANDLE_ERROR(cudaMalloc((void**)&this->image,sizeof(PixelType)*this->imageDims.product())); + HANDLE_ERROR(cudaMemcpy(this->image,other.getConstImagePointer(),sizeof(PixelType)*this->imageDims.product(),cudaMemcpyDeviceToDevice)); } } protected: - CudaImageContainerClean() : CudaImageContainer(){}; + CudaImageContainerClean() : CudaImageContainer<PixelType>(){}; }; \ No newline at end of file diff --git a/src/c/Cuda/CudaUtilities.cu b/src/c/Cuda/CudaUtilities.cu index 475f5e2f..ef66d9dd 100644 --- a/src/c/Cuda/CudaUtilities.cu +++ b/src/c/Cuda/CudaUtilities.cu @@ -1,5 +1,16 @@ #include "CudaUtilities.cuh" +int bitscanReverse(size_t mask) +{ + for ( int i=63; i>=0; --i ) + { + if ( (mask >> i) && 0x01 ) + return i; + } + + return -1; +} + void calcBlockThread(const Vec<size_t>& dims, const cudaDeviceProp &prop, dim3 &blocks, dim3 &threads, size_t maxThreads/*=std::numeric_limits<size_t>::max()*/) { @@ -49,8 +60,7 @@ void calcBlockThread(const Vec<size_t>& dims, const cudaDeviceProp &prop, dim3 & } else { - unsigned long index; - _BitScanReverse(&index,unsigned long(mxThreads)); + int index = bitscanReverse(mxThreads); int dim = index/3; threads.x = 1 << MAX(dim,(int)index - 2*dim); @@ -208,7 +218,7 @@ bool checkFreeMemory(size_t needed, int device, bool throws/*=false*/) if (throws) { char buff[255]; - sprintf_s(buff,"Out of CUDA Memory!\nNeed: %zu\nHave: %zu\n",needed,free); + sprintf(buff,"Out of CUDA Memory!\nNeed: %zu\nHave: %zu\n",needed,free); throw std::runtime_error(buff); } return false; diff --git a/src/c/Cuda/CudaUtilities.cuh b/src/c/Cuda/CudaUtilities.cuh index 534cf0d4..6658f5e4 100644 --- a/src/c/Cuda/CudaUtilities.cuh +++ b/src/c/Cuda/CudaUtilities.cuh @@ -9,6 +9,7 @@ #include <cuda_occupancy.h> #include <functional> +#include <limits> template <typename T, typename U> int getKernelMaxThreadsSharedMem(T func, U f, int threadLimit = 0) @@ -39,23 +40,23 @@ int getKernelMaxThreads(T func, int threadLimit=0) #define DEBUG_KERNEL_CHECK() {} #endif // _DEBUG -static void HandleError( cudaError_t err, const char *file, int line ) +static void HandleError( cudaError_t err, const char* file, int line ) { if (err != cudaSuccess) { char* errorMessage = new char[255]; - sprintf_s(errorMessage, 255, "%s in %s at line %d\n", cudaGetErrorString( err ), file, line ); + sprintf(errorMessage, "%s in %s at line %d\n", cudaGetErrorString( err ), file, line ); throw std::runtime_error(errorMessage); } } #define HANDLE_ERROR( err ) (HandleError( err, __FILE__, __LINE__ )) -inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true) +inline void gpuAssert(cudaError_t code, const char* file, int line, bool abort=true) { if (code != cudaSuccess) { char buff[255]; - sprintf_s(buff, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); + sprintf(buff, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); throw std::runtime_error(buff); } } diff --git a/src/c/Cuda/Vec.h b/src/c/Cuda/Vec.h index ebbacfd2..a639696c 100644 --- a/src/c/Cuda/Vec.h +++ b/src/c/Cuda/Vec.h @@ -19,6 +19,8 @@ #include "Defines.h" #include <type_traits> +#include <stdlib.h> +#include <cmath> #undef min #undef max @@ -192,8 +194,6 @@ public: MIXED_PREFIX Vec<size_t> coordAddressOf(U idx)const { Vec<size_t> vecOut = Vec<size_t>(0,0,0); - if(x==0 && y==0 && z==0) - throw runtime_error("Not a valid vector to index into!"); if(x==0) { @@ -329,7 +329,7 @@ public: MIXED_PREFIX double EuclideanDistanceTo(const Vec<T>& other) { - return sqrt((double)(SQR(x-other.x)+SQR(y-other.y)+SQR(z-other.z))); + return ::sqrt((double)(SQR(x-other.x)+SQR(y-other.y)+SQR(z-other.z))); } MIXED_PREFIX double lengthSqr() diff --git a/src/c/Mex/MexCommand.cpp b/src/c/Mex/MexCommand.cpp index daee3a5d..d1b45c7d 100644 --- a/src/c/Mex/MexCommand.cpp +++ b/src/c/Mex/MexCommand.cpp @@ -9,16 +9,16 @@ #undef BUILD_COMMANDS // Module name info -HMODULE ModuleInfo::hModule; +//HMODULE ModuleInfo::hModule; std::string ModuleInfo::name; -BOOL WINAPI DllMain(HINSTANCE hInstDLL,DWORD fdwReason,LPVOID lpReserved) -{ - if(fdwReason == DLL_PROCESS_ATTACH) - ModuleInfo::setModuleHandle(hInstDLL); +// BOOL WINAPI DllMain(HINSTANCE hInstDLL,DWORD fdwReason,LPVOID lpReserved) +// { +// if(fdwReason == DLL_PROCESS_ATTACH) +// ModuleInfo::setModuleHandle(hInstDLL); - return TRUE; -} +// return TRUE; +// } // MexCommandInfo - This command can be used to provide an easy to parse matlab command info structure for all MEX commands. @@ -264,4 +264,11 @@ Vec<size_t> MexCommand::FillKernel(const mxArray* matKernelIn, float** kernel ) } return kernDims; -} \ No newline at end of file +} + +// Logical array creation specialziation +template <> +mxArray* MexCommand::createArray<bool>(mwSize ndim, const mwSize* dims) +{ + return mxCreateLogicalArray(ndim, dims); +} diff --git a/src/c/Mex/MexCommand.h b/src/c/Mex/MexCommand.h index 8b05448d..0754d3ad 100644 --- a/src/c/Mex/MexCommand.h +++ b/src/c/Mex/MexCommand.h @@ -1,25 +1,26 @@ #pragma once -#include "Vec.h" -#include "ScopedProcessMutex.h" +#include "../Cuda/Vec.h" +//#include "ScopedProcessMutex.h" #include "mex.h" -#include "windows.h" +//#include "windows.h" #undef min #undef max #include <vector> -#include <string> +#include <string.h> #include <algorithm> #include <exception> +#include <stdexcept> // Static class for holding some module information class ModuleInfo { public: - static void setModuleHandle(HMODULE handle) - { - hModule = handle; - } + // static void setModuleHandle(HMODULE handle) + // { + // hModule = handle; + // } static void initModuleInfo() { @@ -35,27 +36,28 @@ public: private: static std::string initName() { - if(hModule == NULL) - return ""; + return "Mex"; + // if(hModule == NULL) + // return ""; - char pathBuffer[1024]; - DWORD result = GetModuleFileName((HMODULE)hModule,pathBuffer,1024); - if(FAILED(result)) - return ""; + // char pathBuffer[1024]; + // DWORD result = GetModuleFileName((HMODULE)hModule,pathBuffer,1024); + // if(FAILED(result)) + // return ""; - std::string path(pathBuffer); + // std::string path(pathBuffer); - size_t startOffset = path.find_last_of('\\') + 1; - path = path.substr(startOffset); + // size_t startOffset = path.find_last_of('\\') + 1; + // path = path.substr(startOffset); - size_t endOffset = path.find_last_of('.'); + // size_t endOffset = path.find_last_of('.'); - return path.substr(0,endOffset); + // return path.substr(0,endOffset); } private: static std::string name; - static HMODULE hModule; + //static HMODULE hModule; }; // Abstract base class for mex commands @@ -101,7 +103,7 @@ public: try { - ScopedProcessMutex("CudaMutex"); + //ScopedProcessMutex("CudaMutex"); mexCmd->execute(nlhs,plhs,cmdNRHS,cmdPRHS); } catch(const std::runtime_error& err) { @@ -221,14 +223,6 @@ protected: // Simple template-specialization map for C++ to mex types template <typename T> struct TypeMap {static const mxClassID mxType;}; - template <> struct TypeMap<char> {static const mxClassID mxType = mxINT8_CLASS;}; - template <> struct TypeMap<short> {static const mxClassID mxType = mxINT16_CLASS;}; - template <> struct TypeMap<int> {static const mxClassID mxType = mxINT32_CLASS;}; - template <> struct TypeMap<unsigned char> {static const mxClassID mxType = mxUINT8_CLASS;}; - template <> struct TypeMap<unsigned short> {static const mxClassID mxType = mxUINT16_CLASS;}; - template <> struct TypeMap<unsigned int> {static const mxClassID mxType = mxUINT32_CLASS;}; - template <> struct TypeMap<float> {static const mxClassID mxType = mxSINGLE_CLASS;}; - template <> struct TypeMap<double> {static const mxClassID mxType = mxDOUBLE_CLASS;}; // General array creation method template <typename T> @@ -237,13 +231,6 @@ protected: return mxCreateNumericArray(ndim, dims, TypeMap<T>::mxType, mxREAL); } - // Logical array creation specialziation - template <> - static mxArray* createArray<bool>(mwSize ndim, const mwSize* dims) - { - return mxCreateLogicalArray(ndim, dims); - } - template <typename T> static void setupImagePointers(const mxArray* imageIn, T** image, Vec<size_t>* dims, mxArray** argOut = NULL, T** imageOut = NULL) { @@ -271,7 +258,7 @@ protected: } - static Vec<size_t> MexCommand::FillKernel(const mxArray* matKernelIn, float** kernel); + static Vec<size_t> FillKernel(const mxArray* matKernelIn, float** kernel); private: static const size_t m_numCommands; @@ -280,5 +267,16 @@ private: std::string m_cmdString; }; +template <> struct MexCommand::TypeMap<char> {static const mxClassID mxType = mxINT8_CLASS;}; +template <> struct MexCommand::TypeMap<short> {static const mxClassID mxType = mxINT16_CLASS;}; +template <> struct MexCommand::TypeMap<int> {static const mxClassID mxType = mxINT32_CLASS;}; +template <> struct MexCommand::TypeMap<unsigned char> {static const mxClassID mxType = mxUINT8_CLASS;}; +template <> struct MexCommand::TypeMap<unsigned short> {static const mxClassID mxType = mxUINT16_CLASS;}; +template <> struct MexCommand::TypeMap<unsigned int> {static const mxClassID mxType = mxUINT32_CLASS;}; +template <> struct MexCommand::TypeMap<float> {static const mxClassID mxType = mxSINGLE_CLASS;}; +template <> struct MexCommand::TypeMap<double> {static const mxClassID mxType = mxDOUBLE_CLASS;}; + +template <> +mxArray* MexCommand::createArray<bool>(mwSize ndim, const mwSize* dims); #include "CommandList.h" diff --git a/src/c/Mex/MexDeviceCount.cpp b/src/c/Mex/MexDeviceCount.cpp index 94184bd7..eaa31372 100644 --- a/src/c/Mex/MexDeviceCount.cpp +++ b/src/c/Mex/MexDeviceCount.cpp @@ -12,7 +12,7 @@ void MexDeviceCount::execute(int nlhs, mxArray* plhs[], int nrhs, const mxArray* int numDevices = memoryStats(&memStats); plhs[0] = mxCreateDoubleScalar(numDevices); const char* fieldNames[] = {"total","available"}; - mwSize dims[2] ={1,numDevices}; + mwSize dims[2] ={1,(mwSize)numDevices}; plhs[1] = mxCreateStructArray(2,dims,2,fieldNames); int total_field = mxGetFieldNumber(plhs[1],"total"); diff --git a/src/c/Mex/MexDeviceStats.cpp b/src/c/Mex/MexDeviceStats.cpp index cf197b88..b0d66220 100644 --- a/src/c/Mex/MexDeviceStats.cpp +++ b/src/c/Mex/MexDeviceStats.cpp @@ -7,7 +7,7 @@ void MexDeviceStats::execute(int nlhs, mxArray* plhs[], int nrhs, const mxArray* DevStats* devStats; int numDevices = deviceStats(&devStats); - mwSize dims[2] = {numDevices,1}; + mwSize dims[2] = {(mwSize)numDevices,1}; const char* fieldNames[] = {"name", "major", "minor", "constMem", "sharedMem", "totalMem", "tccDriver", "mpCount", "threadsPerMP", "warpSize", "maxThreads"}; plhs[0] = mxCreateStructArray(2, dims, 11, fieldNames); int name_field = mxGetFieldNumber(plhs[0], "name"); diff --git a/src/c/Mex/MexHistogram.cpp b/src/c/Mex/MexHistogram.cpp index b81c9da5..d1876a99 100644 --- a/src/c/Mex/MexHistogram.cpp +++ b/src/c/Mex/MexHistogram.cpp @@ -10,7 +10,7 @@ device = mat_to_c((int)mxGetScalar(prhs[4])); unsigned int arraySize = (unsigned int)mxGetScalar(prhs[1]); - size_t* hist; + unsigned int* hist; Vec<size_t> imageDims; if (mxIsUint8(prhs[0])) diff --git a/src/c/Mex/MexResize.cpp b/src/c/Mex/MexResize.cpp new file mode 100644 index 00000000..f879e95d --- /dev/null +++ b/src/c/Mex/MexResize.cpp @@ -0,0 +1,183 @@ +#include "MexCommand.h" +#include "Vec.h" +#include "CWrappers.h" +#include "Defines.h" + +void MexResize::execute(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) const +{ + Vec<size_t> imageInDims(0, 0, 0); + setupDims(prhs[0], &imageInDims); + Vec<double> reductionFactors(0.0, 0.0, 0.0); + if(!mxIsEmpty(prhs[1])) + { + double* reductionD = (double*)mxGetData(prhs[1]); + reductionFactors = Vec<double>(reductionD[0], reductionD[1], reductionD[2]); + } + + Vec<size_t> imageOutDims(0, 0, 0); + if(!mxIsEmpty(prhs[2])) + { + double* outDim = (double*)mxGetData(prhs[2]); + imageOutDims = Vec<size_t>(outDim[0], outDim[1], outDim[2]); + + reductionFactors = Vec<double>(imageOutDims)/Vec<double>(imageInDims); + } + + char method[36]; + ReductionMethods mthd = REDUC_MEAN; + mxGetString(prhs[3], method, 255); + if(strcmp(method, "mean")==0) + mthd = REDUC_MEAN; + else if(strcmp(method, "median")==0) + mthd = REDUC_MEDIAN; + else if(strcmp(method, "min")==0) + mthd = REDUC_MIN; + else if(strcmp(method, "max")==0) + mthd = REDUC_MAX; + else if(strcmp(method, "gaussian")==0) + mthd = REDUC_GAUS; + else + mexErrMsgTxt("Method of resize not supported!"); + + int device = 0; + if(nrhs>4) + device = mat_to_c((int)mxGetScalar(prhs[4])); + + imageOutDims = Vec<size_t>((Vec<double>(imageInDims)*reductionFactors).floor()); + + if(mxIsLogical(prhs[0])) + { + bool* imageIn, *imageOut; + setupInputPointers(prhs[0], &imageInDims, &imageIn); + setupOutputPointers(&(plhs[0]), imageOutDims, &imageOut); + + resize(imageIn, imageInDims, reductionFactors, imageOutDims, mthd, &imageOut, device); + } else if(mxIsUint8(prhs[0])) + { + unsigned char* imageIn, *imageOut; + setupInputPointers(prhs[0], &imageInDims, &imageIn); + setupOutputPointers(&(plhs[0]), imageOutDims, &imageOut); + + resize(imageIn, imageInDims, reductionFactors, imageOutDims, mthd, &imageOut, device); + } else if(mxIsUint16(prhs[0])) + { + unsigned short* imageIn, *imageOut; + setupInputPointers(prhs[0], &imageInDims, &imageIn); + setupOutputPointers(&(plhs[0]), imageOutDims, &imageOut); + + resize(imageIn, imageInDims, reductionFactors, imageOutDims, mthd, &imageOut, device); + } else if(mxIsInt16(prhs[0])) + { + short* imageIn, *imageOut; + setupInputPointers(prhs[0], &imageInDims, &imageIn); + setupOutputPointers(&(plhs[0]), imageOutDims, &imageOut); + + resize(imageIn, imageInDims, reductionFactors, imageOutDims, mthd, &imageOut, device); + } else if(mxIsUint32(prhs[0])) + { + unsigned int* imageIn, *imageOut; + setupInputPointers(prhs[0], &imageInDims, &imageIn); + setupOutputPointers(&(plhs[0]), imageOutDims, &imageOut); + + resize(imageIn, imageInDims, reductionFactors, imageOutDims, mthd, &imageOut, device); + } else if(mxIsInt32(prhs[0])) + { + int* imageIn, *imageOut; + setupInputPointers(prhs[0], &imageInDims, &imageIn); + setupOutputPointers(&(plhs[0]), imageOutDims, &imageOut); + + resize(imageIn, imageInDims, reductionFactors, imageOutDims, mthd, &imageOut, device); + } else if(mxIsSingle(prhs[0])) + { + float* imageIn, *imageOut; + setupInputPointers(prhs[0], &imageInDims, &imageIn); + setupOutputPointers(&(plhs[0]), imageOutDims, &imageOut); + + resize(imageIn, imageInDims, reductionFactors, imageOutDims, mthd, &imageOut, device); + } else if(mxIsDouble(prhs[0])) + { + double* imageIn, *imageOut; + setupInputPointers(prhs[0], &imageInDims, &imageIn); + setupOutputPointers(&(plhs[0]), imageOutDims, &imageOut); + + resize(imageIn, imageInDims, reductionFactors, imageOutDims, mthd, &imageOut, device); + } else + { + mexErrMsgTxt("Image type not supported!"); + } +} + +std::string MexResize::check(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) const +{ + if (nrhs<3 || nrhs>5) + return "Incorrect number of inputs!"; + + size_t numDims = mxGetNumberOfDimensions(prhs[0]); + if(numDims>3||numDims<2) + return "Image can only be either 2D or 3D!"; + + size_t numElFac = mxGetNumberOfElements(prhs[1]); + size_t numElSize = mxGetNumberOfElements(prhs[2]); + + if(numElFac==0&&numElSize==0) + return "Either ResizeFactor of ExplicitSize must be set."; + + if(!mxIsEmpty(prhs[1]) && (numElFac!=3||!mxIsDouble(prhs[1]))) + return "Resize amounts have to be an array of three doubles!"; + + if(!mxIsEmpty(prhs[2]) &&(numElSize!=3||!mxIsDouble(prhs[2]))) + return "ExplicitSize amounts have to be an array of three doubles!"; + + Vec<double> reductionFactors(0.0, 0.0, 0.0); + if(!mxIsEmpty(prhs[1])) + { + double* reductionD = (double*)mxGetData(prhs[1]); + reductionFactors = Vec<double>(reductionD[0], reductionD[1], reductionD[2]); + + if(reductionFactors.maxValue()>1) + return "Enlarging any dimension is currently not implemented!"; + } + + if(!mxIsEmpty(prhs[2])) + { + Vec<size_t> imageOutDims(0, 0, 0); + double* outDim = (double*)mxGetData(prhs[2]); + imageOutDims = Vec<size_t>(outDim[0], outDim[1], outDim[2]); + + Vec<size_t> imageInDims(0, 0, 0); + double* inDim = (double*)mxGetData(prhs[0]); + imageInDims = Vec<size_t>(inDim[0], inDim[1], inDim[2]); + + if (imageInDims>imageOutDims) + return "Enlarging any dimension is currently not implemented!"; + } + + return ""; +} + +void MexResize::usage(std::vector<std::string>& outArgs, std::vector<std::string>& inArgs) const +{ + inArgs.push_back("imageIn"); + inArgs.push_back("resizeFactor"); + inArgs.push_back("explicitSize"); + inArgs.push_back("method"); + inArgs.push_back("device"); + + outArgs.push_back("imageOut"); +} + +void MexResize::help(std::vector<std::string>& helpLines) const +{ + helpLines.push_back("Resizes image using various methods."); + + helpLines.push_back("\tImageIn -- can be an image up to three dimensions and of type (logical,uint8,int8,uint16,int16,uint32,int32,single,double)."); + helpLines.push_back("\tResizeFactor_rcz -- This represents the output size relative to input (r,c,z). Values less than one but greater than zero will reduce the image."); + helpLines.push_back("\t\tValues greater than one will enlarge the image. If this is an empty array, it will be calculated from the explicit resize."); + helpLines.push_back("\t\t\tIf both resizeFactor and explicitSize are both set, the explicitSize will be used."); + helpLines.push_back("\tExplicitSize_rcz -- This is the size that the output should be (r,c,z). If this is an empty array, then the resize factor is used."); + helpLines.push_back("\t\t\tIf both resizeFactor and explicitSize are both set, the explicitSize will be used."); + helpLines.push_back("\tMethod -- This is the neighborhood operation to apply when resizing (mean, median, min, max, gaussian)."); + helpLines.push_back("\tDevice -- this is an optional parameter that indicates which Cuda capable device to use."); + + helpLines.push_back("\tImageOut -- This will be a resize image the same type as the input image."); +} diff --git a/src/c/Mex/ScopedProcessMutex.cpp b/src/c/Mex/ScopedProcessMutex.cpp index 3fb5ddca..db9ae124 100644 --- a/src/c/Mex/ScopedProcessMutex.cpp +++ b/src/c/Mex/ScopedProcessMutex.cpp @@ -1,34 +1,34 @@ #include "ScopedProcessMutex.h" -HANDLE ScopedProcessMutex::mutexHandle = NULL; +//HANDLE ScopedProcessMutex::mutexHandle = NULL; ScopedProcessMutex::ScopedProcessMutex(const std::string& name) { - if ( !mutexHandle ) - { - mutexHandle = CreateMutex(NULL, false, name.c_str()); - if ( !mutexHandle && GetLastError() == ERROR_ACCESS_DENIED ) - mutexHandle = OpenMutex(SYNCHRONIZE, false, name.c_str()); + // if ( !mutexHandle ) + // { + // mutexHandle = CreateMutex(NULL, false, name.c_str()); + // if ( !mutexHandle && GetLastError() == ERROR_ACCESS_DENIED ) + // mutexHandle = OpenMutex(SYNCHRONIZE, false, name.c_str()); - if ( !mutexHandle ) - throw std::runtime_error("Error creating mutex handle!"); - } + // if ( !mutexHandle ) + // throw std::runtime_error("Error creating mutex handle!"); + // } - DWORD waitResult = WaitForSingleObject(mutexHandle, INFINITE); - if ( waitResult == WAIT_FAILED ) - { - mutexHandle = NULL; - throw std::runtime_error("Error unable to acquire mutex!"); - } - else if ( waitResult == WAIT_ABANDONED ) - { - mutexHandle = NULL; - throw std::runtime_error("Previous thread terminated without releasing mutex!"); - } + // DWORD waitResult = WaitForSingleObject(mutexHandle, INFINITE); + // if ( waitResult == WAIT_FAILED ) + // { + // mutexHandle = NULL; + // throw std::runtime_error("Error unable to acquire mutex!"); + // } + // else if ( waitResult == WAIT_ABANDONED ) + // { + // mutexHandle = NULL; + // throw std::runtime_error("Previous thread terminated without releasing mutex!"); + // } } ScopedProcessMutex::~ScopedProcessMutex() { - if ( mutexHandle ) - ReleaseMutex(mutexHandle); + // if ( mutexHandle ) + // ReleaseMutex(mutexHandle); } diff --git a/src/c/Mex/ScopedProcessMutex.h b/src/c/Mex/ScopedProcessMutex.h index 7974ff3f..7b88d947 100644 --- a/src/c/Mex/ScopedProcessMutex.h +++ b/src/c/Mex/ScopedProcessMutex.h @@ -1,7 +1,7 @@ #pragma once #include <string> -#include <windows.h> +//#include <windows.h> class ScopedProcessMutex { @@ -13,5 +13,5 @@ private: ScopedProcessMutex(){} ScopedProcessMutex(const ScopedProcessMutex& other){} - static HANDLE mutexHandle; + //static HANDLE mutexHandle; }; diff --git a/src/c/makefile b/src/c/makefile new file mode 100644 index 00000000..7e3519cf --- /dev/null +++ b/src/c/makefile @@ -0,0 +1,63 @@ +CU_FILES := $(wildcard Cuda/*.cu) +CUPP_FILES := $(wildcard Cuda/*.cpp) +CPP_FILES := $(wildcard Mex/*.cpp) +CUDA_OBJ = $(notdir $(CU_FILES:.cu=.o)) +CUDA_CPP_OBJ = $(notdir $(CUPP_FILES:.cpp=.o)) +CPP_OBJ = $(notdir $(CPP_FILES:.cpp=.o)) + +######################################### +# Nvidia nvcc parameters +######################################### +NVCC_PATH = nvcc +NVCC_FLAGS = -std=c++11 +SMODEL = -arch=sm_30 +NVCC_INC = -I/usr/include +C_COMPILER = clang++ + +######################################### +# gcc/g++ parameters +######################################### +GCC_FLAGS = -std=c++11 +GCC_LIB = -L/usr/local/cuda/lib64 /usr/local/MATLAB/R2017a/bin/glnxa64/libmx.so /usr/local/MATLAB/R2017a/bin/glnxa64/libmex.so +#-lcudart +GCC_INC = -I/usr/local/MATLAB/R2017a/extern/include -I./Cuda + +######################################### +# List all of the projects to build +######################################### +all: Mex.mexa64 + @echo Finished compiling + +######################################### +# link it all together +######################################### +Mex.mexa64: $(CUDA_OBJ) $(CPP_OBJ) $(CUDA_CPP_OBJ) + $(NVCC_PATH) $(GCC_LIB) -shared $^ -o Mex.mexa64 $(SMODEL) --linker-options '--no-undefined' + +######################################### +# compile the cuda tools +######################################### +%.o: Cuda/%.cu + $(NVCC_PATH) $(NVCC_FLAGS) $(SMODEL) --compiler-options '-fPIC' -c $< -o $@ $(NVCC_INC) -dc + +%.o: Cuda/%.cpp + $(C_COMPILER) $(GCC_FLAGS) -fPIC $(GCC_INC) -c $< -o $@ + +# CWrappers.o: Cuda/CWrappers.cu +# $(NVCC_PATH) $(NVCC_FLAGS) --compiler-options '-fPIC' -c $^ -o $@ $(NVCC_INC) -dc + +######################################### +# compile the mex tools +######################################### +%.o: Mex/%.cpp + $(C_COMPILER) $(GCC_FLAGS) -fPIC $(GCC_INC) -c $< -o $@ + +# cudaMex.o: Mex/CudaMex.cpp +# $(C_COMPILER) $(GCC_FLAGS) -fPIC $(GCC_INC) -c $^ -o $@ + +######################################### +# File to clean up +######################################### +clean: + @echo Cleaning + rm -f *.o Mex.mexa64 -- GitLab