...
 
Commits (34)

Too many changes to show.

To preserve performance only 1000 of 1000+ files are displayed.

......@@ -16,7 +16,7 @@ cudaPath = fileparts(which('HIP.BuildMexObject'));
cd(cudaPath)
% create the m files that correspond to the commands in the mex interface
HIP.BuildMexObject('..\..\c\Mex.mexw64','Cuda','HIP');
HIP.BuildMexObject(fullfile('..','..','c',['Mex.' mexext]),'Cuda','HIP');
packagePath = cudaPath;
cudaPath = fullfile(cudaPath,'@Cuda');
......
#pragma once
template <typename T>
void toDevice(T** dst, T* src, size_t length)
void toDevice(T** dst, T* src, std::size_t length)
{
*dst = src;
}
template <typename T, typename U>
void toDevice(T** dst, U* src, size_t length)
void toDevice(T** dst, U* src, std::size_t length)
{
T* temp = new T[length];
for (size_t i = 0; i < length; ++i)
for (std::size_t i = 0; i < length; ++i)
temp[i] = (T)(src[i]);
*dst = temp;
}
template <typename T>
void fromDevice(T** dst, T** src, size_t length)
void fromDevice(T** dst, T** src, std::size_t length)
{
*dst = *src;
}
template <typename T, typename U>
void fromDevice(T** dst, U* src, size_t length)
void fromDevice(T** dst, U* src, std::size_t length)
{
*dst = new T[length];
}
template <typename T>
void copyBuffer(T** dst, T** src, size_t length)
void copyBuffer(T** dst, T** src, std::size_t length)
{
*dst = *src;
}
template <typename T, typename U>
void copyBuffer(T** dst, U** src, size_t length)
void copyBuffer(T** dst, U** src, std::size_t length)
{
for (size_t i = 0; i < length; ++i)
for (std::size_t i = 0; i < length; ++i)
(*dst)[i] = (T)((*src)[i]);
U* toDelete = *src;
......
#include "Defines.h"
#include "CHelpers.h"
#include <cstring>
#include <vector>
#include "Defines.h"
float* createEllipsoidKernel(Vec<size_t> radii, Vec<size_t>& kernelDims)
float* createEllipsoidKernel(Vec<std::size_t> radii, Vec<std::size_t>& kernelDims)
{
kernelDims = radii*2 +1;
float* kernel = new float[kernelDims.product()];
memset(kernel,0,sizeof(float)*kernelDims.product());
std::memset(kernel,0,sizeof(float)*kernelDims.product());
Vec<int> mid((kernelDims-1)/2);
Vec<float> dimScale = Vec<float>(1,1,1) / Vec<float>(radii.pwr(2));
......
......@@ -14,6 +14,6 @@
#define IMAGE_PROCESSOR_API
#endif // IMAGE_PROCESSOR_DLL
IMAGE_PROCESSOR_API float* createEllipsoidKernel(Vec<size_t> radii, Vec<size_t>& kernelDims);
IMAGE_PROCESSOR_API float* createEllipsoidKernel(Vec<std::size_t> radii, Vec<std::size_t>& kernelDims);
IMAGE_PROCESSOR_API int calcOtsuThreshold(const double* normHistogram, int numBins);
......@@ -38,7 +38,7 @@ void clearDevice()
return cDeviceStats(stats);
}
int memoryStats(size_t** stats)
int memoryStats(std::size_t** stats)
{
return cMemoryStats(stats);
}
......@@ -280,47 +280,47 @@ void gaussian(const ImageContainer<bool> imageIn, ImageContainer<bool>& imageOut
}
void getMinMax(const bool* imageIn, size_t numElements, bool& minVal, bool& maxVal, int device /*= 0*/)
void getMinMax(const bool* imageIn, std::size_t numElements, bool& minVal, bool& maxVal, int device /*= 0*/)
{
cGetMinMax(imageIn, numElements, minVal, maxVal, device);
}
void getMinMax(const char* imageIn, size_t numElements, char& minVal, char& maxVal, int device /*= 0*/)
void getMinMax(const char* imageIn, std::size_t numElements, char& minVal, char& maxVal, int device /*= 0*/)
{
cGetMinMax(imageIn, numElements, minVal, maxVal, device);
}
void getMinMax(const unsigned char* imageIn, size_t numElements, unsigned char& minVal, unsigned char& maxVal, int device /*= 0*/)
void getMinMax(const unsigned char* imageIn, std::size_t numElements, unsigned char& minVal, unsigned char& maxVal, int device /*= 0*/)
{
cGetMinMax(imageIn, numElements, minVal, maxVal, device);
}
void getMinMax(const short* imageIn, size_t numElements, short& minVal, short& maxVal, int device /*= 0*/)
void getMinMax(const short* imageIn, std::size_t numElements, short& minVal, short& maxVal, int device /*= 0*/)
{
cGetMinMax(imageIn, numElements, minVal, maxVal, device);
}
void getMinMax(const unsigned short* imageIn, size_t numElements, unsigned short& minVal, unsigned short& maxVal, int device /*= 0*/)
void getMinMax(const unsigned short* imageIn, std::size_t numElements, unsigned short& minVal, unsigned short& maxVal, int device /*= 0*/)
{
cGetMinMax(imageIn, numElements, minVal, maxVal, device);
}
void getMinMax(const int* imageIn, size_t numElements, int& minVal, int& maxVal, int device /*= 0*/)
void getMinMax(const int* imageIn, std::size_t numElements, int& minVal, int& maxVal, int device /*= 0*/)
{
cGetMinMax(imageIn, numElements, minVal, maxVal, device);
}
void getMinMax(const unsigned int* imageIn, size_t numElements, unsigned int& minVal, unsigned int& maxVal, int device /*= 0*/)
void getMinMax(const unsigned int* imageIn, std::size_t numElements, unsigned int& minVal, unsigned int& maxVal, int device /*= 0*/)
{
cGetMinMax(imageIn, numElements, minVal, maxVal, device);
}
void getMinMax(const float* imageIn, size_t numElements, float& minVal, float& maxVal, int device /*= 0*/)
void getMinMax(const float* imageIn, std::size_t numElements, float& minVal, float& maxVal, int device /*= 0*/)
{
cGetMinMax(imageIn, numElements, minVal, maxVal, device);
}
void getMinMax(const double* imageIn, size_t numElements, double& minVal, double& maxVal, int device /*= 0*/)
void getMinMax(const double* imageIn, std::size_t numElements, double& minVal, double& maxVal, int device /*= 0*/)
{
cGetMinMax(imageIn, numElements, minVal, maxVal, device);
}
......@@ -786,7 +786,7 @@ void gaussian(const ImageContainer<bool> imageIn, ImageContainer<bool>& imageOut
}
void sum(const ImageContainer<bool> imageIn, size_t& valOut, int device /*= -1*/)
void sum(const ImageContainer<bool> imageIn, std::size_t& valOut, int device /*= -1*/)
{
cSum(imageIn, valOut, device);
}
......@@ -796,7 +796,7 @@ void gaussian(const ImageContainer<bool> imageIn, ImageContainer<bool>& imageOut
cSum(imageIn, valOut, device);
}
void sum(const ImageContainer<unsigned char> imageIn, size_t& valOut, int device /*= -1*/)
void sum(const ImageContainer<unsigned char> imageIn, std::size_t& valOut, int device /*= -1*/)
{
cSum(imageIn, valOut, device);
}
......@@ -806,7 +806,7 @@ void gaussian(const ImageContainer<bool> imageIn, ImageContainer<bool>& imageOut
cSum(imageIn, valOut, device);
}
void sum(const ImageContainer<unsigned short> imageIn, size_t& valOut, int device /*= -1*/)
void sum(const ImageContainer<unsigned short> imageIn, std::size_t& valOut, int device /*= -1*/)
{
cSum(imageIn, valOut, device);
}
......@@ -816,7 +816,7 @@ void gaussian(const ImageContainer<bool> imageIn, ImageContainer<bool>& imageOut
cSum(imageIn, valOut, device);
}
void sum(const ImageContainer<unsigned int> imageIn, size_t& valOut, int device /*= -1*/)
void sum(const ImageContainer<unsigned int> imageIn, std::size_t& valOut, int device /*= -1*/)
{
cSum(imageIn, valOut, device);
}
......
......@@ -18,7 +18,7 @@ IMAGE_PROCESSOR_API void clearDevice();
IMAGE_PROCESSOR_API int deviceCount();
IMAGE_PROCESSOR_API int deviceStats(DevStats** stats);
IMAGE_PROCESSOR_API int memoryStats(size_t** stats);
IMAGE_PROCESSOR_API int memoryStats(std::size_t** stats);
/// Example wrapper header calls
......@@ -73,15 +73,15 @@ IMAGE_PROCESSOR_API void gaussian(const ImageContainer<unsigned int> imageIn, Im
IMAGE_PROCESSOR_API void gaussian(const ImageContainer<float> imageIn, ImageContainer<float>& imageOut, Vec<double> sigmas, int numIterations = 1, int device = -1);
IMAGE_PROCESSOR_API void gaussian(const ImageContainer<double> imageIn, ImageContainer<double>& imageOut, Vec<double> sigmas, int numIterations = 1, int device = -1);
IMAGE_PROCESSOR_API void getMinMax(const bool* imageIn, size_t numElements, bool& minVal, bool& maxVal, int device = 0);
IMAGE_PROCESSOR_API void getMinMax(const char* imageIn, size_t numElements, char& minVal, char& maxVal, int device = 0);
IMAGE_PROCESSOR_API void getMinMax(const unsigned char* imageIn, size_t numElements, unsigned char& minVal, unsigned char& maxVal, int device = 0);
IMAGE_PROCESSOR_API void getMinMax(const short* imageIn, size_t numElements, short& minVal, short& maxVal, int device = 0);
IMAGE_PROCESSOR_API void getMinMax(const unsigned short* imageIn, size_t numElements, unsigned short& minVal, unsigned short& maxVal, int device = 0);
IMAGE_PROCESSOR_API void getMinMax(const int* imageIn, size_t numElements, int& minVal, int& maxVal, int device = 0);
IMAGE_PROCESSOR_API void getMinMax(const unsigned int* imageIn, size_t numElements, unsigned int& minVal, unsigned int& maxVal, int device = 0);
IMAGE_PROCESSOR_API void getMinMax(const float* imageIn, size_t numElements, float& minVal, float& maxVal, int device = 0);
IMAGE_PROCESSOR_API void getMinMax(const double* imageIn, size_t numElements, double& minVal, double& maxVal, int device = 0);
IMAGE_PROCESSOR_API void getMinMax(const bool* imageIn, std::size_t numElements, bool& minVal, bool& maxVal, int device = 0);
IMAGE_PROCESSOR_API void getMinMax(const char* imageIn, std::size_t numElements, char& minVal, char& maxVal, int device = 0);
IMAGE_PROCESSOR_API void getMinMax(const unsigned char* imageIn, std::size_t numElements, unsigned char& minVal, unsigned char& maxVal, int device = 0);
IMAGE_PROCESSOR_API void getMinMax(const short* imageIn, std::size_t numElements, short& minVal, short& maxVal, int device = 0);
IMAGE_PROCESSOR_API void getMinMax(const unsigned short* imageIn, std::size_t numElements, unsigned short& minVal, unsigned short& maxVal, int device = 0);
IMAGE_PROCESSOR_API void getMinMax(const int* imageIn, std::size_t numElements, int& minVal, int& maxVal, int device = 0);
IMAGE_PROCESSOR_API void getMinMax(const unsigned int* imageIn, std::size_t numElements, unsigned int& minVal, unsigned int& maxVal, int device = 0);
IMAGE_PROCESSOR_API void getMinMax(const float* imageIn, std::size_t numElements, float& minVal, float& maxVal, int device = 0);
IMAGE_PROCESSOR_API void getMinMax(const double* imageIn, std::size_t numElements, double& minVal, double& maxVal, int device = 0);
IMAGE_PROCESSOR_API void highPassFilter(const ImageContainer<bool> imageIn, ImageContainer<bool>& imageOut, Vec<double> sigmas, int device = -1);
IMAGE_PROCESSOR_API void highPassFilter(const ImageContainer<char> imageIn, ImageContainer<char>& imageOut, Vec<double> sigmas, int device = -1);
......@@ -183,13 +183,13 @@ IMAGE_PROCESSOR_API void stdFilter(const ImageContainer<unsigned int> imageIn, I
IMAGE_PROCESSOR_API void stdFilter(const ImageContainer<float> imageIn, ImageContainer<float>& imageOut, ImageContainer<float> kernel, int numIterations = 1, int device = -1);
IMAGE_PROCESSOR_API void stdFilter(const ImageContainer<double> imageIn, ImageContainer<double>& imageOut, ImageContainer<float> kernel, int numIterations = 1, int device = -1);
IMAGE_PROCESSOR_API void sum(const ImageContainer<bool> imageIn, size_t& valOut, int device = -1);
IMAGE_PROCESSOR_API void sum(const ImageContainer<bool> imageIn, std::size_t& valOut, int device = -1);
IMAGE_PROCESSOR_API void sum(const ImageContainer<char> imageIn, long long& valOut, int device = -1);
IMAGE_PROCESSOR_API void sum(const ImageContainer<unsigned char> imageIn, size_t& valOut, int device = -1);
IMAGE_PROCESSOR_API void sum(const ImageContainer<unsigned char> imageIn, std::size_t& valOut, int device = -1);
IMAGE_PROCESSOR_API void sum(const ImageContainer<short> imageIn, long long& valOut, int device = -1);
IMAGE_PROCESSOR_API void sum(const ImageContainer<unsigned short> imageIn, size_t& valOut, int device = -1);
IMAGE_PROCESSOR_API void sum(const ImageContainer<unsigned short> imageIn, std::size_t& valOut, int device = -1);
IMAGE_PROCESSOR_API void sum(const ImageContainer<int> imageIn, long long& valOut, int device = -1);
IMAGE_PROCESSOR_API void sum(const ImageContainer<unsigned int> imageIn, size_t& valOut, int device = -1);
IMAGE_PROCESSOR_API void sum(const ImageContainer<unsigned int> imageIn, std::size_t& valOut, int device = -1);
IMAGE_PROCESSOR_API void sum(const ImageContainer<float> imageIn, double& valOut, int device = -1);
IMAGE_PROCESSOR_API void sum(const ImageContainer<double> imageIn, double& valOut, int device = -1);
......
......@@ -5,7 +5,7 @@
#include <string>
#include <iostream>
#include <math.h>
#include <cmath>
#include <stdio.h>
#ifdef _WIN32
......@@ -55,7 +55,7 @@ uint64 GetTimeMs64()
#endif
}
char* PrintTime(size_t timeInMS)
char* PrintTime(std::size_t timeInMS)
{
char* buff = new char[256];
double hr = floor(timeInMS / 3.6e+6);
......
......@@ -17,7 +17,7 @@
template <class PixelTypeIn1, class PixelTypeIn2, class PixelTypeOut>
__global__ void cudaAddTwoImages(CudaImageContainer<PixelTypeIn1> imageIn1, CudaImageContainer<PixelTypeIn2> imageIn2, CudaImageContainer<PixelTypeOut> imageOut, PixelTypeOut minValue, PixelTypeOut maxValue, double image2Factor=1.0)
{
Vec<size_t> threadCoordinate;
Vec<std::size_t> threadCoordinate;
GetThreadBlockCoordinate(threadCoordinate);
if (threadCoordinate < imageIn1.getDims() && threadCoordinate<imageIn2.getDims())
......@@ -41,10 +41,10 @@ void cAddTwoImages(ImageContainer<PixelTypeIn1> imageIn1, ImageContainer<PixelTy
CudaDevices cudaDevs(cudaAddTwoImages<PixelTypeIn1, PixelTypeIn2, PixelTypeOut>, device);
size_t maxTypeSize = MAX(sizeof(PixelTypeIn1), MAX(sizeof(PixelTypeIn1), sizeof(PixelTypeOut)));
std::size_t maxTypeSize = MAX(sizeof(PixelTypeIn1), MAX(sizeof(PixelTypeIn1), sizeof(PixelTypeOut)));
std::vector<ImageChunk> chunks = calculateBuffers(imageIn1.getDims(), NUM_BUFF_NEEDED, cudaDevs, maxTypeSize);
Vec<size_t> maxDeviceDims;
Vec<std::size_t> maxDeviceDims;
setMaxDeviceDims(chunks, maxDeviceDims);
omp_set_num_threads(MIN(chunks.size(), cudaDevs.getNumDevices()));
......
......@@ -27,10 +27,10 @@ void cClosure(ImageContainer<PixelTypeIn> imageIn, ImageContainer<PixelTypeOut>&
CudaDevices cudaDevs(cudaMaxFilter<PixelTypeIn, PixelTypeOut>, device);
size_t maxTypeSize = MAX(sizeof(PixelTypeIn), sizeof(PixelTypeOut));
std::size_t maxTypeSize = MAX(sizeof(PixelTypeIn), sizeof(PixelTypeOut));
std::vector<ImageChunk> chunks = calculateBuffers(imageIn.getDims(), NUM_BUFF_NEEDED, cudaDevs, maxTypeSize, kernel.getSpatialDims());
Vec<size_t> maxDeviceDims;
Vec<std::size_t> maxDeviceDims;
setMaxDeviceDims(chunks, maxDeviceDims);
omp_set_num_threads(MIN(chunks.size(), cudaDevs.getNumDevices()));
......
......@@ -5,7 +5,7 @@ template <typename PixelType>
class CudaDeviceImages
{
public:
CudaDeviceImages(int numBuffers, Vec<size_t> maxDeviceDims, int device)
CudaDeviceImages(int numBuffers, Vec<std::size_t> maxDeviceDims, int device)
{
deviceImages = new CudaImageContainerClean<PixelType>*[numBuffers];
......@@ -80,13 +80,13 @@ public:
curBuff = 0;
}
void setAllDims(Vec<size_t> dims)
void setAllDims(Vec<std::size_t> dims)
{
for (int i=0; i<numBuffers; ++i)
deviceImages[i]->setDims(dims);
}
void setNextDims(Vec<size_t> dims)
void setNextDims(Vec<std::size_t> dims)
{
if (numBuffers>1)
deviceImages[getNextBuffNum()]->setDims(dims);
......
......@@ -16,7 +16,7 @@ extern "C" void HandleAborts(int signal_number)
void CudaDevices::getCudaInfo(int device/*=-1*/)
{
// Get device count
int numDevices;
int numDevices = 0;
cudaGetDeviceCount(&numDevices);
if (numDevices==0)
......@@ -46,15 +46,15 @@ void CudaDevices::getCudaInfo(int device/*=-1*/)
{
cudaDeviceProp props;
cudaGetDeviceProperties(&props, deviceIdxList[i]);
size_t mTPB = props.maxThreadsPerBlock;
std::size_t mTPB = props.maxThreadsPerBlock;
if (maxThreadsPerBlock > mTPB)
maxThreadsPerBlock = mTPB;
size_t curAvailMem = memoryAvailable(deviceIdxList[i]);
std::size_t curAvailMem = memoryAvailable(deviceIdxList[i]);
if (availMem > curAvailMem)
availMem = curAvailMem;
size_t curSharedMem = props.sharedMemPerBlock;
std::size_t curSharedMem = props.sharedMemPerBlock;
if (sharedMemPerBlock > curSharedMem)
sharedMemPerBlock = curSharedMem;
}
......
......@@ -25,12 +25,12 @@ public:
{
maxThreadsPerBlock = MIN(maxThreadsPerBlock, getKernelMaxThreads(func));
}
void setMaxThreadsPerBlock(size_t newMax) { maxThreadsPerBlock = newMax; }
void setMaxThreadsPerBlock(std::size_t newMax) { maxThreadsPerBlock = newMax; }
size_t getMaxThreadsPerBlock()const { return maxThreadsPerBlock; }
size_t getMinAvailMem()const { return availMem; }
size_t getMinSharedMem()const { return sharedMemPerBlock; }
size_t getNumDevices() const { return deviceIdxList.size(); }
std::size_t getMaxThreadsPerBlock()const { return maxThreadsPerBlock; }
std