Skip to content
Snippets Groups Projects
Commit 8fb85931 authored by Eric Wait's avatar Eric Wait
Browse files

Split out mean and variance operators on the device for reusability.

parent 3db40bda
No related branches found
No related tags found
No related merge requests found
#pragma once
#include "CudaImageContainer.cuh"
#include "CudaDeviceImages.cuh"
#include "CudaUtilities.h"
#include "CudaDeviceInfo.h"
#include "Kernel.cuh"
#include "KernelIterator.cuh"
#include "ImageDimensions.cuh"
#include "ImageChunk.h"
#include "Defines.h"
#include "Vec.h"
#include <cuda_runtime.h>
#include <limits>
#include <omp.h>
template <class PixelTypeIn>
__device__ void deviceMean(Vec<size_t> threadCoordinate, CudaImageContainer<PixelTypeIn> &imageIn, Kernel &constKernelMem, double& mu)
{
KernelIterator kIt(threadCoordinate, imageIn.getDims(), constKernelMem.getDims());
mu = 0;
size_t count = 0;
for (; !kIt.end(); ++kIt)
{
Vec<float> imInPos = kIt.getImageCoordinate();
double inVal = (double)imageIn(imInPos);
Vec<size_t> coord = kIt.getKernelCoordinate();
float kernVal = constKernelMem(coord);
if (kernVal != 0.0f)
{
mu += inVal * kernVal;
++count;
}
}
mu /= (double)count;
}
template <class PixelTypeIn>
__device__ void deviceVariance(Vec<size_t> threadCoordinate, CudaImageContainer<PixelTypeIn> &imageIn, Kernel &constKernelMem, double& varOut, double mu)
{
KernelIterator kIt(threadCoordinate, imageIn.getDims(), constKernelMem.getDims());
varOut = 0;
size_t count = 0;
for (; !kIt.end(); ++kIt)
{
Vec<float> imInPos = kIt.getImageCoordinate();
double inVal = (double)imageIn(imInPos);
Vec<size_t> coord = kIt.getKernelCoordinate();
float kernVal = constKernelMem(coord);
if (kernVal != 0.0f)
{
varOut += SQR(kernVal*inVal) - SQR(mu);
++count;
}
}
varOut /= (double)count;
}
template <class PixelTypeIn>
__device__ void deviceMeanAndVariance(Vec<size_t> threadCoordinate, CudaImageContainer<PixelTypeIn> &imageIn, Kernel &constKernelMem, double& mu, double& var)
{
mu = 0.0;
deviceMean(threadCoordinate, imageIn, constKernelMem, mu);
var = 0.0;
deviceVariance(threadCoordinate, imageIn, constKernelMem, var, mu);
}
template <class PixelTypeIn, class PixelTypeOut>
__global__ void cudaMeanAndVariance(CudaImageContainer<PixelTypeIn> imageIn, CudaImageContainer<PixelTypeOut> muOut, CudaImageContainer<PixelTypeOut> varOut, Kernel constKernelMem, PixelTypeOut minValue, PixelTypeOut maxValue)
{
Vec<size_t> threadCoordinate;
GetThreadBlockCoordinate(threadCoordinate);
if (threadCoordinate < imageIn.getDims())
{
double mu = 0.0, var = 0.0;
deviceMeanAndVariance(threadCoordinate, imageIn, constKernelMem, mu, var);
muOut(threadCoordinate) = (PixelTypeOut)CLAMP(mu, minValue, maxValue);
varOut(threadCoordinate) = (PixelTypeOut)CLAMP(var, minValue, maxValue);
}
}
template <class PixelTypeIn, class PixelTypeOut>
__global__ void cudaMeanAndStd(CudaImageContainer<PixelTypeIn> imageIn, CudaImageContainer<PixelTypeOut> muOut, CudaImageContainer<PixelTypeOut> varOut, Kernel constKernelMem, PixelTypeOut minValue, PixelTypeOut maxValue)
{
Vec<size_t> threadCoordinate;
GetThreadBlockCoordinate(threadCoordinate);
if (threadCoordinate < imageIn.getDims())
{
double mu = 0.0, var = 0.0;
deviceMeanAndVariance(threadCoordinate, imageIn, constKernelMem, mu, var);
muOut(threadCoordinate) = (PixelTypeOut)CLAMP(mu, minValue, maxValue);
varOut(threadCoordinate) = (PixelTypeOut)CLAMP(sqrt(var), minValue, maxValue);
}
}
template <class PixelTypeIn, class PixelTypeOut>
void cMeanAndVariance(ImageContainer<PixelTypeIn> imageIn, ImageContainer<PixelTypeOut>& muOut, ImageContainer<PixelTypeOut>& varOut, ImageContainer<float> kernel, int device = -1)
{
const PixelTypeOut MIN_VAL = std::numeric_limits<PixelTypeOut>::lowest();
const PixelTypeOut MAX_VAL = std::numeric_limits<PixelTypeOut>::max();
const int NUM_BUFF_NEEDED = 3;
setUpOutIm<PixelTypeOut>(imageIn.getDims(), muOut);
setUpOutIm<PixelTypeOut>(imageIn.getDims(), varOut);
CudaDevices cudaDevs(cudaMeanAndVariance<PixelTypeIn, PixelTypeOut>, device);
size_t maxTypeSize = MAX(sizeof(PixelTypeIn), sizeof(PixelTypeOut));
std::vector<ImageChunk> chunks = calculateBuffers(imageIn.getDims(), NUM_BUFF_NEEDED, cudaDevs, maxTypeSize, kernel.getSpatialDims());
Vec<size_t> maxDeviceDims;
setMaxDeviceDims(chunks, maxDeviceDims);
omp_set_num_threads(MIN(chunks.size(), cudaDevs.getNumDevices()));
#pragma omp parallel default(shared)
{
const int CUDA_IDX = omp_get_thread_num();
const int N_THREADS = omp_get_num_threads();
const int CUR_DEVICE = cudaDevs.getDeviceIdx(CUDA_IDX);
CudaDeviceImages<PixelTypeOut> deviceImages(NUM_BUFF_NEEDED, maxDeviceDims, CUR_DEVICE);
Kernel constKernelMem(kernel, CUR_DEVICE);
for (int i = CUDA_IDX; i < chunks.size(); i += N_THREADS)
{
if (!chunks[i].sendROI(imageIn, deviceImages.getCurBuffer()))
std::runtime_error("Error sending ROI to device!");
deviceImages.setAllDims(chunks[i].getFullChunkSize());
cudaMeanAndVariance<<<chunks[i].blocks, chunks[i].threads >> > (*(deviceImages.getCurBuffer()), *(deviceImages.getNextBuffer()), *(deviceImages.getThirdBuffer()), constKernelMem, MIN_VAL, MAX_VAL);
chunks[i].retriveROI(muOut, deviceImages.getNextBuffer());
chunks[i].retriveROI(varOut, deviceImages.getThirdBuffer());
}
constKernelMem.clean();
}
}
......@@ -10,6 +10,8 @@
#include "Defines.h"
#include "Vec.h"
#include "CudaMeanAndVariance.cuh"
#include <cuda_runtime.h>
#include <limits>
#include <omp.h>
......@@ -22,24 +24,10 @@ __global__ void cudaMeanFilter(CudaImageContainer<PixelTypeIn> imageIn, CudaImag
if (threadCoordinate < imageIn.getDims())
{
KernelIterator kIt(threadCoordinate, imageIn.getDims(), constKernelMem.getDims());
double outVal = 0;
size_t count = 0;
for (; !kIt.end(); ++kIt)
{
Vec<float> imInPos = kIt.getImageCoordinate();
double inVal = (double)imageIn(imInPos);
Vec<size_t> coord = kIt.getKernelCoordinate();
float kernVal = constKernelMem(coord);
double mu = 0.0;
deviceMean(threadCoordinate, imageIn, constKernelMem,mu);
if (kernVal != 0.0f)
{
outVal += inVal * kernVal;
++count;
}
}
outVal /= (double)count;
imageOut(threadCoordinate) = (PixelTypeOut)CLAMP(outVal, minValue, maxValue);
imageOut(threadCoordinate) = (PixelTypeOut)CLAMP(mu, minValue, maxValue);
}
}
......
......@@ -10,6 +10,8 @@
#include "Defines.h"
#include "Vec.h"
#include "CudaMeanAndVariance.cuh"
#include <cuda_runtime.h>
#include <limits>
#include <omp.h>
......@@ -22,46 +24,13 @@ __global__ void cudaStdFilter(CudaImageContainer<PixelTypeIn> imageIn, CudaImage
if (threadCoordinate < imageIn.getDims())
{
KernelIterator kIt(threadCoordinate, imageIn.getDims(), constKernelMem.getDims());
double outVal = 0;
int kernVolume = 0;
for (; !kIt.end(); ++kIt)
{
Vec<float> imInPos = kIt.getImageCoordinate();
double inVal = (double)imageIn(imInPos);
Vec<size_t> coord = kIt.getKernelCoordinate();
float kernVal = constKernelMem(coord);
if (kernVal != 0.0f)
{
outVal += kernVal*inVal;
++kernVolume;
}
}
kIt.reset();
double mu = outVal / kernVolume;
double mu=0.0, var=0.0;
deviceMeanAndVariance(threadCoordinate, imageIn, constKernelMem, mu,var);
outVal = 0.0;
for (; !kIt.end(); ++kIt)
{
Vec<float> imInPos = kIt.getImageCoordinate();
double inVal = (double)imageIn(imInPos);
Vec<size_t> coord = kIt.getKernelCoordinate();
float kernVal = constKernelMem(coord);
if (kernVal != 0.0f)
{
outVal += SQR(kernVal*inVal - mu);
}
}
outVal = sqrt(outVal / MAX(1.0,((double)kernVolume-1.0)));
imageOut(threadCoordinate) = (PixelTypeOut)CLAMP(outVal, minValue, maxValue);
imageOut(threadCoordinate) = (PixelTypeOut)CLAMP(sqrt(var), minValue, maxValue);
}
}
template <class PixelTypeIn, class PixelTypeOut>
void cStdFilter(ImageContainer<PixelTypeIn> imageIn, ImageContainer<PixelTypeOut>& imageOut, ImageContainer<float> kernel, int numIterations = 1, int device = -1)
{
......
......@@ -128,6 +128,7 @@
<ClInclude Include="Cuda\CudaEntropyFilter.cuh" />
<ClInclude Include="Cuda\CudaGetMinMax.cuh" />
<ClInclude Include="Cuda\CudaLoG.cuh" />
<ClInclude Include="Cuda\CudaMeanAndVariance.cuh" />
<ClInclude Include="Cuda\CudaMeanFilter.cuh" />
<ClInclude Include="Cuda\CudaMedianFilter.cuh" />
<ClInclude Include="Cuda\CudaOpener.cuh" />
......
......@@ -140,6 +140,9 @@
<ClInclude Include="Cuda\CudaAddTwoImages.cuh">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="Cuda\CudaMeanAndVariance.cuh">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<None Include="Cuda\ImageDimensions.cuh">
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment