diff --git a/.gitattributes b/.gitattributes
index abca70a88f3c90bd95e963ec6e31a2c9a14d831b..cef6a61f34be63968345836055fe384917241712 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,4 +1,7 @@
-*.mexw64 filter=lfs diff=lfs merge=lfs -text
 *.mat filter=lfs diff=lfs merge=lfs -text
 *.ai filter=lfs diff=lfs merge=lfs -text
 *.png filter=lfs diff=lfs merge=lfs -text
+*.mexw64 filter=lfs diff=lfs merge=lfs -text
+*.mexa64 filter=lfs diff=lfs merge=lfs -text
+*.pyd filter=lfs diff=lfs merge=lfs -text
+*.so filter=lfs diff=lfs merge=lfs -text
diff --git a/src/MATLAB/+HIP/@Cuda/Mex.mexa64 b/src/MATLAB/+HIP/@Cuda/Mex.mexa64
index 3def436ab95e30defbeb9c51ddb6dc741df5a108..e588bd360d85259d9ab919d7900d1e3f5e53717d 100755
Binary files a/src/MATLAB/+HIP/@Cuda/Mex.mexa64 and b/src/MATLAB/+HIP/@Cuda/Mex.mexa64 differ
diff --git a/src/MATLAB/+HIP/@Cuda/Mex.mexw64 b/src/MATLAB/+HIP/@Cuda/Mex.mexw64
index 3701ee8f3d395e7359884d937ce9a91cd582b61c..b658d2b05218d51b0a95623eba479b62c8fa9988 100644
--- a/src/MATLAB/+HIP/@Cuda/Mex.mexw64
+++ b/src/MATLAB/+HIP/@Cuda/Mex.mexw64
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8a145aed70562e248f8c065341b5f37069b8d52a66bf40e4c545cff25f2c62bc
-size 12214272
+oid sha256:75d48bac093946934c4cd46fbbe2aedd703f447d1ec8e08752c118eb23528e07
+size 12387840
diff --git a/src/MATLAB/+HSP/JANEWAY.mat b/src/MATLAB/+HSP/JANEWAY.mat
deleted file mode 100644
index daefe8d57f2e1fc54584c9059c50b2cffdc89ae7..0000000000000000000000000000000000000000
--- a/src/MATLAB/+HSP/JANEWAY.mat
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:6e3fbec3fff0ee2c8bb9052f5dfa7beae71122965170d22afb02a7e059eb49d4
-size 1972
diff --git a/src/MATLAB/+HSP/TESLA-LITE.mat b/src/MATLAB/+HSP/TESLA-LITE.mat
deleted file mode 100644
index 6ab896d0e9770a3370d8ba8bd500c634c17ea582..0000000000000000000000000000000000000000
--- a/src/MATLAB/+HSP/TESLA-LITE.mat
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:de5bdcd862ef4791bf4075640a37c46f0fd75170041a9404bf4fa9842a09ec11
-size 4741
diff --git a/src/MATLAB/+HSP/WAITE-WW1.mat b/src/MATLAB/+HSP/WAITE-WW1.mat
deleted file mode 100644
index 525ee8efa68b867d56110be86dcc4aee91898d47..0000000000000000000000000000000000000000
--- a/src/MATLAB/+HSP/WAITE-WW1.mat
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:f074917db04ea1ea5f97f049af570c70abaa5610d9eb54cfed67b5872e5461d9
-size 8420
diff --git a/src/MATLAB/+ImProc/JANEWAY.mat b/src/MATLAB/+ImProc/JANEWAY.mat
deleted file mode 100644
index ee75084eb0e94f69153afb65eb1d3bb7617a9bc1..0000000000000000000000000000000000000000
--- a/src/MATLAB/+ImProc/JANEWAY.mat
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:6f3bdad9858158dd94f001518148564141913d7257cb23293f4821fb8827d182
-size 5093
diff --git a/src/MATLAB/+ImarisProc/CC_BatchSkeleton.m b/src/MATLAB/+ImarisProc/CC_BatchSkeleton.m
deleted file mode 100644
index 18ca138db91ecf70a12c09c52113b9ca31adc35d..0000000000000000000000000000000000000000
--- a/src/MATLAB/+ImarisProc/CC_BatchSkeleton.m
+++ /dev/null
@@ -1,72 +0,0 @@
-%Skeleton for Imaris batch in Matlab
-
-%ImarisLib.jar needs to be in the same folder.
-%Imaris needs to be running when executing the following code 
-
-%CC 2018-01-03 tested with Imaris 9.1.0 
-
-%Get the image folder. Only read *.ims images.
-infolder = uigetdir;
-files = [infolder '/*.ims'];
-listing = dir(files);
-nfiles = size(listing,1);
-
-%open files in Imaris sequentially
-for i = 1:nfiles
-    
-    filename = [infolder '/' listing(i).name];
-    filename = sprintf(filename);
-    vImarisApplication = StartImaris;
-    vImarisApplication.FileOpen(filename,'');
-    
-    %get dataset in Matlab
-    vDataSet = vImarisApplication.GetDataSet;
-    
-    %apply median filter to dataset
-    vImarisApplication.GetImageProcessing.MedianFilterChannel(vDataSet,0,[5 5 5]);
-    
-    %create Surfaces
-    ip = vImarisApplication.GetImageProcessing;
-    vNewSurfaces = ip.DetectSurfaces(vDataSet, [], 0, 1, 0, true, 0, '');
-    vNewSurfaces.SetName(sprintf('New Surface'));
-    vImarisApplication.GetSurpassScene.AddChild(vNewSurfaces,-1);
-    
-    %get Surface stats 
-    vSurpassComponent = vImarisApplication.GetSurpassSelection;
-    vImarisObject = vImarisApplication.GetFactory.ToSurfaces(vSurpassComponent);
-    vAllStatistics = vImarisObject.GetStatistics;
-    vNames = cell(vAllStatistics.mNames);
-    vValues = vAllStatistics.mValues;
-    disp(unique(vNames))
-    
-    %save ims file
-    newFilename = strcat(filename(1:end-4),'new.ims');
-    vImarisApplication.FileSave(newFilename,'');
-    
-    pause(5);
-    
-    %Clear java handles to clear up memory and prevent future errors
-    clear 'Imaris/IApplicationPrxHelper';
-    clear 'Imaris/IDataSetPrxHelper';
-    clear 'Imaris/IDataContainerPrxHelper';
-    clear 'Imaris/IDataItemPrxHelper';
-    clear 'Imaris/cStatisticValues';
-    clear 'ImarisLib';
-    clear 'vSurpassScene';
-    clear 'vDataSet';
-    clear 'vAllStatistics';
-    clear 'err';
-end
-
-%Quit Imaris Application after all is done
-vImarisApplication.SetVisible(~vImarisApplication.GetVisible);
-vImarisApplication.Quit;
-
-function aImarisApplication = StartImaris
-    javaaddpath ImarisLib.jar;
-    vImarisLib = ImarisLib;
-    server = vImarisLib.GetServer();
-    id = server.GetObjectID(0);
-    aImarisApplication = vImarisLib.GetApplication(id);
-    disp(id)
-end
\ No newline at end of file
diff --git a/src/MATLAB/+ImarisProc/ContrastEnhancement.m b/src/MATLAB/+ImarisProc/ContrastEnhancement.m
deleted file mode 100644
index 29c934d0cf07b988899699edd85838d16346b49a..0000000000000000000000000000000000000000
--- a/src/MATLAB/+ImarisProc/ContrastEnhancement.m
+++ /dev/null
@@ -1,50 +0,0 @@
-function ContrastEnhancement(imarisAppID)
-    imarisHandle = ImarisHelper.GetAppHandle(imarisAppID);
-    if (isempty(imarisHandle))
-        error('Imaris is not open or the wrong app id!');
-    end
-    
-    imarisDataset = imarisHandle.GetDataSet();
-    physicalSize = ImarisHelper.GetPhysicalSize(imarisDataset);
-    normSize = physicalSize./max(physicalSize);
-    defaultSigs = 35.*normSize;
-    defaultSigsStr = sprintf('[%.2f,%.2f,%.2f]',defaultSigs(1),defaultSigs(2),defaultSigs(3));
-    
-    prompts = {'Enter Channel:','Enter Gaussian Sigmas:','Enter Median Neighborhood:','Inplace:(false=0,true=1)'};
-    dlgTitle = 'Contrast Enhancement';
-    numLines = 1;
-    defaultAns = {'1',defaultSigsStr,'[3,3,3]','0'};
-    answer = inputdlg(prompts,dlgTitle,numLines,defaultAns);
-    
-    if (isempty(answer))
-        return
-    end
-    
-    chan = str2double(answer{1});
-    sigsStr = regexp(answer{2},'\[(.*),(.*),(.*)\]','tokens');
-    sigs = abs(cellfun(@(x)(str2double(x)),sigsStr{1}));
-    medStr = regexp(answer{3},'\[(.*),(.*),(.*)\]','tokens');
-    medNeighborhood = cellfun(@(x)(str2double(x)),medStr{1});
-    medNeighborhood = abs(round(medNeighborhood));
-    
-    inplace = str2double(answer{4})>0;
-    outChannel = ImarisHelper.GetNumChannels(imarisDataset) +1;
-    if (inplace)
-        outChannel = chan;
-    end
-
-    imarisHandle.DataSetPushUndo('Contrast Enhancement');
-    
-    % TODO see if there is enough memory to just capture the entire series
-    % to process faster
-    
-    numFrames = ImarisHelper.GetNumFrames(imarisDataset);
-    im = ImarisHelper.GetImageData(imarisDataset,chan,1:numFrames);
-    prgs = Utils.CmdlnProgress(numFrames,true,'smooth');
-    for t=1:numFrames
-        im(:,:,:,1,t) = ImProc.ContrastEnhancement(im(:,:,:,1,t),sigs,medNeighborhood);
-        prgs.PrintProgress(t);
-    end
-    prgs.ClearProgress(true);
-    imarisDataset = ImarisHelper.SetImage(imarisHandle,imarisDataset,im,outChannel,1:t);
-end
diff --git a/src/MATLAB/gray.bmp b/src/MATLAB/gray.bmp
deleted file mode 100644
index 21d8dec7aff359599cec0218edf9a8a78515a831..0000000000000000000000000000000000000000
Binary files a/src/MATLAB/gray.bmp and /dev/null differ
diff --git a/src/MATLAB/grayscale.tif b/src/MATLAB/grayscale.tif
deleted file mode 100644
index e4ff61a7e7cec6400c37a2f2ab0f6f1b818ce43a..0000000000000000000000000000000000000000
Binary files a/src/MATLAB/grayscale.tif and /dev/null differ
diff --git a/src/MATLAB/left2right.tif b/src/MATLAB/left2right.tif
deleted file mode 100644
index 87882fec0a38bdedd3912a9b5aeba4a466afc0a8..0000000000000000000000000000000000000000
Binary files a/src/MATLAB/left2right.tif and /dev/null differ
diff --git a/src/MATLAB/sandbox.m b/src/MATLAB/sandbox.m
deleted file mode 100644
index 69ee3b3b4c329b5240e395c223ab1ff938149a07..0000000000000000000000000000000000000000
--- a/src/MATLAB/sandbox.m
+++ /dev/null
@@ -1 +0,0 @@
-im = tiffReader('DAPI Olig2-514 GFAP-488 Dcx-647 Laminin-Cy3 Bcatenin-568 20x1 TopLeft');
\ No newline at end of file
diff --git a/src/MATLAB/seg.bmp b/src/MATLAB/seg.bmp
deleted file mode 100644
index 81ceae19e7ac4b7525c0268901483fbff3ddf1ef..0000000000000000000000000000000000000000
Binary files a/src/MATLAB/seg.bmp and /dev/null differ
diff --git a/src/MATLAB/showIm.m b/src/MATLAB/showIm.m
deleted file mode 100644
index f9e6a1718909ae61c487d626315c41ee46b4100e..0000000000000000000000000000000000000000
--- a/src/MATLAB/showIm.m
+++ /dev/null
@@ -1,13 +0,0 @@
-%% print out image
-function showIm(image,label)
-figure
-imagesc(max(image,[],3))
-
-% set(gcf,'Units','normalized');
-% set(gcf,'Position',[0 0 1 1]);
-
-colormap gray
-
-title(label)
-axis image
-end
diff --git a/src/MATLAB/template.m b/src/MATLAB/template.m
deleted file mode 100644
index f39c38e1b97abdc03a5658fa7240aeddd9b19c4d..0000000000000000000000000000000000000000
--- a/src/MATLAB/template.m
+++ /dev/null
@@ -1,10 +0,0 @@
-function [cTime,mTime,kernelName] = (im,)
-    kernelName = '';
-    cT = tic;
-    imC = HIP.(im,);
-    cTime = toc(cT);
-
-    mT = tic;
-    imM = HIP.(im,,true);
-    mTime = toc(mT);
-end
diff --git a/src/MATLAB/testChunking.m b/src/MATLAB/testChunking.m
deleted file mode 100644
index fda873f6950b853d93d846b5ff3d28fc142bdcff..0000000000000000000000000000000000000000
--- a/src/MATLAB/testChunking.m
+++ /dev/null
@@ -1,18 +0,0 @@
-figure
-
-plot(0,0,'.w');
-ax = gca;
-hold on
-
-for i=1:length(imChunks)
-    curChunk = imChunks(i);
-    
-    Utils.PlotBox(ax,Utils.SwapXY_RC(curChunk.ImageStart_rc),Utils.SwapXY_RC(curChunk.ImageEnd_rc),'-b',num2str(i));
-    Utils.PlotBox(ax,Utils.SwapXY_RC(curChunk.ImageROIstart_rc),Utils.SwapXY_RC(curChunk.ImageROIend_rc),'--r');
-    Utils.PlotBox(ax,...
-        Utils.SwapXY_RC(curChunk.ImageStart_rc) + Utils.SwapXY_RC(curChunk.ChunkROIstart_rc) - 1,...
-        Utils.SwapXY_RC(curChunk.ImageStart_rc) + Utils.SwapXY_RC(curChunk.ChunkROIstart_rc) -1 + (Utils.SwapXY_RC(curChunk.ChunkROIend_rc)-Utils.SwapXY_RC(curChunk.ChunkROIstart_rc)-1),...
-        ':g');
-end
-axis ij
-axis equal
diff --git a/src/MATLAB/top2bottom.tif b/src/MATLAB/top2bottom.tif
deleted file mode 100644
index 4ecd432bbf0134b4eaefb61209fc6cf1df488a3d..0000000000000000000000000000000000000000
Binary files a/src/MATLAB/top2bottom.tif and /dev/null differ
diff --git a/src/Python/HIP.pyd b/src/Python/HIP.pyd
index 82c45814b25402b15f5de69047786e735bf0ab4b..e224a365536bf5d06412c4c88e40337d48f20c64 100644
Binary files a/src/Python/HIP.pyd and b/src/Python/HIP.pyd differ
diff --git a/src/Python/HIP.so b/src/Python/HIP.so
new file mode 100644
index 0000000000000000000000000000000000000000..232d3f81623e0c8d826911ca2516e5ed8fc00a20
--- /dev/null
+++ b/src/Python/HIP.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:985c6897659e283bc9a0a95fbb58a1df4ae025f3b04bce2b2a5454fa1d3246ab
+size 20752712
diff --git a/src/c/Cuda/CudaLoG.cuh b/src/c/Cuda/CudaLoG.cuh
index e9f42084512df63ede4ba135e5675ee1655e27cc..446e417e95ff14d6201a89825e0dc67dd85f2d14 100644
--- a/src/c/Cuda/CudaLoG.cuh
+++ b/src/c/Cuda/CudaLoG.cuh
@@ -70,6 +70,7 @@ void cLoG(ImageView<PixelTypeIn> imageIn, ImageView<float> imageOut, Vec<double>
 			{
 				if (!chunks[i].sendROI(imageIn, deviceImages.getCurBuffer()))
 					std::runtime_error("Error sending ROI to device!");
+
 				cudaMultiplySumBias<<<chunks[i].blocks, chunks[i].threads >> > (*(deviceImages.getCurBuffer()), *(deviceImages.getNextBuffer()), constLoGKernelMem_x, MIN_VAL, MAX_VAL, constGausKernelMem_x, true);
 				deviceImages.incrementBuffer();
 				if (sigmas.y!=0)
@@ -91,6 +92,7 @@ void cLoG(ImageView<PixelTypeIn> imageIn, ImageView<float> imageOut, Vec<double>
 			{
 				if (!chunks[i].sendROI(imageIn, deviceImages.getCurBuffer()))
 					std::runtime_error("Error sending ROI to device!");
+
 				if (sigmas.x!=0)
 				{
 					cudaMultiplySum << <chunks[i].blocks, chunks[i].threads >> > (*(deviceImages.getCurBuffer()), *(deviceImages.getNextBuffer()), constGausKernelMem_x, MIN_VAL, MAX_VAL);
@@ -112,6 +114,7 @@ void cLoG(ImageView<PixelTypeIn> imageIn, ImageView<float> imageOut, Vec<double>
 			{
 				if (!chunks[i].sendROI(imageIn, deviceImages.getCurBuffer()))
 					std::runtime_error("Error sending ROI to device!");
+
 				if (sigmas.x!=0)
 				{
 					cudaMultiplySum << <chunks[i].blocks, chunks[i].threads >> > (*(deviceImages.getCurBuffer()), *(deviceImages.getNextBuffer()), constGausKernelMem_x, MIN_VAL, MAX_VAL);
diff --git a/src/c/Cuda/LoGKernel.cpp b/src/c/Cuda/LoGKernel.cpp
index 402d6f7a4bb2c45d8388ceb1cc3b98b033346e5a..0a8d70c4451d40966ec5afa697f6ed1f21da4d53 100644
--- a/src/c/Cuda/LoGKernel.cpp
+++ b/src/c/Cuda/LoGKernel.cpp
@@ -17,8 +17,6 @@ float* createLoG_GausKernels(Vec<double> sigmas, Vec<std::size_t>& dimsOut)
 	float* kernelOut = new float[dimsOut.sum()*2];
 
 	Vec<double> sigmaSqr = sigmas.pwr(2);
-	Vec<double> oneOverSigSqr = Vec<double>(1.0) / sigmaSqr;
-	Vec<double> twoSigmaSqr = sigmaSqr * 2;
 	Vec<double> sigmaForth = sigmas.pwr(4);
 
 	int loGstride = dimsOut.sum();
@@ -43,11 +41,17 @@ float* createLoG_GausKernels(Vec<double> sigmas, Vec<std::size_t>& dimsOut)
 		{
 			double pos = j - mid.e[i];
 			double posSqr = SQR(pos);
-			double gauss = exp(-(posSqr / twoSigmaSqr.e[i]));
-			double logVal = (posSqr / sigmaForth.e[i] - oneOverSigSqr.e[i])*gauss;
-			kernelOut[j + stride] = (float)logVal;
-			kernelOut[j + stride + loGstride] = gauss;
-			gaussSum += gauss;
+
+			double gaussVal = exp(-(posSqr / (2.0 * sigmaSqr.e[i])));
+			double logVal = (posSqr / sigmaForth.e[i] - 1.0 / sigmaSqr.e[i]) * gaussVal;
+
+			// Multiply by sigma^2 to get a scale-invariant LoG
+			double scaleInvVal = sigmaSqr.e[i] * logVal;
+
+			gaussSum += gaussVal;
+
+			kernelOut[j + stride] = scaleInvVal;
+			kernelOut[j + stride + loGstride] = gaussVal;
 		}
 
 		double sumVal = 0.0;
diff --git a/src/c/CudaImageProcessor.vcxproj b/src/c/CudaImageProcessor.vcxproj
index 6d4cbf9a073f9fc310dadf726efecd4903640ba5..1d9413abe5cde18f5434a353550c522f11af639e 100644
--- a/src/c/CudaImageProcessor.vcxproj
+++ b/src/c/CudaImageProcessor.vcxproj
@@ -15,13 +15,13 @@
     <TargetFrameworkVersion>v4.5.2</TargetFrameworkVersion>
     <Keyword>ManagedCProj</Keyword>
     <RootNamespace>CudaImageProcessor</RootNamespace>
-    <WindowsTargetPlatformVersion>10.0.15063.0</WindowsTargetPlatformVersion>
+    <WindowsTargetPlatformVersion>10.0.17763.0</WindowsTargetPlatformVersion>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
+    <PlatformToolset>v142</PlatformToolset>
     <CLRSupport>false</CLRSupport>
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
@@ -33,16 +33,16 @@
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
-  <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 9.1.props" />
-  </ImportGroup>
+  <ImportGroup Label="ExtensionSettings" />
   <ImportGroup Label="Shared">
   </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="CudaVersionImport.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="CudaVersionImport.props" />
   </ImportGroup>
   <PropertyGroup Label="UserMacros" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
@@ -201,7 +201,5 @@
     </ClInclude>
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
-  <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 9.1.targets" />
-  </ImportGroup>
+  <ImportGroup Label="ExtensionTargets" />
 </Project>
\ No newline at end of file
diff --git a/src/c/CudaMex.def b/src/c/CudaMex.def
new file mode 100644
index 0000000000000000000000000000000000000000..d4263ef131dc28048c4bbeee67348c04251f7987
--- /dev/null
+++ b/src/c/CudaMex.def
@@ -0,0 +1,3 @@
+LIBRARY	"CudaMex"
+EXPORTS DllMain
+EXPORTS mexFunction
diff --git a/src/c/CudaMex.vcxproj b/src/c/CudaMex.vcxproj
index 9af21842f528edd05fe708bf1523f20d9de543bb..dc38a993d85202492c3d963c6570316d0684ff1e 100644
--- a/src/c/CudaMex.vcxproj
+++ b/src/c/CudaMex.vcxproj
@@ -13,13 +13,13 @@
   <PropertyGroup Label="Globals">
     <ProjectGuid>{6698E8EC-49D9-421E-AA87-5BCC6B466347}</ProjectGuid>
     <RootNamespace>CudaMex</RootNamespace>
-    <WindowsTargetPlatformVersion>10.0.15063.0</WindowsTargetPlatformVersion>
+    <WindowsTargetPlatformVersion>10.0.17763.0</WindowsTargetPlatformVersion>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
     <ConfigurationType>DynamicLibrary</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
+    <PlatformToolset>v142</PlatformToolset>
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
@@ -62,11 +62,10 @@
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <AdditionalLibraryDirectories>$(MATLAB_DIR)\extern\lib\win64\microsoft;$(SolutionDir)Output\CudaImageProcessor\$(Configuration)_$(PlatformName)\</AdditionalLibraryDirectories>
       <AdditionalDependencies>CudaImageProcessor_d.lib;libmx.lib;libmex.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <ModuleDefinitionFile>
-      </ModuleDefinitionFile>
+      <ModuleDefinitionFile>CudaMex.def</ModuleDefinitionFile>
     </Link>
     <PostBuildEvent>
-      <Command>echo copy $(OutDir)CudaMex.dll "$(ProjectDir)Mex.mexw64"
+      <Command>echo copy $(OutDir)CudaMex.dll "$(ProjectDir)Mex.mexw64"
 copy $(OutDir)CudaMex.dll "$(ProjectDir)Mex.mexw64"</Command>
     </PostBuildEvent>
   </ItemDefinitionGroup>
@@ -78,7 +77,7 @@ copy $(OutDir)CudaMex.dll "$(ProjectDir)Mex.mexw64"</Command>
       <IntrinsicFunctions>true</IntrinsicFunctions>
       <SDLCheck>true</SDLCheck>
       <AdditionalIncludeDirectories>.;$(MATLAB_DIR)\extern\include;external</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>MEX_BUILD;_WINDLL;USE_PROCESS_MUTEX;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>MEX_BUILD;_WINDLL;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <PreprocessToFile>false</PreprocessToFile>
     </ClCompile>
     <Link>
@@ -87,11 +86,10 @@ copy $(OutDir)CudaMex.dll "$(ProjectDir)Mex.mexw64"</Command>
       <OptimizeReferences>true</OptimizeReferences>
       <AdditionalLibraryDirectories>$(MATLAB_DIR)\extern\lib\win64\microsoft;$(SolutionDir)Output\CudaImageProcessor\$(Configuration)_$(PlatformName)\</AdditionalLibraryDirectories>
       <AdditionalDependencies>CudaImageProcessor.lib;libmx.lib;libmex.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <ModuleDefinitionFile>
-      </ModuleDefinitionFile>
+      <ModuleDefinitionFile>CudaMex.def</ModuleDefinitionFile>
     </Link>
     <PostBuildEvent>
-      <Command>echo copy $(OutDir)CudaMex.dll "$(ProjectDir)Mex.mexw64"
+      <Command>echo copy $(OutDir)CudaMex.dll "$(ProjectDir)Mex.mexw64"
 copy $(OutDir)CudaMex.dll "$(ProjectDir)Mex.mexw64"</Command>
     </PostBuildEvent>
   </ItemDefinitionGroup>
@@ -145,6 +143,7 @@ copy $(OutDir)CudaMex.dll "$(ProjectDir)Mex.mexw64"</Command>
     <ClCompile Include="Mex\CudaMex.cpp" />
     <ClCompile Include="Mex\MexCommandModule.cpp" />
     <ClCompile Include="ScriptCmds\ScopedProcessMutex.cpp" />
+    <None Include="CudaMex.def" />
     <None Include="Mex\_TemplateMex.cpp" />
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
diff --git a/src/c/CudaMex.vcxproj.filters b/src/c/CudaMex.vcxproj.filters
index a85fad3d387da8198668899ea4463d9b827a3066..c2c47e840223cd7deffa8271faff20c01b8c16de 100644
--- a/src/c/CudaMex.vcxproj.filters
+++ b/src/c/CudaMex.vcxproj.filters
@@ -172,5 +172,8 @@
     <None Include="Mex\_TemplateMex.cpp">
       <Filter>Source Files</Filter>
     </None>
+    <None Include="CudaMex.def">
+      <Filter>Resource Files</Filter>
+    </None>
   </ItemGroup>
-</Project>
+</Project>
\ No newline at end of file
diff --git a/src/c/CudaPy3DLL.vcxproj b/src/c/CudaPy3DLL.vcxproj
index d1fb77e7d418063d65e5544060ccef899f0c0b0e..0cec9a3502681994bc9e927db8fc78a5c3dda309 100644
--- a/src/c/CudaPy3DLL.vcxproj
+++ b/src/c/CudaPy3DLL.vcxproj
@@ -73,26 +73,26 @@
     <ProjectGuid>{0957901A-E67A-40C2-9EF5-76DF8EFBC2D5}</ProjectGuid>
     <Keyword>Win32Proj</Keyword>
     <RootNamespace>CudaPy3DLL</RootNamespace>
-    <WindowsTargetPlatformVersion>10.0.15063.0</WindowsTargetPlatformVersion>
+    <WindowsTargetPlatformVersion>10.0.17763.0</WindowsTargetPlatformVersion>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
     <ConfigurationType>DynamicLibrary</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
+    <PlatformToolset>v142</PlatformToolset>
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>DynamicLibrary</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
+    <PlatformToolset>v142</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
     <ConfigurationType>DynamicLibrary</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
+    <PlatformToolset>v142</PlatformToolset>
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
diff --git a/src/c/CudaVersionImport.props b/src/c/CudaVersionImport.props
new file mode 100644
index 0000000000000000000000000000000000000000..e658a62be7e4935caca77ac39126ec1a8140a2a0
--- /dev/null
+++ b/src/c/CudaVersionImport.props
@@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ImportGroup Label="ExtensionSettings">
+    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 10.2.props" />
+  </ImportGroup>
+
+  <ImportGroup Label="ExtensionTargets">
+    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 10.2.targets" />
+  </ImportGroup>
+</Project>
diff --git a/src/c/HIP.so b/src/c/HIP.so
index 9bdf86723856c8a86b29125d01dcaba438706359..0f6e163f05cdc77f78567a7dfb688593f8ba92e7 100755
Binary files a/src/c/HIP.so and b/src/c/HIP.so differ
diff --git a/src/c/Mex.mexa64 b/src/c/Mex.mexa64
index 3def436ab95e30defbeb9c51ddb6dc741df5a108..e588bd360d85259d9ab919d7900d1e3f5e53717d 100755
Binary files a/src/c/Mex.mexa64 and b/src/c/Mex.mexa64 differ
diff --git a/src/c/Mex.mexw64 b/src/c/Mex.mexw64
index 626808d43340b64892cc3774464bf4a30e0384cd..b658d2b05218d51b0a95623eba479b62c8fa9988 100644
--- a/src/c/Mex.mexw64
+++ b/src/c/Mex.mexw64
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0395df76896d9b773162832d127eca3bfdd7599a2c4e954dec2376e0ee349a34
-size 12214272
+oid sha256:75d48bac093946934c4cd46fbbe2aedd703f447d1ec8e08752c118eb23528e07
+size 12387840
diff --git a/src/c/ScriptCmds/ScopedProcessMutex.cpp b/src/c/ScriptCmds/ScopedProcessMutex.cpp
index d7e5bf382803b8e45dd2ade49d51f0c35e58fdfa..732e28848bcd2e398372642c072293408560cd7d 100644
--- a/src/c/ScriptCmds/ScopedProcessMutex.cpp
+++ b/src/c/ScriptCmds/ScopedProcessMutex.cpp
@@ -1,22 +1,86 @@
 #include "ScopedProcessMutex.h"
 
 #include <stdexcept>
+#include <memory>
 
-#ifdef USE_WINDOWS_IPC_MUTEX
-#include <windows.h>
+// Helpers for getting user ID
+std::string getProcessUser();
 
-#undef min
-#undef max
+#if defined(_WIN32)
+#define NOMINMAX
+#define WIN32_LEAN_AND_MEAN
+#include <Windows.h>
+#include <sddl.h>
 
-HANDLE ScopedProcessMutex::mutexHandle = NULL;
+#undef WIN32_LEAN_AND_MEAN
+#undef NOMINMAX
+
+struct LocalFreeFunc { inline void operator() (HLOCAL* ptr) { LocalFree((HLOCAL)ptr); } };
+template <typename T> using LocalUnique = std::unique_ptr<T, LocalFreeFunc>;
+
+std::string getProcessUser()
+{
+	HANDLE hToken;
+
+	HANDLE hProc = GetCurrentProcess();
+	if ( !OpenProcessToken(hProc, TOKEN_QUERY, &hToken) )
+		return "unk";
+
+	DWORD dwSize = sizeof(TOKEN_USER);
+	TOKEN_USER tu = { 0 };
+
+	if ( !GetTokenInformation(hToken, TokenUser, &tu, dwSize, &dwSize) )
+		return "unk";
+
+	char* strSID = nullptr;
+	if ( !ConvertSidToStringSidA(&tu.User.Sid, &strSID) )
+		return "unk";
+
+	std::string outStr(strSID);
+	LocalFree(strSID);
+
+	return outStr;
+}
+
+#elif defined(__linux__)
+#include <unistd.h>
+#include <sys/types.h>
+
+std::string getProcessUser()
+{
+	uid_t uid = geteuid();
+
+	const int MAXLEN = 10;
+	char uidStr[MAXLEN+1];
+	snprintf(uidStr, MAXLEN, "%d", uid);
+
+	return uidStr;
+}
+
+#endif
+
+
+#if defined(USE_WINDOWS_IPC_MUTEX)
+#define NOMINMAX
+#define WIN32_LEAN_AND_MEAN
+#include <Windows.h>
+
+#undef WIN32_LEAN_AND_MEAN
+#undef NOMINMAX
+
+
+HANDLE ScopedProcessMutex::mutexHandle = nullptr;
 
 ScopedProcessMutex::ScopedProcessMutex(const char* name)
 {
 	if ( !mutexHandle )
 	{
-		mutexHandle = CreateMutex(NULL, false, name);
+		// Postfix a unique user-id to the mutex name
+		std::string mtx_name = name + getProcessUser();
+
+		mutexHandle = CreateMutex(NULL, false, mtx_name.c_str());
 		if ( !mutexHandle && GetLastError() == ERROR_ACCESS_DENIED )
-			mutexHandle = OpenMutex(SYNCHRONIZE, false, name);
+			mutexHandle = OpenMutex(SYNCHRONIZE, false, mtx_name.c_str());
 
 		if ( !mutexHandle )
 			throw std::runtime_error("Error creating mutex handle!");
@@ -28,11 +92,10 @@ ScopedProcessMutex::ScopedProcessMutex(const char* name)
 		mutexHandle = NULL;
 		throw std::runtime_error("Error unable to acquire mutex!");
 	}
+	// MW - Treat a previous crash as ok since the GPU is likely to recover 
+	//      from process crashes at the driver level (no longer throw error)
 	else if ( waitResult == WAIT_ABANDONED )
-	{
-		mutexHandle = NULL;
-		throw std::runtime_error("Previous thread terminated without releasing mutex!");
-	}
+	{}
 }
 
 ScopedProcessMutex::~ScopedProcessMutex()
@@ -41,12 +104,290 @@ ScopedProcessMutex::~ScopedProcessMutex()
 		ReleaseMutex(mutexHandle);
 }
 
-#else
+#elif defined(USE_PTHREADS_ROBUST_MUTEX)
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/limits.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <unistd.h>
+#include <stdlib.h>
+
+#include <thread>
+#include <atomic>
+
+#if (ATOMIC_INT_LOCK_FREE != 2)
+ #error USE_PTHREADS_ROBUST_MUTEX implementation requires always lock-free atomic int type
+#endif
+
+struct ScopedProcessMutex::PThreadMutex
+{
+	enum SharedMutexState
+	{
+		Uninitialized = 0,
+		Initializing = 1,
+		Valid = 2,
+	};
+
+	struct SharedMemMutex
+	{
+		std::atomic_int state;
+		pthread_mutex_t pthread_mutex;
+	};
+
+	int shm_fd;
+	SharedMemMutex* sharedMem;
+	std::string shm_name;
+
+	PThreadMutex(const char* name)
+		: shm_fd(-1), sharedMem(nullptr), shm_name(name)
+	{
+		try_create_mutex();
+	}
+
+	~PThreadMutex()
+	{
+		cleanup();
+	}
+
+	static void force_unlink(const char* name)
+	{
+		shm_unlink(name);
+	}
+
+	void lock()
+	{
+		int err = pthread_mutex_lock(&sharedMem->pthread_mutex);
+		if ( err == EOWNERDEAD )
+			err = pthread_mutex_consistent(&sharedMem->pthread_mutex);
+		else if ( err != 0 )
+			throw std::runtime_error("Error unable to acquire mutex!");
+	}
+
+	void unlock()
+	{
+		int err = pthread_mutex_unlock(&sharedMem->pthread_mutex);
+		if (err != 0)
+			throw std::runtime_error("Error unable to release mutex!");
+	}
+
+private:
+	void try_create_mutex()
+	{
+		errno = 0;
+
+		// Try to create shared memory-mapping
+		shm_fd = shm_open(shm_name.c_str(), O_RDWR|O_CREAT|O_EXCL, S_IRUSR|S_IWUSR);
+		if (shm_fd < 0)
+		{
+			if ( errno == EEXIST )
+			{
+				try_open_mutex();
+				return;
+			}
+			else
+				throw std::runtime_error("Error unable to create shared memory");
+		}
+
+		int err = ftruncate(shm_fd, sizeof(SharedMemMutex));
+		if (err)
+		{
+			err_create_cleanup(shm_name.c_str());
+			throw std::runtime_error("Error unable to resize shared memory for mutex");
+		}
+
+		void* mapPtr = mmap(nullptr, sizeof(SharedMemMutex), PROT_READ|PROT_WRITE, MAP_SHARED, shm_fd, 0);
+		if (mapPtr == MAP_FAILED)
+		{
+			err_create_cleanup(shm_name.c_str());
+			throw std::runtime_error("Failed to map shared memory for mutex");
+		}
+
+		sharedMem = (SharedMemMutex*) mapPtr;
+		sharedMem->state.store(SharedMutexState::Initializing, std::memory_order_seq_cst);
+		////// Guard other processes from using mutex until it's initialized
+		std::unique_ptr<pthread_mutexattr_t, int(*)(pthread_mutexattr_t*)> mtxAttr(new pthread_mutexattr_t(),pthread_mutexattr_destroy);
+
+		err = pthread_mutexattr_init(mtxAttr.get());
+		if ( err )
+		{
+			err_create_cleanup(shm_name.c_str());
+			throw std::runtime_error("Error failed to initialize mutex attribute");
+		}
+
+		err = pthread_mutexattr_setpshared(mtxAttr.get(), PTHREAD_PROCESS_SHARED);
+		if ( err )
+		{
+			err_create_cleanup(shm_name.c_str());
+			throw std::runtime_error("Error failed to set mutex shared");
+		}
+
+		err = pthread_mutexattr_setrobust(mtxAttr.get(), PTHREAD_MUTEX_ROBUST);
+		if ( err )
+		{
+			err_create_cleanup(shm_name.c_str());
+			throw std::runtime_error("Error failed to set mutex robust");
+		}
+
+		err = pthread_mutex_init(&sharedMem->pthread_mutex, mtxAttr.get());
+		if ( err )
+		{
+			err_create_cleanup(shm_name.c_str());
+			throw std::runtime_error("Error failed to initialize mutex");
+		}
+		//////
+		sharedMem->state.store(SharedMutexState::Valid, std::memory_order_seq_cst);
+	}
+
+	void try_open_mutex()
+	{
+		shm_fd = shm_open(shm_name.c_str(), O_RDWR, S_IRUSR|S_IWUSR);
+		if (shm_fd < 0)
+			throw std::runtime_error("Error unable to open shared memory");
+
+		struct stat shm_stat;
+
+		const int chkLimit = 100;
+		for (int i = 0; i < chkLimit; ++i)
+		{
+			// Wait for the shared-mem to be properly resized (ftruncate)
+			int err = fstat(shm_fd, &shm_stat);
+			if (err)
+			{
+				cleanup();
+				throw std::runtime_error("Error unable to stat shared memory");
+			}
+
+			if (shm_stat.st_size > 0)
+				break;
+
+			std::this_thread::sleep_for(std::chrono::milliseconds(10));
+		}
+
+		// Timeout failure
+		if (shm_stat.st_size == 0)
+		{
+			cleanup();
+			throw std::runtime_error("Error timeout waiting for shared memory init");
+		}
+
+		void* mapPtr = mmap(nullptr, sizeof(SharedMemMutex), PROT_READ|PROT_WRITE, MAP_SHARED, shm_fd, 0);
+		if (mapPtr == MAP_FAILED)
+		{
+			cleanup();
+			throw std::runtime_error("Failed to map shared memory for mutex");
+		}
+
+		sharedMem = (SharedMemMutex*)mapPtr;
+
+		// NOTE: sharedMem is already valid but mutex may not have been properly initialized yet
+		int chkState;
+		for (int i = 0; i < chkLimit; ++i)
+		{
+			// Wait for the mutex to be properly initialized
+			chkState = sharedMem->state.load();
+			if (chkState == SharedMutexState::Valid)
+				break;
+
+			std::this_thread::sleep_for(std::chrono::milliseconds(10));
+		}
+
+		// Timout failure
+		if (chkState != SharedMutexState::Valid)
+		{
+			cleanup();
+			throw std::runtime_error("Error timeout waiting for mutex init");
+		}
+	}
+
+	// Cleanup helpers
+	inline void err_create_cleanup(const char* name)
+	{
+		// Cleanup if creating-process errors
+		safe_destroy_mutex();
+		safe_unmap_mem();
+		safe_close_shm();
+		force_unlink(name);
+	}
+
+	// Normal cleanup remove local resources on create error
+	inline void cleanup()
+	{
+		safe_unmap_mem();
+		safe_close_shm();
+	}
+
+	inline void safe_destroy_mutex()
+	{
+		if (sharedMem)
+		{
+			int chkValid = SharedMutexState::Valid;
+			if (sharedMem->state.compare_exchange_weak(chkValid, SharedMutexState::Uninitialized))
+			{
+				pthread_mutex_destroy(&sharedMem->pthread_mutex);
+			}
+		}
+	}
+
+	inline void safe_unmap_mem()
+	{
+		if (sharedMem)
+		{
+			munmap((void*)sharedMem, sizeof(SharedMemMutex));
+			sharedMem = nullptr;
+		}
+	}
+
+	inline void safe_close_shm()
+	{
+		// NOTE: Unlike safe_unlink_shm, this just closes the file descriptor
+		//       it will not invalidate the shared memory for other processes
+		if (shm_fd >= 0)
+		{
+			close(shm_fd);
+			shm_fd = -1;
+		}
+	}
+};
+
+thread_local std::unique_ptr<ScopedProcessMutex::PThreadMutex> ScopedProcessMutex::procMutex = nullptr;
+
+ScopedProcessMutex::ScopedProcessMutex(const char* name)
+{
+	if ( !procMutex)
+	{
+		// Postfix a unique user-id to the mutex name
+		std::string mtx_name = std::string("/") + name + getProcessUser();
+		procMutex = std::unique_ptr<PThreadMutex>(new PThreadMutex(mtx_name.c_str()));
+	}
+
+	if ( !procMutex )
+		throw std::runtime_error("Error unable to open/create shared mutex!");
+
+	procMutex->lock();
+}
+
+ScopedProcessMutex::~ScopedProcessMutex()
+{
+	if (procMutex)
+		procMutex->unlock();
+}
+
+void ScopedProcessMutex::remove(const char* name)
+{
+	std::string mtxName = name + getProcessUser();
+	PThreadMutex::force_unlink(mtxName.c_str());
+}
+
+
+#elif defined(USE_BOOST_IPC_MUTEX)
 using boost::interprocess::named_mutex;
 using boost::interprocess::open_or_create;
 
 ScopedProcessMutex::ScopedProcessMutex(const char* name)
-	: ipc_mutex(open_or_create, name)
+	: ipc_mutex(open_or_create, std::string(name + getProcessUser()).c_str())
 {
 	ipc_mutex.lock();
 }
@@ -56,4 +397,10 @@ ScopedProcessMutex::~ScopedProcessMutex()
 	ipc_mutex.unlock();
 }
 
+void ScopedProcessMutex::remove(const char* name)
+{
+	std::string mtxName = name + getProcessUser();
+	named_mutex::remove(mtxName.c_str());
+}
+
 #endif
diff --git a/src/c/ScriptCmds/ScopedProcessMutex.h b/src/c/ScriptCmds/ScopedProcessMutex.h
index f30d315ec615d6c16da96d479570fbd7e7485f81..fb55b2230e66db80f712c3a7143471d86450622f 100644
--- a/src/c/ScriptCmds/ScopedProcessMutex.h
+++ b/src/c/ScriptCmds/ScopedProcessMutex.h
@@ -3,17 +3,27 @@
 #ifdef USE_PROCESS_MUTEX
  #define SCOPED_PROCESS_MUTEX(Name) ScopedProcessMutex Name##_mutex(#Name)
 #else
+<<<<<<< HEAD
 #pragma message "hi friend: no process mutex"
+=======
+#pragma message ("HIP (leverjs) ::  no process mutex in use")
+>>>>>>> f7b83d58739d9ee780da4851cbe4206199306a7b
  #define SCOPED_PROCESS_MUTEX(Name)
 #endif
 
 #ifdef _WIN32
  #define USE_WINDOWS_IPC_MUTEX (1)
+#elif defined(__linux__)
+ #define USE_PTHREADS_ROBUST_MUTEX (1)
+#else
+ #define USE_BOOST_IPC_MUTEX (1)
 #endif
 
-#ifndef USE_WINDOWS_IPC_MUTEX
-	#define BOOST_DATE_TIME_NO_LIB (1)
-	#include "boost/interprocess/sync/named_mutex.hpp"
+#if defined(USE_PTHREADS_ROBUST_MUTEX)
+ #include <memory>
+#elif defined(USE_BOOST_IPC_MUTEX)
+ #define BOOST_DATE_TIME_NO_LIB (1)
+ #include "boost/interprocess/sync/named_mutex.hpp"
 #endif
 
 class ScopedProcessMutex
@@ -32,10 +42,16 @@ public:
 
 	~ScopedProcessMutex();
 
+	// Allow force release of mutex resource (cross-process removal)
+	static void remove(const char* name);
+
 private:
 
-#ifdef USE_WINDOWS_IPC_MUTEX
+#if defined(USE_WINDOWS_IPC_MUTEX)
 	static void* mutexHandle;
+#elif defined(USE_PTHREADS_ROBUST_MUTEX)
+	struct PThreadMutex;
+	static thread_local std::unique_ptr<PThreadMutex> procMutex;
 #else
 	boost::interprocess::named_mutex ipc_mutex;
 #endif
diff --git a/src/c/ScriptCmds/ScriptCommandImpl.h b/src/c/ScriptCmds/ScriptCommandImpl.h
index 29059902cd9e0217c724e621b2d222c711dd0a4e..4752cb377b7cb8c6fda3c1e54b38a82a02599d2c 100644
--- a/src/c/ScriptCmds/ScriptCommandImpl.h
+++ b/src/c/ScriptCmds/ScriptCommandImpl.h
@@ -185,7 +185,7 @@ private:
 	{
 		// Use a scoped process-level mutex to run only a single GPU kernel at a time
 		// TODO: Figure out a scheduling system multi-process HIP calls
-		SCOPED_PROCESS_MUTEX(hip_cmd_gpu);
+		SCOPED_PROCESS_MUTEX(hip_cmd_gpu_);
 
 		static_assert(ArgConverter::has_deferred_image_inputs(), "HIP_COMPILE: Argument layout has no dynamic image inputs. Please overload default ::execute() function!");
 
diff --git a/src/c/ScriptCmds/ScriptHelpers.h b/src/c/ScriptCmds/ScriptHelpers.h
index e8c3a233a1e66f8726875d1ff16a068209ac5b9e..b2de32b34a06624aab935a2c0f82413a6453b30e 100644
--- a/src/c/ScriptCmds/ScriptHelpers.h
+++ b/src/c/ScriptCmds/ScriptHelpers.h
@@ -10,7 +10,7 @@
 #include <string>
 #include <memory>
 #include <algorithm>
-
+#include <stdexcept> 
 
 #define BEGIN_TYPE_MAP(EnumType,ScriptEngine)		\
 	typedef EnumType IdType;						\
diff --git a/src/c/makefile b/src/c/makefile
index db3b80cc54d1e7b550dbf6c4bead1daf6065360b..bf141265e9e92cf310d92f0b8d5b04edee621eea 100755
--- a/src/c/makefile
+++ b/src/c/makefile
@@ -8,6 +8,9 @@ PYTHON3_INC = /usr/include/$(PYTHON_VER)
 NUMPY3_INC = /usr/include/$(PYTHON_VER)
 PYTHON_LIB = /usr/lib/$(PYTHON_VER)/config-3.5m-x86_64-linux-gnu
 
+# Include directory for cuda toolkit
+NVCC_INC = /usr/include
+
 #########################################
 # Default files and include dirs
 #########################################
@@ -27,7 +30,7 @@ PY_CPP_OBJ = $(addprefix $(OBJ_DIR)/,$(notdir $(PY_CPP_FILES:.cpp=.o)))
 SCRPY_CPP_OBJ = $(addprefix $(OBJ_DIR)/Py,$(notdir $(SCR_CPP_FILES:.cpp=.o)))
 SCRMEX_CPP_OBJ = $(addprefix $(OBJ_DIR)/Mx,$(notdir $(SCR_CPP_FILES:.cpp=.o)))
 
-CUDA_INC = -I. -I./Cuda -I/usr/local/cuda/include
+CUDA_INC = -I. -I./Cuda -I$(NVCC_INC)
 MEX_INC = -I. -I./Mex -I./ScriptCmds -I./external -I$(MATALAB_DIR)/extern/include
 PY_INC = -I. -I./Python -I./ScriptCmds -I./external -I$(PYTHON3_INC) -I$(NUMPY3_INC)
 
@@ -52,7 +55,6 @@ LD_FLAGS = --no-undefined
 NVCC_PATH = nvcc
 NVCC_FLAGS = $(CPP_FLAGS)
 SMODEL = -arch=sm_30
-NVCC_INC = -I/usr/include 
 
 ifndef COMP
 	COMP=clang
@@ -100,7 +102,7 @@ Mex.mexa64: $(CUDA_OBJ) $(MEX_CPP_OBJ) $(SCRMEX_CPP_OBJ) $(CUDA_CPP_OBJ)
 #########################################
 $(OBJ_DIR)/%.o: Cuda/%.cu
 	@mkdir -p $(@D)
-	$(NVCC_PATH) $(NVCC_FLAGS) $(CUDA_INC) $(SMODEL) --compiler-options '$(C_FLAGS)' -c $< -o $@ $(NVCC_INC) -dc
+	$(NVCC_PATH) $(NVCC_FLAGS) $(CUDA_INC) $(SMODEL) --compiler-options '$(C_FLAGS)' -c $< -o $@ -dc
 
 $(OBJ_DIR)/%.o: Cuda/%.cpp
 	@mkdir -p $(@D)
diff --git a/tdrDelay.reg b/tdrDelay.reg
deleted file mode 100644
index 5f44d4aeaa8808b332652108661df5dad5bab0c1..0000000000000000000000000000000000000000
Binary files a/tdrDelay.reg and /dev/null differ