diff --git a/.gitattributes b/.gitattributes index abca70a88f3c90bd95e963ec6e31a2c9a14d831b..cef6a61f34be63968345836055fe384917241712 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,4 +1,7 @@ -*.mexw64 filter=lfs diff=lfs merge=lfs -text *.mat filter=lfs diff=lfs merge=lfs -text *.ai filter=lfs diff=lfs merge=lfs -text *.png filter=lfs diff=lfs merge=lfs -text +*.mexw64 filter=lfs diff=lfs merge=lfs -text +*.mexa64 filter=lfs diff=lfs merge=lfs -text +*.pyd filter=lfs diff=lfs merge=lfs -text +*.so filter=lfs diff=lfs merge=lfs -text diff --git a/src/MATLAB/+HIP/@Cuda/Mex.mexa64 b/src/MATLAB/+HIP/@Cuda/Mex.mexa64 index 3def436ab95e30defbeb9c51ddb6dc741df5a108..e588bd360d85259d9ab919d7900d1e3f5e53717d 100755 Binary files a/src/MATLAB/+HIP/@Cuda/Mex.mexa64 and b/src/MATLAB/+HIP/@Cuda/Mex.mexa64 differ diff --git a/src/MATLAB/+HIP/@Cuda/Mex.mexw64 b/src/MATLAB/+HIP/@Cuda/Mex.mexw64 index 3701ee8f3d395e7359884d937ce9a91cd582b61c..b658d2b05218d51b0a95623eba479b62c8fa9988 100644 --- a/src/MATLAB/+HIP/@Cuda/Mex.mexw64 +++ b/src/MATLAB/+HIP/@Cuda/Mex.mexw64 @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8a145aed70562e248f8c065341b5f37069b8d52a66bf40e4c545cff25f2c62bc -size 12214272 +oid sha256:75d48bac093946934c4cd46fbbe2aedd703f447d1ec8e08752c118eb23528e07 +size 12387840 diff --git a/src/MATLAB/+HSP/JANEWAY.mat b/src/MATLAB/+HSP/JANEWAY.mat deleted file mode 100644 index daefe8d57f2e1fc54584c9059c50b2cffdc89ae7..0000000000000000000000000000000000000000 --- a/src/MATLAB/+HSP/JANEWAY.mat +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6e3fbec3fff0ee2c8bb9052f5dfa7beae71122965170d22afb02a7e059eb49d4 -size 1972 diff --git a/src/MATLAB/+HSP/TESLA-LITE.mat b/src/MATLAB/+HSP/TESLA-LITE.mat deleted file mode 100644 index 6ab896d0e9770a3370d8ba8bd500c634c17ea582..0000000000000000000000000000000000000000 --- a/src/MATLAB/+HSP/TESLA-LITE.mat +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:de5bdcd862ef4791bf4075640a37c46f0fd75170041a9404bf4fa9842a09ec11 -size 4741 diff --git a/src/MATLAB/+HSP/WAITE-WW1.mat b/src/MATLAB/+HSP/WAITE-WW1.mat deleted file mode 100644 index 525ee8efa68b867d56110be86dcc4aee91898d47..0000000000000000000000000000000000000000 --- a/src/MATLAB/+HSP/WAITE-WW1.mat +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f074917db04ea1ea5f97f049af570c70abaa5610d9eb54cfed67b5872e5461d9 -size 8420 diff --git a/src/MATLAB/+ImProc/JANEWAY.mat b/src/MATLAB/+ImProc/JANEWAY.mat deleted file mode 100644 index ee75084eb0e94f69153afb65eb1d3bb7617a9bc1..0000000000000000000000000000000000000000 --- a/src/MATLAB/+ImProc/JANEWAY.mat +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6f3bdad9858158dd94f001518148564141913d7257cb23293f4821fb8827d182 -size 5093 diff --git a/src/MATLAB/+ImarisProc/CC_BatchSkeleton.m b/src/MATLAB/+ImarisProc/CC_BatchSkeleton.m deleted file mode 100644 index 18ca138db91ecf70a12c09c52113b9ca31adc35d..0000000000000000000000000000000000000000 --- a/src/MATLAB/+ImarisProc/CC_BatchSkeleton.m +++ /dev/null @@ -1,72 +0,0 @@ -%Skeleton for Imaris batch in Matlab - -%ImarisLib.jar needs to be in the same folder. -%Imaris needs to be running when executing the following code - -%CC 2018-01-03 tested with Imaris 9.1.0 - -%Get the image folder. Only read *.ims images. -infolder = uigetdir; -files = [infolder '/*.ims']; -listing = dir(files); -nfiles = size(listing,1); - -%open files in Imaris sequentially -for i = 1:nfiles - - filename = [infolder '/' listing(i).name]; - filename = sprintf(filename); - vImarisApplication = StartImaris; - vImarisApplication.FileOpen(filename,''); - - %get dataset in Matlab - vDataSet = vImarisApplication.GetDataSet; - - %apply median filter to dataset - vImarisApplication.GetImageProcessing.MedianFilterChannel(vDataSet,0,[5 5 5]); - - %create Surfaces - ip = vImarisApplication.GetImageProcessing; - vNewSurfaces = ip.DetectSurfaces(vDataSet, [], 0, 1, 0, true, 0, ''); - vNewSurfaces.SetName(sprintf('New Surface')); - vImarisApplication.GetSurpassScene.AddChild(vNewSurfaces,-1); - - %get Surface stats - vSurpassComponent = vImarisApplication.GetSurpassSelection; - vImarisObject = vImarisApplication.GetFactory.ToSurfaces(vSurpassComponent); - vAllStatistics = vImarisObject.GetStatistics; - vNames = cell(vAllStatistics.mNames); - vValues = vAllStatistics.mValues; - disp(unique(vNames)) - - %save ims file - newFilename = strcat(filename(1:end-4),'new.ims'); - vImarisApplication.FileSave(newFilename,''); - - pause(5); - - %Clear java handles to clear up memory and prevent future errors - clear 'Imaris/IApplicationPrxHelper'; - clear 'Imaris/IDataSetPrxHelper'; - clear 'Imaris/IDataContainerPrxHelper'; - clear 'Imaris/IDataItemPrxHelper'; - clear 'Imaris/cStatisticValues'; - clear 'ImarisLib'; - clear 'vSurpassScene'; - clear 'vDataSet'; - clear 'vAllStatistics'; - clear 'err'; -end - -%Quit Imaris Application after all is done -vImarisApplication.SetVisible(~vImarisApplication.GetVisible); -vImarisApplication.Quit; - -function aImarisApplication = StartImaris - javaaddpath ImarisLib.jar; - vImarisLib = ImarisLib; - server = vImarisLib.GetServer(); - id = server.GetObjectID(0); - aImarisApplication = vImarisLib.GetApplication(id); - disp(id) -end \ No newline at end of file diff --git a/src/MATLAB/+ImarisProc/ContrastEnhancement.m b/src/MATLAB/+ImarisProc/ContrastEnhancement.m deleted file mode 100644 index 29c934d0cf07b988899699edd85838d16346b49a..0000000000000000000000000000000000000000 --- a/src/MATLAB/+ImarisProc/ContrastEnhancement.m +++ /dev/null @@ -1,50 +0,0 @@ -function ContrastEnhancement(imarisAppID) - imarisHandle = ImarisHelper.GetAppHandle(imarisAppID); - if (isempty(imarisHandle)) - error('Imaris is not open or the wrong app id!'); - end - - imarisDataset = imarisHandle.GetDataSet(); - physicalSize = ImarisHelper.GetPhysicalSize(imarisDataset); - normSize = physicalSize./max(physicalSize); - defaultSigs = 35.*normSize; - defaultSigsStr = sprintf('[%.2f,%.2f,%.2f]',defaultSigs(1),defaultSigs(2),defaultSigs(3)); - - prompts = {'Enter Channel:','Enter Gaussian Sigmas:','Enter Median Neighborhood:','Inplace:(false=0,true=1)'}; - dlgTitle = 'Contrast Enhancement'; - numLines = 1; - defaultAns = {'1',defaultSigsStr,'[3,3,3]','0'}; - answer = inputdlg(prompts,dlgTitle,numLines,defaultAns); - - if (isempty(answer)) - return - end - - chan = str2double(answer{1}); - sigsStr = regexp(answer{2},'\[(.*),(.*),(.*)\]','tokens'); - sigs = abs(cellfun(@(x)(str2double(x)),sigsStr{1})); - medStr = regexp(answer{3},'\[(.*),(.*),(.*)\]','tokens'); - medNeighborhood = cellfun(@(x)(str2double(x)),medStr{1}); - medNeighborhood = abs(round(medNeighborhood)); - - inplace = str2double(answer{4})>0; - outChannel = ImarisHelper.GetNumChannels(imarisDataset) +1; - if (inplace) - outChannel = chan; - end - - imarisHandle.DataSetPushUndo('Contrast Enhancement'); - - % TODO see if there is enough memory to just capture the entire series - % to process faster - - numFrames = ImarisHelper.GetNumFrames(imarisDataset); - im = ImarisHelper.GetImageData(imarisDataset,chan,1:numFrames); - prgs = Utils.CmdlnProgress(numFrames,true,'smooth'); - for t=1:numFrames - im(:,:,:,1,t) = ImProc.ContrastEnhancement(im(:,:,:,1,t),sigs,medNeighborhood); - prgs.PrintProgress(t); - end - prgs.ClearProgress(true); - imarisDataset = ImarisHelper.SetImage(imarisHandle,imarisDataset,im,outChannel,1:t); -end diff --git a/src/MATLAB/gray.bmp b/src/MATLAB/gray.bmp deleted file mode 100644 index 21d8dec7aff359599cec0218edf9a8a78515a831..0000000000000000000000000000000000000000 Binary files a/src/MATLAB/gray.bmp and /dev/null differ diff --git a/src/MATLAB/grayscale.tif b/src/MATLAB/grayscale.tif deleted file mode 100644 index e4ff61a7e7cec6400c37a2f2ab0f6f1b818ce43a..0000000000000000000000000000000000000000 Binary files a/src/MATLAB/grayscale.tif and /dev/null differ diff --git a/src/MATLAB/left2right.tif b/src/MATLAB/left2right.tif deleted file mode 100644 index 87882fec0a38bdedd3912a9b5aeba4a466afc0a8..0000000000000000000000000000000000000000 Binary files a/src/MATLAB/left2right.tif and /dev/null differ diff --git a/src/MATLAB/sandbox.m b/src/MATLAB/sandbox.m deleted file mode 100644 index 69ee3b3b4c329b5240e395c223ab1ff938149a07..0000000000000000000000000000000000000000 --- a/src/MATLAB/sandbox.m +++ /dev/null @@ -1 +0,0 @@ -im = tiffReader('DAPI Olig2-514 GFAP-488 Dcx-647 Laminin-Cy3 Bcatenin-568 20x1 TopLeft'); \ No newline at end of file diff --git a/src/MATLAB/seg.bmp b/src/MATLAB/seg.bmp deleted file mode 100644 index 81ceae19e7ac4b7525c0268901483fbff3ddf1ef..0000000000000000000000000000000000000000 Binary files a/src/MATLAB/seg.bmp and /dev/null differ diff --git a/src/MATLAB/showIm.m b/src/MATLAB/showIm.m deleted file mode 100644 index f9e6a1718909ae61c487d626315c41ee46b4100e..0000000000000000000000000000000000000000 --- a/src/MATLAB/showIm.m +++ /dev/null @@ -1,13 +0,0 @@ -%% print out image -function showIm(image,label) -figure -imagesc(max(image,[],3)) - -% set(gcf,'Units','normalized'); -% set(gcf,'Position',[0 0 1 1]); - -colormap gray - -title(label) -axis image -end diff --git a/src/MATLAB/template.m b/src/MATLAB/template.m deleted file mode 100644 index f39c38e1b97abdc03a5658fa7240aeddd9b19c4d..0000000000000000000000000000000000000000 --- a/src/MATLAB/template.m +++ /dev/null @@ -1,10 +0,0 @@ -function [cTime,mTime,kernelName] = (im,) - kernelName = ''; - cT = tic; - imC = HIP.(im,); - cTime = toc(cT); - - mT = tic; - imM = HIP.(im,,true); - mTime = toc(mT); -end diff --git a/src/MATLAB/testChunking.m b/src/MATLAB/testChunking.m deleted file mode 100644 index fda873f6950b853d93d846b5ff3d28fc142bdcff..0000000000000000000000000000000000000000 --- a/src/MATLAB/testChunking.m +++ /dev/null @@ -1,18 +0,0 @@ -figure - -plot(0,0,'.w'); -ax = gca; -hold on - -for i=1:length(imChunks) - curChunk = imChunks(i); - - Utils.PlotBox(ax,Utils.SwapXY_RC(curChunk.ImageStart_rc),Utils.SwapXY_RC(curChunk.ImageEnd_rc),'-b',num2str(i)); - Utils.PlotBox(ax,Utils.SwapXY_RC(curChunk.ImageROIstart_rc),Utils.SwapXY_RC(curChunk.ImageROIend_rc),'--r'); - Utils.PlotBox(ax,... - Utils.SwapXY_RC(curChunk.ImageStart_rc) + Utils.SwapXY_RC(curChunk.ChunkROIstart_rc) - 1,... - Utils.SwapXY_RC(curChunk.ImageStart_rc) + Utils.SwapXY_RC(curChunk.ChunkROIstart_rc) -1 + (Utils.SwapXY_RC(curChunk.ChunkROIend_rc)-Utils.SwapXY_RC(curChunk.ChunkROIstart_rc)-1),... - ':g'); -end -axis ij -axis equal diff --git a/src/MATLAB/top2bottom.tif b/src/MATLAB/top2bottom.tif deleted file mode 100644 index 4ecd432bbf0134b4eaefb61209fc6cf1df488a3d..0000000000000000000000000000000000000000 Binary files a/src/MATLAB/top2bottom.tif and /dev/null differ diff --git a/src/Python/HIP.pyd b/src/Python/HIP.pyd index 82c45814b25402b15f5de69047786e735bf0ab4b..e224a365536bf5d06412c4c88e40337d48f20c64 100644 Binary files a/src/Python/HIP.pyd and b/src/Python/HIP.pyd differ diff --git a/src/Python/HIP.so b/src/Python/HIP.so new file mode 100644 index 0000000000000000000000000000000000000000..232d3f81623e0c8d826911ca2516e5ed8fc00a20 --- /dev/null +++ b/src/Python/HIP.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:985c6897659e283bc9a0a95fbb58a1df4ae025f3b04bce2b2a5454fa1d3246ab +size 20752712 diff --git a/src/c/Cuda/CudaLoG.cuh b/src/c/Cuda/CudaLoG.cuh index e9f42084512df63ede4ba135e5675ee1655e27cc..446e417e95ff14d6201a89825e0dc67dd85f2d14 100644 --- a/src/c/Cuda/CudaLoG.cuh +++ b/src/c/Cuda/CudaLoG.cuh @@ -70,6 +70,7 @@ void cLoG(ImageView<PixelTypeIn> imageIn, ImageView<float> imageOut, Vec<double> { if (!chunks[i].sendROI(imageIn, deviceImages.getCurBuffer())) std::runtime_error("Error sending ROI to device!"); + cudaMultiplySumBias<<<chunks[i].blocks, chunks[i].threads >> > (*(deviceImages.getCurBuffer()), *(deviceImages.getNextBuffer()), constLoGKernelMem_x, MIN_VAL, MAX_VAL, constGausKernelMem_x, true); deviceImages.incrementBuffer(); if (sigmas.y!=0) @@ -91,6 +92,7 @@ void cLoG(ImageView<PixelTypeIn> imageIn, ImageView<float> imageOut, Vec<double> { if (!chunks[i].sendROI(imageIn, deviceImages.getCurBuffer())) std::runtime_error("Error sending ROI to device!"); + if (sigmas.x!=0) { cudaMultiplySum << <chunks[i].blocks, chunks[i].threads >> > (*(deviceImages.getCurBuffer()), *(deviceImages.getNextBuffer()), constGausKernelMem_x, MIN_VAL, MAX_VAL); @@ -112,6 +114,7 @@ void cLoG(ImageView<PixelTypeIn> imageIn, ImageView<float> imageOut, Vec<double> { if (!chunks[i].sendROI(imageIn, deviceImages.getCurBuffer())) std::runtime_error("Error sending ROI to device!"); + if (sigmas.x!=0) { cudaMultiplySum << <chunks[i].blocks, chunks[i].threads >> > (*(deviceImages.getCurBuffer()), *(deviceImages.getNextBuffer()), constGausKernelMem_x, MIN_VAL, MAX_VAL); diff --git a/src/c/Cuda/LoGKernel.cpp b/src/c/Cuda/LoGKernel.cpp index 402d6f7a4bb2c45d8388ceb1cc3b98b033346e5a..0a8d70c4451d40966ec5afa697f6ed1f21da4d53 100644 --- a/src/c/Cuda/LoGKernel.cpp +++ b/src/c/Cuda/LoGKernel.cpp @@ -17,8 +17,6 @@ float* createLoG_GausKernels(Vec<double> sigmas, Vec<std::size_t>& dimsOut) float* kernelOut = new float[dimsOut.sum()*2]; Vec<double> sigmaSqr = sigmas.pwr(2); - Vec<double> oneOverSigSqr = Vec<double>(1.0) / sigmaSqr; - Vec<double> twoSigmaSqr = sigmaSqr * 2; Vec<double> sigmaForth = sigmas.pwr(4); int loGstride = dimsOut.sum(); @@ -43,11 +41,17 @@ float* createLoG_GausKernels(Vec<double> sigmas, Vec<std::size_t>& dimsOut) { double pos = j - mid.e[i]; double posSqr = SQR(pos); - double gauss = exp(-(posSqr / twoSigmaSqr.e[i])); - double logVal = (posSqr / sigmaForth.e[i] - oneOverSigSqr.e[i])*gauss; - kernelOut[j + stride] = (float)logVal; - kernelOut[j + stride + loGstride] = gauss; - gaussSum += gauss; + + double gaussVal = exp(-(posSqr / (2.0 * sigmaSqr.e[i]))); + double logVal = (posSqr / sigmaForth.e[i] - 1.0 / sigmaSqr.e[i]) * gaussVal; + + // Multiply by sigma^2 to get a scale-invariant LoG + double scaleInvVal = sigmaSqr.e[i] * logVal; + + gaussSum += gaussVal; + + kernelOut[j + stride] = scaleInvVal; + kernelOut[j + stride + loGstride] = gaussVal; } double sumVal = 0.0; diff --git a/src/c/CudaImageProcessor.vcxproj b/src/c/CudaImageProcessor.vcxproj index 6d4cbf9a073f9fc310dadf726efecd4903640ba5..1d9413abe5cde18f5434a353550c522f11af639e 100644 --- a/src/c/CudaImageProcessor.vcxproj +++ b/src/c/CudaImageProcessor.vcxproj @@ -15,13 +15,13 @@ <TargetFrameworkVersion>v4.5.2</TargetFrameworkVersion> <Keyword>ManagedCProj</Keyword> <RootNamespace>CudaImageProcessor</RootNamespace> - <WindowsTargetPlatformVersion>10.0.15063.0</WindowsTargetPlatformVersion> + <WindowsTargetPlatformVersion>10.0.17763.0</WindowsTargetPlatformVersion> </PropertyGroup> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration"> <ConfigurationType>StaticLibrary</ConfigurationType> <UseDebugLibraries>true</UseDebugLibraries> - <PlatformToolset>v141</PlatformToolset> + <PlatformToolset>v142</PlatformToolset> <CLRSupport>false</CLRSupport> <CharacterSet>MultiByte</CharacterSet> </PropertyGroup> @@ -33,16 +33,16 @@ <CharacterSet>MultiByte</CharacterSet> </PropertyGroup> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> - <ImportGroup Label="ExtensionSettings"> - <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 9.1.props" /> - </ImportGroup> + <ImportGroup Label="ExtensionSettings" /> <ImportGroup Label="Shared"> </ImportGroup> <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> + <Import Project="CudaVersionImport.props" /> </ImportGroup> <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> + <Import Project="CudaVersionImport.props" /> </ImportGroup> <PropertyGroup Label="UserMacros" /> <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> @@ -201,7 +201,5 @@ </ClInclude> </ItemGroup> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> - <ImportGroup Label="ExtensionTargets"> - <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 9.1.targets" /> - </ImportGroup> + <ImportGroup Label="ExtensionTargets" /> </Project> \ No newline at end of file diff --git a/src/c/CudaMex.def b/src/c/CudaMex.def new file mode 100644 index 0000000000000000000000000000000000000000..d4263ef131dc28048c4bbeee67348c04251f7987 --- /dev/null +++ b/src/c/CudaMex.def @@ -0,0 +1,3 @@ +LIBRARY "CudaMex" +EXPORTS DllMain +EXPORTS mexFunction diff --git a/src/c/CudaMex.vcxproj b/src/c/CudaMex.vcxproj index 9af21842f528edd05fe708bf1523f20d9de543bb..dc38a993d85202492c3d963c6570316d0684ff1e 100644 --- a/src/c/CudaMex.vcxproj +++ b/src/c/CudaMex.vcxproj @@ -13,13 +13,13 @@ <PropertyGroup Label="Globals"> <ProjectGuid>{6698E8EC-49D9-421E-AA87-5BCC6B466347}</ProjectGuid> <RootNamespace>CudaMex</RootNamespace> - <WindowsTargetPlatformVersion>10.0.15063.0</WindowsTargetPlatformVersion> + <WindowsTargetPlatformVersion>10.0.17763.0</WindowsTargetPlatformVersion> </PropertyGroup> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration"> <ConfigurationType>DynamicLibrary</ConfigurationType> <UseDebugLibraries>true</UseDebugLibraries> - <PlatformToolset>v141</PlatformToolset> + <PlatformToolset>v142</PlatformToolset> <CharacterSet>MultiByte</CharacterSet> </PropertyGroup> <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration"> @@ -62,11 +62,10 @@ <GenerateDebugInformation>true</GenerateDebugInformation> <AdditionalLibraryDirectories>$(MATLAB_DIR)\extern\lib\win64\microsoft;$(SolutionDir)Output\CudaImageProcessor\$(Configuration)_$(PlatformName)\</AdditionalLibraryDirectories> <AdditionalDependencies>CudaImageProcessor_d.lib;libmx.lib;libmex.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies> - <ModuleDefinitionFile> - </ModuleDefinitionFile> + <ModuleDefinitionFile>CudaMex.def</ModuleDefinitionFile> </Link> <PostBuildEvent> - <Command>echo copy $(OutDir)CudaMex.dll "$(ProjectDir)Mex.mexw64" + <Command>echo copy $(OutDir)CudaMex.dll "$(ProjectDir)Mex.mexw64" copy $(OutDir)CudaMex.dll "$(ProjectDir)Mex.mexw64"</Command> </PostBuildEvent> </ItemDefinitionGroup> @@ -78,7 +77,7 @@ copy $(OutDir)CudaMex.dll "$(ProjectDir)Mex.mexw64"</Command> <IntrinsicFunctions>true</IntrinsicFunctions> <SDLCheck>true</SDLCheck> <AdditionalIncludeDirectories>.;$(MATLAB_DIR)\extern\include;external</AdditionalIncludeDirectories> - <PreprocessorDefinitions>MEX_BUILD;_WINDLL;USE_PROCESS_MUTEX;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions> + <PreprocessorDefinitions>MEX_BUILD;_WINDLL;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions> <PreprocessToFile>false</PreprocessToFile> </ClCompile> <Link> @@ -87,11 +86,10 @@ copy $(OutDir)CudaMex.dll "$(ProjectDir)Mex.mexw64"</Command> <OptimizeReferences>true</OptimizeReferences> <AdditionalLibraryDirectories>$(MATLAB_DIR)\extern\lib\win64\microsoft;$(SolutionDir)Output\CudaImageProcessor\$(Configuration)_$(PlatformName)\</AdditionalLibraryDirectories> <AdditionalDependencies>CudaImageProcessor.lib;libmx.lib;libmex.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies> - <ModuleDefinitionFile> - </ModuleDefinitionFile> + <ModuleDefinitionFile>CudaMex.def</ModuleDefinitionFile> </Link> <PostBuildEvent> - <Command>echo copy $(OutDir)CudaMex.dll "$(ProjectDir)Mex.mexw64" + <Command>echo copy $(OutDir)CudaMex.dll "$(ProjectDir)Mex.mexw64" copy $(OutDir)CudaMex.dll "$(ProjectDir)Mex.mexw64"</Command> </PostBuildEvent> </ItemDefinitionGroup> @@ -145,6 +143,7 @@ copy $(OutDir)CudaMex.dll "$(ProjectDir)Mex.mexw64"</Command> <ClCompile Include="Mex\CudaMex.cpp" /> <ClCompile Include="Mex\MexCommandModule.cpp" /> <ClCompile Include="ScriptCmds\ScopedProcessMutex.cpp" /> + <None Include="CudaMex.def" /> <None Include="Mex\_TemplateMex.cpp" /> </ItemGroup> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> diff --git a/src/c/CudaMex.vcxproj.filters b/src/c/CudaMex.vcxproj.filters index a85fad3d387da8198668899ea4463d9b827a3066..c2c47e840223cd7deffa8271faff20c01b8c16de 100644 --- a/src/c/CudaMex.vcxproj.filters +++ b/src/c/CudaMex.vcxproj.filters @@ -172,5 +172,8 @@ <None Include="Mex\_TemplateMex.cpp"> <Filter>Source Files</Filter> </None> + <None Include="CudaMex.def"> + <Filter>Resource Files</Filter> + </None> </ItemGroup> -</Project> +</Project> \ No newline at end of file diff --git a/src/c/CudaPy3DLL.vcxproj b/src/c/CudaPy3DLL.vcxproj index d1fb77e7d418063d65e5544060ccef899f0c0b0e..0cec9a3502681994bc9e927db8fc78a5c3dda309 100644 --- a/src/c/CudaPy3DLL.vcxproj +++ b/src/c/CudaPy3DLL.vcxproj @@ -73,26 +73,26 @@ <ProjectGuid>{0957901A-E67A-40C2-9EF5-76DF8EFBC2D5}</ProjectGuid> <Keyword>Win32Proj</Keyword> <RootNamespace>CudaPy3DLL</RootNamespace> - <WindowsTargetPlatformVersion>10.0.15063.0</WindowsTargetPlatformVersion> + <WindowsTargetPlatformVersion>10.0.17763.0</WindowsTargetPlatformVersion> </PropertyGroup> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration"> <ConfigurationType>DynamicLibrary</ConfigurationType> <UseDebugLibraries>true</UseDebugLibraries> - <PlatformToolset>v141</PlatformToolset> + <PlatformToolset>v142</PlatformToolset> <CharacterSet>MultiByte</CharacterSet> </PropertyGroup> <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration"> <ConfigurationType>DynamicLibrary</ConfigurationType> <UseDebugLibraries>false</UseDebugLibraries> - <PlatformToolset>v141</PlatformToolset> + <PlatformToolset>v142</PlatformToolset> <WholeProgramOptimization>true</WholeProgramOptimization> <CharacterSet>MultiByte</CharacterSet> </PropertyGroup> <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration"> <ConfigurationType>DynamicLibrary</ConfigurationType> <UseDebugLibraries>true</UseDebugLibraries> - <PlatformToolset>v141</PlatformToolset> + <PlatformToolset>v142</PlatformToolset> <CharacterSet>MultiByte</CharacterSet> </PropertyGroup> <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration"> diff --git a/src/c/CudaVersionImport.props b/src/c/CudaVersionImport.props new file mode 100644 index 0000000000000000000000000000000000000000..e658a62be7e4935caca77ac39126ec1a8140a2a0 --- /dev/null +++ b/src/c/CudaVersionImport.props @@ -0,0 +1,10 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ImportGroup Label="ExtensionSettings"> + <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 10.2.props" /> + </ImportGroup> + + <ImportGroup Label="ExtensionTargets"> + <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 10.2.targets" /> + </ImportGroup> +</Project> diff --git a/src/c/HIP.so b/src/c/HIP.so index 9bdf86723856c8a86b29125d01dcaba438706359..0f6e163f05cdc77f78567a7dfb688593f8ba92e7 100755 Binary files a/src/c/HIP.so and b/src/c/HIP.so differ diff --git a/src/c/Mex.mexa64 b/src/c/Mex.mexa64 index 3def436ab95e30defbeb9c51ddb6dc741df5a108..e588bd360d85259d9ab919d7900d1e3f5e53717d 100755 Binary files a/src/c/Mex.mexa64 and b/src/c/Mex.mexa64 differ diff --git a/src/c/Mex.mexw64 b/src/c/Mex.mexw64 index 626808d43340b64892cc3774464bf4a30e0384cd..b658d2b05218d51b0a95623eba479b62c8fa9988 100644 --- a/src/c/Mex.mexw64 +++ b/src/c/Mex.mexw64 @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0395df76896d9b773162832d127eca3bfdd7599a2c4e954dec2376e0ee349a34 -size 12214272 +oid sha256:75d48bac093946934c4cd46fbbe2aedd703f447d1ec8e08752c118eb23528e07 +size 12387840 diff --git a/src/c/ScriptCmds/ScopedProcessMutex.cpp b/src/c/ScriptCmds/ScopedProcessMutex.cpp index d7e5bf382803b8e45dd2ade49d51f0c35e58fdfa..732e28848bcd2e398372642c072293408560cd7d 100644 --- a/src/c/ScriptCmds/ScopedProcessMutex.cpp +++ b/src/c/ScriptCmds/ScopedProcessMutex.cpp @@ -1,22 +1,86 @@ #include "ScopedProcessMutex.h" #include <stdexcept> +#include <memory> -#ifdef USE_WINDOWS_IPC_MUTEX -#include <windows.h> +// Helpers for getting user ID +std::string getProcessUser(); -#undef min -#undef max +#if defined(_WIN32) +#define NOMINMAX +#define WIN32_LEAN_AND_MEAN +#include <Windows.h> +#include <sddl.h> -HANDLE ScopedProcessMutex::mutexHandle = NULL; +#undef WIN32_LEAN_AND_MEAN +#undef NOMINMAX + +struct LocalFreeFunc { inline void operator() (HLOCAL* ptr) { LocalFree((HLOCAL)ptr); } }; +template <typename T> using LocalUnique = std::unique_ptr<T, LocalFreeFunc>; + +std::string getProcessUser() +{ + HANDLE hToken; + + HANDLE hProc = GetCurrentProcess(); + if ( !OpenProcessToken(hProc, TOKEN_QUERY, &hToken) ) + return "unk"; + + DWORD dwSize = sizeof(TOKEN_USER); + TOKEN_USER tu = { 0 }; + + if ( !GetTokenInformation(hToken, TokenUser, &tu, dwSize, &dwSize) ) + return "unk"; + + char* strSID = nullptr; + if ( !ConvertSidToStringSidA(&tu.User.Sid, &strSID) ) + return "unk"; + + std::string outStr(strSID); + LocalFree(strSID); + + return outStr; +} + +#elif defined(__linux__) +#include <unistd.h> +#include <sys/types.h> + +std::string getProcessUser() +{ + uid_t uid = geteuid(); + + const int MAXLEN = 10; + char uidStr[MAXLEN+1]; + snprintf(uidStr, MAXLEN, "%d", uid); + + return uidStr; +} + +#endif + + +#if defined(USE_WINDOWS_IPC_MUTEX) +#define NOMINMAX +#define WIN32_LEAN_AND_MEAN +#include <Windows.h> + +#undef WIN32_LEAN_AND_MEAN +#undef NOMINMAX + + +HANDLE ScopedProcessMutex::mutexHandle = nullptr; ScopedProcessMutex::ScopedProcessMutex(const char* name) { if ( !mutexHandle ) { - mutexHandle = CreateMutex(NULL, false, name); + // Postfix a unique user-id to the mutex name + std::string mtx_name = name + getProcessUser(); + + mutexHandle = CreateMutex(NULL, false, mtx_name.c_str()); if ( !mutexHandle && GetLastError() == ERROR_ACCESS_DENIED ) - mutexHandle = OpenMutex(SYNCHRONIZE, false, name); + mutexHandle = OpenMutex(SYNCHRONIZE, false, mtx_name.c_str()); if ( !mutexHandle ) throw std::runtime_error("Error creating mutex handle!"); @@ -28,11 +92,10 @@ ScopedProcessMutex::ScopedProcessMutex(const char* name) mutexHandle = NULL; throw std::runtime_error("Error unable to acquire mutex!"); } + // MW - Treat a previous crash as ok since the GPU is likely to recover + // from process crashes at the driver level (no longer throw error) else if ( waitResult == WAIT_ABANDONED ) - { - mutexHandle = NULL; - throw std::runtime_error("Previous thread terminated without releasing mutex!"); - } + {} } ScopedProcessMutex::~ScopedProcessMutex() @@ -41,12 +104,290 @@ ScopedProcessMutex::~ScopedProcessMutex() ReleaseMutex(mutexHandle); } -#else +#elif defined(USE_PTHREADS_ROBUST_MUTEX) +#include <errno.h> +#include <fcntl.h> +#include <linux/limits.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> + +#include <unistd.h> +#include <stdlib.h> + +#include <thread> +#include <atomic> + +#if (ATOMIC_INT_LOCK_FREE != 2) + #error USE_PTHREADS_ROBUST_MUTEX implementation requires always lock-free atomic int type +#endif + +struct ScopedProcessMutex::PThreadMutex +{ + enum SharedMutexState + { + Uninitialized = 0, + Initializing = 1, + Valid = 2, + }; + + struct SharedMemMutex + { + std::atomic_int state; + pthread_mutex_t pthread_mutex; + }; + + int shm_fd; + SharedMemMutex* sharedMem; + std::string shm_name; + + PThreadMutex(const char* name) + : shm_fd(-1), sharedMem(nullptr), shm_name(name) + { + try_create_mutex(); + } + + ~PThreadMutex() + { + cleanup(); + } + + static void force_unlink(const char* name) + { + shm_unlink(name); + } + + void lock() + { + int err = pthread_mutex_lock(&sharedMem->pthread_mutex); + if ( err == EOWNERDEAD ) + err = pthread_mutex_consistent(&sharedMem->pthread_mutex); + else if ( err != 0 ) + throw std::runtime_error("Error unable to acquire mutex!"); + } + + void unlock() + { + int err = pthread_mutex_unlock(&sharedMem->pthread_mutex); + if (err != 0) + throw std::runtime_error("Error unable to release mutex!"); + } + +private: + void try_create_mutex() + { + errno = 0; + + // Try to create shared memory-mapping + shm_fd = shm_open(shm_name.c_str(), O_RDWR|O_CREAT|O_EXCL, S_IRUSR|S_IWUSR); + if (shm_fd < 0) + { + if ( errno == EEXIST ) + { + try_open_mutex(); + return; + } + else + throw std::runtime_error("Error unable to create shared memory"); + } + + int err = ftruncate(shm_fd, sizeof(SharedMemMutex)); + if (err) + { + err_create_cleanup(shm_name.c_str()); + throw std::runtime_error("Error unable to resize shared memory for mutex"); + } + + void* mapPtr = mmap(nullptr, sizeof(SharedMemMutex), PROT_READ|PROT_WRITE, MAP_SHARED, shm_fd, 0); + if (mapPtr == MAP_FAILED) + { + err_create_cleanup(shm_name.c_str()); + throw std::runtime_error("Failed to map shared memory for mutex"); + } + + sharedMem = (SharedMemMutex*) mapPtr; + sharedMem->state.store(SharedMutexState::Initializing, std::memory_order_seq_cst); + ////// Guard other processes from using mutex until it's initialized + std::unique_ptr<pthread_mutexattr_t, int(*)(pthread_mutexattr_t*)> mtxAttr(new pthread_mutexattr_t(),pthread_mutexattr_destroy); + + err = pthread_mutexattr_init(mtxAttr.get()); + if ( err ) + { + err_create_cleanup(shm_name.c_str()); + throw std::runtime_error("Error failed to initialize mutex attribute"); + } + + err = pthread_mutexattr_setpshared(mtxAttr.get(), PTHREAD_PROCESS_SHARED); + if ( err ) + { + err_create_cleanup(shm_name.c_str()); + throw std::runtime_error("Error failed to set mutex shared"); + } + + err = pthread_mutexattr_setrobust(mtxAttr.get(), PTHREAD_MUTEX_ROBUST); + if ( err ) + { + err_create_cleanup(shm_name.c_str()); + throw std::runtime_error("Error failed to set mutex robust"); + } + + err = pthread_mutex_init(&sharedMem->pthread_mutex, mtxAttr.get()); + if ( err ) + { + err_create_cleanup(shm_name.c_str()); + throw std::runtime_error("Error failed to initialize mutex"); + } + ////// + sharedMem->state.store(SharedMutexState::Valid, std::memory_order_seq_cst); + } + + void try_open_mutex() + { + shm_fd = shm_open(shm_name.c_str(), O_RDWR, S_IRUSR|S_IWUSR); + if (shm_fd < 0) + throw std::runtime_error("Error unable to open shared memory"); + + struct stat shm_stat; + + const int chkLimit = 100; + for (int i = 0; i < chkLimit; ++i) + { + // Wait for the shared-mem to be properly resized (ftruncate) + int err = fstat(shm_fd, &shm_stat); + if (err) + { + cleanup(); + throw std::runtime_error("Error unable to stat shared memory"); + } + + if (shm_stat.st_size > 0) + break; + + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } + + // Timeout failure + if (shm_stat.st_size == 0) + { + cleanup(); + throw std::runtime_error("Error timeout waiting for shared memory init"); + } + + void* mapPtr = mmap(nullptr, sizeof(SharedMemMutex), PROT_READ|PROT_WRITE, MAP_SHARED, shm_fd, 0); + if (mapPtr == MAP_FAILED) + { + cleanup(); + throw std::runtime_error("Failed to map shared memory for mutex"); + } + + sharedMem = (SharedMemMutex*)mapPtr; + + // NOTE: sharedMem is already valid but mutex may not have been properly initialized yet + int chkState; + for (int i = 0; i < chkLimit; ++i) + { + // Wait for the mutex to be properly initialized + chkState = sharedMem->state.load(); + if (chkState == SharedMutexState::Valid) + break; + + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } + + // Timout failure + if (chkState != SharedMutexState::Valid) + { + cleanup(); + throw std::runtime_error("Error timeout waiting for mutex init"); + } + } + + // Cleanup helpers + inline void err_create_cleanup(const char* name) + { + // Cleanup if creating-process errors + safe_destroy_mutex(); + safe_unmap_mem(); + safe_close_shm(); + force_unlink(name); + } + + // Normal cleanup remove local resources on create error + inline void cleanup() + { + safe_unmap_mem(); + safe_close_shm(); + } + + inline void safe_destroy_mutex() + { + if (sharedMem) + { + int chkValid = SharedMutexState::Valid; + if (sharedMem->state.compare_exchange_weak(chkValid, SharedMutexState::Uninitialized)) + { + pthread_mutex_destroy(&sharedMem->pthread_mutex); + } + } + } + + inline void safe_unmap_mem() + { + if (sharedMem) + { + munmap((void*)sharedMem, sizeof(SharedMemMutex)); + sharedMem = nullptr; + } + } + + inline void safe_close_shm() + { + // NOTE: Unlike safe_unlink_shm, this just closes the file descriptor + // it will not invalidate the shared memory for other processes + if (shm_fd >= 0) + { + close(shm_fd); + shm_fd = -1; + } + } +}; + +thread_local std::unique_ptr<ScopedProcessMutex::PThreadMutex> ScopedProcessMutex::procMutex = nullptr; + +ScopedProcessMutex::ScopedProcessMutex(const char* name) +{ + if ( !procMutex) + { + // Postfix a unique user-id to the mutex name + std::string mtx_name = std::string("/") + name + getProcessUser(); + procMutex = std::unique_ptr<PThreadMutex>(new PThreadMutex(mtx_name.c_str())); + } + + if ( !procMutex ) + throw std::runtime_error("Error unable to open/create shared mutex!"); + + procMutex->lock(); +} + +ScopedProcessMutex::~ScopedProcessMutex() +{ + if (procMutex) + procMutex->unlock(); +} + +void ScopedProcessMutex::remove(const char* name) +{ + std::string mtxName = name + getProcessUser(); + PThreadMutex::force_unlink(mtxName.c_str()); +} + + +#elif defined(USE_BOOST_IPC_MUTEX) using boost::interprocess::named_mutex; using boost::interprocess::open_or_create; ScopedProcessMutex::ScopedProcessMutex(const char* name) - : ipc_mutex(open_or_create, name) + : ipc_mutex(open_or_create, std::string(name + getProcessUser()).c_str()) { ipc_mutex.lock(); } @@ -56,4 +397,10 @@ ScopedProcessMutex::~ScopedProcessMutex() ipc_mutex.unlock(); } +void ScopedProcessMutex::remove(const char* name) +{ + std::string mtxName = name + getProcessUser(); + named_mutex::remove(mtxName.c_str()); +} + #endif diff --git a/src/c/ScriptCmds/ScopedProcessMutex.h b/src/c/ScriptCmds/ScopedProcessMutex.h index f30d315ec615d6c16da96d479570fbd7e7485f81..fb55b2230e66db80f712c3a7143471d86450622f 100644 --- a/src/c/ScriptCmds/ScopedProcessMutex.h +++ b/src/c/ScriptCmds/ScopedProcessMutex.h @@ -3,17 +3,27 @@ #ifdef USE_PROCESS_MUTEX #define SCOPED_PROCESS_MUTEX(Name) ScopedProcessMutex Name##_mutex(#Name) #else +<<<<<<< HEAD #pragma message "hi friend: no process mutex" +======= +#pragma message ("HIP (leverjs) :: no process mutex in use") +>>>>>>> f7b83d58739d9ee780da4851cbe4206199306a7b #define SCOPED_PROCESS_MUTEX(Name) #endif #ifdef _WIN32 #define USE_WINDOWS_IPC_MUTEX (1) +#elif defined(__linux__) + #define USE_PTHREADS_ROBUST_MUTEX (1) +#else + #define USE_BOOST_IPC_MUTEX (1) #endif -#ifndef USE_WINDOWS_IPC_MUTEX - #define BOOST_DATE_TIME_NO_LIB (1) - #include "boost/interprocess/sync/named_mutex.hpp" +#if defined(USE_PTHREADS_ROBUST_MUTEX) + #include <memory> +#elif defined(USE_BOOST_IPC_MUTEX) + #define BOOST_DATE_TIME_NO_LIB (1) + #include "boost/interprocess/sync/named_mutex.hpp" #endif class ScopedProcessMutex @@ -32,10 +42,16 @@ public: ~ScopedProcessMutex(); + // Allow force release of mutex resource (cross-process removal) + static void remove(const char* name); + private: -#ifdef USE_WINDOWS_IPC_MUTEX +#if defined(USE_WINDOWS_IPC_MUTEX) static void* mutexHandle; +#elif defined(USE_PTHREADS_ROBUST_MUTEX) + struct PThreadMutex; + static thread_local std::unique_ptr<PThreadMutex> procMutex; #else boost::interprocess::named_mutex ipc_mutex; #endif diff --git a/src/c/ScriptCmds/ScriptCommandImpl.h b/src/c/ScriptCmds/ScriptCommandImpl.h index 29059902cd9e0217c724e621b2d222c711dd0a4e..4752cb377b7cb8c6fda3c1e54b38a82a02599d2c 100644 --- a/src/c/ScriptCmds/ScriptCommandImpl.h +++ b/src/c/ScriptCmds/ScriptCommandImpl.h @@ -185,7 +185,7 @@ private: { // Use a scoped process-level mutex to run only a single GPU kernel at a time // TODO: Figure out a scheduling system multi-process HIP calls - SCOPED_PROCESS_MUTEX(hip_cmd_gpu); + SCOPED_PROCESS_MUTEX(hip_cmd_gpu_); static_assert(ArgConverter::has_deferred_image_inputs(), "HIP_COMPILE: Argument layout has no dynamic image inputs. Please overload default ::execute() function!"); diff --git a/src/c/ScriptCmds/ScriptHelpers.h b/src/c/ScriptCmds/ScriptHelpers.h index e8c3a233a1e66f8726875d1ff16a068209ac5b9e..b2de32b34a06624aab935a2c0f82413a6453b30e 100644 --- a/src/c/ScriptCmds/ScriptHelpers.h +++ b/src/c/ScriptCmds/ScriptHelpers.h @@ -10,7 +10,7 @@ #include <string> #include <memory> #include <algorithm> - +#include <stdexcept> #define BEGIN_TYPE_MAP(EnumType,ScriptEngine) \ typedef EnumType IdType; \ diff --git a/src/c/makefile b/src/c/makefile index db3b80cc54d1e7b550dbf6c4bead1daf6065360b..bf141265e9e92cf310d92f0b8d5b04edee621eea 100755 --- a/src/c/makefile +++ b/src/c/makefile @@ -8,6 +8,9 @@ PYTHON3_INC = /usr/include/$(PYTHON_VER) NUMPY3_INC = /usr/include/$(PYTHON_VER) PYTHON_LIB = /usr/lib/$(PYTHON_VER)/config-3.5m-x86_64-linux-gnu +# Include directory for cuda toolkit +NVCC_INC = /usr/include + ######################################### # Default files and include dirs ######################################### @@ -27,7 +30,7 @@ PY_CPP_OBJ = $(addprefix $(OBJ_DIR)/,$(notdir $(PY_CPP_FILES:.cpp=.o))) SCRPY_CPP_OBJ = $(addprefix $(OBJ_DIR)/Py,$(notdir $(SCR_CPP_FILES:.cpp=.o))) SCRMEX_CPP_OBJ = $(addprefix $(OBJ_DIR)/Mx,$(notdir $(SCR_CPP_FILES:.cpp=.o))) -CUDA_INC = -I. -I./Cuda -I/usr/local/cuda/include +CUDA_INC = -I. -I./Cuda -I$(NVCC_INC) MEX_INC = -I. -I./Mex -I./ScriptCmds -I./external -I$(MATALAB_DIR)/extern/include PY_INC = -I. -I./Python -I./ScriptCmds -I./external -I$(PYTHON3_INC) -I$(NUMPY3_INC) @@ -52,7 +55,6 @@ LD_FLAGS = --no-undefined NVCC_PATH = nvcc NVCC_FLAGS = $(CPP_FLAGS) SMODEL = -arch=sm_30 -NVCC_INC = -I/usr/include ifndef COMP COMP=clang @@ -100,7 +102,7 @@ Mex.mexa64: $(CUDA_OBJ) $(MEX_CPP_OBJ) $(SCRMEX_CPP_OBJ) $(CUDA_CPP_OBJ) ######################################### $(OBJ_DIR)/%.o: Cuda/%.cu @mkdir -p $(@D) - $(NVCC_PATH) $(NVCC_FLAGS) $(CUDA_INC) $(SMODEL) --compiler-options '$(C_FLAGS)' -c $< -o $@ $(NVCC_INC) -dc + $(NVCC_PATH) $(NVCC_FLAGS) $(CUDA_INC) $(SMODEL) --compiler-options '$(C_FLAGS)' -c $< -o $@ -dc $(OBJ_DIR)/%.o: Cuda/%.cpp @mkdir -p $(@D) diff --git a/tdrDelay.reg b/tdrDelay.reg deleted file mode 100644 index 5f44d4aeaa8808b332652108661df5dad5bab0c1..0000000000000000000000000000000000000000 Binary files a/tdrDelay.reg and /dev/null differ