Commit e5d6b410 authored by Andrew Cohen's avatar Andrew Cohen
Browse files

new open source project

parents
#ignore thumbnails created by windows
Thumbs.db
#Ignore files build by Visual Studio
*.obj
*.exe
*.pdb
*.user
*.aps
*.pch
*.vspscc
*_i.c
*_p.c
*.ncb
*.suo
*.tlb
*.tlh
*.bak
*.cache
*.ilk
*.log
[Bb]in
[Dd]ebug*/
*.lib
*.sbr
obj/
[Rr]elease*/
_ReSharper*/
[Tt]est[Rr]esult*
*.asv
% /******************************************************************************
%
% This program, "NCDM", the associated MATLAB scripts and all
% provided data, are copyright (C) 2014 Andrew R. Cohen, All rights reserved.
%
% This program uses bzip2 compressor as a static library.
% A built version for windows 64 is included. for other platforms, see
% the files in the bzlib project on https://git-bioimage.coe.drexel.edu
%
% This software may be referenced as:
%
% A.R. Cohen, C. Bjornsson, S. Temple, G. Banker, and B. Roysam,
% "Automatic Summarization of Changes in Biological Image Sequences
% using Algorithmic Information Theory". IEEE Transactions on Pattern
% Analysis and Machine Intelligence, 2009. 31(8): p. 1386-1403.
%
% Redistribution and use in source and binary forms, with or without
% modification, are permitted provided that the following conditions
% are met:
%
% 1. Redistributions of source code must retain the above copyright
% notice, this list of conditions and the following disclaimer.
%
% 2. The origin of this software must not be misrepresented; you must
% not claim that you wrote the original software. If you use this
% software in a product, an acknowledgment in the product
% documentation would be appreciated but is not required.
%
% 3. Altered source versions must be plainly marked as such, and must
% not be misrepresented as being the original software.
%
% 4. The name of the author may not be used to endorse or promote
% products derived from this software without specific prior written
% permission.
%
% THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
% OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
% WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
% ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
% DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
% DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
% GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
% INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
% WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
% NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
% SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
%
% Andrew R. Cohen acohen@coe.drexel.edu
% GapSpectral version 1.0 (release) November 2014
%
% ******************************************************************************/
function [kGap Gap S idx] = GapSpectral(DistanceMatrix,nMaxClusters,bAlgorithmicInformationDistance)
if nargin<3
bAlgorithmicInformationDistance=1;
end
B = 50; % size of Monte Carlo distribution
if nMaxClusters>size(DistanceMatrix,1)
nMaxClusters = size(DistanceMatrix,1)-1;
end
D = Regularize(DistanceMatrix);
bound=D;
for i=1: size(bound,1)
bound(i,i)=NaN;
end
a = min(min(bound));%;
b = max(max(bound));
UV = a + (b-a)*rand(size (D,1),size (D,2),B); % uniform distribution
for k=1:nMaxClusters
if (1==k) %
% one happy cluster
idx = ones(size (D,1),1);
else
idx = SpectralCluster(D,k);
end
W(k)=WkSpectral(k,idx,D);
for ib =1:B
uni = UV(:,:,ib);
uni = Regularize(uni); % make uni a valid distance matrix
if (1==k) %
% one happy cluster
idx = ones(size (D,1),1);
else
idx = SpectralCluster(uni,k);
end
Wb(ib,k)=WkSpectral(k,idx,uni);;
end
Wkb = Wb(:,k);
lkb = log(Wkb);
if bAlgorithmicInformationDistance
Gap(k) = 1/B*sum(Wkb) - W(k);
sdk = std(Wkb,1);
else
Gap(k) = 1/B*sum(lkb) - log(W(k));
sdk = std(lkb,1);
end
S(k)=sdk * sqrt(1+1/B);
% Gap
% S
end
figure
errorbar( [1:nMaxClusters],Gap,S)
set(gca,'XTick',[1:nMaxClusters])
k=1;
while ((k<nMaxClusters) && (Gap(k) < Gap(k+1)-S(k+1)))
k=k+1;
end
kGap=k;
if (kGap>1)
idx = SpectralCluster(D,kGap);
else
idx = ones(size (D,1),1);
end
function w=WkSpectral(k,idx,DistanceMatrix)
% called by GapSpectral
for r =1:k
% find points in cluster r
pr = find(idx==r);
D(r) = 0;
for i=1:size(pr,1)
for j=i:size(pr,1)
D(r) = D(r)+DistanceMatrix(pr(i),pr(j));
end
end
D(r) = D(r)/(2*size(pr,1)); %(2) in paper
end
w = sum(D);
/******************************************************************************
This program, "NCDM", the associated MATLAB scripts and all
provided data, are copyright (C) 2014 Andrew R. Cohen, All rights reserved.
This program uses bzip2 compressor as a static library.
A built version for windows 64 is included. for other platforms, see
the files in the bzlib project on https://git-bioimage.coe.drexel.edu
This software may be referenced as:
A.R. Cohen, C. Bjornsson, S. Temple, G. Banker, and B. Roysam,
"Automatic Summarization of Changes in Biological Image Sequences
using Algorithmic Information Theory". IEEE Transactions on Pattern
Analysis and Machine Intelligence, 2009. 31(8): p. 1386-1403.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. The origin of this software must not be misrepresented; you must
not claim that you wrote the original software. If you use this
software in a product, an acknowledgment in the product
documentation would be appreciated but is not required.
3. Altered source versions must be plainly marked as such, and must
not be misrepresented as being the original software.
4. The name of the author may not be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Andrew R. Cohen acohen@coe.drexel.edu
GapSpectral version 1.0 (release) November 2014
******************************************************************************/
\ No newline at end of file
% /******************************************************************************
%
% This program, "NCDM", the associated MATLAB scripts and all
% provided data, are copyright (C) 2014 Andrew R. Cohen, All rights reserved.
%
% This program uses bzip2 compressor as a static library.
% A built version for windows 64 is included. for other platforms, see
% the files in the bzlib project on https://git-bioimage.coe.drexel.edu
%
% This software may be referenced as:
%
% A.R. Cohen, C. Bjornsson, S. Temple, G. Banker, and B. Roysam,
% "Automatic Summarization of Changes in Biological Image Sequences
% using Algorithmic Information Theory". IEEE Transactions on Pattern
% Analysis and Machine Intelligence, 2009. 31(8): p. 1386-1403.
%
% Redistribution and use in source and binary forms, with or without
% modification, are permitted provided that the following conditions
% are met:
%
% 1. Redistributions of source code must retain the above copyright
% notice, this list of conditions and the following disclaimer.
%
% 2. The origin of this software must not be misrepresented; you must
% not claim that you wrote the original software. If you use this
% software in a product, an acknowledgment in the product
% documentation would be appreciated but is not required.
%
% 3. Altered source versions must be plainly marked as such, and must
% not be misrepresented as being the original software.
%
% 4. The name of the author may not be used to endorse or promote
% products derived from this software without specific prior written
% permission.
%
% THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
% OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
% WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
% ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
% DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
% DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
% GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
% INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
% WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
% NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
% SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
%
% Andrew R. Cohen acohen@coe.drexel.edu
% GapSpectral version 1.0 (release) November 2014
%
% ******************************************************************************/
% NCD
% Returns the normalized compression distance between V1,V2
% this is the pairwise NCD
function ncd = NCD(V1,V2)
n1 = Press(V1,[]);
n2 = Press(V2,[]);
nBoth = Press(V1, V2);
ncd= (nBoth-min(n1,n2))/max(n1,n2);
end
% /******************************************************************************
%
% This program, "NCDM", the associated MATLAB scripts and all
% provided data, are copyright (C) 2014 Andrew R. Cohen, All rights reserved.
%
% This program uses bzip2 compressor as a static library.
% A built version for windows 64 is included. for other platforms, see
% the files in the bzlib project on https://git-bioimage.coe.drexel.edu
%
% This software may be referenced as:
%
% A.R. Cohen, C. Bjornsson, S. Temple, G. Banker, and B. Roysam,
% "Automatic Summarization of Changes in Biological Image Sequences
% using Algorithmic Information Theory". IEEE Transactions on Pattern
% Analysis and Machine Intelligence, 2009. 31(8): p. 1386-1403.
%
% Redistribution and use in source and binary forms, with or without
% modification, are permitted provided that the following conditions
% are met:
%
% 1. Redistributions of source code must retain the above copyright
% notice, this list of conditions and the following disclaimer.
%
% 2. The origin of this software must not be misrepresented; you must
% not claim that you wrote the original software. If you use this
% software in a product, an acknowledgment in the product
% documentation would be appreciated but is not required.
%
% 3. Altered source versions must be plainly marked as such, and must
% not be misrepresented as being the original software.
%
% 4. The name of the author may not be used to endorse or promote
% products derived from this software without specific prior written
% permission.
%
% THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
% OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
% WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
% ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
% DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
% DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
% GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
% INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
% WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
% NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
% SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
%
% Andrew R. Cohen acohen@coe.drexel.edu
% GapSpectral version 1.0 (release) November 2014
%
% ******************************************************************************/
% Helper function called by NCD
% uses libbz2.dll to bzip Compress V1,V2
% Returns the size in bytes of the compressed data
%
function nBytes = Press(V1,V2)
persistent lib
if ~libisloaded('lib')
loadlibrary bz2dll.dll bzlib.h alias lib
end
if iscell(V1)
V1=cell2mat(V1);
end
if iscell(V2)
V2=cell2mat(V2);
end
vv= mat2str([V1;V2]);
sz=length(vv(:));
dsz=sz+1024;
dest= mat2str(zeros(dsz,1));
pstr=libpointer('cstring', vv);
pdest=libpointer('cstring', dest);
pdsz = libpointer('uint32Ptr',dsz);
rval=calllib('lib', 'BZ2_bzBuffToBuffCompress', pdest,pdsz,vv,sz,9,0,30);
if rval
disp 'ACK! bad return from bzip'
rval
end
nBytes=double(get(pdsz, 'Value'));
end
% /******************************************************************************
%
% This program, "NCDM", the associated MATLAB scripts and all
% provided data, are copyright (C) 2014 Andrew R. Cohen, All rights reserved.
%
% This program uses bzip2 compressor as a static library.
% A built version for windows 64 is included. for other platforms, see
% the files in the bzlib project on https://git-bioimage.coe.drexel.edu
%
% This software may be referenced as:
%
% A.R. Cohen, C. Bjornsson, S. Temple, G. Banker, and B. Roysam,
% "Automatic Summarization of Changes in Biological Image Sequences
% using Algorithmic Information Theory". IEEE Transactions on Pattern
% Analysis and Machine Intelligence, 2009. 31(8): p. 1386-1403.
%
% Redistribution and use in source and binary forms, with or without
% modification, are permitted provided that the following conditions
% are met:
%
% 1. Redistributions of source code must retain the above copyright
% notice, this list of conditions and the following disclaimer.
%
% 2. The origin of this software must not be misrepresented; you must
% not claim that you wrote the original software. If you use this
% software in a product, an acknowledgment in the product
% documentation would be appreciated but is not required.
%
% 3. Altered source versions must be plainly marked as such, and must
% not be misrepresented as being the original software.
%
% 4. The name of the author may not be used to endorse or promote
% products derived from this software without specific prior written
% permission.
%
% THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
% OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
% WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
% ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
% DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
% DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
% GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
% INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
% WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
% NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
% SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
%
% Andrew R. Cohen acohen@coe.drexel.edu
% GapSpectral version 1.0 (release) November 2014
%
% ******************************************************************************/
% make the distance matrix symetric (positive semi-definite), so
% eigen* are all real. needed due to slight asymetry in compression for NCD
% called by GapSpectral and SpectralCluster
%
% degenerate is two elements having zero distance - violates the identity
% axiom
function [D,degenerate] = Regularize(DistanceMatrix)
% turn output from NCD into well behaved distance matrix
D=DistanceMatrix;
% b = max(max(D));
% D=D/b;
for i=1:size (D,1)
for j= 1:size(D,2)
D(i,j)= max(D(i,j),D(j,i));
end
D(i,i)=0;
end
bound=D;
for i=1: size(bound,1)
bound(i,i)=NaN;
end
a = min(min(bound));
if 0==a
degenerate=1;
else
degenerate=0;
end
% /******************************************************************************
%
% This program, "NCDM", the associated MATLAB scripts and all
% provided data, are copyright (C) 2014 Andrew R. Cohen, All rights reserved.
%
% This program uses bzip2 compressor as a static library.
% A built version for windows 64 is included. for other platforms, see
% the files in the bzlib project on https://git-bioimage.coe.drexel.edu
%
% This software may be referenced as:
%
% A.R. Cohen, C. Bjornsson, S. Temple, G. Banker, and B. Roysam,
% "Automatic Summarization of Changes in Biological Image Sequences
% using Algorithmic Information Theory". IEEE Transactions on Pattern
% Analysis and Machine Intelligence, 2009. 31(8): p. 1386-1403.
%
% Redistribution and use in source and binary forms, with or without
% modification, are permitted provided that the following conditions
% are met:
%
% 1. Redistributions of source code must retain the above copyright
% notice, this list of conditions and the following disclaimer.
%
% 2. The origin of this software must not be misrepresented; you must
% not claim that you wrote the original software. If you use this
% software in a product, an acknowledgment in the product
% documentation would be appreciated but is not required.
%
% 3. Altered source versions must be plainly marked as such, and must
% not be misrepresented as being the original software.
%
% 4. The name of the author may not be used to endorse or promote
% products derived from this software without specific prior written
% permission.
%
% THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
% OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
% WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
% ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
% DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
% DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
% GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
% INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
% WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
% NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
% SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
%
% Andrew R. Cohen acohen@coe.drexel.edu
% GapSpectral version 1.0 (release) November 2014
%
% ******************************************************************************/
% distance matrix A into k clusters
function idx= SpectralCluster(A,k)
NREP=5;
if (1==k)
idx=ones(size(A,2),1);
return;
end
for i=1:size(A,1)
D(i,i)=sum(A(i,:));
end
L=D^(-.5)*A*D^(-.5);
L=Regularize(L); % remove the miniscule asymmetry from L
[eVec eVal]=eig(L);
X=[eVec(:,end) eVec(:,1:k-1)];
for i=1:size(X,1)
Y(i,:)=X(i,:)./norm(X(i,:));
end
idx=kmeans(Y,k,'emptyaction','singleton','replicates',NREP);
end
File added
/*-------------------------------------------------------------*/
/*--- Public header file for the library. ---*/
/*--- bzlib.h ---*/
/*-------------------------------------------------------------*/
/* ------------------------------------------------------------------
This file is part of bzip2/libbzip2, a program and library for
lossless, block-sorting data compression.
bzip2/libbzip2 version 1.0.4 of 20 December 2006
Copyright (C) 1996-2006 Julian Seward <jseward@bzip.org>
Please read the WARNING, DISCLAIMER and PATENTS sections in the
README file.
This program is released under the terms of the license contained
in the file LICENSE.
------------------------------------------------------------------ */
#ifndef _BZLIB_H
#define _BZLIB_H
#ifdef __cplusplus
extern "C" {
#endif
#define BZ_RUN 0
#define BZ_FLUSH 1
#define BZ_FINISH 2
#define BZ_OK 0
#define BZ_RUN_OK 1
#define BZ_FLUSH_OK 2
#define BZ_FINISH_OK 3
#define BZ_STREAM_END 4
#define BZ_SEQUENCE_ERROR (-1)
#define BZ_PARAM_ERROR (-2)
#define BZ_MEM_ERROR (-3)
#define BZ_DATA_ERROR (-4)
#define BZ_DATA_ERROR_MAGIC (-5)
#define BZ_IO_ERROR (-6)
#define BZ_UNEXPECTED_EOF (-7)
#define BZ_OUTBUFF_FULL (-8)
#define BZ_CONFIG_ERROR (-9)
typedef
struct {
char *next_in;
unsigned int avail_in;
unsigned int total_in_lo32;
unsigned int total_in_hi32;
char *next_out;
unsigned int avail_out;
unsigned int total_out_lo32;
unsigned int total_out_hi32;
void *state;
void *(*bzalloc)(void *,int,int);
void (*bzfree)(void *,void *);
void *opaque;
}
bz_stream;
#ifndef BZ_IMPORT
#define BZ_EXPORT
#endif
#ifndef BZ_NO_STDIO
/* Need a definitition for FILE */
#include <stdio.h>
#endif
#ifdef _WIN32
# include <windows.h>
# ifdef small
/* windows.h define small to char */
# undef small
# endif
# ifdef BZ_EXPORT
# define BZ_API(func) WINAPI func
# define BZ_EXTERN extern
# else
/* import windows dll dynamically */
# define BZ_API(func) (WINAPI * func)
# define BZ_EXTERN
# endif
#else
# define BZ_API(func) func
# define BZ_EXTERN extern
#endif
/*-- Core (low-level) library functions --*/