Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
% /******************************************************************************
%
% This program, "NCDM", the associated MATLAB scripts and all
% provided data, are copyright (C) 2018 Andrew R. Cohen, All rights reserved.
%
% This program uses bzip2 compressor as a static library.
% A built version for windows 64 is included. for other platforms, see
% the files in the bzlib project on https://git-bioimage.coe.drexel.edu
%
% This software may be referenced as:
% A.R.Cohen and P.M.B. Vitanyi, Normalized Compression Distance of Multisets
% with Applications, IEEE Transactions on Pattern Analysis and Machine
% Intelligence. 2015 Aug;37(8):1602-14,
%
% Redistribution and use in source and binary forms, with or without
% modification, are permitted provided that the following conditions
% are met:
%
% 1. Redistributions of source code must retain the above copyright
% notice, this list of conditions and the following disclaimer.
%
% 2. The origin of this software must not be misrepresented; you must
% not claim that you wrote the original software. If you use this
% software in a product, an acknowledgment in the product
% documentation would be appreciated but is not required.
%
% 3. Altered source versions must be plainly marked as such, and must
% not be misrepresented as being the original software.
%
% 4. The name of the author may not be used to endorse or promote
% products derived from this software without specific prior written
% permission.
%
% THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
% OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
% WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
% ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
% DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
% DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
% GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
% INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
% WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
% NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
% SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
%
% Andrew R. Cohen andrew.r.cohen@drexel.edu
%
% ******************************************************************************/
% NCDM
% Returns the normalized compression distance for the multiset X
% each row of x is assumed to be a single member x of set X
% this is the multiset NCD
function ncd = NCD(X)
if size(X,1)<2
ncd=0;
fprintf(1,'single element x of X : d=0\n');
return
end
GX = Press(X',[]);
cxi=[];
for i=1:size(X,1)
xi=X(i,:);
cxi(i)=Press(xi,[]);
end
gx = min([cxi]);
gExclude =[];
xExclude=[];
for i=1:size(X,1)
xExclude=X;
xExclude(i,:)=[];
gExclude(i)=Press(xExclude',[]); % G(X\x)
end
ncd = (GX - gx)/(max(gExclude));
end