Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
OpenSource
NWD
Commits
270e30c5
Commit
270e30c5
authored
May 19, 2020
by
Andrew Cohen
Browse files
results formatting
parent
722e8884
Changes
5
Hide whitespace changes
Inline
Side-by-side
+Distance/SpectralCluster.m
View file @
270e30c5
...
...
@@ -8,7 +8,6 @@ end
for i=1:size(A,1)
D(i,i)=sum(A(i,:));
end
L=D^(-.5)*A*D^(-.5);
L=Regularize(L); % remove the miniscule asymmetry from L
...
...
+Distance/getDistancesPairs.m
View file @
270e30c5
...
...
@@ -17,6 +17,8 @@ d(length(words),length(words))=0;
d
(
find
(
isinf
(
d
)))
=
1.2
;
d
(
find
(
isnan
(
d
)))
=
1.2
;
d
=
Regularize
(
d
);
d
=
d
-
min
(
d
(:));
d
=
Regularize
(
d
);
nClasses
=
max
(
idxKey
);
idxCluster
=
Distance
.
SpectralCluster
(
d
,
nClasses
);
...
...
NWD.m
View file @
270e30c5
...
...
@@ -12,8 +12,8 @@ fexclude=[];
for
i
=
1
:
length
(
words
)
fw
(
i
)
=
GetCount
(
words
(
i
),
strServer
);
end
N
=
getN
(
'reddit'
);
N
=
getN
(
strServer
);
nwd
=
(
log2
(
max
(
fw
))
-
log2
(
fX
))
/
(
log2
(
N
)
-
log2
(
min
(
fw
)));
%
% % normalize for cardinality ?
...
...
@@ -37,7 +37,7 @@ for i=1:length(maxN)
% nwd = 1 = (log2(max(fw))-log2(fX)) / (log2(N)-log2(min(fw)));
cn
(
i
)
=
2
^
((
log2
(
max
(
fw
))
-
log2
(
fX
))
+
log2
(
min
(
fw
)));
end
N
=
median
(
cn
);
N
=
1e6
*
max
(
cn
);
function
f
=
GetCount
(
words
,
strServer
)
...
...
@@ -58,7 +58,7 @@ if strcmp(strServer,'pubmed')
elseif
strcmp
(
strServer
,
'amazon'
)
f
=
doQueryAmazon
(
term
);
elseif
strcmp
(
strServer
,
'google'
)
f
=
GetCountGoogle
(
term
);
f
=
Count
.
GetCountGoogle
(
term
);
elseif
strcmp
(
strServer
,
'reddit'
)
f
=
Count
.
reddit
(
term
);
else
...
...
formatResults.m
View file @
270e30c5
% formatResults
fprintf
(
1
,
'[multiset NWD accuracy, spectral (pairwise NWD) accuracy, nearest neighbor (pairwise NWD) accuracy\n'
);
fprintf
(
1
,
'[multiset NWD accuracy, spectral (pairwise NWD) accuracy, nearest neighbor (pairwise NWD) accuracy
] (min search term count)
\n'
);
for
iExperiment
=
1
:
length
(
exp
)
fprintf
(
'\n'
)
fprintf
(
1
,
'%s\n'
,
exp
(
iExperiment
)
.
label
);
expCount
=
[
min
(
cc
(
iExperiment
)
.
wikipedia
),
min
(
cc
(
iExperiment
)
.
reddit
),
min
(
cc
(
iExperiment
)
.
pubmed
)];
nWords
=
sum
(
cellfun
(
@
length
,
exp
(
iExperiment
)
.
words
));
for
iServer
=
1
:
length
(
servers
)
fprintf
(
1
,
' %s:[%3.2f,%3.2f,%3.2f]\t '
,
servers
{
iServer
},
results
{
iExperiment
,
iServer
}
.
/
nWords
);
fprintf
(
1
,
' %s:[%3.2f,%3.2f,%3.2f]
(%d)
\t '
,
servers
{
iServer
},
results
{
iExperiment
,
iServer
}
.
/
nWords
,
expCount
(
iServer
)
);
end
fprintf
(
'\n'
)
end
\ No newline at end of file
runExamples.m
View file @
270e30c5
warning
(
'off'
,
'MATLAB:urlread:ReplacingSpaces'
)
servers
=
{
'wikipedia'
,
'reddit'
,
'pubmed'
};
exp
(
1
)
.
words
=
{
...
...
@@ -34,7 +34,7 @@ exp(3).words={
exp
(
3
)
.
label
=
'scientists 2'
;
%colors animals
exp
(
4
)
.
words
=
{
exp
(
4
)
.
words
=
{
{
'red'
,
'orange'
,
'yellow'
,
'green'
,
'blue'
,
'indigo'
,
'violet'
}
{
'lion'
,
'tiger'
,
'bear'
,
'monkey'
,
'zebra'
,
'elephant'
,
'aardvark'
,
'lamb'
,
'fox'
,
'ape'
,
'dog'
}
};
...
...
@@ -47,8 +47,14 @@ exp(5).words={
exp
(
5
)
.
label
=
'pres / vp 2008 us election'
;
results
=
[];
cc
=
[];
for
iExperiment
=
1
:
length
(
exp
)
ww
=
[
exp
(
iExperiment
)
.
words
{:}];
for
i
=
1
:
length
(
ww
),
cc
(
iExperiment
)
.
wikipedia
(
i
)
=
Count
.
wikipedia
(
ww
{
i
});
end
for
i
=
1
:
length
(
ww
),
cc
(
iExperiment
)
.
reddit
(
i
)
=
Count
.
reddit
(
ww
{
i
});
end
for
i
=
1
:
length
(
ww
),
cc
(
iExperiment
)
.
pubmed
(
i
)
=
Count
.
pubmed
(
ww
{
i
});
end
words
=
exp
(
iExperiment
)
.
words
;
idxKey
=
[];
for
i
=
1
:
length
(
words
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment