Commit 270e30c5 authored by Andrew Cohen's avatar Andrew Cohen
Browse files

results formatting

parent 722e8884
......@@ -8,7 +8,6 @@ end
for i=1:size(A,1)
D(i,i)=sum(A(i,:));
end
L=D^(-.5)*A*D^(-.5);
L=Regularize(L); % remove the miniscule asymmetry from L
......
......@@ -17,6 +17,8 @@ d(length(words),length(words))=0;
d(find(isinf(d)))=1.2;
d(find(isnan(d)))=1.2;
d=Regularize(d);
d=d-min(d(:));
d=Regularize(d);
nClasses=max(idxKey);
idxCluster=Distance.SpectralCluster(d,nClasses);
......
......@@ -12,8 +12,8 @@ fexclude=[];
for i=1:length(words)
fw(i)=GetCount(words(i),strServer);
end
N=getN('reddit');
N=getN(strServer);
nwd = (log2(max(fw))-log2(fX)) / (log2(N)-log2(min(fw)));
%
% % normalize for cardinality ?
......@@ -37,7 +37,7 @@ for i=1:length(maxN)
% nwd = 1 = (log2(max(fw))-log2(fX)) / (log2(N)-log2(min(fw)));
cn(i)=2^((log2(max(fw))-log2(fX))+log2(min(fw)));
end
N=median(cn);
N=1e6*max(cn);
function f = GetCount(words,strServer)
......@@ -58,7 +58,7 @@ if strcmp(strServer,'pubmed')
elseif strcmp(strServer,'amazon')
f=doQueryAmazon(term);
elseif strcmp(strServer,'google')
f=GetCountGoogle(term);
f=Count.GetCountGoogle(term);
elseif strcmp(strServer,'reddit')
f=Count.reddit(term);
else
......
% formatResults
fprintf(1,'[multiset NWD accuracy, spectral (pairwise NWD) accuracy, nearest neighbor (pairwise NWD) accuracy\n');
fprintf(1,'[multiset NWD accuracy, spectral (pairwise NWD) accuracy, nearest neighbor (pairwise NWD) accuracy] (min search term count) \n');
for iExperiment=1:length(exp)
fprintf('\n')
fprintf(1,'%s\n',exp(iExperiment).label);
expCount=[min(cc(iExperiment).wikipedia),min(cc(iExperiment).reddit),min(cc(iExperiment).pubmed)];
nWords=sum(cellfun(@length,exp(iExperiment).words));
for iServer=1:length(servers)
fprintf(1,' %s:[%3.2f,%3.2f,%3.2f]\t ',servers{iServer},results{iExperiment,iServer}./nWords);
fprintf(1,' %s:[%3.2f,%3.2f,%3.2f] (%d)\t ',servers{iServer},results{iExperiment,iServer}./nWords,expCount(iServer));
end
fprintf('\n')
end
\ No newline at end of file
warning('off','MATLAB:urlread:ReplacingSpaces')
servers={'wikipedia','reddit','pubmed'};
exp(1).words= {
......@@ -34,7 +34,7 @@ exp(3).words={
exp(3).label='scientists 2';
%colors animals
exp(4).words={
exp(4).words={
{'red','orange','yellow','green','blue','indigo','violet'}
{'lion','tiger','bear','monkey','zebra','elephant','aardvark','lamb','fox','ape','dog'}
};
......@@ -47,8 +47,14 @@ exp(5).words={
exp(5).label='pres / vp 2008 us election';
results=[];
cc=[];
for iExperiment=1:length(exp)
ww=[exp(iExperiment).words{:}];
for i=1:length(ww),cc(iExperiment).wikipedia(i)=Count.wikipedia(ww{i});end
for i=1:length(ww),cc(iExperiment).reddit(i)=Count.reddit(ww{i});end
for i=1:length(ww),cc(iExperiment).pubmed(i)=Count.pubmed(ww{i});end
words=exp(iExperiment).words;
idxKey=[];
for i=1:length(words)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment