Commit 270e30c5 authored by Andrew Cohen's avatar Andrew Cohen
Browse files

results formatting

parent 722e8884
...@@ -8,7 +8,6 @@ end ...@@ -8,7 +8,6 @@ end
for i=1:size(A,1) for i=1:size(A,1)
D(i,i)=sum(A(i,:)); D(i,i)=sum(A(i,:));
end end
L=D^(-.5)*A*D^(-.5); L=D^(-.5)*A*D^(-.5);
L=Regularize(L); % remove the miniscule asymmetry from L L=Regularize(L); % remove the miniscule asymmetry from L
......
...@@ -17,6 +17,8 @@ d(length(words),length(words))=0; ...@@ -17,6 +17,8 @@ d(length(words),length(words))=0;
d(find(isinf(d)))=1.2; d(find(isinf(d)))=1.2;
d(find(isnan(d)))=1.2; d(find(isnan(d)))=1.2;
d=Regularize(d); d=Regularize(d);
d=d-min(d(:));
d=Regularize(d);
nClasses=max(idxKey); nClasses=max(idxKey);
idxCluster=Distance.SpectralCluster(d,nClasses); idxCluster=Distance.SpectralCluster(d,nClasses);
......
...@@ -12,8 +12,8 @@ fexclude=[]; ...@@ -12,8 +12,8 @@ fexclude=[];
for i=1:length(words) for i=1:length(words)
fw(i)=GetCount(words(i),strServer); fw(i)=GetCount(words(i),strServer);
end end
N=getN('reddit'); N=getN(strServer);
nwd = (log2(max(fw))-log2(fX)) / (log2(N)-log2(min(fw))); nwd = (log2(max(fw))-log2(fX)) / (log2(N)-log2(min(fw)));
% %
% % normalize for cardinality ? % % normalize for cardinality ?
...@@ -37,7 +37,7 @@ for i=1:length(maxN) ...@@ -37,7 +37,7 @@ for i=1:length(maxN)
% nwd = 1 = (log2(max(fw))-log2(fX)) / (log2(N)-log2(min(fw))); % nwd = 1 = (log2(max(fw))-log2(fX)) / (log2(N)-log2(min(fw)));
cn(i)=2^((log2(max(fw))-log2(fX))+log2(min(fw))); cn(i)=2^((log2(max(fw))-log2(fX))+log2(min(fw)));
end end
N=median(cn); N=1e6*max(cn);
function f = GetCount(words,strServer) function f = GetCount(words,strServer)
...@@ -58,7 +58,7 @@ if strcmp(strServer,'pubmed') ...@@ -58,7 +58,7 @@ if strcmp(strServer,'pubmed')
elseif strcmp(strServer,'amazon') elseif strcmp(strServer,'amazon')
f=doQueryAmazon(term); f=doQueryAmazon(term);
elseif strcmp(strServer,'google') elseif strcmp(strServer,'google')
f=GetCountGoogle(term); f=Count.GetCountGoogle(term);
elseif strcmp(strServer,'reddit') elseif strcmp(strServer,'reddit')
f=Count.reddit(term); f=Count.reddit(term);
else else
......
% formatResults % formatResults
fprintf(1,'[multiset NWD accuracy, spectral (pairwise NWD) accuracy, nearest neighbor (pairwise NWD) accuracy\n'); fprintf(1,'[multiset NWD accuracy, spectral (pairwise NWD) accuracy, nearest neighbor (pairwise NWD) accuracy] (min search term count) \n');
for iExperiment=1:length(exp) for iExperiment=1:length(exp)
fprintf('\n') fprintf('\n')
fprintf(1,'%s\n',exp(iExperiment).label); fprintf(1,'%s\n',exp(iExperiment).label);
expCount=[min(cc(iExperiment).wikipedia),min(cc(iExperiment).reddit),min(cc(iExperiment).pubmed)];
nWords=sum(cellfun(@length,exp(iExperiment).words)); nWords=sum(cellfun(@length,exp(iExperiment).words));
for iServer=1:length(servers) for iServer=1:length(servers)
fprintf(1,' %s:[%3.2f,%3.2f,%3.2f]\t ',servers{iServer},results{iExperiment,iServer}./nWords); fprintf(1,' %s:[%3.2f,%3.2f,%3.2f] (%d)\t ',servers{iServer},results{iExperiment,iServer}./nWords,expCount(iServer));
end end
fprintf('\n') fprintf('\n')
end end
\ No newline at end of file
warning('off','MATLAB:urlread:ReplacingSpaces')
servers={'wikipedia','reddit','pubmed'}; servers={'wikipedia','reddit','pubmed'};
exp(1).words= { exp(1).words= {
...@@ -34,7 +34,7 @@ exp(3).words={ ...@@ -34,7 +34,7 @@ exp(3).words={
exp(3).label='scientists 2'; exp(3).label='scientists 2';
%colors animals %colors animals
exp(4).words={ exp(4).words={
{'red','orange','yellow','green','blue','indigo','violet'} {'red','orange','yellow','green','blue','indigo','violet'}
{'lion','tiger','bear','monkey','zebra','elephant','aardvark','lamb','fox','ape','dog'} {'lion','tiger','bear','monkey','zebra','elephant','aardvark','lamb','fox','ape','dog'}
}; };
...@@ -47,8 +47,14 @@ exp(5).words={ ...@@ -47,8 +47,14 @@ exp(5).words={
exp(5).label='pres / vp 2008 us election'; exp(5).label='pres / vp 2008 us election';
results=[]; results=[];
cc=[];
for iExperiment=1:length(exp) for iExperiment=1:length(exp)
ww=[exp(iExperiment).words{:}];
for i=1:length(ww),cc(iExperiment).wikipedia(i)=Count.wikipedia(ww{i});end
for i=1:length(ww),cc(iExperiment).reddit(i)=Count.reddit(ww{i});end
for i=1:length(ww),cc(iExperiment).pubmed(i)=Count.pubmed(ww{i});end
words=exp(iExperiment).words; words=exp(iExperiment).words;
idxKey=[]; idxKey=[];
for i=1:length(words) for i=1:length(words)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment