Commit 2d3057d9 authored by Andrew Cohen's avatar Andrew Cohen
Browse files

wikipedia/amazon and more

parent 6a3c8525
......@@ -19,6 +19,7 @@ cc=strcmp(QueryCache.Queries,term);
if ~isempty(cc) && any(cc)
idx=find(cc);
f=QueryCache.Count(idx);
f=f-1;
nCacheCount=nCacheCount+1;
return
end
......@@ -30,6 +31,9 @@ elseif strcmp(strServer,'amazon')
else
f=doQueryWikipedia(term);
end
% f=max(f,1); % no zero counts - sends nwd to NaN
f=f+1;
nQueryCount=nQueryCount+1;
QueryCache.Queries=[QueryCache.Queries {term}];
QueryCache.Count=[QueryCache.Count f];
......
......@@ -33,7 +33,7 @@ for i=1:length(words)
end
d = rgNWDx-rgNWD;
idx=find(isnan(d));
idx=find(isnan(d) | isinf(d));
d(idx)=inf;
[mm idx]=min(d(:,i));
fprintf('%d,',idx);
......
......@@ -18,13 +18,28 @@ for i=1:length(words)
end
N=GetCount({'the'});
% N=GetCount({'the'});
% N=11e6 ;
if bNWDmin
nwd = (log2(min(fw))-log2(fX)) / (log2(N)-log2(min(fw))); % from xx.pdf
else
nwd = (log2(max(fw))-log2(fX)) / (log2(N)-log2(min(fw))); % III.3 from arxiv
end
% normalize for cardinality
N=4776196; % wikipedia!
% N=25672211*1e3; % amazon
% if bNWDmin
% nwd = (log2(min(fw))-log2(fX)) / (log2(N)-log2(min(fw))); % from xx.pdf
% else
% nwd = (log2(max(fw))-log2(fX)) / (log2(N)-log2(min(fw))); % III.3 from arxiv
% end
% nwd = (log2(max(fw))-log2(fX)) / (log2(N)-log2(min(fexclude))); % III.3 from arxiv
nwd = (log2(max(fw))-log2(fX)) / (log2(N)-log2(min(fw)));
%
% % normalize for cardinality
nwd = nwd/(length(words)-1);
% nwd=(log2(max(fw))-log2(fX))/( log2(N)-log2(max(fexclude)) );
% ww = length(words);
% renorm = log2(N/ww)/log2(ww);
% % renorm = log2( (1-1/ww) *N ) / log2(N/ww);
% nwd = nwd / renorm;
4;
......@@ -19,6 +19,7 @@ end
d(length(words),length(words))=0;
d(find(isinf(d)))=1.2;
d(find(isnan(d)))=1.2;
d=Regularize(d);
[kGap Gap S idx] = GapSpectral(d,6,1);
fprintf('kGap=%d\n',kGap);
......@@ -40,7 +41,7 @@ for i=1:size(idxPerms,1)
end
[nCorrect iBest]=max(rgCorrect);
idxWrong = rgWrong(iBest);
idx=idxCluster;
fid=fopen('results.txt','a');
fprintf(fid,'\n\n-------------------------\n')
fprintf(fid,'GetDistances::(pairs) \n');
......
......@@ -9,13 +9,15 @@ global nQueryCount nCacheCount
nQueryCount=0;
nCacheCount=0;
nobels2014={'Isamu Akasaki', 'Hiroshi Amano', 'Shuji Nakamura','Eric Betzig', 'Stefan W. Hell', 'William E. Moerner'}
nobels2014={'Isamu Akasaki', 'Hiroshi Amano', 'Shuji Nakamura','Eric Betzig', 'Stefan W. Hell', 'William E. Moerner','John O''Keefe', 'May-Britt Moser', 'Edvard I. Moser' }
physics={'Albert Einstein','Isaac Newton','Stephen Hawking','Nils Bohr','James Maxwell'}
chemistry={'Amedeo Avogadro','Louis Pasteur','Linus Pauling','Robert Boyle'}
medicine={'Elizabeth Blackwell','William Harvey','Carl Jung','Richard Lister'}
idxKey = [ones(1,length(nobels2014)) 2*ones(1,length(physics)) 2*ones(1,length(chemistry))...
2*ones(1,length(medicine))]
idxKey = [ones(1,length(nobels2014)) 2*ones(1,length(physics)) 2*ones(1,length(chemistry))]
[d]=GetDistances([nobels2014,physics,chemistry,medicine],idxKey);
[d]=GetDistances([nobels2014,physics,chemistry],idxKey);
idx=getDistancesPairs([nobels2014,physics,chemistry],idxKey);
idx=getDistancesPairs([nobels2014,physics,chemistry,medicine],idxKey)
......@@ -9,7 +9,7 @@ global nQueryCount nCacheCount
nQueryCount=0;
nCacheCount=0;
words1={'Fermat','Hilbert', 'Godel', 'Riemann','Gauss'} % mathematicians
words1={'Kolmogorov','Fermat','Hilbert', 'Godel', 'Riemann','Gauss'} % mathematicians
words2={'Einstein','Newton','Hawking','Bohr','Maxwell','Boltzmann',} %physicists
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment