Commit a3c1022e authored by Andrew Cohen's avatar Andrew Cohen
Browse files

updated with new folder structure

parent 0b81ac9c
......@@ -35,10 +35,15 @@ count=sum(cellfun(@str2double,{dparse.count}));
mapCache(term)=count;
function dd=query(term,creds)
queryOptions=weboptions('Timeout',Inf);
queryStr=['http://eutils.ncbi.nlm.nih.gov/entrez/eutils/egquery.fcgi?term=' term ...
'&email=' creds.email '&tool=' creds.appID];
tic;dd=webread(queryStr);toc
try
dd=webread(queryStr,queryOptions);
catch
dd=[];
end
function [email,appID]=getPubmedCredentials()
email='you@yourServer.net';
appID='myInterestingAppName';
......
......@@ -72,8 +72,8 @@ if isempty(term) | strcmp(term,'')
return
end
queryStr=['https://oauth.reddit.com/search?q=' term '&sort=top&limit=100'];
queryOptions=weboptions('HeaderFields',{'Authorization',['bearer ' token.access_token]});
tic;dd=webread(queryStr,queryOptions);toc
queryOptions=weboptions('Timeout',Inf,'HeaderFields',{'Authorization',['bearer ' token.access_token]});
dd=webread(queryStr,queryOptions);
function dd=queryAll(token)
queryStr=['https://oauth.reddit.com/r/all/top/?t=all&limit=100'];
......
% reddit query
function count=wikipedia(term)
persistent mapCache
persistent mapCache timerVal
if isempty(mapCache)
mapCache=containers.Map();
......@@ -12,8 +12,18 @@ if mapCache.isKey(term)
return;
end
tThrottle=0.1; % 0.1 seconds max per query. picked this to be polite...
if ~isempty(timerVal)
elapsedTime=toc(timerVal);
deltaT=tThrottle-elapsedTime;
if deltaT>0
pause(deltaT);
end
end
URL=['https://en.wikipedia.org/w/index.php?title=Special%3ASearch&profile=default&search=' term '+&fulltext=Search'];
str = urlread(URL);
timerVal=tic();
try
idx=strfind(str,'div class="results-info"');
idx=idx(1);
......
......@@ -17,11 +17,9 @@ d(length(words),length(words))=0;
d(find(isinf(d)))=1.2;
d(find(isnan(d)))=1.2;
d=Regularize(d);
[kGap Gap S idx] = GapSpectral(d,6,1);
fprintf('kGap=%d\n',kGap);
nClasses=max(idxKey);
idxCluster=SpectralCluster(d,nClasses);
idxCluster=Distance.SpectralCluster(d,nClasses);
idxPerms=perms(1:nClasses);
rgCorrect=[];
......
......@@ -39,3 +39,34 @@ for i=1:length(maxN)
end
N=median(cn);
function f = GetCount(words,strServer)
if ~exist('strServer','var')
strServer='wikipedia'
end
term=[ words{1} ];
for i=2:length(words)
term=[ term '+' words{i}];
end
idx=strfind(term,' ');
term(idx)='+';
if strcmp(strServer,'pubmed')
f=Count.pubmed(term);
elseif strcmp(strServer,'amazon')
f=doQueryAmazon(term);
elseif strcmp(strServer,'google')
f=GetCountGoogle(term);
elseif strcmp(strServer,'reddit')
f=Count.reddit(term);
else
f=Count.wikipedia(term);
end
% f=max(f,1); % no zero counts - sends nwd to NaN
f=f+1;
% authors
% words= {
% {'Macbeth','The Tempest','Othello','King Lear','Hamlet'...
% 'The Merchant of Venice','A Midsummer Nights Dream',...
% 'Much Ado About Nothing', 'Taming of the Shrew','Twelfth Night' }...
% {'Carrie','Salems Lot','The Shining','The Stand','The Dead Zone',...
% 'Firestarter','Cujo'}...
% {'Adventures of Huckleberry Finn','A Connecticut Yankee in King Arthurs Court','Life on the Mississippi'...
% 'Puddnhead Wilson'}...
% {'The Old Man and The Sea','The Sun Also Rises','For Whom the Bell Tolls','A Farewell To Arms'}...
% {'Anna Karenina','War and Peace','The Death of Ivan Ilyich'}...
% };
%
% famous
% words={
% {'Isamu Akasaki', 'Hiroshi Amano', 'Shuji Nakamura','Eric Betzig', 'Stefan W. Hell', 'William E. Moerner','John O''Keefe', 'May-Britt Moser', 'Edvard I. Moser' }
% {'Albert Einstein','Isaac Newton','Stephen Hawking','Nils Bohr','James Maxwell'}
% {'Amedeo Avogadro','Louis Pasteur','Linus Pauling','Robert Boyle'}
% {'Elizabeth Blackwell','William Harvey','Carl Jung','Richard Lister'}
% } ;
% scientists
words={
{'Kolmogorov','Fermat','Hilbert', 'Godel', 'Riemann','Gauss'} % mathematicians
{'Einstein','Newton','Hawking','Bohr','Maxwell','Boltzmann',} %physicists
{'Freud','Pavlov','Skinner','Jung'} %psychologists
{'Turing','Lovelace','Knuth','Hopper'} % computer scientists
{'Darwin','Lamarck','Linnaeus','Mendel'} % biology
}
%colors animals
words={
{'red','orange','yellow','green','blue','indigo','violet'}
{'lion','tiger','bear','monkey','zebra','elephant','aardvark','lamb','fox','ape','dog'}
};
words={
{'Barack Obama','Hillary Clinton','John Edwards','Joe Biden','Chris Dodd','Mike Gravel'}
{'John McCain','Mitt Romney','Mike Huckabee','Ron Paul','Fred Thompson','Alan Keyes'}
};
idxKey=[];
for i=1:length(words)
idxKey=[idxKey,repmat(i,1,length(words{i}))];
end
strServer='reddit'
[d,Confusion,nCorrect]=GetDistances([words{:}],idxKey,strServer)
[idx,nCorrect]=getDistancesPairs([words{:}],idxKey,strServer)
[idxNN,nCorrectNN]=getDistanceNearestNeighbor([words{:}],idxKey,strServer)
This diff is collapsed.
servers={'wikipedia','reddit','pubmed'};
exp(1).words= {
{'Macbeth','The Tempest','Othello','King Lear','Hamlet'...
'The Merchant of Venice','A Midsummer Nights Dream',...
'Much Ado About Nothing', 'Taming of the Shrew','Twelfth Night' }...
{'Carrie','Salems Lot','The Shining','The Stand','The Dead Zone',...
'Firestarter','Cujo'}...
{'Adventures of Huckleberry Finn','A Connecticut Yankee in King Arthurs Court','Life on the Mississippi'...
'Puddnhead Wilson'}...
{'The Old Man and The Sea','The Sun Also Rises','For Whom the Bell Tolls','A Farewell To Arms'}...
{'Anna Karenina','War and Peace','The Death of Ivan Ilyich'}...
};
exp(1).label='novels';
%
% famous
exp(2).words={
{'Isamu Akasaki', 'Hiroshi Amano', 'Shuji Nakamura','Eric Betzig', 'Stefan W. Hell', 'William E. Moerner','John O''Keefe', 'May-Britt Moser', 'Edvard I. Moser' }
{'Albert Einstein','Isaac Newton','Stephen Hawking','Nils Bohr','James Maxwell'}
{'Amedeo Avogadro','Louis Pasteur','Linus Pauling','Robert Boyle'}
{'Elizabeth Blackwell','William Harvey','Carl Jung','Richard Lister'}
};
exp(2).label='scientists 1';
% scientists
exp(3).words={
{'Kolmogorov','Fermat','Hilbert', 'Godel', 'Riemann','Gauss'} % mathematicians
{'Einstein','Newton','Hawking','Bohr','Maxwell','Boltzmann',} %physicists
{'Freud','Pavlov','Skinner','Jung'} %psychologists
{'Turing','Lovelace','Knuth','Hopper'} % computer scientists
{'Darwin','Lamarck','Linnaeus','Mendel'} % biology
};
exp(3).label='scientists 2';
%colors animals
exp(4).words={
{'red','orange','yellow','green','blue','indigo','violet'}
{'lion','tiger','bear','monkey','zebra','elephant','aardvark','lamb','fox','ape','dog'}
};
exp(4).label='colors v animals';
exp(5).words={
{'Barack Obama','Hillary Clinton','John Edwards','Joe Biden','Chris Dodd','Mike Gravel'}
{'John McCain','Mitt Romney','Mike Huckabee','Ron Paul','Fred Thompson','Alan Keyes'}
};
exp(5).label='pres / vp 2008 us election';
results=[];
for iExperiment=1:length(exp)
words=exp(iExperiment).words;
idxKey=[];
for i=1:length(words)
idxKey=[idxKey,repmat(i,1,length(words{i}))];
end
for iServer=1:length(servers)
strServer=servers{iServer};
[d,Confusion,nCorrectMulti]=Distance.GetDistances([words{:}],idxKey,strServer);
[idx,nCorrectPairs]=Distance.getDistancesPairs([words{:}],idxKey,strServer);
[idxNN,nCorrectNN]=Distance.getDistanceNearestNeighbor([words{:}],idxKey,strServer);
resSet=[nCorrectMulti,nCorrectPairs,nCorrectNN];
results{iExperiment,iServer}=resSet;
fprintf(1,'%s: %s -> %s\n',exp(iExperiment).label,strServer,mat2str(resSet));
end
end
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment