Commit 14a34e61 authored by Andrew Cohen's avatar Andrew Cohen
Browse files

unified server side handling - wikipedia, pubmed or amazon

parent 14e63f51
function f = GetCount(words)
global nQueryCount nCacheCount
global nQueryCount nCacheCount strServer
if isempty(strServer)
strServer='wikipedia'
end
term=[words{1}];
for i=2:length(words)
......@@ -19,46 +23,98 @@ if ~isempty(cc) && any(cc)
return
end
% http://en.wikipedia.org/w/index.php?title=Special%3ASearch&profile=default&search=blue+OR+elephant+&fulltext=Search
% URL=['http://en.wikipedia.org/w/index.php?title=Special%3ASearch&profile=default&search=' term '+&fulltext=Search'];
% http://www.ncbi.nlm.nih.gov/gquery/?term=WHSC1%2CNSD1%2CASH1L%2CSETD2%2C
URL=['http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Daps&field-keywords=' term '&rh=i%3Aaps%2Ck%3A' term];
% URL = [ 'http://www.ncbi.nlm.nih.gov/gquery/?term=' term];
% URL = [ 'http://www.ncbi.nlm.nih.gov/protein/?term=' term];
for i=1:5
if strcmp(strServer,'pubmed')
f=doQueryPubmed(term);
elseif strcmp(strServer,'amazon')
f=doQueryAmazon(term);
else
f=doQueryWikipedia(term);
end
nQueryCount=nQueryCount+1;
QueryCache.Queries=[QueryCache.Queries {term}];
QueryCache.Count=[QueryCache.Count f];
end
function f=doQueryWikipedia(term)
URL=['http://en.wikipedia.org/w/index.php?title=Special%3ASearch&profile=default&search=' term '+&fulltext=Search'];
str = urlread(URL);
try
%amazon
idx=strfind(str,'results for <span class="');
idx=idx(1)-2;
idxStart=idx;
while(str(idxStart)~=' ')
idxStart=idxStart-1;
end
nx=str(idxStart:idx);
idx=strfind(str,'div class="results-info"');
idx=idx(1);
str2=str(idx:end);
% WIKIPEDIA%
% idx=strfind(str,'div class="results-info"');
% idx=idx(1);
% str2=str(idx:end);
%
% idx2=strfind(str2,'of <b>');
% str3=str2(idx2+6:end);
%
% idx3=strfind(str3,'</b>');
% idx3=idx3(1);
%
% nx=str3(1:idx3-1);
% END WIKIPEDIA
idx2=strfind(str2,'of <b>');
str3=str2(idx2+6:end);
idx3=strfind(str3,'</b>');
idx3=idx3(1);
nx=str3(1:idx3-1);
f=str2double(nx);
break;
catch
f=0;
pause(1);
end
end
nQueryCount=nQueryCount+1;
QueryCache.Queries=[QueryCache.Queries {term}];
QueryCache.Count=[QueryCache.Count f];
function f=doQueryPubmed(term)
URL = [ 'http://www.ncbi.nlm.nih.gov/gquery/?term=' term];
str = urlread(URL);
try
key = '<h2>About ';
idx1=strfind(str,key);
idx1=idx1+length(key);
idx2=strfind(str(idx1:end), 'search results for');
nx=str(idx1:idx1+idx2-2);
f=str2double(nx);
catch
f=0;
end
end
function f=doQueryAmazon(term)
URL=['http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Daps&field-keywords=' term '&rh=i%3Aaps%2Ck%3A' term];
for i=1:5
str = urlread(URL);
try
%amazon
idx=strfind(str,'results for <span class="');
idx=idx(1)-2;
idxStart=idx;
while(str(idxStart)~=' ')
idxStart=idxStart-1;
end
nx=str(idxStart:idx);
% WIKIPEDIA%
% idx=strfind(str,'div class="results-info"');
% idx=idx(1);
% str2=str(idx:end);
%
% idx2=strfind(str2,'of <b>');
% str3=str2(idx2+6:end);
%
% idx3=strfind(str3,'</b>');
% idx3=idx3(1);
%
% nx=str3(1:idx3-1);
% END WIKIPEDIA
f=str2double(nx);
break;
catch
f=0;
pause(1);
end
end
end
......@@ -4,7 +4,7 @@ tic
%
%
% %
% words1={'Upsal', 'Carpenter','Allen Lane','St Martins','Highland','Chestnut Hill West'}
% words1={'Upsal','Carpenter','Allen Lane','St Martins','Highland','Chestnut Hill West'}
% words2={'Stenton','Sedgwick','Mount Airy','Wyndmoor','Gravers',' Chestnut Hill East'}
% words2={'Fox Chase','Ryers','Cheltenham','Lawndale','Olney'}
% words1={'148991' '327941' '195539' '704017' '260207' '988649'}
......@@ -12,28 +12,30 @@ tic
% words1={'29' '37' '59' '71' '97' '23' }
% words2={'88' '44' '12' '56' '6' '62'}
% words1={'Gerald Ford' 'Jimmy Carter' 'Ronald Reagan' 'George H. W. Bush' 'Bill Clinton' 'George W. Bush'}
% words1={'Gerald Ford' 'Jimmy Carter' 'Ronald Reagan' 'George H. W. Bush' 'Bill Clinton' 'George W. Bush'}
% words2={'Mitt Romney' 'John McCain' 'John Kerry' 'Al Gore' 'Bob Dole' 'Michael Dukakis'}
%
strServer='wikipedia'
words1={'red','orange','yellow','green','blue','indigo'}
words2={'lion', 'tiger', 'bear', 'monkey', 'zebra', 'elephant'}
[d1, d2]=GetDistances(words1,words2);
words2={'lion','tiger','bear','monkey','zebra','elephant','aardvark','lamb','fox','ape','dog'}
[d1,d2]=GetDistances(words1,words2);
idx=getDistancesPairs(words1,words2);
words1={'red','orange','yellow','green','blue','indigo'}
words1={'red','orange','yellow','green','blue','indigo','violet','purple','tan','white'}
words2={'square','circle','rectangle','ellipse','triangle','rhombus'}
[d1, d2]=GetDistances(words1,words2);
[d1,d2]=GetDistances(words1,words2);
idx=getDistancesPairs(words1,words2);
words1={'Barack Obama', 'Hillary Clinton','John Edwards','Joe Biden','Chris Dodd','Mike Gravel'}
words1={'Barack Obama','Hillary Clinton','John Edwards','Joe Biden','Chris Dodd','Mike Gravel'}
words2={'John McCain','Mitt Romney','Mike Huckabee','Ron Paul','Fred Thompson','Alan Keyes'}
[d1, d2]=GetDistances(words1,words2);
[d1,d2]=GetDistances(words1,words2);
idx=getDistancesPairs(words1,words2);
%
% words1={'Labidochromis caeruleus','Sciaenochromis fryeri','Betta splendens','Carassius auratus','Melanochromis cyaneorhabdos'}
% words2={'Ecsenius bicolor','Pictichromis paccagnellae','Amphiprion ocellaris ','Paracanthurus hepatus','Chromis viridis'}
% [d1, d2]=GetDistances(words1,words2);
% [d1,d2]=GetDistances(words1,words2);
% idx=getDistancesPairs(words1,words2);
toc
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment