Skip to content
Snippets Groups Projects
Commit b89695e0 authored by Andrew Cohen's avatar Andrew Cohen
Browse files

don't use cache if it's not setup. what to use for 'N'. Added google back in

parent 2d3057d9
No related branches found
No related tags found
No related merge requests found
......@@ -15,6 +15,7 @@ idx=strfind(term,' ');
term(idx)='+';
%check cache
global QueryCache
if ~isempty(QueryCache)
cc=strcmp(QueryCache.Queries,term);
if ~isempty(cc) && any(cc)
idx=find(cc);
......@@ -23,26 +24,31 @@ if ~isempty(cc) && any(cc)
nCacheCount=nCacheCount+1;
return
end
end
if strcmp(strServer,'pubmed')
f=doQueryPubmed(term);
elseif strcmp(strServer,'amazon')
f=doQueryAmazon(term);
elseif strcmp(strServer,'google')
f=GetCountGoogle(term);
else
f=doQueryWikipedia(term);
end
% f=max(f,1); % no zero counts - sends nwd to NaN
f=f+1;
if ~isempty(QueryCache)
nQueryCount=nQueryCount+1;
QueryCache.Queries=[QueryCache.Queries {term}];
QueryCache.Count=[QueryCache.Count f];
end
end
function f=doQueryWikipedia(term)
URL=['http://en.wikipedia.org/w/index.php?title=Special%3ASearch&profile=default&search=' term '+&fulltext=Search'];
URL=['https://en.wikipedia.org/w/index.php?title=Special%3ASearch&profile=default&search=' term '+&fulltext=Search'];
str = urlread(URL);
try
idx=strfind(str,'div class="results-info"');
......
function f = GetCountGoogle(term)
global nQueryCount nCacheCount
bBing=0;
% GET https://www.googleapis.com/customsearch/v1?key={INSERT-YOUR-KEY}&cx=017576662512468239146:omuauf_lfve&q=lectures
% URL=['https://www.googleapis.com/customsearch/v1?key=AIzaSyBL45OCdryTDoJQuQXCYB6vTLBRmpJ8lbE&cx=005221536855408015250:inwswu7zcam&q=' term];
nrep=0;
bDone=0;
hBrowser=actxcontrol('Shell.Explorer.2',[0 0 1600 800]);
f=0;
while ~bDone && (nrep<5)
try
bDone=1;
if ~bBing
URL=['https://www.google.com/#q=' term];
q='id="resultStats">About';
else
URL=['http://www.bing.com/search?q=' term '&go=Submit+Query'];
if nrep>0
URL=[URL '&first=11*ff&FORM=PERE2'];
end
q='<span class="sb_count">';
end
hBrowser.Navigate(URL);
if ~bBing
pause(5);
end
% pause(5);
while( ~strcmp(hBrowser.readyState,'READYSTATE_COMPLETE') || isempty(hBrowser.Document.body)...
|| isempty(strfind(hBrowser.Document.body.innerHTML,q)))
pause(.1)
end
str=hBrowser.Document.body.innerHTML;
idx=strfind(str,q);
idx=idx(1);
sx=str(idx+length(q):end);
wx=strfind(sx,'results');
nx=str2double(sx(1:wx(1)-1));
f=nx; % log2(nx);
% % f=nx;
% if f<250
% bDone=0;
% pause(15);
% nrep=nrep+1
% end
%
catch
bDone=0;
pause(1);
nrep=nrep+1
end
end
delete(hBrowser);
end
\ No newline at end of file
......@@ -18,10 +18,10 @@ for i=1:length(words)
end
% N=GetCount({'the'});
N=GetCount({'the'});
% N=11e6 ;
N=4776196; % wikipedia!
% N=4776196; % wikipedia!
% N=25672211*1e3; % amazon
% if bNWDmin
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment