Commit b89695e0 authored by Andrew Cohen's avatar Andrew Cohen
Browse files

don't use cache if it's not setup. what to use for 'N'. Added google back in

parent 2d3057d9
......@@ -15,34 +15,40 @@ idx=strfind(term,' ');
term(idx)='+';
%check cache
global QueryCache
cc=strcmp(QueryCache.Queries,term);
if ~isempty(cc) && any(cc)
idx=find(cc);
f=QueryCache.Count(idx);
f=f-1;
nCacheCount=nCacheCount+1;
return
if ~isempty(QueryCache)
cc=strcmp(QueryCache.Queries,term);
if ~isempty(cc) && any(cc)
idx=find(cc);
f=QueryCache.Count(idx);
f=f-1;
nCacheCount=nCacheCount+1;
return
end
end
if strcmp(strServer,'pubmed')
f=doQueryPubmed(term);
elseif strcmp(strServer,'amazon')
f=doQueryAmazon(term);
elseif strcmp(strServer,'google')
f=GetCountGoogle(term);
else
f=doQueryWikipedia(term);
end
% f=max(f,1); % no zero counts - sends nwd to NaN
f=f+1;
nQueryCount=nQueryCount+1;
QueryCache.Queries=[QueryCache.Queries {term}];
QueryCache.Count=[QueryCache.Count f];
if ~isempty(QueryCache)
nQueryCount=nQueryCount+1;
QueryCache.Queries=[QueryCache.Queries {term}];
QueryCache.Count=[QueryCache.Count f];
end
end
function f=doQueryWikipedia(term)
URL=['http://en.wikipedia.org/w/index.php?title=Special%3ASearch&profile=default&search=' term '+&fulltext=Search'];
URL=['https://en.wikipedia.org/w/index.php?title=Special%3ASearch&profile=default&search=' term '+&fulltext=Search'];
str = urlread(URL);
try
idx=strfind(str,'div class="results-info"');
......
function f = GetCountGoogle(term)
global nQueryCount nCacheCount
bBing=0;
% GET https://www.googleapis.com/customsearch/v1?key={INSERT-YOUR-KEY}&cx=017576662512468239146:omuauf_lfve&q=lectures
% URL=['https://www.googleapis.com/customsearch/v1?key=AIzaSyBL45OCdryTDoJQuQXCYB6vTLBRmpJ8lbE&cx=005221536855408015250:inwswu7zcam&q=' term];
nrep=0;
bDone=0;
hBrowser=actxcontrol('Shell.Explorer.2',[0 0 1600 800]);
f=0;
while ~bDone && (nrep<5)
try
bDone=1;
if ~bBing
URL=['https://www.google.com/#q=' term];
q='id="resultStats">About';
else
URL=['http://www.bing.com/search?q=' term '&go=Submit+Query'];
if nrep>0
URL=[URL '&first=11*ff&FORM=PERE2'];
end
q='<span class="sb_count">';
end
hBrowser.Navigate(URL);
if ~bBing
pause(5);
end
% pause(5);
while( ~strcmp(hBrowser.readyState,'READYSTATE_COMPLETE') || isempty(hBrowser.Document.body)...
|| isempty(strfind(hBrowser.Document.body.innerHTML,q)))
pause(.1)
end
str=hBrowser.Document.body.innerHTML;
idx=strfind(str,q);
idx=idx(1);
sx=str(idx+length(q):end);
wx=strfind(sx,'results');
nx=str2double(sx(1:wx(1)-1));
f=nx; % log2(nx);
% % f=nx;
% if f<250
% bDone=0;
% pause(15);
% nrep=nrep+1
% end
%
catch
bDone=0;
pause(1);
nrep=nrep+1
end
end
delete(hBrowser);
end
\ No newline at end of file
......@@ -18,10 +18,10 @@ for i=1:length(words)
end
% N=GetCount({'the'});
N=GetCount({'the'});
% N=11e6 ;
N=4776196; % wikipedia!
% N=4776196; % wikipedia!
% N=25672211*1e3; % amazon
% if bNWDmin
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment