Commit 118b0276 authored by Andrew Cohen's avatar Andrew Cohen
Browse files

n class support

parent ee894148
function f = GetCount(words)
global nQueryCount nCacheCount strServer
if isempty(strServer)
strServer='wikipedia'
end
term=[words{1}];
for i=2:length(words)
term=[ term '+' words{i}];
end
idx=strfind(term,' ');
term(idx)='+';
%check cache
global QueryCache
cc=strcmp(QueryCache.Queries,term);
if ~isempty(cc) && any(cc)
idx=find(cc);
f=QueryCache.Count(idx);
nCacheCount=nCacheCount+1;
return
end
<<<<<<< HEAD
if strcmp(strServer,'pubmed')
f=doURLQueryPubmed(term);
elseif strcmp(strServer,'amazon')
f=doURLQueryAmazon(term);
else
f=doURLQueryWikipedia(term);
end
nQueryCount=nQueryCount+1;
QueryCache.Queries=[QueryCache.Queries {term}];
QueryCache.Count=[QueryCache.Count f];
end
function f=doURLQueryWikipedia(term)
URL=['http://en.wikipedia.org/w/index.php?title=Special%3ASearch&profile=default&search=' term '+&fulltext=Search'];
str = urlread(URL);
try
idx=strfind(str,'div class="results-info"');
idx=idx(1);
str2=str(idx:end);
idx2=strfind(str2,'of <b>');
str3=str2(idx2+6:end);
idx3=strfind(str3,'</b>');
idx3=idx3(1);
nx=str3(1:idx3-1);
f=str2double(nx);
catch
f=0;
end
end
function f=doURLQueryPubmed(term)
URL = [ 'http://www.ncbi.nlm.nih.gov/gquery/?term=' term];
str = urlread(URL);
try
key = '<h2>About ';
idx1=strfind(str,key);
idx1=idx1+length(key);
idx2=strfind(str(idx1:end), 'search results for');
nx=str(idx1:idx1+idx2-2);
=======
% http://en.wikipedia.org/w/index.php?title=Special%3ASearch&profile=default&search=blue+OR+elephant+&fulltext=Search
% URL=['http://en.wikipedia.org/w/index.php?title=Special%3ASearch&profile=default&search=' term '+&fulltext=Search'];
% http://www.ncbi.nlm.nih.gov/gquery/?term=WHSC1%2CNSD1%2CASH1L%2CSETD2%2C
URL=['http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Daps&field-keywords=' term '&rh=i%3Aaps%2Ck%3A' term];
% URL = [ 'http://www.ncbi.nlm.nih.gov/gquery/?term=' term];
% URL = [ 'http://www.ncbi.nlm.nih.gov/protein/?term=' term];
for i=1:5
str = urlread(URL);
try
%amazon
idx=strfind(str,'results for <span class="');
idx=idx(1)-2;
idxStart=idx;
while(str(idxStart)~=' ')
idxStart=idxStart-1;
end
nx=str(idxStart:idx);
% WIKIPEDIA%
% idx=strfind(str,'div class="results-info"');
% idx=idx(1);
% str2=str(idx:end);
%
% idx2=strfind(str2,'of <b>');
% str3=str2(idx2+6:end);
%
% idx3=strfind(str3,'</b>');
% idx3=idx3(1);
%
% nx=str3(1:idx3-1);
% END WIKIPEDIA
>>>>>>> bef2f507c6358a5ceee1c070702fd6d9ada135b7
f=str2double(nx);
break;
catch
f=0;
pause(1);
end
end
end
function f=doURLQueryAmazon(term)
URL=['http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Daps&field-keywords=' term '&rh=i%3Aaps%2Ck%3A' term];
for i=1:5
str = urlread(URL);
try
%amazon
idx=strfind(str,'results for <span class="');
idx=idx(1)-2;
idxStart=idx;
while(str(idxStart)~=' ')
idxStart=idxStart-1;
end
nx=str(idxStart:idx);
f=str2double(nx);
break;
catch
f=0;
pause(1);
end
end
end
function f = GetCount(words)
global nQueryCount nCacheCount
term=[words{1}];
for i=2:length(words)
term=[ term '+' words{i}];
end
idx=strfind(term,' ');
term(idx)='+';
%check cache
global QueryCache
cc=strcmp(QueryCache.Queries,term);
if ~isempty(cc) && any(cc)
idx=find(cc);
f=QueryCache.Count(idx);
nCacheCount=nCacheCount+1;
return
end
% http://en.wikipedia.org/w/index.php?title=Special%3ASearch&profile=default&search=blue+OR+elephant+&fulltext=Search
% URL=['http://en.wikipedia.org/w/index.php?title=Special%3ASearch&profile=default&search=' term '+&fulltext=Search'];
% http://www.ncbi.nlm.nih.gov/gquery/?term=WHSC1%2CNSD1%2CASH1L%2CSETD2%2C
URL = [ 'http://www.ncbi.nlm.nih.gov/gquery/?term=' term];
str = urlread(URL);
try
key = '<h2>About ';
idx1=strfind(str,key);
idx1=idx1+length(key);
idx2=strfind(str(idx1:end), 'search results for');
nx=str(idx1:idx1+idx2-2);
% idx=strfind(str,'div class="results-info"');
% WIKIPEDIA
% idx=idx(1);
% str2=str(idx:end);
%
% idx2=strfind(str2,'of <b>');
% str3=str2(idx2+6:end);
%
% idx3=strfind(str3,'</b>');
% idx3=idx3(1);
%
% nx=str3(1:idx3-1);
% END WIKIPEDIA
f=str2double(nx);
catch
f=0;
end
nQueryCount=nQueryCount+1;
QueryCache.Queries=[QueryCache.Queries {term}];
QueryCache.Count=[QueryCache.Count f];
function f = GetCount(words)
global nQueryCount nCacheCount strServer
if isempty(strServer)
strServer='wikipedia'
end
term=[words{1}];
for i=2:length(words)
term=[ term '+' words{i}];
end
idx=strfind(term,' ');
term(idx)='+';
%check cache
global QueryCache
cc=strcmp(QueryCache.Queries,term);
if ~isempty(cc) && any(cc)
idx=find(cc);
f=QueryCache.Count(idx);
nCacheCount=nCacheCount+1;
return
end
if strcmp(strServer,'pubmed')
f=doURLQueryPubmed(term);
elseif strcmp(strServer,'amazon')
f=doURLQueryAmazon(term);
else
f=doURLQueryWikipedia(term);
end
nQueryCount=nQueryCount+1;
QueryCache.Queries=[QueryCache.Queries {term}];
QueryCache.Count=[QueryCache.Count f];
end
function f=doURLQueryWikipedia(term)
URL=['http://en.wikipedia.org/w/index.php?title=Special%3ASearch&profile=default&search=' term '+&fulltext=Search'];
str = urlread(URL);
try
idx=strfind(str,'div class="results-info"');
idx=idx(1);
str2=str(idx:end);
idx2=strfind(str2,'of <b>');
str3=str2(idx2+6:end);
idx3=strfind(str3,'</b>');
idx3=idx3(1);
nx=str3(1:idx3-1);
f=str2double(nx);
catch
f=0;
end
end
function f=doURLQueryPubmed(term)
URL = [ 'http://www.ncbi.nlm.nih.gov/gquery/?term=' term];
str = urlread(URL);
try
key = '<h2>About ';
idx1=strfind(str,key);
idx1=idx1+length(key);
idx2=strfind(str(idx1:end), 'search results for');
nx=str(idx1:idx1+idx2-2);
f=str2double(nx);
catch
f=0;
end
end
function f=doURLQueryAmazon(term)
URL=['http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Daps&field-keywords=' term '&rh=i%3Aaps%2Ck%3A' term];
for i=1:5
str = urlread(URL);
try
%amazon
idx=strfind(str,'results for <span class="');
idx=idx(1)-2;
idxStart=idx;
while(str(idxStart)~=' ')
idxStart=idxStart-1;
end
nx=str(idxStart:idx);
f=str2double(nx);
break;
catch
f=0;
pause(1);
end
end
end
function f = GetCount(words)
global nQueryCount nCacheCount
term=[words{1}];
for i=2:length(words)
term=[ term '+' words{i}];
end
idx=strfind(term,' ');
term(idx)='+';
%check cache
global QueryCache
cc=strcmp(QueryCache.Queries,term);
if ~isempty(cc) && any(cc)
idx=find(cc);
f=QueryCache.Count(idx);
nCacheCount=nCacheCount+1;
return
end
% http://en.wikipedia.org/w/index.php?title=Special%3ASearch&profile=default&search=blue+OR+elephant+&fulltext=Search
% URL=['http://en.wikipedia.org/w/index.php?title=Special%3ASearch&profile=default&search=' term '+&fulltext=Search'];
% http://www.ncbi.nlm.nih.gov/gquery/?term=WHSC1%2CNSD1%2CASH1L%2CSETD2%2C
URL=['http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Daps&field-keywords=' term '&rh=i%3Aaps%2Ck%3A' term];
% URL = [ 'http://www.ncbi.nlm.nih.gov/gquery/?term=' term];
% URL = [ 'http://www.ncbi.nlm.nih.gov/protein/?term=' term];
for i=1:5
str = urlread(URL);
try
%amazon
idx=strfind(str,'results for <span class="');
idx=idx(1)-2;
idxStart=idx;
while(str(idxStart)~=' ')
idxStart=idxStart-1;
end
nx=str(idxStart:idx);
% WIKIPEDIA%
% idx=strfind(str,'div class="results-info"');
% idx=idx(1);
% str2=str(idx:end);
%
% idx2=strfind(str2,'of <b>');
% str3=str2(idx2+6:end);
%
% idx3=strfind(str3,'</b>');
% idx3=idx3(1);
%
% nx=str3(1:idx3-1);
% END WIKIPEDIA
f=str2double(nx);
break;
catch
f=0;
pause(1);
end
end
nQueryCount=nQueryCount+1;
QueryCache.Queries=[QueryCache.Queries {term}];
QueryCache.Count=[QueryCache.Count f];
function f = GetCount(words)
global nQueryCount nCacheCount strServer
if isempty(strServer)
strServer='wikipedia'
end
term=[words{1}];
for i=2:length(words)
term=[ term '+' words{i}];
end
idx=strfind(term,' ');
term(idx)='+';
%check cache
global QueryCache
cc=strcmp(QueryCache.Queries,term);
if ~isempty(cc) && any(cc)
idx=find(cc);
f=QueryCache.Count(idx);
nCacheCount=nCacheCount+1;
return
end
<<<<<<< HEAD
if strcmp(strServer,'pubmed')
f=doURLQueryPubmed(term);
elseif strcmp(strServer,'amazon')
f=doURLQueryAmazon(term);
else
f=doURLQueryWikipedia(term);
end
nQueryCount=nQueryCount+1;
QueryCache.Queries=[QueryCache.Queries {term}];
QueryCache.Count=[QueryCache.Count f];
end
function f=doURLQueryWikipedia(term)
URL=['http://en.wikipedia.org/w/index.php?title=Special%3ASearch&profile=default&search=' term '+&fulltext=Search'];
str = urlread(URL);
try
idx=strfind(str,'div class="results-info"');
idx=idx(1);
str2=str(idx:end);
idx2=strfind(str2,'of <b>');
str3=str2(idx2+6:end);
idx3=strfind(str3,'</b>');
idx3=idx3(1);
nx=str3(1:idx3-1);
f=str2double(nx);
catch
f=0;
end
end
function f=doURLQueryPubmed(term)
URL = [ 'http://www.ncbi.nlm.nih.gov/gquery/?term=' term];
str = urlread(URL);
try
key = '<h2>About ';
idx1=strfind(str,key);
idx1=idx1+length(key);
idx2=strfind(str(idx1:end), 'search results for');
nx=str(idx1:idx1+idx2-2);
=======
% http://en.wikipedia.org/w/index.php?title=Special%3ASearch&profile=default&search=blue+OR+elephant+&fulltext=Search
% URL=['http://en.wikipedia.org/w/index.php?title=Special%3ASearch&profile=default&search=' term '+&fulltext=Search'];
% http://www.ncbi.nlm.nih.gov/gquery/?term=WHSC1%2CNSD1%2CASH1L%2CSETD2%2C
URL=['http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Daps&field-keywords=' term '&rh=i%3Aaps%2Ck%3A' term];
% URL = [ 'http://www.ncbi.nlm.nih.gov/gquery/?term=' term];
% URL = [ 'http://www.ncbi.nlm.nih.gov/protein/?term=' term];
for i=1:5
str = urlread(URL);
try
%amazon
idx=strfind(str,'results for <span class="');
idx=idx(1)-2;
idxStart=idx;
while(str(idxStart)~=' ')
idxStart=idxStart-1;
end
nx=str(idxStart:idx);
% WIKIPEDIA%
% idx=strfind(str,'div class="results-info"');
% idx=idx(1);
% str2=str(idx:end);
%
% idx2=strfind(str2,'of <b>');
% str3=str2(idx2+6:end);
%
% idx3=strfind(str3,'</b>');
% idx3=idx3(1);
%
% nx=str3(1:idx3-1);
% END WIKIPEDIA
>>>>>>> bef2f507c6358a5ceee1c070702fd6d9ada135b7
f=str2double(nx);
break;
catch
f=0;
pause(1);
end
end
end
function f=doURLQueryAmazon(term)
URL=['http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Daps&field-keywords=' term '&rh=i%3Aaps%2Ck%3A' term];
for i=1:5
str = urlread(URL);
try
%amazon
idx=strfind(str,'results for <span class="');
idx=idx(1)-2;
idxStart=idx;
while(str(idxStart)~=' ')
idxStart=idxStart-1;
end
nx=str(idxStart:idx);
f=str2double(nx);
break;
catch
f=0;
pause(1);
end
end
end
function [d1, d2]=GetDistances(words1,words2)
function [d]=GetDistances(words,idxKey)
words=[words1 words2];
d1=[];
d2=[];
NWD1x=[];NWD1=[];
NWD2x=[];NWD2=[];
global QueryCache
if isempty(QueryCache)
......@@ -17,72 +11,39 @@ global nQueryCount nCacheCount
nQueryCount=0;
nCacheCount=0;
d=[];
rgNWDx=[];rgNWD=[];
nClasses=max(idxKey);
for i=1:length(words)
if (i<=length(words1))
% words{i} belongs to words1
w1=words1;
w1(i)=[];
% w1=w1(1:NW);
w1x=[words(i) w1];
% w2=words2(1:NW);
w2=words2;
w2x=[words(i) w2 ];
else
% words{i} belongs to words2
% w1=words1(1:NW);
w1=words1;
w1x=[words(i) w1];
w2=words2;
w2(i-length(words1))=[];
% w2=w2(1:NW);
w2x=[words(i) w2];
for j=1:nClasses
idx=find(idxKey==j);
idxIncluded=find(idx==i);
if ~isempty(idxIncluded)
wordListX=words(idx);
idx(idxIncluded)=[];
wordList=words(idx);
else
wordList=words(idx);
wordListX=[words(i) words(idx)];
end
rgNWDx(j,i)=NWD(wordListX);
rgNWD(j,i)=NWD(wordList);
end
% NW=4;
% w1=w1(1:NW);
% w1x=w1x(1:NW);
% w2=w2(1:NW);
% w2x=w2x(1:NW);
NWD1x(i)=NWD(w1x)-NWD(w1x,1);
NWD1(i)=NWD(w1)-NWD(w1,1);
NWD2x(i)=NWD(w2x)-NWD(w2x,1);
NWD2(i)=NWD(w2)-NWD(w2,1);
% NWD1x(i)=NWD(w1x);
% NWD1(i)=NWD(w1);
% NWD2x(i)=NWD(w2x);
% NWD2(i)=NWD(w2);
d1(i)=(NWD1x(i)-NWD1(i));
if isnan(d1(i))
d1(i)=inf;
end
d2(i)=(NWD2x(i)-NWD2(i));
if isnan(d2(i)) % f(X)=0 --> -log2(0)==Inf
d2(i)=Inf;
end
if d1(i)<d2(i)
% if abs(d1(i))<abs(d2(i)) % magnitude?
fprintf('1,');
else
fprintf('2,');
end
d = rgNWDx-rgNWD;
idx=find(isnan(d));
d(idx)=inf;
[mm idx]=min(d(:,i));
fprintf('%d,',idx);
end
fprintf('\n');
Confusion=zeros(2,2);
Confusion=zeros(nClasses,nClasses);
for i=1:length(words)
if i<=length(words1)
nTrue=1;
else
nTrue=2;
end
%if abs(d1(i))<abs(d2(i)) % magnitude?
if (d1(i))<(d2(i))
nPred=1;
else
nPred=2;
end
nTrue = idxKey(i);
[mm nPred] = min(d(:,i));
Confusion(nTrue,nPred)=Confusion(nTrue,nPred)+1;
end
......@@ -101,4 +62,3 @@ c=mat2str(Confusion);
fprintf(fid,'%s\n',c);
fclose(fid);
end
\ No newline at end of file
function idx=GetDistancesPairs(words1,words2)