Select Git revision
getDistancesPairs.m
Andrew Cohen authored
getDistancesPairs.m 1.46 KiB
function [idx,nCorrect]=GetDistancesPairs(words,idxKey,strServer)
if ~exist('strServer','var')
strServer='wikipedia';
end
path(path,'..\Gap');
d=[];
for i=1:length(words)
for j=i+1:length(words)
d(i,j)=NWD([words(i),words(j)],strServer);
end
end
d(length(words),length(words))=0;
d(find(isinf(d)))=1.2;
d(find(isnan(d)))=1.2;
d=Regularize(d);
d=d-min(d(:));
d=Regularize(d);
nClasses=max(idxKey);
idxCluster=Distance.SpectralCluster(d,nClasses);
idxPerms=perms(1:nClasses);
rgCorrect=[];
rgWrong={};
for i=1:size(idxPerms,1)
idxMap = idxKey;
for j=1:size(idxPerms,2)
idx = find(idxKey==j);
idxMap(idx)=idxPerms(i,j);
end
rgCorrect(i)=length(find(idxMap'==idxCluster));
rgWrong{i}=find(idxMap'~=idxCluster);
end
[nCorrect iBest]=max(rgCorrect);
idxWrong = rgWrong(iBest);
% set the output clustering to match the permutation with the min error
% rate -- just 'cause
idx=idxCluster';
idxMap=idxPerms(iBest,:);
for i=1:length(idxMap);idx(idxCluster==idxMap(i))=i;end
bWriteFile=false;
if bWriteFile
fid=fopen('results.txt','a');
fprintf(fid,'\n\n-------------------------\n')
fprintf(fid,'GetDistances::(pairs) \n');
c=clock;
c=num2str(c)
fprintf(fid,'%s\n',c);
for i=1:length(words)
fprintf(fid,'%s,',words{i});
end
fprintf(fid,'kGap=%d\n',kGap);
% fprintf(fid,'nQueryCount=%d, nCacheCount=%d\n',nQueryCount, nCacheCount);
% fprintf(fid,'nCorrect=%d\n',nCorrect);
fclose(fid);
end