Skip to content
Snippets Groups Projects
Select Git revision
  • master
1 result

getDistancesPairs.m

Blame
  • ac_31's avatar
    Andrew Cohen authored
    270e30c5
    History
    getDistancesPairs.m 1.46 KiB
    function [idx,nCorrect]=GetDistancesPairs(words,idxKey,strServer)
    
    if ~exist('strServer','var')
        strServer='wikipedia';
    end
    
    path(path,'..\Gap');
    d=[];
    
    for i=1:length(words)
        for j=i+1:length(words)
            d(i,j)=NWD([words(i),words(j)],strServer);
        end
    end
    
    d(length(words),length(words))=0;
    d(find(isinf(d)))=1.2;
    d(find(isnan(d)))=1.2;
    d=Regularize(d);
    d=d-min(d(:));
    d=Regularize(d);
    
    nClasses=max(idxKey);
    idxCluster=Distance.SpectralCluster(d,nClasses);
    
    idxPerms=perms(1:nClasses);
    rgCorrect=[];
    rgWrong={};
    for i=1:size(idxPerms,1)
        idxMap = idxKey;
        for j=1:size(idxPerms,2)
            idx = find(idxKey==j);
            idxMap(idx)=idxPerms(i,j);
        end
        rgCorrect(i)=length(find(idxMap'==idxCluster));
        rgWrong{i}=find(idxMap'~=idxCluster);
    end
    [nCorrect iBest]=max(rgCorrect);
    idxWrong = rgWrong(iBest);
    % set the output clustering to match the permutation with the min error
    % rate -- just 'cause
    idx=idxCluster';
    idxMap=idxPerms(iBest,:);
    for i=1:length(idxMap);idx(idxCluster==idxMap(i))=i;end
    bWriteFile=false;
    if bWriteFile
        fid=fopen('results.txt','a');
        fprintf(fid,'\n\n-------------------------\n')
        fprintf(fid,'GetDistances::(pairs) \n');
        c=clock;
        c=num2str(c)
        fprintf(fid,'%s\n',c);
        for i=1:length(words)
            fprintf(fid,'%s,',words{i});
        end
        fprintf(fid,'kGap=%d\n',kGap);
        % fprintf(fid,'nQueryCount=%d, nCacheCount=%d\n',nQueryCount, nCacheCount);
        % fprintf(fid,'nCorrect=%d\n',nCorrect);
        fclose(fid);
    end