Commit 945d30e7 authored by Andrew Cohen's avatar Andrew Cohen
Browse files

update for exclude terms on nwd, goRDK

parent e3fc05c4
...@@ -12,9 +12,9 @@ fw=[]; ...@@ -12,9 +12,9 @@ fw=[];
fexclude=[]; fexclude=[];
for i=1:length(words) for i=1:length(words)
fw(i)=GetCount(words(i)); fw(i)=GetCount(words(i));
ww=words; % ww=words;
ww(i)=[]; % ww(i)=[];
fexclude(i)=GetCount(ww); % fexclude(i)=GetCount(ww);
end end
......
% wikiScientistsGoRDK.m
% load wiki_scientists_9_12_18.mat
words1={'Kolmogorov','Fermat','Hilbert', 'Godel', 'Riemann','Gauss'} % mathematicians
words2={'Einstein','Newton','Hawking','Bohr','Maxwell','Boltzmann',} %physicists
words3={'Freud','Pavlov','Skinner','Jung'} %psychologists
words4={'Turing','Lovelace','Knuth','Hopper'} % computer scientists
words5={'Darwin','Lamarck','Linnaeus','Mendel'} % biology
idxKey = [ones(1,length(words1)) 2*ones(1,length(words2)) 3*ones(1,length(words3)) 4*ones(1,length(words4)) 5*ones(1,length(words5))]
words=[words1,words2,words3,words4,words5];
dx=[];
for i=1:length(words)
for j=i+1:length(words)
dx(i,j)=NWD([words(i),words(j)]);
end
end
% ack! pad d
dx(length(words),:)=0;
dx=Regularize(dx);
path(path,'../Gap');
hk=[];
for KK=1:15
idxSpectral=SpectralCluster(dx,KK);
hki=[];
ni=[];
% go through each cluster, compute the NWD for that cluster
for i=1:KK
idxi=find(idxSpectral==i);
ni(i)=length(idxi);
if length(idxi)<2
% singleton
hki(i)=NaN;
break;
end
hki(i)=NWD(words(idxi));
end
fprintf(1,'KK=%d : h=%0.3f, hki=%s, ni=%s\n',KK,max(hki),mat2str(hki,2),mat2str(ni,2));
ek=[];
for k=1:KK
idxk=find(idxSpectral==k);
fprintf(1,' %s\n',mat2str([idxKey(idxk)]));
idxTrueI=idxKey(idxk);
pred=mode(idxTrueI);
ek(k)=length(find(idxTrueI~=pred));
if 1==length(idxk)
ek(k)=NaN;
end
end
errorK(KK)=sum(ek);
fprintf(1,'errorK(%d)=%0.2f\n',KK,errorK(KK)/length(words));
hk(KK)=max(hki);
end
x=[1:length(hk)]'
hkn=hk
hkn(isnan(hk))=[]
x(isnan(hk))=[]
hkn=hkn'
p=polyfit(x,hkn,1);
hkd=hkn-(p(1).*x+p(2))
hkd=diff(hkd);
figure(1);clf;plot(hk,'-*')
title('average per cluster NWD')
figure(2);clf;plot(errorK./length(tx),'-+');
title('error rate')
figure(3);clf;plot(hkd,'-^')
title('1st derivative of de-biased average per cluster NWD')
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment