Skip to content
Snippets Groups Projects
Commit c4b3bafb authored by ac_fx's avatar ac_fx
Browse files

added nist k supervised

parent f44a8042
Branches
No related tags found
No related merge requests found
function [kOD,mx,sd]=getK(xDigits,kMax,NUMBER_OF_TRIALS) function [kOD,mx,sd,clusterConfusion]=getK(xDigits,kMax,NUMBER_OF_SAMPLES,clusterConfusion)
dx=[]; dx=[];
idx=[]; idx=[];
idxsp=[]; idxsp=[];
kTrue=length(unique([xDigits.idxTrue]));
dDigits=MNISTdistance(xDigits); dDigits=MNISTdistance(xDigits);
for k=1:kMax for k=1:kMax
idxsp=Cluster.SpectralCluster(dDigits,k); idxsp=Cluster.SpectralCluster(dDigits,k);
idxTrials=cell(1,NUMBER_OF_TRIALS); % ntrial, ik, 1:5 if k==kTrue
idxKey=cell(1,NUMBER_OF_TRIALS); % ntrial, ik, 1:5 clusterConfusion=updateConfusion(xDigits,clusterConfusion,kTrue,kMax,idxsp);
end
idxTrials=cell(1,NUMBER_OF_SAMPLES); % ntrial, ik, 1:5
idxKey=cell(1,NUMBER_OF_SAMPLES); % ntrial, ik, 1:5
% compute dx % compute dx
for ik=1:k for ik=1:k
idx=find(idxsp==ik); idx=find(idxsp==ik);
for ntrial=1:NUMBER_OF_TRIALS for ntrial=1:NUMBER_OF_SAMPLES
trialCardinality=4+mod(ntrial,4); trialCardinality=4+mod(ntrial,4);
trialCardinality=min(length(idx),trialCardinality); trialCardinality=min(length(idx),trialCardinality);
idxTrials{ntrial}=[idxTrials{ntrial};idx(randperm(length(idx),trialCardinality))]; idxTrials{ntrial}=[idxTrials{ntrial};idx(randperm(length(idx),trialCardinality))];
...@@ -21,7 +25,7 @@ for k=1:kMax ...@@ -21,7 +25,7 @@ for k=1:kMax
end end
end end
parfor i=1:NUMBER_OF_TRIALS parfor i=1:NUMBER_OF_SAMPLES
% for i=1:NUMBER_OF_TRIALS % for i=1:NUMBER_OF_TRIALS
dx(k,i)=deltaK([xDigits(idxTrials{i})],idxKey{i}); dx(k,i)=deltaK([xDigits(idxTrials{i})],idxKey{i});
end end
...@@ -35,7 +39,7 @@ end ...@@ -35,7 +39,7 @@ end
mx=mean(dx,2,'omitnan'); mx=mean(dx,2,'omitnan');
sd=std(dx,0,2,'omitnan'); sd=std(dx,0,2,'omitnan');
% account for simulation error, as in gap statistic % account for simulation error, as in gap statistic
sd=sd.*sqrt(1+1/NUMBER_OF_TRIALS); sd=sd.*sqrt(1+1/NUMBER_OF_SAMPLES);
kOD=find(mx(1:end-1)-sd(1:end-1)>mx(2:end),1,'first'); kOD=find(mx(1:end-1)-sd(1:end-1)>mx(2:end),1,'first');
if ~isempty(kOD) if ~isempty(kOD)
......
...@@ -4,13 +4,17 @@ IMDIM=28*28; ...@@ -4,13 +4,17 @@ IMDIM=28*28;
IM1D=28; IM1D=28;
% read training images % read training images
file = fopen('../../../lib/train-labels.idx1-ubyte','rb'); thisFile=mfilename('fullpath');
thisFolder=fileparts(thisFile);
labelFile=fullfile(thisFolder,'../../../lib/train-labels.idx1-ubyte');
file = fopen(labelFile,'rb');
labels=fread(file); labels=fread(file);
fclose(file); fclose(file);
% labels[9] onwards are 1 byte values (0..9) specifying digits in test file % labels[9] onwards are 1 byte values (0..9) specifying digits in test file
labels=labels(9:end); labels=labels(9:end);
file = fopen('../../../lib/train-images.idx3-ubyte','rb'); digitFile=fullfile(thisFolder,'../../../lib/train-images.idx3-ubyte');
file = fopen(digitFile,'rb');
images=fread(file); images=fread(file);
fclose(file); fclose(file);
......
...@@ -8,7 +8,7 @@ end ...@@ -8,7 +8,7 @@ end
tblResults=table(); tblResults=table();
maxDigits=[0,1,2,3,8,9]; maxDigits=[0,1,2,3,8,9];
targetSize=100; targetSize=100;
clusterConfusion={};
rgResults=[]; % maxDigit,trial,iteration rgResults=[]; % maxDigit,trial,iteration
kMax=12; kMax=12;
NUMBER_OF_SAMPLES=2000; NUMBER_OF_SAMPLES=2000;
...@@ -17,7 +17,7 @@ p=startParallel(256); ...@@ -17,7 +17,7 @@ p=startParallel(256);
outname=['getK5k' datestr(now,'mm-dd-yyyy') '.mat'] outname=['getK5k' datestr(now,'mm-dd-yyyy') '.mat']
NDIGITS=length(maxDigits); NDIGITS=length(maxDigits);
confusionMatrix=zeros(10,10); kConfusion=zeros(10,kMax);
for nTrial=1:NUMBER_OF_TRIALS for nTrial=1:NUMBER_OF_TRIALS
for d=1:NDIGITS for d=1:NDIGITS
target=[0:maxDigits(d)]; target=[0:maxDigits(d)];
...@@ -25,15 +25,16 @@ for nTrial=1:NUMBER_OF_TRIALS ...@@ -25,15 +25,16 @@ for nTrial=1:NUMBER_OF_TRIALS
cardinality=ceil(targetSize/length(target)); cardinality=ceil(targetSize/length(target));
xDigits=getNISTdigits(target,cardinality); xDigits=getNISTdigits(target,cardinality);
[kpred,mx,sd]=getK(xDigits,kMax,NUMBER_OF_SAMPLES); [kpred,mx,sd,clusterConfusion]=getK(xDigits,kMax,NUMBER_OF_SAMPLES,clusterConfusion);
rgResults(d,nTrial)=kpred; rgResults(d,nTrial)=kpred;
confusionMatrix(K,kpred)=confusionMatrix(K,kpred)+1; kConfusion(K,kpred)=kConfusion(K,kpred)+1;
drawResults(kpred,K,mx,sd,outfolder,nTrial); drawResults(kpred,K,mx,sd,outfolder,nTrial);
5; 5;
end end
if 0==mod(nTrial,20) if 0==mod(nTrial,20)
clusterConfusion;
kConfusion
mean(rgResults,2) mean(rgResults,2)
confusionMatrix
end end
5; 5;
end end
......
function Mdl = getEnsemble(xTraining,yTraining)
% Mdl = fitcensemble(xTraining,yTraining,'method','subspace','learner','knn');
% Mdl = fitcensemble(xTraining,yTraining);
t = templateTree('MaxNumSplits',12);
Mdl = fitcensemble(xTraining,yTraining,'Method','AdaBoostM2','Learners',t);
yPred=predict(Mdl,xTraining);
cm=zeros(10);
for i=1:length(yPred)
cm(yTraining(i),yPred(i))=cm(yTraining(i),yPred(i))+1;
end
cm
sum(diag(cm))/length(yPred)
\ No newline at end of file
function [net,cm]=getNetwork(xTraining,yTraining)
net=patternnet([12,6],'trainbr');
yResponse=zeros(10,size(xTraining,2));
for i=1:size(xTraining,2)
yResponse(yTraining(i),i)=1;
end
% net.trainParam.showWindow = false;
% net.performFcn='sse'; % for trainlm
net = train(net,xTraining,yResponse,'useParallel','yes');
yPred = net(xTraining);
% length(find(yTraining==round(yPred)))/length(yPred)
[~,yPredActual]=max(yPred,[],1);
cm=zeros(10);
for i=1:length(yPredActual)
cm(yTraining(i),yPredActual(i))=cm(yTraining(i),yPredActual(i))+1;
end
cm
sum(diag(cm))/length(yPredActual)
4;
path(path,'..');
load('digitsTrainingInc.mat');
A=load('digitsTrainingInc_30.mat');
xTraining=[xTraining',A.xTraining'];
yTraining=[yTraining',A.yTraining'];
[net]=getNetwork(xTraining,yTraining);
NUMBER_OF_TRIALS=100;
kConfusion=zeros(10);
for nTrial=1:NUMBER_OF_TRIALS
for d=1:NDIGITS
target=[0:maxDigits(d)];
K=length(target);
cardinality=ceil(targetSize/length(target));
xDigits=getNISTdigits(target,cardinality);
[kpred,mx,sd,clusterConfusion]=getK(xDigits,kMax,NUMBER_OF_SAMPLES,clusterConfusion);
xTest=[mx',sd']';
yTest=net(xTest);
[~,yTestActual]=max(yTest);
kConfusion(K,yTestActual)=kConfusion(K,yTestActual)+1
sum(diag(kConfusion))/sum(kConfusion(:))
5;
end
end
path(path,'..');
load('digitsTraining2k.mat');
Mdl = getEnsemble(xTraining,yTraining);
[net]=getNetwork(xTraining',yTraining');
NUMBER_OF_TRIALS=100;
kConfusionEns=zeros(10);
kConfusionNN=zeros(10);
for nTrial=1:NUMBER_OF_TRIALS
for d=1:NDIGITS
target=[0:maxDigits(d)];
K=length(target);
cardinality=ceil(targetSize/length(target));
xDigits=getNISTdigits(target,cardinality);
[kpred,mx,sd,clusterConfusion]=getK(xDigits,kMax,NUMBER_OF_SAMPLES,clusterConfusion);
xTest=[mx',sd'];
yTestEns=predict(Mdl,xTest);
kConfusionEns(K,yTestEns)=kConfusionEns(K,yTestEns)+1
yTestNN=net(xTest');
[~,yTestActual]=max(yTestNN);
kConfusionNN(K,yTestActual)=kConfusionNN(K,yTestActual)+1
accEnsNN=[sum(diag(kConfusionEns))/sum(kConfusionEns(:)),sum(diag(kConfusionNN))/sum(kConfusionNN(:))]
5;
end
end
path(path,'..');
maxDigits=[0,1,2,3,8,9];
targetSize=100;
rgResults=[]; % maxDigit,trial,iteration
kMax=12;
NUMBER_OF_SAMPLES=2000;
NUMBER_OF_TRIALS=1000;
p=startParallel(256);
outfile='digitsTraining2k.mat';
outname=['getK5k' datestr(now,'mm-dd-yyyy') '.mat']
NDIGITS=length(maxDigits);
kConfusion=zeros(10,kMax);
xTraining=[];
yTraining=[];
clusterConfusion={};
for nTrial=1:NUMBER_OF_TRIALS
tStart=tic();
for d=1:NDIGITS
target=[0:maxDigits(d)];
K=length(target);
cardinality=ceil(targetSize/length(target));
xDigits=getNISTdigits(target,cardinality);
[kpred,mx,sd,clusterConfusion]=getK(xDigits,kMax,NUMBER_OF_SAMPLES,clusterConfusion);
xTraining=[xTraining;mx',sd'];
yTraining=[yTraining;K];
5;
end
if 0==mod(nTrial,50)
save(outfile);
end
tElapsed=toc(tStart);
fprintf(1,'trial %d elapsed time=%0.f\n',nTrial,tElapsed);
5;
end
save(outfile);
% save(outname);
% idxKPred=tblResults.kPred==K;
% idxKGap=tblResults.kGap==K;
% idxKSP=tblResults.kSP==K;
% accPredGapSP=[length(find(idxKPred))/length(idxKPred),length(find(idxKGap))/length(idxKGap)...
% length(find(idxKSP))/length(idxKSP)]
% sciPred=bootci(20000,@mean,idxKPred)
% sciGap=bootci(20000,@mean,idxKGap)
% sciSP=bootci(20000,@mean,idxKSP)
% idxsp is a vector of cluster indices for each of xDigits
function clusterConfusion=updateConfusion(xDigits,clusterConfusion,kTrue,kMax,idxsp)
if length(clusterConfusion)<kTrue
clusterConfusion{kTrue}=zeros(kTrue,kTrue);
end
for i=1:length(idxsp)
% predicted class is mode of all elements in same cluster as me
idxi = find(idxsp==idxsp(i));
cPredicted = mode([xDigits(idxi).idxTrue]);
cGT = xDigits(i).idxTrue;
clusterConfusion{kTrue}(cGT,cPredicted)=clusterConfusion{kTrue}(cGT,cPredicted)+1;
end
4;
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment