Skip to content
Snippets Groups Projects
Commit 17258d6c authored by Andrew Cohen's avatar Andrew Cohen
Browse files

removed spare parts

parent 0bf58d85
No related branches found
No related tags found
No related merge requests found
Showing with 0 additions and 1031 deletions
function [kGap Gap S idx] = GapSpectral(DistanceMatrix,nMaxClusters,bAlgorithmicInformationDistance)
if nargin<3
bAlgorithmicInformationDistance=1;
end
B = 50; % size of Monte Carlo distribution
if nMaxClusters>size(DistanceMatrix,1)
nMaxClusters = size(DistanceMatrix,1)-1;
end
D = Regularize(DistanceMatrix);
bound=D;
for i=1: size(bound,1)
bound(i,i)=NaN;
end
a = min(min(bound));%;
b = max(max(bound));
UV = a + (b-a)*rand(size (D,1),size (D,2),B); % uniform distribution
for k=1:nMaxClusters
if (1==k) %
% one happy cluster
idx = ones(size (D,1),1);
else
idx = SpectralCluster(D,k);
end
W(k)=WkSpectral(k,idx,D);
for ib =1:B
uni = UV(:,:,ib);
uni = Regularize(uni); % make uni a valid distance matrix
if (1==k) %
% one happy cluster
idx = ones(size (D,1),1);
else
idx = SpectralCluster(uni,k);
end
Wb(ib,k)=WkSpectral(k,idx,uni);;
end
Wkb = Wb(:,k);
lkb = log(Wkb);
if bAlgorithmicInformationDistance
Gap(k) = 1/B*sum(Wkb) - W(k);
sdk = std(Wkb,1);
else
Gap(k) = 1/B*sum(lkb) - log(W(k));
sdk = std(lkb,1);
end
S(k)=sdk * sqrt(1+1/B);
% Gap
% S
end
% figure
% errorbar( [1:nMaxClusters],Gap,S)
% set(gca,'XTick',[1:nMaxClusters])
k=1;
while ((k<nMaxClusters) && (Gap(k) < Gap(k+1)-S(k+1)))
k=k+1;
end
kGap=k;
if (kGap>1)
idx = SpectralCluster(D,kGap);
else
idx = ones(size (D,1),1);
end
function f = GetCount(words,strServer)
if ~exist('strServer','var')
strServer='wikipedia'
end
term=[ words{1} ];
for i=2:length(words)
term=[ term '+' words{i}];
end
idx=strfind(term,' ');
term(idx)='+';
if strcmp(strServer,'pubmed')
f=Count.pubmed(term);
elseif strcmp(strServer,'amazon')
f=doQueryAmazon(term);
elseif strcmp(strServer,'google')
f=GetCountGoogle(term);
elseif strcmp(strServer,'reddit')
f=Count.reddit(term);
else
f=Count.wikipedia(term);
end
% f=max(f,1); % no zero counts - sends nwd to NaN
f=f+1;
end
% amazon no longer returning search totals as of 1/2019...
% function f=doQueryAmazon(term)
%
% URL=['http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Daps&field-keywords=' term '&rh=i%3Aaps%2Ck%3A' term];
% for i=1:5
%
% str = urlread(URL);
% try
% %amazon
% idx=strfind(str,'results for <span>');
% idx=idx(1)-2;
% idxStart=idx;
% chPrev = str(idxStart-1);
% while(isNumeric(chPrev))
% idxStart=idxStart-1;
% chPrev = str(idxStart-1);
% end
% nx=str(idxStart:idx);
% f=str2double(nx);
% break;
% catch
% f=0;
% pause(1);
% end
% end
%
% end
function fX=GetSynthesizedCount(words)
k=length(words);
fX=0;
for i=1:2^k-1
binary=decimalToBinaryVector(i,k,'LSBFirst');
idx=find(binary);
if 0==mod(length(idx),2)
bAdd=-1;
% fprintf('-');
else
bAdd=1;
% fprintf('+')
end
% fprintf('%s',mat2str(idx));
fX=fX+bAdd*GetCount(words(idx),0);
end
% fprintf('\n');
function [D,degenerate] = Regularize(DistanceMatrix)
% turn output from NCD into well behaved distance matrix
D=DistanceMatrix;
% b = max(max(D));
% D=D/b;
for i=1:size (D,1)
for j= 1:i-1
D(i,j)= D(j,i);
end
D(i,i)=0;
end
bound=D;
for i=1: size(bound,1)
bound(i,i)=NaN;
end
a = min(min(bound));
if 0==a
degenerate=1;
else
degenerate=0;
end
function w=WkSpectral(k,idx,DistanceMatrix)
% called by GapSpectral
for r =1:k
% find points in cluster r
pr = find(idx==r);
D(r) = 0;
for i=1:size(pr,1)
for j=i:size(pr,1)
D(r) = D(r)+DistanceMatrix(pr(i),pr(j));
end
end
D(r) = D(r)/(2*size(pr,1)); %(2) in paper
end
w = sum(D);
tic
% goNWD
%
%
global strServer
strServer='amazon'
global QueryCache
if isempty(QueryCache)
QueryCache.Queries={};
QueryCache.Count=[];
end
global nQueryCount nCacheCount
nQueryCount=0;
nCacheCount=0;
words1={'Macbeth','The Tempest','Othello','King Lear','Hamlet'...
'The Merchant of Venice','A Midsummer Nights Dream',...
'Much Ado About Nothing', 'Taming of the Shrew','Twelfth Night' }
words2={'Carrie','Salems Lot','The Shining','The Stand','The Dead Zone',...
'Firestarter','Cujo'}
words3={'Adventures of Huckleberry Finn','A Connecticut Yankee in King Arthurs Court','Life on the Mississippi'...
'Puddnhead Wilson'}
words4 = {'The Old Man and The Sea','The Sun Also Rises','For Whom the Bell Tolls','A Farewell To Arms'}
words5={'Anna Karenina','War and Peace','The Death of Ivan Ilyich'}
idxKey = [ones(1,length(words1)) 2*ones(1,length(words2)) 3*ones(1,length(words3)) 4*ones(1,length(words4)) 5*ones(1,length(words5))]
GetCount(words1)
GetCount(words2)
%
[d]=GetDistances([words1,words2,words3,words4,words5],idxKey);
idx=getDistancesPairs([words1,words2,words3,words4,words5],idxKey);
tic
% goNWD
%
%
strServer='wikipedia'
words1={'red','orange','yellow','green','blue','indigo'}
words2={'lion','tiger','bear','monkey','zebra','elephant','aardvark','lamb','fox','ape','dog'}
idxKey = [ones(1,length(words1)) 2*ones(1,length(words2))]
d=GetDistances([words1,words2],idxKey);
idx=getDistancesPairs([words1,words2],idxKey);
words1={'red','orange','yellow','green','blue','indigo','violet','purple','cyan','white'}
words2={'square','circle','rectangle','ellipse','triangle','rhombus'}
idxKey = [ones(1,length(words1)) 2*ones(1,length(words2))]
d=GetDistances([words1,words2],idxKey);
idx=getDistancesPairs([words1,words2],idxKey);
words1={'Barack Obama','Hillary Clinton','John Edwards','Joe Biden','Chris Dodd','Mike Gravel'}
words2={'John McCain','Mitt Romney','Mike Huckabee','Ron Paul','Fred Thompson','Alan Keyes'}
idxKey = [ones(1,length(words1)) 2*ones(1,length(words2))]
d=GetDistances([words1,words2],idxKey);
idx=getDistancesPairs([words1,words2],idxKey);
%
% words1={'Labidochromis caeruleus','Sciaenochromis fryeri','Betta splendens','Carassius auratus','Melanochromis cyaneorhabdos'}
% words2={'Ecsenius bicolor','Pictichromis paccagnellae','Amphiprion ocellaris ','Paracanthurus hepatus','Chromis viridis'}
% [d1,d2]=GetDistances(words1,words2);
% idx=getDistancesPairs(words1,words2);
toc
global strServer
strServer='pubmed'
%http://www.nature.com/nature/journal/v511/n7510/full/nature13595.html
% supp table 2, in ranked order
shizophrenia = {'rs1702294','rs11191419','rs2007044','rs4129585',...
'rs35518360'};
% http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3250464/
% table 1
leukemia = {'rs17483466','rs13397985','rs757978','rs2456449','rs735665',...
'rs783540','rs305061','rs391525','rs1036935','rs11083846'};
%alzheimers
% http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3365264/
% table 1
% http://www.nature.com/tp/journal/v2/n5/fig_tab/tp201245t1.html#figure-title
alzheimers={'rs4420638','rs7561528','rs17817600','rs3748140','rs12808148','rs6856768','rs11738335','rs1357692'};
% obesity
% http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2858696/
% table 1
obesity={'rs10926984','rs12145833','rs2783963','rs11127485','rs17150703','rs13278851'};
%
% % neuroblastoma
% % http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3716226/
% % table 3
% neuroblastoma={'rs6939340','rs4712653','rs6435862','rs3768716','rs7585356'};
% neuroblastoma 2
% http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2742373/table/T1/
neuroblastoma = {'rs6939340','rs4712653','rs9295536','rs3790171','rs7272481'};
% parkinsons
% http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3305333/
% table 2, rs entries, by pvalue
% now trying http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3852568/table/tbl03/
% p<.1% in 2/3
% parkinsons={'rs356219','rs10847864','rs1491942','rs947211','rs2390669'}
parkinsons={'rs356219','rs10847864','rs2942168','rs11724635'}
% http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3839234/
% http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3839234/table/T1/
als = {'rs2303565','rs1344642','rs2814707','rs3849942','rs2453556', 'rs1971791', 'rs8056742'};
words={ alzheimers;parkinsons;als;shizophrenia;leukemia;obesity;neuroblastoma}
global QueryCache
if isempty(QueryCache)
QueryCache.Queries={};
QueryCache.Count=[];
end
global nQueryCount nCacheCount
nQueryCount=0;
nCacheCount=0;
d=[];
for i=1:length(words)
for j=1:length(words)
d(i,j)=NWD( [words{i},words{j}]);
end
end
dx=[];
for i=1:length(words)
for j=1:length(words)
dx(i,j)=d(i,j)-d(i,i);
end
end
xlswrite('rr.xls',[d dx])
path(path,'..\Gap')
% goTypical
global QueryCache
if isempty(QueryCache)
QueryCache.Queries={};
QueryCache.Count=[];
end
global nQueryCount nCacheCount
nQueryCount=0;
nCacheCount=0;
% words1={'red','orange','yellow','green','blue','vermilion','chartreuse'}
% words1={'red','orange','yellow','green','blue','indigo','violet'}
words1={'red','orange','yellow','green','blue'}
% words1={'red','orange','yellow','green','blue','indigo','texas'}
dx=NWD(words1)
d=[];
for i=1:length(words1)
wordsX=words1;
wordsX(i)=[];
d(i)=NWD(wordsX);
end
warning off all
d=dx-d';
[K Gap S idx] = GapKM(d,length(words1)-1);
tic
global QueryCache
if isempty(QueryCache)
QueryCache.Queries={};
QueryCache.Count=[];
end
global nQueryCount nCacheCount
nQueryCount=0;
nCacheCount=0;
nobels2014={'Isamu Akasaki', 'Hiroshi Amano', 'Shuji Nakamura','Eric Betzig', 'Stefan W. Hell', 'William E. Moerner','John O''Keefe', 'May-Britt Moser', 'Edvard I. Moser' }
physics={'Albert Einstein','Isaac Newton','Stephen Hawking','Nils Bohr','James Maxwell'}
chemistry={'Amedeo Avogadro','Louis Pasteur','Linus Pauling','Robert Boyle'}
medicine={'Elizabeth Blackwell','William Harvey','Carl Jung','Richard Lister'}
idxKey = [ones(1,length(nobels2014)) 2*ones(1,length(physics)) 2*ones(1,length(chemistry))...
2*ones(1,length(medicine))]
[d]=GetDistances([nobels2014,physics,chemistry,medicine],idxKey);
idx=getDistancesPairs([nobels2014,physics,chemistry,medicine],idxKey)
tic
global QueryCache
if isempty(QueryCache)
QueryCache.Queries={};
QueryCache.Count=[];
end
global nQueryCount nCacheCount
nQueryCount=0;
nCacheCount=0;
words1={'Kolmogorov','Fermat','Hilbert', 'Godel', 'Riemann','Gauss'} % mathematicians
words2={'Einstein','Newton','Hawking','Bohr','Maxwell','Boltzmann',} %physicists
words3={'Freud','Pavlov','Skinner','Jung'} %psychologists
words4={'Turing','Lovelace','Knuth','Hopper'} % computer scientists
words5={'Darwin','Lamarck','Linnaeus','Mendel'} % biology
idxKey = [ones(1,length(words1)) 2*ones(1,length(words2)) 3*ones(1,length(words3)) 4*ones(1,length(words4)) 5*ones(1,length(words5))]
[d]=GetDistances([words1,words2,words3,words4,words5],idxKey);
idx=getDistancesPairs([words1,words2,words3,words4,words5],idxKey);
File deleted
File deleted
-------------------------
GetDistances::(multiples) NW=5
2014 5 1 16 51 5.961
red,orange,yellow,green,blue,indigo,lion,tiger,bear,monkey,zebra,elephant,nQueryCount=49, nCacheCount=335
[6 0;0 6]
-------------------------
GetDistances::(pairs)
2014 5 1 16 51 57.306
red,orange,yellow,green,blue,indigo,lion,tiger,bear,monkey,zebra,elephant,kGap=2
nQueryCount=66, nCacheCount=198
nCorrect=11
-------------------------
GetDistances::(multiples) NW=5
2014 5 1 16 52 14.308
red,orange,yellow,green,blue,indigo,square,circle,rectangle,ellipse,triangle,rhombus,nQueryCount=30, nCacheCount=354
[6 0;0 6]
-------------------------
GetDistances::(pairs)
2014 5 1 16 52 53.707
red,orange,yellow,green,blue,indigo,square,circle,rectangle,ellipse,triangle,rhombus,kGap=2
nQueryCount=51, nCacheCount=213
nCorrect=12
-------------------------
GetDistances::(multiples) NW=5
2014 5 1 16 53 30.481
Barack Obama,Hillary Clinton,John Edwards,Joe Biden,Chris Dodd,Mike Gravel,John McCain,Mitt Romney,Mike Huckabee,Ron Paul,Fred Thompson,Alan Keyes,nQueryCount=48, nCacheCount=336
[6 0;0 6]
-------------------------
GetDistances::(pairs)
2014 5 1 16 54 30.869
Barack Obama,Hillary Clinton,John Edwards,Joe Biden,Chris Dodd,Mike Gravel,John McCain,Mitt Romney,Mike Huckabee,Ron Paul,Fred Thompson,Alan Keyes,kGap=2
nQueryCount=66, nCacheCount=198
nCorrect=7
-------------------------
GetDistances::(multiples) NW=5
2014 5 1 16 54 55.778
Labidochromis caeruleus,Sciaenochromis fryeri,Betta splendens,Carassius auratus,Melanochromis cyaneorhabdos,Ecsenius bicolor,Pictichromis paccagnellae,Amphiprion ocellaris ,Paracanthurus hepatus,Chromis viridis,nQueryCount=40, nCacheCount=240
[0 5;0 5]
-------------------------
GetDistances::(multiples) NW=5
2014 5 26 17 48 59.115
red,orange,yellow,green,blue,indigo,lion,tiger,bear,monkey,zebra,elephant,nQueryCount=49, nCacheCount=335
[6 0;0 6]
-------------------------
GetDistances::(pairs)
2014 5 26 17 49 47.104
red,orange,yellow,green,blue,indigo,lion,tiger,bear,monkey,zebra,elephant,kGap=2
nQueryCount=66, nCacheCount=198
nCorrect=11
-------------------------
GetDistances::(multiples) NW=5
2014 5 26 17 50 3.128
red,orange,yellow,green,blue,indigo,square,circle,rectangle,ellipse,triangle,rhombus,nQueryCount=30, nCacheCount=354
[6 0;0 6]
-------------------------
GetDistances::(pairs)
2014 5 26 17 50 38.946
red,orange,yellow,green,blue,indigo,square,circle,rectangle,ellipse,triangle,rhombus,kGap=2
nQueryCount=51, nCacheCount=213
nCorrect=12
-------------------------
GetDistances::(multiples) NW=5
2014 5 26 17 51 15.729
Barack Obama,Hillary Clinton,John Edwards,Joe Biden,Chris Dodd,Mike Gravel,John McCain,Mitt Romney,Mike Huckabee,Ron Paul,Fred Thompson,Alan Keyes,nQueryCount=48, nCacheCount=336
[6 0;0 6]
-------------------------
GetDistances::(pairs)
2014 5 26 17 52 16.695
Barack Obama,Hillary Clinton,John Edwards,Joe Biden,Chris Dodd,Mike Gravel,John McCain,Mitt Romney,Mike Huckabee,Ron Paul,Fred Thompson,Alan Keyes,kGap=2
nQueryCount=66, nCacheCount=198
nCorrect=7
-------------------------
GetDistances::(multiples) NW=5
2014 5 26 17 52 38.784
Labidochromis caeruleus,Sciaenochromis fryeri,Betta splendens,Carassius auratus,Melanochromis cyaneorhabdos,Ecsenius bicolor,Pictichromis paccagnellae,Amphiprion ocellaris ,Paracanthurus hepatus,Chromis viridis,nQueryCount=40, nCacheCount=240
[0 5;0 5]
-------------------------
GetDistances::(multiples) NW=5
2014 5 26 18 9 46.311
red,orange,yellow,green,blue,indigo,lion,tiger,bear,monkey,zebra,elephant,nQueryCount=150, nCacheCount=474
[0 6;6 0]
-------------------------
GetDistances::(pairs)
2014 5 26 18 9 48.012
red,orange,yellow,green,blue,indigo,lion,tiger,bear,monkey,zebra,elephant,kGap=4
nQueryCount=0, nCacheCount=330
nCorrect=7
-------------------------
GetDistances::(multiples) NW=5
2014 5 26 18 10 50.192
red,orange,yellow,green,blue,indigo,square,circle,rectangle,ellipse,triangle,rhombus,nQueryCount=111, nCacheCount=513
[0 6;4 2]
-------------------------
GetDistances::(pairs)
2014 5 26 18 10 51.87
red,orange,yellow,green,blue,indigo,square,circle,rectangle,ellipse,triangle,rhombus,kGap=3
nQueryCount=0, nCacheCount=330
nCorrect=6
-------------------------
GetDistances::(multiples) NW=5
2014 5 26 18 12 44.564
Barack Obama,Hillary Clinton,John Edwards,Joe Biden,Chris Dodd,Mike Gravel,John McCain,Mitt Romney,Mike Huckabee,Ron Paul,Fred Thompson,Alan Keyes,nQueryCount=150, nCacheCount=474
[1 5;6 0]
-------------------------
GetDistances::(pairs)
2014 5 26 18 12 46.22
Barack Obama,Hillary Clinton,John Edwards,Joe Biden,Chris Dodd,Mike Gravel,John McCain,Mitt Romney,Mike Huckabee,Ron Paul,Fred Thompson,Alan Keyes,kGap=2
nQueryCount=0, nCacheCount=330
nCorrect=8
-------------------------
GetDistances::(multiples) NW=5
2014 5 26 18 13 46.902
Labidochromis caeruleus,Sciaenochromis fryeri,Betta splendens,Carassius auratus,Melanochromis cyaneorhabdos,Ecsenius bicolor,Pictichromis paccagnellae,Amphiprion ocellaris ,Paracanthurus hepatus,Chromis viridis,nQueryCount=100, nCacheCount=340
[0 5;0 5]
-------------------------
GetDistances::(pairs)
2014 5 26 18 13 48.428
Labidochromis caeruleus,Sciaenochromis fryeri,Betta splendens,Carassius auratus,Melanochromis cyaneorhabdos,Ecsenius bicolor,Pictichromis paccagnellae,Amphiprion ocellaris ,Paracanthurus hepatus,Chromis viridis,kGap=3
nQueryCount=0, nCacheCount=225
nCorrect=7
-------------------------
GetDistances::(multiples) NW=5
2014 5 27 11 29 56.627
red,orange,yellow,green,blue,indigo,lion,tiger,bear,monkey,zebra,elephant,nQueryCount=0, nCacheCount=672
[6 0;3 3]
-------------------------
GetDistances::(pairs)
2014 5 27 11 29 58.3
red,orange,yellow,green,blue,indigo,lion,tiger,bear,monkey,zebra,elephant,kGap=1
nQueryCount=0, nCacheCount=396
nCorrect=10
-------------------------
GetDistances::(multiples) NW=5
2014 5 27 11 29 58.339
red,orange,yellow,green,blue,indigo,square,circle,rectangle,ellipse,triangle,rhombus,nQueryCount=0, nCacheCount=672
[6 0;2 4]
-------------------------
GetDistances::(pairs)
2014 5 27 11 30 0.027
red,orange,yellow,green,blue,indigo,square,circle,rectangle,ellipse,triangle,rhombus,kGap=2
nQueryCount=0, nCacheCount=396
nCorrect=9
-------------------------
GetDistances::(multiples) NW=5
2014 5 27 11 30 0.066
Barack Obama,Hillary Clinton,John Edwards,Joe Biden,Chris Dodd,Mike Gravel,John McCain,Mitt Romney,Mike Huckabee,Ron Paul,Fred Thompson,Alan Keyes,nQueryCount=0, nCacheCount=672
[6 0;0 6]
-------------------------
GetDistances::(pairs)
2014 5 27 11 30 1.765
Barack Obama,Hillary Clinton,John Edwards,Joe Biden,Chris Dodd,Mike Gravel,John McCain,Mitt Romney,Mike Huckabee,Ron Paul,Fred Thompson,Alan Keyes,kGap=2
nQueryCount=0, nCacheCount=396
nCorrect=7
-------------------------
GetDistances::(multiples) NW=5
2014 5 27 11 30 1.794
Labidochromis caeruleus,Sciaenochromis fryeri,Betta splendens,Carassius auratus,Melanochromis cyaneorhabdos,Ecsenius bicolor,Pictichromis paccagnellae,Amphiprion ocellaris ,Paracanthurus hepatus,Chromis viridis,nQueryCount=0, nCacheCount=480
[0 5;0 5]
-------------------------
GetDistances::(multiples) NW=5
2014 7 2 12 52 27.35
WHSC1,NSD1,ASH1l,SETD2,MLL5,CREBBP,EP300,MLL,PHIP(2),BRWD3(2),nQueryCount=141, nCacheCount=339
[0 5;0 5]
-------------------------
GetDistances::(multiples) NW=5
2014 7 2 13 3 7.572
WHSC1,NSD1,ASH1l,SETD2,MLL5,CREBBP,EP300,MLL,PHIP(2),BRWD3(2),nQueryCount=130, nCacheCount=350
[2 3;0 5]
-------------------------
GetDistances::(pairs)
2014 7 2 13 3 23.862
WHSC1,NSD1,ASH1l,SETD2,MLL5,CREBBP,EP300,MLL,PHIP(2),BRWD3(2),kGap=1
nQueryCount=45, nCacheCount=225
nCorrect=9
-------------------------
GetDistances::(multiples) NW=5
2014 7 2 13 9 10.186
ELP3,KAT2B,MYST4,KAT5,HAT1,CREBBP,EP300,MLL,PHIP(2),BRWD3(2),nQueryCount=100, nCacheCount=380
[4 1;2 3]
-------------------------
GetDistances::(pairs)
2014 7 2 13 9 24.264
ELP3,KAT2B,MYST4,KAT5,HAT1,CREBBP,EP300,MLL,PHIP(2),BRWD3(2),kGap=1
nQueryCount=35, nCacheCount=235
nCorrect=7
-------------------------
GetDistances::(multiples) NW=5
2014 7 2 15 0 45.002
ELP3,KAT2B,MYST4,KAT5,HAT1,CREBBP,EP300,MLL,PHIP,BRWD3,nQueryCount=141, nCacheCount=339
[3 2;2 3]
-------------------------
GetDistances::(pairs)
2014 7 2 15 1 0.536
ELP3,KAT2B,MYST4,KAT5,HAT1,CREBBP,EP300,MLL,PHIP,BRWD3,kGap=1
nQueryCount=45, nCacheCount=225
nCorrect=7
-------------------------
GetDistances::(multiples) NW=5
2014 7 2 15 4 42.089
PPRV,CDV,RPV,MeV,hPIV2,SV41,PIV5,MuV,PorPV,MprPV,nQueryCount=141, nCacheCount=339
[2 2;4 2]
-------------------------
GetDistances::(pairs)
2014 7 2 15 4 53.022
PPRV,CDV,RPV,MeV,hPIV2,SV41,PIV5,MuV,PorPV,MprPV,kGap=1
nQueryCount=39, nCacheCount=231
nCorrect=5
-------------------------
GetDistances::(multiples) NW=5
2014 7 2 15 25 40.952
ONAC001,ONAC003,ONAC005,ONAC014,ONAC028,ONAC010,ONAC016,ONAC024,ONAC058,ONAC088,nQueryCount=141, nCacheCount=339
[0 5;0 5]
-------------------------
GetDistances::(multiples) NW=5
2014 7 2 15 31 4.826
SwObihiro92,Leningrad91,Stockholm90,Yamagata89,SwNebraska92,SwStHyacinthe91,SwQuebec90,SwIowa88,nQueryCount=92, nCacheCount=228
[0 4;0 4]
-------------------------
GetDistances::(multiples) NW=5
<<<<<<< HEAD
2014 8 12 11 31 57.976
red,orange,yellow,green,blue,indigo,lion,tiger,bear,monkey,zebra,elephant,aardvark,lamb,fox,ape,dog,nQueryCount=389, nCacheCount=903
[6 0;0 11]
=======
2014 7 7 15 3 13.02
brm,CG17652,BRWD3,Hem,br,velo,E2f,Dp,ctp,nQueryCount=116, nCacheCount=280
[2 2;0 5]
>>>>>>> bef2f507c6358a5ceee1c070702fd6d9ada135b7
-------------------------
GetDistances::(pairs)
<<<<<<< HEAD
2014 8 12 11 33 12.318
red,orange,yellow,green,blue,indigo,lion,tiger,bear,monkey,zebra,elephant,aardvark,lamb,fox,ape,dog,kGap=2
nQueryCount=136, nCacheCount=680
nCorrect=17
=======
2014 7 7 15 3 24.91
brm,CG17652,BRWD3,Hem,br,velo,E2f,Dp,ctp,kGap=1
nQueryCount=30, nCacheCount=186
nCorrect=6
>>>>>>> bef2f507c6358a5ceee1c070702fd6d9ada135b7
-------------------------
GetDistances::(multiples) NW=5
<<<<<<< HEAD
2014 8 12 11 36 37.787
red,orange,yellow,green,blue,indigo,violet,purple,tan,white,square,circle,rectangle,ellipse,triangle,rhombus,nQueryCount=342, nCacheCount=810
[9 1;0 6]
=======
2014 7 7 15 4 40.842
Cap-G,aub,vas,neb,br,velo,E2f,Dp,ctp,nQueryCount=75, nCacheCount=321
[1 3;1 4]
>>>>>>> bef2f507c6358a5ceee1c070702fd6d9ada135b7
-------------------------
GetDistances::(pairs)
<<<<<<< HEAD
2014 8 12 11 37 33.886
red,orange,yellow,green,blue,indigo,violet,purple,tan,white,square,circle,rectangle,ellipse,triangle,rhombus,kGap=2
nQueryCount=105, nCacheCount=615
nCorrect=16
=======
2014 7 7 15 4 48.483
Cap-G,aub,vas,neb,br,velo,E2f,Dp,ctp,kGap=1
nQueryCount=20, nCacheCount=196
nCorrect=5
>>>>>>> bef2f507c6358a5ceee1c070702fd6d9ada135b7
-------------------------
GetDistances::(multiples) NW=5
<<<<<<< HEAD
2014 8 12 11 39 45.674
Barack Obama,Hillary Clinton,John Edwards,Joe Biden,Chris Dodd,Mike Gravel,John McCain,Mitt Romney,Mike Huckabee,Ron Paul,Fred Thompson,Alan Keyes,nQueryCount=198, nCacheCount=474
[6 0;0 6]
=======
2014 7 25 6 22 5.7
Actin,Arp2/3,Coronin,Dystrophin,FtsZ,Keratin,Cadherin,Ependymin,Integrin,NCAM,Selectin,nQueryCount=168, nCacheCount=404
[4 2;3 2]
>>>>>>> bef2f507c6358a5ceee1c070702fd6d9ada135b7
-------------------------
GetDistances::(pairs)
<<<<<<< HEAD
2014 8 12 11 40 28.955
Barack Obama,Hillary Clinton,John Edwards,Joe Biden,Chris Dodd,Mike Gravel,John McCain,Mitt Romney,Mike Huckabee,Ron Paul,Fred Thompson,Alan Keyes,kGap=2
nQueryCount=66, nCacheCount=330
nCorrect=7
=======
2014 7 25 6 22 36.598
Actin,Arp2/3,Coronin,Dystrophin,FtsZ,Keratin,Cadherin,Ependymin,Integrin,NCAM,Selectin,kGap=1
nQueryCount=55, nCacheCount=275
nCorrect=7
-------------------------
GetDistances::(multiples) NW=5
2014 7 25 6 24 12.565
Actin,Arp2/3,Coronin,Dystrophin,FtsZ,Keratin,Cadherin,Ependymin,Integrin,NCAM,Selectin,nQueryCount=0, nCacheCount=572
[4 2;3 2]
-------------------------
GetDistances::(pairs)
2014 7 25 6 24 14.609
Actin,Arp2/3,Coronin,Dystrophin,FtsZ,Keratin,Cadherin,Ependymin,Integrin,NCAM,Selectin,kGap=1
nQueryCount=0, nCacheCount=330
nCorrect=7
-------------------------
GetDistances::(multiples) NW=5
2014 7 25 6 59 53.568
cytoskeleton,Actin,Arp2/3,Coronin,Dystrophin,FtsZ,Keratin,adhesion,Cadherin,Ependymin,Integrin,NCAM,Selectin,nQueryCount=232, nCacheCount=548
[5 2;4 2]
-------------------------
GetDistances::(pairs)
2014 7 25 7 0 26.173
cytoskeleton,Actin,Arp2/3,Coronin,Dystrophin,FtsZ,Keratin,adhesion,Cadherin,Ependymin,Integrin,NCAM,Selectin,kGap=1
nQueryCount=78, nCacheCount=390
nCorrect=7
-------------------------
GetDistances::(multiples) NW=5
2014 7 25 11 26 29.498
DRD2,GRM3,GRIN2A,SRR,SNCA,LRRK2,PINK1,ATP13A2,nQueryCount=87, nCacheCount=233
[3 1;4 0]
-------------------------
GetDistances::(pairs)
2014 7 25 11 26 36.154
DRD2,GRM3,GRIN2A,SRR,SNCA,LRRK2,PINK1,ATP13A2,kGap=1
nQueryCount=16, nCacheCount=152
nCorrect=7
-------------------------
GetDistances::(pairs)
2014 7 25 11 28 39.277
DRD2,GRM3,GRIN2A,SRR,SNCA,LRRK2,PINK1,ATP13A2,kGap=1
nQueryCount=6, nCacheCount=196
nCorrect=7
-------------------------
GetDistances::(multiples) NW=5
2014 7 25 11 35 8.274
Schizophrenia,DRD2,GRM3,GRIN2A,SRR,Parkinsons,LRRK2,PINK1,ATP13A2,MAPT,nQueryCount=105, nCacheCount=375
[5 0;2 3]
-------------------------
GetDistances::(multiples) NW=5
2014 7 25 11 52 50.767
Schizophrenia,DRD2,GRM3,GRIN2A,SRR,CACNA1I,RIMS1,KCTD13,Parkinsons,LRRK2,PINK1,ATP13A2,MAPT,SNCA,HLA-DRB5,BST1,GAK,nQueryCount=376, nCacheCount=916
[8 0;1 8]
-------------------------
GetDistances::(multiples) NW=5
2014 7 25 11 55 21.349
DRD2,GRM3,GRIN2A,SRR,CACNA1I,RIMS1,KCTD13,LRRK2,PINK1,ATP13A2,MAPT,SNCA,HLA-DRB5,BST1,GAK,nQueryCount=244, nCacheCount=776
[4 3;5 3]
-------------------------
GetDistances::(multiples) NW=5
2014 7 25 15 19 39.235
DRD2,GRM3,GRIN2A,SRR,CACNA1I,RIMS1,KCTD13,LRRK2,PINK1,ATP13A2,MAPT,SNCA,HLA-DRB5,BST1,GAK,ACMSD,STK39,MCCC1/LAMP3,SYT11,CCDC62/HIP1R,nQueryCount=390, nCacheCount=1370
[3 4;8 5]
-------------------------
GetDistances::(multiples) NW=5
2014 8 4 16 2 31.248
rs17483466,rs13397985,rs757978,rs872071,rs2456449,rs735665,rs6939340,rs4712653,rs3768716,rs7585356,rs2070094,rs2229571,nQueryCount=199, nCacheCount=473
[6 0;2 4]
-------------------------
GetDistances::(multiples) NW=5
2014 8 4 16 23 16.536
rs17483466,rs13397985,rs757978,rs872071,rs2456449,rs735665,rs4420638,rs7561528,rs17817600,rs3748140,rs12808148,rs6856768,rs11738335,rs1357692,nQueryCount=161, nCacheCount=735
[3 3;5 3]
-------------------------
GetDistances::(multiples) NW=5
2014 8 4 16 41 56.569
red,orange,yellow,green,blue,indigo,lion,tiger,bear,monkey,zebra,elephant,nQueryCount=199, nCacheCount=473
[0 6;0 6]
-------------------------
GetDistances::(multiples) NW=5
2014 8 4 16 42 20.597
red,orange,yellow,green,blue,indigo,lion,tiger,bear,monkey,zebra,elephant,nQueryCount=0, nCacheCount=672
[0 6;0 6]
-------------------------
GetDistances::(multiples) NW=5
2014 8 4 17 2 36.636
red,orange,yellow,green,blue,indigo,lion,tiger,bear,monkey,zebra,elephant,nQueryCount=191, nCacheCount=481
[6 0;1 5]
-------------------------
GetDistances::(pairs)
2014 8 4 17 5 6.979
red,orange,yellow,green,blue,indigo,lion,tiger,bear,monkey,zebra,elephant,kGap=2
nQueryCount=66, nCacheCount=330
nCorrect=9
>>>>>>> bef2f507c6358a5ceee1c070702fd6d9ada135b7
File deleted
File deleted
% wikiScientistsGoRDK.m
% load wiki_scientists_9_12_18.mat
words1={'Kolmogorov','Fermat','Hilbert', 'Godel', 'Riemann','Gauss'} % mathematicians
words2={'Einstein','Newton','Hawking','Bohr','Maxwell','Boltzmann',} %physicists
words3={'Freud','Pavlov','Skinner','Jung'} %psychologists
words4={'Turing','Lovelace','Knuth','Hopper'} % computer scientists
words5={'Darwin','Lamarck','Linnaeus','Mendel'} % biology
idxKey = [ones(1,length(words1)) 2*ones(1,length(words2)) 3*ones(1,length(words3)) 4*ones(1,length(words4)) 5*ones(1,length(words5))]
words=[words1,words2,words3,words4,words5];
dx=[];
for i=1:length(words)
for j=i+1:length(words)
dx(i,j)=NWD([words(i),words(j)]);
end
end
% ack! pad d
dx(length(words),:)=0;
dx=Regularize(dx);
path(path,'../Gap');
hk=[];
for KK=1:15
idxSpectral=SpectralCluster(dx,KK);
hki=[];
ni=[];
% go through each cluster, compute the NWD for that cluster
for i=1:KK
idxi=find(idxSpectral==i);
ni(i)=length(idxi);
if length(idxi)<2
% singleton
hki(i)=NaN;
break;
end
hki(i)=NWD(words(idxi));
end
fprintf(1,'KK=%d : h=%0.3f, hki=%s, ni=%s\n',KK,max(hki),mat2str(hki,2),mat2str(ni,2));
ek=[];
for k=1:KK
idxk=find(idxSpectral==k);
fprintf(1,' %s\n',mat2str([idxKey(idxk)]));
idxTrueI=idxKey(idxk);
pred=mode(idxTrueI);
ek(k)=length(find(idxTrueI~=pred));
if 1==length(idxk)
ek(k)=NaN;
end
end
errorK(KK)=sum(ek);
fprintf(1,'errorK(%d)=%0.2f\n',KK,errorK(KK)/length(words));
hk(KK)=max(hki);
end
x=[1:length(hk)]'
hkn=hk
hkn(isnan(hk))=[]
x(isnan(hk))=[]
hkn=hkn'
p=polyfit(x,hkn,1);
hkd=hkn-(p(1).*x+p(2))
hkd=diff(hkd);
figure(1);clf;plot(hk,'-*')
title('average per cluster NWD')
figure(2);clf;plot(errorK./length(tx),'-+');
title('error rate')
figure(3);clf;plot(hkd,'-^')
title('1st derivative of de-biased average per cluster NWD')
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment