Skip to content
Snippets Groups Projects
Commit ee894148 authored by Andrew Cohen's avatar Andrew Cohen
Browse files

Merge remote-tracking branch 'origin/pubmed' into wikipedia_AND

Conflicts:
	GetDistances.m
	NWD.m
	results.txt
parents 2ab7ed0c 15e5e7ca
No related branches found
No related tags found
No related merge requests found
...@@ -28,3 +28,4 @@ obj/ ...@@ -28,3 +28,4 @@ obj/
_ReSharper*/ _ReSharper*/
[Tt]est[Rr]esult* [Tt]est[Rr]esult*
*.asv *.asv
~$*
...@@ -6,7 +6,7 @@ if isempty(strServer) ...@@ -6,7 +6,7 @@ if isempty(strServer)
strServer='wikipedia' strServer='wikipedia'
end end
term=[words{1}]; term=[ words{1} ];
for i=2:length(words) for i=2:length(words)
term=[ term '+' words{i}]; term=[ term '+' words{i}];
end end
...@@ -85,28 +85,15 @@ for i=1:5 ...@@ -85,28 +85,15 @@ for i=1:5
str = urlread(URL); str = urlread(URL);
try try
%amazon %amazon
idx=strfind(str,'results for <span class="'); idx=strfind(str,'results for <span>');
idx=idx(1)-2; idx=idx(1)-2;
idxStart=idx; idxStart=idx;
while(str(idxStart)~=' ') chPrev = str(idxStart-1);
while(isNumeric(chPrev))
idxStart=idxStart-1; idxStart=idxStart-1;
chPrev = str(idxStart-1);
end end
nx=str(idxStart:idx); nx=str(idxStart:idx);
% WIKIPEDIA%
% idx=strfind(str,'div class="results-info"');
% idx=idx(1);
% str2=str(idx:end);
%
% idx2=strfind(str2,'of <b>');
% str3=str2(idx2+6:end);
%
% idx3=strfind(str3,'</b>');
% idx3=idx3(1);
%
% nx=str3(1:idx3-1);
% END WIKIPEDIA
f=str2double(nx); f=str2double(nx);
break; break;
catch catch
...@@ -117,4 +104,18 @@ end ...@@ -117,4 +104,18 @@ end
end end
function bNumber = isNumeric(ch)
bNumber=1;
if ~isnan(str2double(ch))
return
end
if ch==','
return
end
bNumber=0;
end
...@@ -54,10 +54,10 @@ for i=1:length(words) ...@@ -54,10 +54,10 @@ for i=1:length(words)
% NWD2(i)=NWD(w2); % NWD2(i)=NWD(w2);
d1(i)=(NWD1x(i)-NWD1(i)); d1(i)=(NWD1x(i)-NWD1(i));
d2(i)=(NWD2x(i)-NWD2(i)); if isnan(d1(i))
if isnan(d1(i)) % f(X)=0 --> -log2(0)==Inf , Inf-Inf=NaN d1(i)=inf;
d1(i)=Inf;
end end
d2(i)=(NWD2x(i)-NWD2(i));
if isnan(d2(i)) % f(X)=0 --> -log2(0)==Inf if isnan(d2(i)) % f(X)=0 --> -log2(0)==Inf
d2(i)=Inf; d2(i)=Inf;
end end
......
...@@ -26,5 +26,5 @@ else ...@@ -26,5 +26,5 @@ else
nwd = (log2(max(fw))-log2(fX)) / (log2(N)-log2(min(fw))); % III.3 from arxiv nwd = (log2(max(fw))-log2(fX)) / (log2(N)-log2(min(fw))); % III.3 from arxiv
end end
% normalize for cardinality % normalize for cardinality
nwd = nwd/(length(words)-1); nwd = nwd/(length(words)-1);
% nwd=(log2(max(fw))-log2(fX))/( log2(N)-log2(max(fexclude)) );
...@@ -19,6 +19,7 @@ for i=1:length(words) ...@@ -19,6 +19,7 @@ for i=1:length(words)
end end
d(length(words),length(words))=0; d(length(words),length(words))=0;
d(find(isinf(d)))=10;
d=Regularize(d); d=Regularize(d);
[kGap Gap S idx] = GapSpectral(d,6,1); [kGap Gap S idx] = GapSpectral(d,6,1);
fprintf('kGap=%d\n',kGap); fprintf('kGap=%d\n',kGap);
......
tic
% goNWD
%
%
global strServer
strServer='amazon'
global QueryCache
if isempty(QueryCache)
QueryCache.Queries={};
QueryCache.Count=[];
end
global nQueryCount nCacheCount
nQueryCount=0;
nCacheCount=0;
words1={'Macbeth','The Tempest','Othello','King Lear','Hamlet'...
'The Merchant of Venice','A Midsummer Nights Dream',...
'Much Ado About Nothing', 'Taming of the Shrew','Twelfth Night' }
words2={'Carrie','Salems Lot','The Shining','The Stand','The Dead Zone',...
'Firestarter','Cujo'}
GetCount(words1)
GetCount(words2)
%
[d1,d2]=GetDistances(words1,words2);
idx=getDistancesPairs(words1,words2);
global strServer
strServer='pubmed'
%http://www.nature.com/nature/journal/v511/n7510/full/nature13595.html %http://www.nature.com/nature/journal/v511/n7510/full/nature13595.html
% supp table 2, in ranked order % supp table 2, in ranked order
...@@ -37,7 +40,11 @@ neuroblastoma = {'rs6939340','rs4712653','rs9295536','rs3790171','rs7272481'}; ...@@ -37,7 +40,11 @@ neuroblastoma = {'rs6939340','rs4712653','rs9295536','rs3790171','rs7272481'};
% parkinsons={'rs356219','rs10847864','rs1491942','rs947211','rs2390669'} % parkinsons={'rs356219','rs10847864','rs1491942','rs947211','rs2390669'}
parkinsons={'rs356219','rs10847864','rs2942168','rs11724635'} parkinsons={'rs356219','rs10847864','rs2942168','rs11724635'}
words={ alzheimers;parkinsons;shizophrenia;leukemia;obesity;neuroblastoma} % http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3839234/
% http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3839234/table/T1/
als = {'rs2303565','rs1344642','rs2814707','rs3849942','rs2453556', 'rs1971791', 'rs8056742'};
words={ alzheimers;parkinsons;als;shizophrenia;leukemia;obesity;neuroblastoma}
global QueryCache global QueryCache
if isempty(QueryCache) if isempty(QueryCache)
......
No preview for this file type
No preview for this file type
<<<<<<< HEAD
------------------------- -------------------------
GetDistances::(multiples) NW=5 GetDistances::(multiples) NW=5
2014 9 5 22 18 10.438 2014 9 5 22 18 10.438
red,orange,yellow,green,blue,indigo,lion,tiger,bear,monkey,zebra,elephant,aardvark,lamb,fox,ape,dog,nQueryCount=0, nCacheCount=1292 red,orange,yellow,green,blue,indigo,lion,tiger,bear,monkey,zebra,elephant,aardvark,lamb,fox,ape,dog,nQueryCount=0, nCacheCount=1292
=======
% no normalization!
-------------------------
GetDistances::(multiples) NW=5
2014 8 13 12 18 14.692
red,orange,yellow,green,blue,indigo,lion,tiger,bear,monkey,zebra,elephant,aardvark,lamb,fox,ape,dog,nQueryCount=394, nCacheCount=898
>>>>>>> origin/pubmed
[6 0;0 11] [6 0;0 11]
------------------------- -------------------------
<<<<<<< HEAD
GetDistances::(multiples) NW=5 GetDistances::(multiples) NW=5
2014 9 5 22 21 31.57 2014 9 5 22 21 31.57
red,orange,yellow,green,blue,indigo,violet,purple,cyan,white,square,circle,rectangle,ellipse,triangle,rhombus,nQueryCount=320, nCacheCount=832 red,orange,yellow,green,blue,indigo,violet,purple,cyan,white,square,circle,rectangle,ellipse,triangle,rhombus,nQueryCount=320, nCacheCount=832
=======
GetDistances::(pairs)
2014 8 13 12 19 27.056
red,orange,yellow,green,blue,indigo,lion,tiger,bear,monkey,zebra,elephant,aardvark,lamb,fox,ape,dog,kGap=2
nQueryCount=136, nCacheCount=680
nCorrect=17
-------------------------
GetDistances::(multiples) NW=5
2014 8 13 12 22 38.066
red,orange,yellow,green,blue,indigo,violet,purple,cyan,white,square,circle,rectangle,ellipse,triangle,rhombus,nQueryCount=342, nCacheCount=810
>>>>>>> origin/pubmed
[10 0;0 6] [10 0;0 6]
------------------------- -------------------------
<<<<<<< HEAD
GetDistances::(multiples) NW=5 GetDistances::(multiples) NW=5
2014 9 5 22 23 50.569 2014 9 5 22 23 50.569
=======
GetDistances::(pairs)
2014 8 13 12 23 32.038
red,orange,yellow,green,blue,indigo,violet,purple,cyan,white,square,circle,rectangle,ellipse,triangle,rhombus,kGap=1
nQueryCount=105, nCacheCount=615
nCorrect=16
-------------------------
GetDistances::(multiples) NW=5
2014 8 13 12 25 37.418
>>>>>>> origin/pubmed
Barack Obama,Hillary Clinton,John Edwards,Joe Biden,Chris Dodd,Mike Gravel,John McCain,Mitt Romney,Mike Huckabee,Ron Paul,Fred Thompson,Alan Keyes,nQueryCount=198, nCacheCount=474 Barack Obama,Hillary Clinton,John Edwards,Joe Biden,Chris Dodd,Mike Gravel,John McCain,Mitt Romney,Mike Huckabee,Ron Paul,Fred Thompson,Alan Keyes,nQueryCount=198, nCacheCount=474
[6 0;0 6] [6 0;0 6]
...@@ -26,23 +62,41 @@ Barack Obama,Hillary Clinton,John Edwards,Joe Biden,Chris Dodd,Mike Gravel,John ...@@ -26,23 +62,41 @@ Barack Obama,Hillary Clinton,John Edwards,Joe Biden,Chris Dodd,Mike Gravel,John
------------------------- -------------------------
GetDistances::(pairs) GetDistances::(pairs)
<<<<<<< HEAD
2014 9 5 22 24 38.426 2014 9 5 22 24 38.426
=======
2014 8 13 12 26 19.956
>>>>>>> origin/pubmed
Barack Obama,Hillary Clinton,John Edwards,Joe Biden,Chris Dodd,Mike Gravel,John McCain,Mitt Romney,Mike Huckabee,Ron Paul,Fred Thompson,Alan Keyes,kGap=2 Barack Obama,Hillary Clinton,John Edwards,Joe Biden,Chris Dodd,Mike Gravel,John McCain,Mitt Romney,Mike Huckabee,Ron Paul,Fred Thompson,Alan Keyes,kGap=2
nQueryCount=66, nCacheCount=330 nQueryCount=66, nCacheCount=330
nCorrect=7 nCorrect=7
<<<<<<< HEAD
------------------------- -------------------------
GetDistances::(multiples) NW=5 GetDistances::(multiples) NW=5
2014 9 5 22 29 12.913 2014 9 5 22 29 12.913
red,orange,yellow,green,blue,indigo,lion,tiger,bear,monkey,zebra,elephant,aardvark,lamb,fox,ape,dog,nQueryCount=0, nCacheCount=1292 red,orange,yellow,green,blue,indigo,lion,tiger,bear,monkey,zebra,elephant,aardvark,lamb,fox,ape,dog,nQueryCount=0, nCacheCount=1292
=======
% normalization!
-------------------------
GetDistances::(multiples) NW=5
2014 8 13 12 37 22.423
red,orange,yellow,green,blue,indigo,lion,tiger,bear,monkey,zebra,elephant,aardvark,lamb,fox,ape,dog,nQueryCount=394, nCacheCount=898
>>>>>>> origin/pubmed
[6 0;0 11] [6 0;0 11]
------------------------- -------------------------
GetDistances::(pairs) GetDistances::(pairs)
<<<<<<< HEAD
2014 9 5 22 30 36.213 2014 9 5 22 30 36.213
=======
2014 8 13 12 38 38.851
>>>>>>> origin/pubmed
red,orange,yellow,green,blue,indigo,lion,tiger,bear,monkey,zebra,elephant,aardvark,lamb,fox,ape,dog,kGap=2 red,orange,yellow,green,blue,indigo,lion,tiger,bear,monkey,zebra,elephant,aardvark,lamb,fox,ape,dog,kGap=2
nQueryCount=136, nCacheCount=680 nQueryCount=136, nCacheCount=680
nCorrect=17 nCorrect=17
...@@ -50,15 +104,24 @@ nCorrect=17 ...@@ -50,15 +104,24 @@ nCorrect=17
------------------------- -------------------------
GetDistances::(multiples) NW=5 GetDistances::(multiples) NW=5
<<<<<<< HEAD
2014 9 5 22 30 36.384 2014 9 5 22 30 36.384
red,orange,yellow,green,blue,indigo,violet,purple,cyan,white,square,circle,rectangle,ellipse,triangle,rhombus,nQueryCount=0, nCacheCount=1152 red,orange,yellow,green,blue,indigo,violet,purple,cyan,white,square,circle,rectangle,ellipse,triangle,rhombus,nQueryCount=0, nCacheCount=1152
=======
2014 8 13 12 41 55.419
red,orange,yellow,green,blue,indigo,violet,purple,cyan,white,square,circle,rectangle,ellipse,triangle,rhombus,nQueryCount=342, nCacheCount=810
>>>>>>> origin/pubmed
[10 0;0 6] [10 0;0 6]
------------------------- -------------------------
GetDistances::(pairs) GetDistances::(pairs)
<<<<<<< HEAD
2014 9 5 22 31 38.331 2014 9 5 22 31 38.331
=======
2014 8 13 12 42 49.628
>>>>>>> origin/pubmed
red,orange,yellow,green,blue,indigo,violet,purple,cyan,white,square,circle,rectangle,ellipse,triangle,rhombus,kGap=1 red,orange,yellow,green,blue,indigo,violet,purple,cyan,white,square,circle,rectangle,ellipse,triangle,rhombus,kGap=1
nQueryCount=105, nCacheCount=615 nQueryCount=105, nCacheCount=615
nCorrect=16 nCorrect=16
...@@ -66,15 +129,56 @@ nCorrect=16 ...@@ -66,15 +129,56 @@ nCorrect=16
------------------------- -------------------------
GetDistances::(multiples) NW=5 GetDistances::(multiples) NW=5
<<<<<<< HEAD
2014 9 5 22 31 38.411 2014 9 5 22 31 38.411
Barack Obama,Hillary Clinton,John Edwards,Joe Biden,Chris Dodd,Mike Gravel,John McCain,Mitt Romney,Mike Huckabee,Ron Paul,Fred Thompson,Alan Keyes,nQueryCount=0, nCacheCount=672 Barack Obama,Hillary Clinton,John Edwards,Joe Biden,Chris Dodd,Mike Gravel,John McCain,Mitt Romney,Mike Huckabee,Ron Paul,Fred Thompson,Alan Keyes,nQueryCount=0, nCacheCount=672
=======
2014 8 13 12 44 55.56
Barack Obama,Hillary Clinton,John Edwards,Joe Biden,Chris Dodd,Mike Gravel,John McCain,Mitt Romney,Mike Huckabee,Ron Paul,Fred Thompson,Alan Keyes,nQueryCount=198, nCacheCount=474
>>>>>>> origin/pubmed
[6 0;0 6] [6 0;0 6]
------------------------- -------------------------
GetDistances::(pairs) GetDistances::(pairs)
<<<<<<< HEAD
2014 9 5 22 31 40.923 2014 9 5 22 31 40.923
=======
2014 8 13 12 45 38.156
>>>>>>> origin/pubmed
Barack Obama,Hillary Clinton,John Edwards,Joe Biden,Chris Dodd,Mike Gravel,John McCain,Mitt Romney,Mike Huckabee,Ron Paul,Fred Thompson,Alan Keyes,kGap=2 Barack Obama,Hillary Clinton,John Edwards,Joe Biden,Chris Dodd,Mike Gravel,John McCain,Mitt Romney,Mike Huckabee,Ron Paul,Fred Thompson,Alan Keyes,kGap=2
nQueryCount=0, nCacheCount=396 nQueryCount=0, nCacheCount=396
nCorrect=7 nCorrect=7
-------------------------
GetDistances::(multiples) NW=5
2014 9 19 10 33 53.142
Alls Well That Ends Well,As You Like It,The Comedy of Errors,Measure for Measure,The Merchant of Venice,A Midsummer Nights Dream,Much Ado About Nothing,Taming of the Shrew,Twelfth Night,Carrie,Salems Lot,The Shining,The Stand,The Dead Zone,Firestarter,Cujo,nQueryCount=344, nCacheCount=808
[8 1;0 7]
-------------------------
GetDistances::(multiples) NW=5
2014 9 19 14 25 25.227
Alls Well That Ends Well,As You Like It,The Comedy of Errors,Measure for Measure,The Merchant of Venice,A Midsummer Nights Dream,Much Ado About Nothing,Taming of the Shrew,Twelfth Night,Carrie,Salems Lot,The Shining,The Stand,The Dead Zone,Firestarter,Cujo,nQueryCount=153, nCacheCount=999
[8 1;0 7]
-------------------------
GetDistances::(multiples) NW=5
2014 9 19 16 13 15.418
Macbeth,The Tempest,Othello,King Lear,Hamlet,The Merchant of Venice,A Midsummer Nights Dream,Much Ado About Nothing,Taming of the Shrew,Twelfth Night,Carrie,Salems Lot,The Shining,The Stand,The Dead Zone,Firestarter,Cujo,nQueryCount=37, nCacheCount=1255
[10 0;0 7]
-------------------------
GetDistances::(pairs)
2014 9 19 16 21 31.009
Macbeth,The Tempest,Othello,King Lear,Hamlet,The Merchant of Venice,A Midsummer Nights Dream,Much Ado About Nothing,Taming of the Shrew,Twelfth Night,Carrie,Salems Lot,The Shining,The Stand,The Dead Zone,Firestarter,Cujo,kGap=2
nQueryCount=136, nCacheCount=680
nCorrect=17
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment