Commit ee894148 authored by Andrew Cohen's avatar Andrew Cohen
Browse files

Merge remote-tracking branch 'origin/pubmed' into wikipedia_AND

Conflicts:
	GetDistances.m
	NWD.m
	results.txt
parents 2ab7ed0c 15e5e7ca
......@@ -28,3 +28,4 @@ obj/
_ReSharper*/
[Tt]est[Rr]esult*
*.asv
~$*
......@@ -6,7 +6,7 @@ if isempty(strServer)
strServer='wikipedia'
end
term=[words{1}];
term=[ words{1} ];
for i=2:length(words)
term=[ term '+' words{i}];
end
......@@ -85,28 +85,15 @@ for i=1:5
str = urlread(URL);
try
%amazon
idx=strfind(str,'results for <span class="');
idx=strfind(str,'results for <span>');
idx=idx(1)-2;
idxStart=idx;
while(str(idxStart)~=' ')
chPrev = str(idxStart-1);
while(isNumeric(chPrev))
idxStart=idxStart-1;
chPrev = str(idxStart-1);
end
nx=str(idxStart:idx);
% WIKIPEDIA%
% idx=strfind(str,'div class="results-info"');
% idx=idx(1);
% str2=str(idx:end);
%
% idx2=strfind(str2,'of <b>');
% str3=str2(idx2+6:end);
%
% idx3=strfind(str3,'</b>');
% idx3=idx3(1);
%
% nx=str3(1:idx3-1);
% END WIKIPEDIA
f=str2double(nx);
break;
catch
......@@ -117,4 +104,18 @@ end
end
function bNumber = isNumeric(ch)
bNumber=1;
if ~isnan(str2double(ch))
return
end
if ch==','
return
end
bNumber=0;
end
......@@ -54,10 +54,10 @@ for i=1:length(words)
% NWD2(i)=NWD(w2);
d1(i)=(NWD1x(i)-NWD1(i));
d2(i)=(NWD2x(i)-NWD2(i));
if isnan(d1(i)) % f(X)=0 --> -log2(0)==Inf , Inf-Inf=NaN
d1(i)=Inf;
if isnan(d1(i))
d1(i)=inf;
end
d2(i)=(NWD2x(i)-NWD2(i));
if isnan(d2(i)) % f(X)=0 --> -log2(0)==Inf
d2(i)=Inf;
end
......
......@@ -26,5 +26,5 @@ else
nwd = (log2(max(fw))-log2(fX)) / (log2(N)-log2(min(fw))); % III.3 from arxiv
end
% normalize for cardinality
nwd = nwd/(length(words)-1);
nwd = nwd/(length(words)-1);
% nwd=(log2(max(fw))-log2(fX))/( log2(N)-log2(max(fexclude)) );
......@@ -19,6 +19,7 @@ for i=1:length(words)
end
d(length(words),length(words))=0;
d(find(isinf(d)))=10;
d=Regularize(d);
[kGap Gap S idx] = GapSpectral(d,6,1);
fprintf('kGap=%d\n',kGap);
......
tic
% goNWD
%
%
global strServer
strServer='amazon'
global QueryCache
if isempty(QueryCache)
QueryCache.Queries={};
QueryCache.Count=[];
end
global nQueryCount nCacheCount
nQueryCount=0;
nCacheCount=0;
words1={'Macbeth','The Tempest','Othello','King Lear','Hamlet'...
'The Merchant of Venice','A Midsummer Nights Dream',...
'Much Ado About Nothing', 'Taming of the Shrew','Twelfth Night' }
words2={'Carrie','Salems Lot','The Shining','The Stand','The Dead Zone',...
'Firestarter','Cujo'}
GetCount(words1)
GetCount(words2)
%
[d1,d2]=GetDistances(words1,words2);
idx=getDistancesPairs(words1,words2);
global strServer
strServer='pubmed'
%http://www.nature.com/nature/journal/v511/n7510/full/nature13595.html
% supp table 2, in ranked order
......@@ -37,7 +40,11 @@ neuroblastoma = {'rs6939340','rs4712653','rs9295536','rs3790171','rs7272481'};
% parkinsons={'rs356219','rs10847864','rs1491942','rs947211','rs2390669'}
parkinsons={'rs356219','rs10847864','rs2942168','rs11724635'}
words={ alzheimers;parkinsons;shizophrenia;leukemia;obesity;neuroblastoma}
% http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3839234/
% http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3839234/table/T1/
als = {'rs2303565','rs1344642','rs2814707','rs3849942','rs2453556', 'rs1971791', 'rs8056742'};
words={ alzheimers;parkinsons;als;shizophrenia;leukemia;obesity;neuroblastoma}
global QueryCache
if isempty(QueryCache)
......
No preview for this file type
No preview for this file type
<<<<<<< HEAD
-------------------------
GetDistances::(multiples) NW=5
2014 9 5 22 18 10.438
red,orange,yellow,green,blue,indigo,lion,tiger,bear,monkey,zebra,elephant,aardvark,lamb,fox,ape,dog,nQueryCount=0, nCacheCount=1292
=======
% no normalization!
-------------------------
GetDistances::(multiples) NW=5
2014 8 13 12 18 14.692
red,orange,yellow,green,blue,indigo,lion,tiger,bear,monkey,zebra,elephant,aardvark,lamb,fox,ape,dog,nQueryCount=394, nCacheCount=898
>>>>>>> origin/pubmed
[6 0;0 11]
-------------------------
<<<<<<< HEAD
GetDistances::(multiples) NW=5
2014 9 5 22 21 31.57
red,orange,yellow,green,blue,indigo,violet,purple,cyan,white,square,circle,rectangle,ellipse,triangle,rhombus,nQueryCount=320, nCacheCount=832
=======
GetDistances::(pairs)
2014 8 13 12 19 27.056
red,orange,yellow,green,blue,indigo,lion,tiger,bear,monkey,zebra,elephant,aardvark,lamb,fox,ape,dog,kGap=2
nQueryCount=136, nCacheCount=680
nCorrect=17
-------------------------
GetDistances::(multiples) NW=5
2014 8 13 12 22 38.066
red,orange,yellow,green,blue,indigo,violet,purple,cyan,white,square,circle,rectangle,ellipse,triangle,rhombus,nQueryCount=342, nCacheCount=810
>>>>>>> origin/pubmed
[10 0;0 6]
-------------------------
<<<<<<< HEAD
GetDistances::(multiples) NW=5
2014 9 5 22 23 50.569
=======
GetDistances::(pairs)
2014 8 13 12 23 32.038
red,orange,yellow,green,blue,indigo,violet,purple,cyan,white,square,circle,rectangle,ellipse,triangle,rhombus,kGap=1
nQueryCount=105, nCacheCount=615
nCorrect=16
-------------------------
GetDistances::(multiples) NW=5
2014 8 13 12 25 37.418
>>>>>>> origin/pubmed
Barack Obama,Hillary Clinton,John Edwards,Joe Biden,Chris Dodd,Mike Gravel,John McCain,Mitt Romney,Mike Huckabee,Ron Paul,Fred Thompson,Alan Keyes,nQueryCount=198, nCacheCount=474
[6 0;0 6]
......@@ -26,23 +62,41 @@ Barack Obama,Hillary Clinton,John Edwards,Joe Biden,Chris Dodd,Mike Gravel,John
-------------------------
GetDistances::(pairs)
<<<<<<< HEAD
2014 9 5 22 24 38.426
=======
2014 8 13 12 26 19.956
>>>>>>> origin/pubmed
Barack Obama,Hillary Clinton,John Edwards,Joe Biden,Chris Dodd,Mike Gravel,John McCain,Mitt Romney,Mike Huckabee,Ron Paul,Fred Thompson,Alan Keyes,kGap=2
nQueryCount=66, nCacheCount=330
nCorrect=7
<<<<<<< HEAD
-------------------------
GetDistances::(multiples) NW=5
2014 9 5 22 29 12.913
red,orange,yellow,green,blue,indigo,lion,tiger,bear,monkey,zebra,elephant,aardvark,lamb,fox,ape,dog,nQueryCount=0, nCacheCount=1292
=======
% normalization!
-------------------------
GetDistances::(multiples) NW=5
2014 8 13 12 37 22.423
red,orange,yellow,green,blue,indigo,lion,tiger,bear,monkey,zebra,elephant,aardvark,lamb,fox,ape,dog,nQueryCount=394, nCacheCount=898
>>>>>>> origin/pubmed
[6 0;0 11]
-------------------------
GetDistances::(pairs)
<<<<<<< HEAD
2014 9 5 22 30 36.213
=======
2014 8 13 12 38 38.851
>>>>>>> origin/pubmed
red,orange,yellow,green,blue,indigo,lion,tiger,bear,monkey,zebra,elephant,aardvark,lamb,fox,ape,dog,kGap=2
nQueryCount=136, nCacheCount=680
nCorrect=17
......@@ -50,15 +104,24 @@ nCorrect=17
-------------------------
GetDistances::(multiples) NW=5
<<<<<<< HEAD
2014 9 5 22 30 36.384
red,orange,yellow,green,blue,indigo,violet,purple,cyan,white,square,circle,rectangle,ellipse,triangle,rhombus,nQueryCount=0, nCacheCount=1152
=======
2014 8 13 12 41 55.419
red,orange,yellow,green,blue,indigo,violet,purple,cyan,white,square,circle,rectangle,ellipse,triangle,rhombus,nQueryCount=342, nCacheCount=810
>>>>>>> origin/pubmed
[10 0;0 6]
-------------------------
GetDistances::(pairs)
<<<<<<< HEAD
2014 9 5 22 31 38.331
=======
2014 8 13 12 42 49.628
>>>>>>> origin/pubmed
red,orange,yellow,green,blue,indigo,violet,purple,cyan,white,square,circle,rectangle,ellipse,triangle,rhombus,kGap=1
nQueryCount=105, nCacheCount=615
nCorrect=16
......@@ -66,15 +129,56 @@ nCorrect=16
-------------------------
GetDistances::(multiples) NW=5
<<<<<<< HEAD
2014 9 5 22 31 38.411
Barack Obama,Hillary Clinton,John Edwards,Joe Biden,Chris Dodd,Mike Gravel,John McCain,Mitt Romney,Mike Huckabee,Ron Paul,Fred Thompson,Alan Keyes,nQueryCount=0, nCacheCount=672
=======
2014 8 13 12 44 55.56
Barack Obama,Hillary Clinton,John Edwards,Joe Biden,Chris Dodd,Mike Gravel,John McCain,Mitt Romney,Mike Huckabee,Ron Paul,Fred Thompson,Alan Keyes,nQueryCount=198, nCacheCount=474
>>>>>>> origin/pubmed
[6 0;0 6]
-------------------------
GetDistances::(pairs)
<<<<<<< HEAD
2014 9 5 22 31 40.923
=======
2014 8 13 12 45 38.156
>>>>>>> origin/pubmed
Barack Obama,Hillary Clinton,John Edwards,Joe Biden,Chris Dodd,Mike Gravel,John McCain,Mitt Romney,Mike Huckabee,Ron Paul,Fred Thompson,Alan Keyes,kGap=2
nQueryCount=0, nCacheCount=396
nCorrect=7
-------------------------
GetDistances::(multiples) NW=5
2014 9 19 10 33 53.142
Alls Well That Ends Well,As You Like It,The Comedy of Errors,Measure for Measure,The Merchant of Venice,A Midsummer Nights Dream,Much Ado About Nothing,Taming of the Shrew,Twelfth Night,Carrie,Salems Lot,The Shining,The Stand,The Dead Zone,Firestarter,Cujo,nQueryCount=344, nCacheCount=808
[8 1;0 7]
-------------------------
GetDistances::(multiples) NW=5
2014 9 19 14 25 25.227
Alls Well That Ends Well,As You Like It,The Comedy of Errors,Measure for Measure,The Merchant of Venice,A Midsummer Nights Dream,Much Ado About Nothing,Taming of the Shrew,Twelfth Night,Carrie,Salems Lot,The Shining,The Stand,The Dead Zone,Firestarter,Cujo,nQueryCount=153, nCacheCount=999
[8 1;0 7]
-------------------------
GetDistances::(multiples) NW=5
2014 9 19 16 13 15.418
Macbeth,The Tempest,Othello,King Lear,Hamlet,The Merchant of Venice,A Midsummer Nights Dream,Much Ado About Nothing,Taming of the Shrew,Twelfth Night,Carrie,Salems Lot,The Shining,The Stand,The Dead Zone,Firestarter,Cujo,nQueryCount=37, nCacheCount=1255
[10 0;0 7]
-------------------------
GetDistances::(pairs)
2014 9 19 16 21 31.009
Macbeth,The Tempest,Othello,King Lear,Hamlet,The Merchant of Venice,A Midsummer Nights Dream,Much Ado About Nothing,Taming of the Shrew,Twelfth Night,Carrie,Salems Lot,The Shining,The Stand,The Dead Zone,Firestarter,Cujo,kGap=2
nQueryCount=136, nCacheCount=680
nCorrect=17
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment