Commit 15e5e7ca authored by Andrew Cohen's avatar Andrew Cohen
Browse files

added new amazon project

parent 0c03089d
......@@ -28,3 +28,4 @@ obj/
_ReSharper*/
[Tt]est[Rr]esult*
*.asv
~$*
......@@ -6,7 +6,7 @@ if isempty(strServer)
strServer='wikipedia'
end
term=[words{1}];
term=[ words{1} ];
for i=2:length(words)
term=[ term '+' words{i}];
end
......@@ -85,28 +85,15 @@ for i=1:5
str = urlread(URL);
try
%amazon
idx=strfind(str,'results for <span class="');
idx=strfind(str,'results for <span>');
idx=idx(1)-2;
idxStart=idx;
while(str(idxStart)~=' ')
chPrev = str(idxStart-1);
while(isNumeric(chPrev))
idxStart=idxStart-1;
chPrev = str(idxStart-1);
end
nx=str(idxStart:idx);
% WIKIPEDIA%
% idx=strfind(str,'div class="results-info"');
% idx=idx(1);
% str2=str(idx:end);
%
% idx2=strfind(str2,'of <b>');
% str3=str2(idx2+6:end);
%
% idx3=strfind(str3,'</b>');
% idx3=idx3(1);
%
% nx=str3(1:idx3-1);
% END WIKIPEDIA
f=str2double(nx);
break;
catch
......@@ -117,4 +104,18 @@ end
end
function bNumber = isNumeric(ch)
bNumber=1;
if ~isnan(str2double(ch))
return
end
if ch==','
return
end
bNumber=0;
end
......@@ -52,8 +52,13 @@ for i=1:length(words)
% d1(i)=(NWD1x(i)-NWD1(i))./max(abs(NWD1x(i)),abs(NWD1(i)));
% d2(i)=(NWD2x(i)-NWD2(i))./max(abs(NWD2x(i)),abs(NWD2(i)));;
d1(i)=(NWD1x(i)-NWD1(i));
if isnan(d1(i))
d1(i)=inf;
end
d2(i)=(NWD2x(i)-NWD2(i));
if isnan(d2(i))
d2(i)=inf;
end
if d1(i)<d2(i)
% if abs(d1(i))<abs(d2(i))
fprintf('1,');
......
......@@ -23,5 +23,5 @@ end
N=GetCount({'N'});
nwd = (log2(max(fw))-log2(fX)) / (log2(N)-log2(min(fw))); % III.3 from arxiv
% normalize for cardinality
% nwd = nwd/(length(words)-1);
nwd = nwd/(length(words)-1);
% nwd=(log2(max(fw))-log2(fX))/( log2(N)-log2(max(fexclude)) );
......@@ -19,6 +19,7 @@ for i=1:length(words)
end
d(length(words),length(words))=0;
d(find(isinf(d)))=10;
d=Regularize(d);
[kGap Gap S idx] = GapSpectral(d,6,1);
fprintf('kGap=%d\n',kGap);
......
tic
% goNWD
%
%
global strServer
strServer='amazon'
global QueryCache
if isempty(QueryCache)
QueryCache.Queries={};
QueryCache.Count=[];
end
global nQueryCount nCacheCount
nQueryCount=0;
nCacheCount=0;
words1={'Macbeth','The Tempest','Othello','King Lear','Hamlet'...
'The Merchant of Venice','A Midsummer Nights Dream',...
'Much Ado About Nothing', 'Taming of the Shrew','Twelfth Night' }
words2={'Carrie','Salems Lot','The Shining','The Stand','The Dead Zone',...
'Firestarter','Cujo'}
GetCount(words1)
GetCount(words2)
%
[d1,d2]=GetDistances(words1,words2);
idx=getDistancesPairs(words1,words2);
global strServer
strServer='pubmed'
%http://www.nature.com/nature/journal/v511/n7510/full/nature13595.html
% supp table 2, in ranked order
......@@ -37,7 +40,11 @@ neuroblastoma = {'rs6939340','rs4712653','rs9295536','rs3790171','rs7272481'};
% parkinsons={'rs356219','rs10847864','rs1491942','rs947211','rs2390669'}
parkinsons={'rs356219','rs10847864','rs2942168','rs11724635'}
words={ alzheimers;parkinsons;shizophrenia;leukemia;obesity;neuroblastoma}
% http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3839234/
% http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3839234/table/T1/
als = {'rs2303565','rs1344642','rs2814707','rs3849942','rs2453556', 'rs1971791', 'rs8056742'};
words={ alzheimers;parkinsons;als;shizophrenia;leukemia;obesity;neuroblastoma}
global QueryCache
if isempty(QueryCache)
......
No preview for this file type
No preview for this file type
% below with normalizatino
% no normalization!
-------------------------
GetDistances::(multiples) NW=5
2014 8 12 14 18 2.463
2014 8 13 12 18 14.692
red,orange,yellow,green,blue,indigo,lion,tiger,bear,monkey,zebra,elephant,aardvark,lamb,fox,ape,dog,nQueryCount=394, nCacheCount=898
[6 0;0 11]
......@@ -10,7 +10,7 @@ red,orange,yellow,green,blue,indigo,lion,tiger,bear,monkey,zebra,elephant,aardva
-------------------------
GetDistances::(pairs)
2014 8 12 14 19 11.238
2014 8 13 12 19 27.056
red,orange,yellow,green,blue,indigo,lion,tiger,bear,monkey,zebra,elephant,aardvark,lamb,fox,ape,dog,kGap=2
nQueryCount=136, nCacheCount=680
nCorrect=17
......@@ -18,7 +18,7 @@ nCorrect=17
-------------------------
GetDistances::(multiples) NW=5
2014 8 12 14 22 21.367
2014 8 13 12 22 38.066
red,orange,yellow,green,blue,indigo,violet,purple,cyan,white,square,circle,rectangle,ellipse,triangle,rhombus,nQueryCount=342, nCacheCount=810
[10 0;0 6]
......@@ -26,15 +26,15 @@ red,orange,yellow,green,blue,indigo,violet,purple,cyan,white,square,circle,recta
-------------------------
GetDistances::(pairs)
2014 8 12 14 23 16.088
red,orange,yellow,green,blue,indigo,violet,purple,cyan,white,square,circle,rectangle,ellipse,triangle,rhombus,kGap=2
2014 8 13 12 23 32.038
red,orange,yellow,green,blue,indigo,violet,purple,cyan,white,square,circle,rectangle,ellipse,triangle,rhombus,kGap=1
nQueryCount=105, nCacheCount=615
nCorrect=16
-------------------------
GetDistances::(multiples) NW=5
2014 8 12 14 25 20.728
2014 8 13 12 25 37.418
Barack Obama,Hillary Clinton,John Edwards,Joe Biden,Chris Dodd,Mike Gravel,John McCain,Mitt Romney,Mike Huckabee,Ron Paul,Fred Thompson,Alan Keyes,nQueryCount=198, nCacheCount=474
[6 0;0 6]
......@@ -42,17 +42,17 @@ Barack Obama,Hillary Clinton,John Edwards,Joe Biden,Chris Dodd,Mike Gravel,John
-------------------------
GetDistances::(pairs)
2014 8 12 14 26 2.281
2014 8 13 12 26 19.956
Barack Obama,Hillary Clinton,John Edwards,Joe Biden,Chris Dodd,Mike Gravel,John McCain,Mitt Romney,Mike Huckabee,Ron Paul,Fred Thompson,Alan Keyes,kGap=2
nQueryCount=66, nCacheCount=330
nCorrect=7
%below without normalization
% normalization!
-------------------------
GetDistances::(multiples) NW=5
2014 8 12 14 35 5.938
2014 8 13 12 37 22.423
red,orange,yellow,green,blue,indigo,lion,tiger,bear,monkey,zebra,elephant,aardvark,lamb,fox,ape,dog,nQueryCount=394, nCacheCount=898
[6 0;0 11]
......@@ -60,7 +60,7 @@ red,orange,yellow,green,blue,indigo,lion,tiger,bear,monkey,zebra,elephant,aardva
-------------------------
GetDistances::(pairs)
2014 8 12 14 36 17.183
2014 8 13 12 38 38.851
red,orange,yellow,green,blue,indigo,lion,tiger,bear,monkey,zebra,elephant,aardvark,lamb,fox,ape,dog,kGap=2
nQueryCount=136, nCacheCount=680
nCorrect=17
......@@ -68,7 +68,7 @@ nCorrect=17
-------------------------
GetDistances::(multiples) NW=5
2014 8 12 14 39 32.895
2014 8 13 12 41 55.419
red,orange,yellow,green,blue,indigo,violet,purple,cyan,white,square,circle,rectangle,ellipse,triangle,rhombus,nQueryCount=342, nCacheCount=810
[10 0;0 6]
......@@ -76,7 +76,7 @@ red,orange,yellow,green,blue,indigo,violet,purple,cyan,white,square,circle,recta
-------------------------
GetDistances::(pairs)
2014 8 12 14 40 25.069
2014 8 13 12 42 49.628
red,orange,yellow,green,blue,indigo,violet,purple,cyan,white,square,circle,rectangle,ellipse,triangle,rhombus,kGap=1
nQueryCount=105, nCacheCount=615
nCorrect=16
......@@ -84,7 +84,7 @@ nCorrect=16
-------------------------
GetDistances::(multiples) NW=5
2014 8 12 14 42 30.533
2014 8 13 12 44 55.56
Barack Obama,Hillary Clinton,John Edwards,Joe Biden,Chris Dodd,Mike Gravel,John McCain,Mitt Romney,Mike Huckabee,Ron Paul,Fred Thompson,Alan Keyes,nQueryCount=198, nCacheCount=474
[6 0;0 6]
......@@ -92,7 +92,39 @@ Barack Obama,Hillary Clinton,John Edwards,Joe Biden,Chris Dodd,Mike Gravel,John
-------------------------
GetDistances::(pairs)
2014 8 12 14 43 12.117
2014 8 13 12 45 38.156
Barack Obama,Hillary Clinton,John Edwards,Joe Biden,Chris Dodd,Mike Gravel,John McCain,Mitt Romney,Mike Huckabee,Ron Paul,Fred Thompson,Alan Keyes,kGap=2
nQueryCount=66, nCacheCount=330
nCorrect=7
-------------------------
GetDistances::(multiples) NW=5
2014 9 19 10 33 53.142
Alls Well That Ends Well,As You Like It,The Comedy of Errors,Measure for Measure,The Merchant of Venice,A Midsummer Nights Dream,Much Ado About Nothing,Taming of the Shrew,Twelfth Night,Carrie,Salems Lot,The Shining,The Stand,The Dead Zone,Firestarter,Cujo,nQueryCount=344, nCacheCount=808
[8 1;0 7]
-------------------------
GetDistances::(multiples) NW=5
2014 9 19 14 25 25.227
Alls Well That Ends Well,As You Like It,The Comedy of Errors,Measure for Measure,The Merchant of Venice,A Midsummer Nights Dream,Much Ado About Nothing,Taming of the Shrew,Twelfth Night,Carrie,Salems Lot,The Shining,The Stand,The Dead Zone,Firestarter,Cujo,nQueryCount=153, nCacheCount=999
[8 1;0 7]
-------------------------
GetDistances::(multiples) NW=5
2014 9 19 16 13 15.418
Macbeth,The Tempest,Othello,King Lear,Hamlet,The Merchant of Venice,A Midsummer Nights Dream,Much Ado About Nothing,Taming of the Shrew,Twelfth Night,Carrie,Salems Lot,The Shining,The Stand,The Dead Zone,Firestarter,Cujo,nQueryCount=37, nCacheCount=1255
[10 0;0 7]
-------------------------
GetDistances::(pairs)
2014 9 19 16 21 31.009
Macbeth,The Tempest,Othello,King Lear,Hamlet,The Merchant of Venice,A Midsummer Nights Dream,Much Ado About Nothing,Taming of the Shrew,Twelfth Night,Carrie,Salems Lot,The Shining,The Stand,The Dead Zone,Firestarter,Cujo,kGap=2
nQueryCount=136, nCacheCount=680
nCorrect=17
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment