Commit e14d6f76 authored by Andrew Cohen's avatar Andrew Cohen
Browse files

get count - 'the'

new confidence intervals
parent 3e757678
% compute confidence interval for 2 class classificatoin problem
%
% Witten, I. H. and E. Frank (2005). Data Mining: Practical Machine Learning Tools and Techniques
% pp147-149
c = 0.95 % confidence interval
p=(1-c)/2;
z=-1*norminv(p,0,1);
f=1-282/656
N=656
%fN=[ 1-282/656,656; 1-57/656,656 ; 1,86 ; 1,72 ; .6,82;.98,82;.92 1000;.92 78 ]
% fN = [.99 72;.87 86;.83 78;1 72;1 86;.92 78;1-282/656 656;1-57/656 656;.82 1000;.85 1000;.991 1000;.57 88;.97 88;.81 10000;.82 10000]
fN = [ 1 17;1 12;.58 12;27/28 28;22/28 28]
for i=1:size(fN,1)
f=fN(i,1);
N=fN(i,2);
p1 = (f+ z^2/(2*N) - z*sqrt(f/N - f^2/N + z^2/(4*N^2)) ) / (1+z^2/N);
p2 = (f+ z^2/(2*N) + z*sqrt(f/N - f^2/N + z^2/(4*N^2)) ) / (1+z^2/N);
fprintf(1,'f=%f N=%d p1=%f p2=%f\n',f,N,p1,p2);
end
......@@ -18,7 +18,7 @@ for i=1:length(words)
end
N=GetCount({'N'});
N=GetCount({'the'});
if bNWDmin
nwd = (log2(min(fw))-log2(fX)) / (log2(N)-log2(min(fw))); % from xx.pdf
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment