matthiasm@8
|
1 function [bandwidth,density,xmesh]=kde(data,n,MIN,MAX)
|
matthiasm@8
|
2 % Reliable and extremely fast kernel density estimator for 1 dimensional data;
|
matthiasm@8
|
3 % Gaussian kernel is assumed and the bandwidth is chosen automatically;
|
matthiasm@8
|
4 %
|
matthiasm@8
|
5 % INPUTS:
|
matthiasm@8
|
6 % data - a vector of data from which the density estimate is constructed;
|
matthiasm@8
|
7 % MIN, MAX - defines the interval [MIN,MAX] on which the density estimate is constructed;
|
matthiasm@8
|
8 % the default values of MIN and MAX are:
|
matthiasm@8
|
9 % MIN=min(data)-Range/100 and MAX=max(data)+Range/100, where Range=max(data)-min(data);
|
matthiasm@8
|
10 % n - the number of mesh points used in the uniform discretization of the
|
matthiasm@8
|
11 % interval [MIN, MAX]; n has to be a power of two; if n is not a power of two, then
|
matthiasm@8
|
12 % n is rounded up to the next power of two, i.e., n is set to n=2^ceil(log2(n));
|
matthiasm@8
|
13 % the default value of n is n=2^12;
|
matthiasm@8
|
14 % OUTPUTS:
|
matthiasm@8
|
15 % bandwidth - the optimal bandwidth (Gaussian kernel assumed);
|
matthiasm@8
|
16 % density - column vector of length 'n' with the values of the density
|
matthiasm@8
|
17 % estimate at the grid points;
|
matthiasm@8
|
18 % xmesh - the grid over which the density estimate is computed;
|
matthiasm@8
|
19 % Reference: Botev, Z. I.,
|
matthiasm@8
|
20 % "A Novel Nonparametric Density Estimator",Technical Report,The University of Queensland
|
matthiasm@8
|
21 % http://espace.library.uq.edu.au/view.php?pid=UQ:12535
|
matthiasm@8
|
22 %
|
matthiasm@8
|
23 % Example:
|
matthiasm@8
|
24 % data=randn(1000,1);
|
matthiasm@8
|
25 % [bandwidth,density,xmesh]=kde(data,2^12,min(data)-1,max(data)+1);
|
matthiasm@8
|
26 % plot(xmesh,density)
|
matthiasm@8
|
27
|
matthiasm@8
|
28 data=data(:); %make data a column vector
|
matthiasm@8
|
29 if nargin<2 % if n is not supplied switch to the default
|
matthiasm@8
|
30 n=2^12;
|
matthiasm@8
|
31 end
|
matthiasm@8
|
32 n=2^ceil(log2(n)); % round up n to the next power of 2;
|
matthiasm@8
|
33
|
matthiasm@8
|
34 if nargin<4 %define the default interval [MIN,MAX]
|
matthiasm@8
|
35 minimum=min(data); maximum=max(data);
|
matthiasm@8
|
36 Range=maximum-minimum;
|
matthiasm@8
|
37 MIN=minimum-Range/10; MAX=maximum+Range/10;
|
matthiasm@8
|
38 end
|
matthiasm@8
|
39 % set up the grid over which the density estimate is computed;
|
matthiasm@8
|
40 R=MAX-MIN; dx=R/(n-1); xmesh=MIN+[0:dx:R]; N=length(data);
|
matthiasm@8
|
41 %bin the data uniformly using the grid define above;
|
matthiasm@8
|
42 initial_data=histc(data,xmesh)/N;
|
matthiasm@8
|
43 a=dct1d(initial_data); % discrete cosine transform of initial data
|
matthiasm@8
|
44 % now compute the optimal bandwidth^2 using the GCE method
|
matthiasm@8
|
45 t_star=gce(a,n,N);
|
matthiasm@8
|
46 % smooth the discrete cosine transform of initial data using t_star
|
matthiasm@8
|
47 a_t=a.*exp(-[0:n-1]'.^2*pi^2*t_star/2);
|
matthiasm@8
|
48 % now apply the inverse discrete cosine transform
|
matthiasm@8
|
49 if nargout>1
|
matthiasm@8
|
50 density=idct1d(a_t)/R;
|
matthiasm@8
|
51 end
|
matthiasm@8
|
52 bandwidth=sqrt(t_star)*R;
|
matthiasm@8
|
53 end
|
matthiasm@8
|
54 function t_star=gce(a,n,N)
|
matthiasm@8
|
55 a=a(2:end)/2;
|
matthiasm@8
|
56 I=[1:n-1]'.^2;
|
matthiasm@8
|
57 a2=a.^2;
|
matthiasm@8
|
58 Var_a=zeros(n-1,1);
|
matthiasm@8
|
59 Var_a(1:n/2-1)=(1/2+1/2*a(2:2:n-1)-a2(1:n/2-1))/N;
|
matthiasm@8
|
60
|
matthiasm@8
|
61 t_star=fzero(@mise,[0,1]);
|
matthiasm@8
|
62 NORM=2*pi^4*sum(I.^2.*a2.*exp(-I*pi^2*t_star));
|
matthiasm@8
|
63 %NORM=.5*pi^4*sum([1:n-1]'.^4.*a_t(2:end).^2)/R^5;
|
matthiasm@8
|
64 t_star=[2*N*sqrt(pi)*NORM]^(-2/5);
|
matthiasm@8
|
65 function out=mise(t)
|
matthiasm@8
|
66
|
matthiasm@8
|
67 out=sum((a2+Var_a).*(1-exp(-I*pi^2*t/2)).^2./I)+...
|
matthiasm@8
|
68 sqrt(t/pi)/N*(pi^2/2)-sum(Var_a./I);
|
matthiasm@8
|
69 end
|
matthiasm@8
|
70 end
|
matthiasm@8
|
71
|
matthiasm@8
|
72 function data=dct1d(data)
|
matthiasm@8
|
73 % computes the discrete cosine transform of the column vector data
|
matthiasm@8
|
74 [nrows,ncols]= size(data);
|
matthiasm@8
|
75 % Compute weights to multiply DFT coefficients
|
matthiasm@8
|
76 weight = [1;2*(exp(-i*(1:nrows-1)*pi/(2*nrows))).'];
|
matthiasm@8
|
77 % Re-order the elements of the columns of x
|
matthiasm@8
|
78 data = [ data(1:2:end,:); data(end:-2:2,:) ];
|
matthiasm@8
|
79 % Multiply FFT by weights:
|
matthiasm@8
|
80 data= real(weight.* fft(data));
|
matthiasm@8
|
81 end
|
matthiasm@8
|
82 function out = idct1d(data)
|
matthiasm@8
|
83 % computes the inverse discrete cosine transform
|
matthiasm@8
|
84 [nrows,ncols]=size(data);
|
matthiasm@8
|
85 % Compute weights
|
matthiasm@8
|
86 weights = nrows*exp(i*(0:nrows-1)*pi/(2*nrows)).';
|
matthiasm@8
|
87 % Compute x tilde using equation (5.93) in Jain
|
matthiasm@8
|
88 data = real(ifft(weights.*data));
|
matthiasm@8
|
89 % Re-order elements of each column according to equations (5.93) and
|
matthiasm@8
|
90 % (5.94) in Jain
|
matthiasm@8
|
91 out = zeros(nrows,1);
|
matthiasm@8
|
92 out(1:2:nrows) = data(1:nrows/2);
|
matthiasm@8
|
93 out(2:2:nrows) = data(nrows:-1:nrows/2+1);
|
matthiasm@8
|
94 % Reference:
|
matthiasm@8
|
95 % A. K. Jain, "Fundamentals of Digital Image
|
matthiasm@8
|
96 % Processing", pp. 150-153.
|
matthiasm@8
|
97 end
|
matthiasm@8
|
98
|