To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

Statistics Download as Zip
| Branch: | Revision:

root / _misc / kde.m

History | View | Annotate | Download (3.79 KB)

1
function [bandwidth,density,xmesh]=kde(data,n,MIN,MAX)
2
% Reliable and extremely fast kernel density estimator for 1 dimensional data;
3
%        Gaussian kernel is assumed and the bandwidth is chosen automatically;
4
%
5
% INPUTS:
6
%     data    - a vector of data from which the density estimate is constructed;
7
%   MIN, MAX  - defines the interval [MIN,MAX] on which the density estimate is constructed;
8
%               the default values of MIN and MAX are: 
9
%               MIN=min(data)-Range/100 and MAX=max(data)+Range/100, where Range=max(data)-min(data);   
10
%          n  - the number of mesh points used in the uniform discretization of the
11
%               interval [MIN, MAX]; n has to be a power of two; if n is not a power of two, then
12
%               n is rounded up to the next power of two, i.e., n is set to n=2^ceil(log2(n));
13
%               the default value of n is n=2^12;
14
% OUTPUTS:
15
%   bandwidth - the optimal bandwidth (Gaussian kernel assumed);
16
%     density - column vector of length 'n' with the values of the density
17
%               estimate at the grid points;
18
%     xmesh   - the grid over which the density estimate is computed;
19
%  Reference: Botev, Z. I.,
20
%             "A Novel Nonparametric Density Estimator",Technical Report,The University of Queensland
21
%             http://espace.library.uq.edu.au/view.php?pid=UQ:12535
22
%
23
%  Example:
24
%    data=randn(1000,1); 
25
%    [bandwidth,density,xmesh]=kde(data,2^12,min(data)-1,max(data)+1);
26
%    plot(xmesh,density)
27

    
28
data=data(:); %make data a column vector
29
if nargin<2 % if n is not supplied switch to the default
30
    n=2^12;
31
end
32
n=2^ceil(log2(n)); % round up n to the next power of 2;
33

    
34
if nargin<4 %define the default  interval [MIN,MAX]
35
    minimum=min(data); maximum=max(data);
36
    Range=maximum-minimum;
37
    MIN=minimum-Range/10; MAX=maximum+Range/10;
38
end
39
% set up the grid over which the density estimate is computed;
40
R=MAX-MIN; dx=R/(n-1); xmesh=MIN+[0:dx:R]; N=length(data);
41
%bin the data uniformly using the grid define above;
42
initial_data=histc(data,xmesh)/N;
43
a=dct1d(initial_data); % discrete cosine transform of initial data
44
% now compute the optimal bandwidth^2 using the GCE method
45
t_star=gce(a,n,N);
46
% smooth the discrete cosine transform of initial data using t_star
47
a_t=a.*exp(-[0:n-1]'.^2*pi^2*t_star/2);
48
% now apply the inverse discrete cosine transform
49
  if nargout>1
50
    density=idct1d(a_t)/R;
51
  end
52
bandwidth=sqrt(t_star)*R;
53
end
54
function t_star=gce(a,n,N)
55
a=a(2:end)/2;
56
I=[1:n-1]'.^2;
57
a2=a.^2;
58
Var_a=zeros(n-1,1);
59
Var_a(1:n/2-1)=(1/2+1/2*a(2:2:n-1)-a2(1:n/2-1))/N;
60

    
61
t_star=fzero(@mise,[0,1]);
62
NORM=2*pi^4*sum(I.^2.*a2.*exp(-I*pi^2*t_star));
63
%NORM=.5*pi^4*sum([1:n-1]'.^4.*a_t(2:end).^2)/R^5;
64
t_star=[2*N*sqrt(pi)*NORM]^(-2/5);
65
    function  out=mise(t)
66

    
67
        out=sum((a2+Var_a).*(1-exp(-I*pi^2*t/2)).^2./I)+...
68
            sqrt(t/pi)/N*(pi^2/2)-sum(Var_a./I);
69
    end
70
end
71

    
72
function data=dct1d(data)
73
% computes the discrete cosine transform of the column vector data
74
[nrows,ncols]= size(data);
75
% Compute weights to multiply DFT coefficients
76
weight = [1;2*(exp(-i*(1:nrows-1)*pi/(2*nrows))).'];
77
% Re-order the elements of the columns of x
78
data = [ data(1:2:end,:); data(end:-2:2,:) ];
79
% Multiply FFT by weights:
80
data= real(weight.* fft(data));
81
end
82
function out = idct1d(data)
83
% computes the inverse discrete cosine transform
84
[nrows,ncols]=size(data);
85
% Compute weights
86
weights = nrows*exp(i*(0:nrows-1)*pi/(2*nrows)).';
87
% Compute x tilde using equation (5.93) in Jain
88
data = real(ifft(weights.*data));
89
% Re-order elements of each column according to equations (5.93) and
90
% (5.94) in Jain
91
out = zeros(nrows,1);
92
out(1:2:nrows) = data(1:nrows/2);
93
out(2:2:nrows) = data(nrows:-1:nrows/2+1);
94
%   Reference: 
95
%      A. K. Jain, "Fundamentals of Digital Image
96
%      Processing", pp. 150-153.
97
end
98