top of page

CADEE is a method for estimating the differential Shannon entropy of a continuous random variable using independent samples.

​

The method is based on decomposing the distribution into a product of the marginal distributions and the joint dependency, also known as the copula. The entropy of marginals is estimated using one-dimensional methods. The entropy of the copula is estimated recursively by splitting the data along dimensions which are statistically dependent on others. Numerical examples demonstrate that the method is accurate both for distributions with compact and non-compact supports, which is imperative when the support is not known or of mixed type (in different dimensions).

 

At high dimensions (greater than 20), our method is not only more accurate, but also significantly more efficient than previous approaches.

​

See below for a Matlab function

function [H] = copulasH(xs,range,level)
%Evaluates the entopy using recursive copula splitting.
%Input:
% xs: Samples. Each row is an iid sample.
% range: The support of the distribution.
%   A 2xD maatrix. First row are lower limits.
%                  Second row are upper limits.
%                  [] if not known.
% level: Depth in the tree. Default is 0.
%
%Output:
% H: Estimated entropy.

if nargin<=1 % Default range value (unknown).
    range=[];
end
if nargin<=2 % Default level (=0).
    level=0;
end
D=size(xs,2);   % The dimension.
n=size(xs,1);   % Number of samples.
if n==0
    H=0;
    return;
end

%Algorithm parameters
params.randomLevels=false;                       % Levels change randomly. Otherwise sequentially.
params.nbins1=1000;                              % Max number of bins for 1D entropy.
params.pValuesForIndependence=0.05;              % p-Value for independence using correlations.
params.exponentForH2D=0.62;                      % Scaling exponent for pair-wise entropy.
params.acceptenceThresholdForIndependence=-0.7;  % Threshold for pair-wide entropy to accept hypothesis of independence with p-Value 0.05.
params.minSamplesToProcessCopula=5*D;            % With fewer samples, assume the dimensions are independent.
params.numberOfPartitionsPerDim=2;               % For recursion: partition copula (along the chosen dimension) into numberOfPartitionsPerDim equal parts.

%Calculate the empirical cumulative distribution along each dimention
%   (i.e., the integral transform of marginals).
H1s=0;
ys=zeros(n,D);
y1=zeros(n,1);
for d=1:D
    x1=xs(:,d);
    [x1s,idx]=sort(x1);
    
    % 1D entropy of marginals
    if isempty(range)
        % The range is not known, use m_n spacing method.
        H1=H1Dspacings(x1s);
    else
        % The range is given, use binning.
        H1=H1Dbins(x1s,params,[range(1,d) range(2,d)]);
    end
    H1s=H1s+H1;
    
    % Rank of marginal.
    y1(idx)=( (0:n-1)+0.5 )/n;
    ys(:,d)=y1';
end

clear x1 xs; %No need for these any longer.
if n<params.minSamplesToProcessCopula || D==1
    H=H1s;
    return;
end

[R,P]=corr(ys); % Calculate the Peterson correlation coefficient of all pairs. 
                % Since the ys are the ranks, it is equal to the Spearman correlation of the xs.
                % P is the P-value matrix of the hypothesis that dimension pairs are independent.
P(eye(D)==1)=0;
isCorrelated=P<params.pValuesForIndependence; % A Boolean matrix. ==true is the corresponding pair is correlated.

if sum(isCorrelated(:))<D^2
    % Do more checks for pairs that are not correlated
    [ci,cj]=find(isCorrelated==0);
    nFactor=n^params.exponentForH2D;
    for k=1:length(ci)
        i=ci(k);
        j=cj(k);
        if i>j
            %Calculate pair-wide entropy = mutual information (because marginals are U(0,1).
            H2=H2D([ys(:,i),ys(:,j)],params);
            isCorrelated(i,j)=H2*nFactor<params.acceptenceThresholdForIndependence;
            isCorrelated(j,i)=isCorrelated(i,j);
        end
    end
end %if sum(isCorrelated(:))<D^2

% Partition isCorrelated into blocks
nCorrelated=sum(isCorrelated);
L=isCorrelated-diag(nCorrelated);
Z=null(L,'r');
nBlocks=size(Z,2);
if nBlocks>=2
    H=H1s;
     %Split into blocks
    for c=1:nBlocks
        clusterSize=sum(Z(:,c));
        if clusterSize>1 % Blocks of size 1 are a marginal. The distribution is U(0,1), so its entropy is 0.
            dimsInCluster=find(Z(:,c)==1);
            ysmall=ys(:,dimsInCluster);     % Samples of the block
            smallD=length(dimsInCluster);   % The dimension of the block
            unitCube=[zeros(1,smallD) ; ones(1,smallD)];  % The support is always [0,1]^smallD.
            Hpart=copulasH(ysmall,unitCube,level+1);      % Calculate the entropy of the reduced block.
            
            % Add the entropy of the block
            H=H+Hpart;
        end
    end
    
    return;
end

if n<params.minSamplesToProcessCopula
    H=H1s;
    return;
end

% Find which dimension to is most correlated ith others.
R2=R.^2;
R2(eye(D)==1)=0;
Rsum=sum(R2);
largeDims=find(max(Rsum)-Rsum<0.1 & Rsum>0); % List of dimenstions which are highly correlated with others (can be 0, 2, 3, ..., but not 1).
if isempty(largeDims)
    % Correlations are small, yet variables are dependent. All dims are equally problematic.
    largeDims=1:D;
end
nLargeDims=length(largeDims);
% Pick one of the dims in largeDims
if params.randomLevels
    % Choose randomly.
    maxCorrsDim=largeDims(randi(nLargeDims));
else
    % Choose sequentially.
    maxCorrsDim=largeDims(mod(level,nLargeDims)+1);
end

unitCube=[zeros(1,D) ; ones(1,D)];
Hparts=zeros(params.numberOfPartitionsPerDim,1);
%Split the data along Dim maxCorrsDim into params.numberOfPartitionsPerDim equal parts.
for prt=1:params.numberOfPartitionsPerDim
    % Range of data is [f,l)
    f=(prt-1)/params.numberOfPartitionsPerDim;
    if prt==params.numberOfPartitionsPerDim
        l=2;
    else
        l=prt/params.numberOfPartitionsPerDim;
    end
    
    mask=ys(:,maxCorrsDim)>=f & ys(:,maxCorrsDim)<l;
    nIn1=sum(mask);
    maskM=repmat(mask,[1 D]);
    
    % Subset of data.
    y1=reshape(ys(maskM),[nIn1,D]);
    % Scale back to [0,1].
    y1(:,maxCorrsDim)=(y1(:,maxCorrsDim)-f)*params.numberOfPartitionsPerDim;
    % Entropy of subset.
    Hparts(prt)=copulasH(y1,unitCube,level+1);
end

% Add the entropies of all subsets.
H=H1s+sum(Hparts)/params.numberOfPartitionsPerDim;

end

function [H] = H1Dbins(xs,params,range)
%Calculate the entropy of 1D data using bins.
n=length(xs);
nbins=max([min([params.nbins1 floor(n^0.4) floor(n/10)]) 1]);  % Number of bins.
if nargin==2
    % No range.
    [p,edges]=histcounts(xs,nbins,'Normalization','pdf');
else
    % Range given.
    edges=linspace(range(1),range(2),nbins+1);
    p=histcounts(xs,edges,'Normalization','pdf');
end

mask=p>0;
H=-(edges(2)-edges(1))*sum( p(mask).*log(p(mask)) );
end

function [H] = H1Dspacings(xs)
%Calculate the entropy of 1D data using m_N spacings.
%   Assume xs is ordered.
n=length(xs);
mn=floor(n^(1/3-0.01));
mnSpacings=xs(mn+1:end)-xs(1:end-mn);
H=( sum(log(mnSpacings)) + log(n/mn)*(n-mn) )/n;
end

function [H] = H2D(xs,params)
% Calculate the entropy of 2D data compatly supported in [0,1]^2.
n=size(xs,1);
if n<4
    H=0;
    return;
end

nbins=max([min([params.nbins1 floor(n^0.2) floor(n/10)]) 2]);  % Number of bins.
pys=floor(xs*nbins);         % The partition number in each dimension.
py1=pys(:,1)+pys(:,2)*nbins; % Enumerate the partitions from 1 to npartitions^2.
edges=-0.5:(nbins^2-0.5);
counts=histcounts(py1,edges);% 1D histogram

mask=counts>0;
H=-sum( counts(mask).*log(counts(mask)) )/n + log(n/nbins/nbins);
end
 

bottom of page