Skip to content

Commit

Permalink
some cleanup and new functions
Browse files Browse the repository at this point in the history
  • Loading branch information
CPernet committed Nov 30, 2017
1 parent f521c6f commit 7e8a3e7
Show file tree
Hide file tree
Showing 21 changed files with 630 additions and 408 deletions.
4 changes: 0 additions & 4 deletions HZmvntest.m
Original file line number Diff line number Diff line change
Expand Up @@ -184,10 +184,6 @@
return;
end

% remove NaNs
% -----------
X(find(sum(isnan(X),2)),:) = [];

if c == 1 %covariance matrix normalizes by (n) [=default]
S = cov(X,1);
else %covariance matrix normalizes by (n-1)
Expand Down
80 changes: 80 additions & 0 deletions MC_corrpval.asv
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
function [p_alpha,v] = MC_corrpval(n,p,method,alphav,pairs,D)

% function to compute the alpha quantile estimate of the distribution of
% minimal p-values under the null of correlations in a n*p matrix with null
% covariance but variance D (I by default)
%
% FORMAT p_alpha = MC_corrpval(n,p,D)
%
% INPUT n the number of observations
% p the number of variables
% method can be 'Pearson', 'Spearman', 'Skipped Pearson', 'Skipped Spearman'
% pairs a m*2 matrix of variables to correlate (optional)
% D the variance of each variable (optional)
%
% p_alpha the alpha quantile estimate of the distribution of
% minimal p-values
%
%
% Cyril Pernet v3 - Novembre 2017
% ---------------------------------------------------
% Copyright (C) Corr_toolbox 2017

%% deal with inputs
if nargin == 0
help MC_corrpval
elsie nargin < 2
error('at least 2 inputs requested see help MC_corrpval');
end

if ~exist('pairs','var') || isempty(pairs)
pairs = nchoosek([1:p],2);
end

if ~exist('alphav','var')
alphav = 5/100;
end

%% generate the variance
SIGMA = eye(p);
if exist('D','var')
if length(D) ~= p
error('the vector D of variance must be of the same size as the number of variables p')
else
SIGMA(SIGMA==1) = D;
end
end

%% run the Monte Carlo simulation and keep smallest p values
v = NaN(1,1000);
parfor MC = 1:1000
fprintf('Running Monte Carlo %g\n',MC)
MVN = mvnrnd(zeros(1,p),SIGMA,n); % a multivariate normal distribution
if strcmp(method,'Pearson')
[~,~,pval] = Pearson(MVN,pairs);
elseif strcmp(method,'Pearson')
[~,~,pval] = Spearman(MVN,pairs);
elseif strcmp(method,'Skipped Pearson')
[r,t,pval] = skipped_Pearson(MVN,pairs);
elseif strcmp(method,'Skipped Spearman')
[r,t,pval] = skipped_Spearman(MVN,pairs);
end
v(MC) = min(pval);

end

%% get the Harell-Davis estimate of the alpha quantile
n = length(v);
for l=1:length(alphav)
q = alphav(l)*10; % for a decile
m1 = (n+1).*q;
m2 = (n+1).*(1-q);
vec = 1:n;
w = betacdf(vec./n,m1,m2)-betacdf((vec-1)./n,m1,m2);
y = sort(v);
p_alpha(l) = sum(w(:).*y(:));
end

% For p=6, n=20 alpha =.05 .025 .01 get
% 0.01122045 0.004343809 0.002354744
% 0.0240 0.0069 0.000027
85 changes: 85 additions & 0 deletions MC_corrpval.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
function [r_alpha,t_alpha,p_alpha,vv,vvv,v] = MC_corrpval(n,p,method,alphav,pairs,D)

% function to compute the alpha quantile estimate of the distribution of
% minimal p-values under the null of correlations in a n*p matrix with null
% covariance but variance D (I by default)
%
% FORMAT p_alpha = MC_corrpval(n,p,D)
%
% INPUT n the number of observations
% p the number of variables
% method can be 'Pearson', 'Spearman', 'Skipped Pearson', 'Skipped Spearman'
% pairs a m*2 matrix of variables to correlate (optional)
% D the variance of each variable (optional)
%
% p_alpha the alpha quantile estimate of the distribution of
% minimal p-values
%
%
% Cyril Pernet v3 - Novembre 2017
% ---------------------------------------------------
% Copyright (C) Corr_toolbox 2017

%% deal with inputs
if nargin == 0
help MC_corrpval
elsie nargin < 2
error('at least 2 inputs requested see help MC_corrpval');
end

if ~exist('pairs','var') || isempty(pairs)
pairs = nchoosek([1:p],2);
end

if ~exist('alphav','var')
alphav = 5/100;
end

%% generate the variance
SIGMA = eye(p);
if exist('D','var')
if length(D) ~= p
error('the vector D of variance must be of the same size as the number of variables p')
else
SIGMA(SIGMA==1) = D;
end
end

%% run the Monte Carlo simulation and keep smallest p values
v = NaN(1,1000);
parfor MC = 1:1000
fprintf('Running Monte Carlo %g\n',MC)
MVN = mvnrnd(zeros(1,p),SIGMA,n); % a multivariate normal distribution
if strcmp(method,'Pearson')
[~,~,pval] = Pearson(MVN,pairs);
elseif strcmp(method,'Pearson')
[~,~,pval] = Spearman(MVN,pairs);
elseif strcmp(method,'Skipped Pearson')
[r,t,pval] = skipped_Pearson(MVN,pairs);
elseif strcmp(method,'Skipped Spearman')
[r,t,pval] = skipped_Spearman(MVN,pairs);
end
v(MC) = min(pval);
vv(MC) = max(r);
vvv(MC) = max(t);
end

%% get the Harell-Davis estimate of the alpha quantile
n = length(v);
for l=1:length(alphav)
q = alphav(l)*10; % for a decile
m1 = (n+1).*q;
m2 = (n+1).*(1-q);
vec = 1:n;
w = betacdf(vec./n,m1,m2)-betacdf((vec-1)./n,m1,m2);
y = sort(v);
p_alpha(l) = sum(w(:).*y(:));
y = sort(vv);
r_alpha(l) = sum(w(:).*y(:));
y = sort(vvv);
t_alpha(l) = sum(w(:).*y(:));
end

% For p=6, n=20 alpha =.05 .025 .01 get
% 0.01122045 0.004343809 0.002354744
% 0.0240 0.0069 0.000027
43 changes: 14 additions & 29 deletions Pearson.m
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
function [r,t,pval,hboot,CI] = Pearson(X,Y,fig_flag,level)
function [r,t,pval,hboot,CI,pboot] = Pearson(X,Y,fig_flag,level)

% compute the Pearson correlation along with the bootstrap CI
%
Expand All @@ -22,15 +22,11 @@
%
% If X and Y are matrices of size [n p], p correlations are computed
% consequently, the CI are adjusted at a level alpha/p (Bonferonni
% correction) and hboot is based on these adjusted CI but pval remains
% uncorrected - note also that if some values are NaN, the adjustement
% is based on the largest n
%
% This function requires the nansum.m function
% from the matlab stat toolbox.
%
% Cyril Pernet v2 (10-01-2014 - deals with NaN)
% ---------------------------------------------
% correction) and hboot is based on these adjusted CI (pval remain
% uncorrected)

% Cyril Pernet v1
% ---------------------------------
% Copyright (C) Corr_toolbox 2012

%% data check
Expand Down Expand Up @@ -61,27 +57,20 @@
end

[n p] = size(X);
if p==1
[X Y]=pairwise_cleanup(X,Y);
[n p] = size(X);
end

%% basic Pearson

% compute r
r = nansum(demean(X).*demean(Y)) ./ ...
(nansum(demean(X).^2).*nansum(demean(Y).^2)).^(1/2);
r = sum(detrend(X,'constant').*detrend(Y,'constant')) ./ ...
(sum(detrend(X,'constant').^2).*sum(detrend(Y,'constant').^2)).^(1/2);
t = r.*sqrt((n-2)./(1-r.^2));
pval = 2*tcdf(-abs(t),n-2);

%% bootstrap
if nargout > 3
% adjust boot parameters
if p == 1
nboot = 599;
% adjust percentiles following Wilcox
% even if NaN n is fine since data have
% been cleaned up
if n < 40
low = 7 ; high = 593;
elseif n >= 40 && n < 80
Expand All @@ -105,19 +94,15 @@
high = nboot - low;
end
end

% compute hboot and CI
table = randi(n,n,nboot);
for B=1:nboot
rb(B,:) = nansum(demean(X(table(:,B),:)).*demean(Y(table(:,B),:))) ./ ...
(nansum(demean(X(table(:,B),:)).^2).*nansum(demean(Y(table(:,B),:)).^2)).^(1/2);
if fig_flag ~= 0 % to make a nice figure get regression values
for c=1:size(X,2)
tmp = [X(table(:,B),c) Y(table(:,B),c)]; % take a pair
tmp(find(sum(isnan(tmp),2)),:) = []; % remove NaNs
b = pinv([tmp(:,1) ones(size(tmp,1),1)])*tmp(:,2); % solve
slope(B,c) = b(1); intercept(B,c) = b(2,:);
end
rb(B,:) = sum(detrend(X(table(:,B),:),'constant').*detrend(Y(table(:,B),:),'constant')) ./ ...
(sum(detrend(X(table(:,B),:),'constant').^2).*sum(detrend(Y(table(:,B),:),'constant').^2)).^(1/2);
for c=1:size(X,2)
b = pinv([X(table(:,B),c) ones(n,1)])*Y(table(:,B),c);
slope(B,c) = b(1);
intercept(B,c) = b(2,:);
end
end

Expand Down
44 changes: 17 additions & 27 deletions Spearman.m
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,14 @@
% If X and Y are matrices of size [n p], p correlations are computed
% and the CIs are adjusted at the alpha/p level (Bonferonni
% correction); hboot is based on these adjusted CIs but pval remains
% uncorrected - note also that if some values are NaN, the adjustement
% is based on the largest n
% uncorrected.
%
% This function requires the tiedrank.m and nansum.m functions
% from the matlab stat toolbox.
% This function requires the tiedrank.m function from the matlab stat toolbox.
%
% Cyril Pernet v2 (10-01-2014 - deals with NaN)
% ---------------------------------------------
% See also TIEDRANK.

% Cyril Pernet v1
% ---------------------------------
% Copyright (C) Corr_toolbox 2012

%% data check
Expand Down Expand Up @@ -58,27 +58,21 @@
end

[n p] = size(X);
if p==1
[X Y]=pairwise_cleanup(X,Y);
[n p] = size(X);
end

%% basic Spearman

% The corr function in the stat toolbox uses
% permutations for n<10 and some other fancy
% things when n>10 and there are no ties among
% ranks - we just do the standard way.

% compute r (default)
xrank = tiedrank(X,0);
yrank = tiedrank(Y,0);
r = nansum(demean(xrank).*demean(yrank)) ./ ...
(nansum(demean(xrank).^2).*nansum(demean(yrank).^2)).^(1/2);
r = sum(detrend(xrank,'constant').*detrend(yrank,'constant')) ./ ...
(sum(detrend(xrank,'constant').^2).*sum(detrend(yrank,'constant').^2)).^(1/2);
t = r.*(sqrt(n-2)) ./ sqrt((1-r.^2));
pval = 2*tcdf(-abs(t),n-2);
% The corr function in the stat toolbox uses
% permutations for n<10 and some other fancy
% things when n>10 and there are no ties among
% ranks - we just do the standard way.

%% bootstrap
if nargout > 3
nboot = 1000;
if p > 1
Expand All @@ -96,15 +90,11 @@
for B=1:nboot
xrank = tiedrank(X(table(:,B),:),0);
yrank = tiedrank(Y(table(:,B),:),0);
rb(B,:) = nansum(demean(xrank).*demean(yrank)) ./ ...
(nansum(demean(xrank).^2).*nansum(demean(yrank).^2)).^(1/2);
if fig_flag ~= 0 % to make a nice figure get regression values
for c=1:size(X,2)
tmp = [xrank(:,c) yrank(:,c)]; % take a pair
tmp(find(sum(isnan(tmp),2)),:) = []; % remove NaNs
b = pinv([tmp(:,1) ones(size(tmp,1),1)])*tmp(:,2); % solve
slope(B,c) = b(1); intercept(B,c) = b(2,:);
end
rb(B,:) = sum(detrend(xrank,'constant').*detrend(yrank,'constant')) ./ ...
(sum(detrend(xrank,'constant').^2).*sum(detrend(yrank,'constant').^2)).^(1/2);
for c=1:size(X,2)
b = pinv([xrank(:,c) ones(n,1)])*yrank(:,c);
slope(B,c) = b(1); intercept(B,c) = b(2,:);
end
end

Expand Down
7 changes: 3 additions & 4 deletions bendcorr.m
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,10 @@
% pH is the p value for an omnibus test of independence between all pairs
%
% The percentage bend correlation is a robust method that protects against
% outliers among the marginal distributions. If NaN values are present, the
% smallest available n is used across all pairs.
%
% outliers among the marginal distributions.

% Cyril Pernet and Guillaume Rousselet 26-01-2011
% Reformatted for Corr_toolbox 02-7-2012
% Reformatted for Corr_toolbox 02--7-2012
% ----------------------------------------------
% Copyright (C) Corr_toolbox 2012

Expand Down
47 changes: 47 additions & 0 deletions bivariate_outliers.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
function flag = bivariate_outliers(X)

% routine that find the bivariate outliers using orthogonal projection and
% box plot rule

% find the centre of the data cloud using mid-covariance determinant
n = size(X,1);
result = mcdcov(X,'cor',1,'plots',0,'h',floor((n+size(X,2)*2+1)/2));
center = result.center;

% orthogonal projection to the lines joining the center
% followed by outlier detection using box plot rule

gval = sqrt(chi2inv(0.975,2)); % in fact depends on size(X,2) but here always = 2
for i=1:n % for each row
dis = NaN(n,1);
B = (X(i,:)-center)';
BB = B.^2;
bot = sum(BB);
if bot~=0
for j=1:n
A = (X(j,:)-center)';
dis(j)= norm(A'*B/bot.*B);
end
% IQR rule
[ql,qu]=idealf(dis);
record{i} = (dis > median(dis)+gval.*(qu-ql)) ; % + (dis < median(dis)-gval.*(qu-ql));
end
end

try
flag = nan(n,1);
flag = sum(cell2mat(record),2); % if any point is flagged

catch ME % this can happen to have an empty cell so loop
flag = nan(n,size(record,2));
index = 1;
for s=1:size(record,2)
if ~isempty(record{s})
flag(:,index) = record{s};
index = index+1;
end
end
flag(:,index:end) = [];
flag = sum(flag,2);
end

Loading

0 comments on commit 7e8a3e7

Please sign in to comment.