forked from CPernet/Robust-Correlations
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
21 changed files
with
630 additions
and
408 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
function [p_alpha,v] = MC_corrpval(n,p,method,alphav,pairs,D) | ||
|
||
% function to compute the alpha quantile estimate of the distribution of | ||
% minimal p-values under the null of correlations in a n*p matrix with null | ||
% covariance but variance D (I by default) | ||
% | ||
% FORMAT p_alpha = MC_corrpval(n,p,D) | ||
% | ||
% INPUT n the number of observations | ||
% p the number of variables | ||
% method can be 'Pearson', 'Spearman', 'Skipped Pearson', 'Skipped Spearman' | ||
% pairs a m*2 matrix of variables to correlate (optional) | ||
% D the variance of each variable (optional) | ||
% | ||
% p_alpha the alpha quantile estimate of the distribution of | ||
% minimal p-values | ||
% | ||
% | ||
% Cyril Pernet v3 - Novembre 2017 | ||
% --------------------------------------------------- | ||
% Copyright (C) Corr_toolbox 2017 | ||
|
||
%% deal with inputs | ||
if nargin == 0 | ||
help MC_corrpval | ||
elsie nargin < 2 | ||
error('at least 2 inputs requested see help MC_corrpval'); | ||
end | ||
|
||
if ~exist('pairs','var') || isempty(pairs) | ||
pairs = nchoosek([1:p],2); | ||
end | ||
|
||
if ~exist('alphav','var') | ||
alphav = 5/100; | ||
end | ||
|
||
%% generate the variance | ||
SIGMA = eye(p); | ||
if exist('D','var') | ||
if length(D) ~= p | ||
error('the vector D of variance must be of the same size as the number of variables p') | ||
else | ||
SIGMA(SIGMA==1) = D; | ||
end | ||
end | ||
|
||
%% run the Monte Carlo simulation and keep smallest p values | ||
v = NaN(1,1000); | ||
parfor MC = 1:1000 | ||
fprintf('Running Monte Carlo %g\n',MC) | ||
MVN = mvnrnd(zeros(1,p),SIGMA,n); % a multivariate normal distribution | ||
if strcmp(method,'Pearson') | ||
[~,~,pval] = Pearson(MVN,pairs); | ||
elseif strcmp(method,'Pearson') | ||
[~,~,pval] = Spearman(MVN,pairs); | ||
elseif strcmp(method,'Skipped Pearson') | ||
[r,t,pval] = skipped_Pearson(MVN,pairs); | ||
elseif strcmp(method,'Skipped Spearman') | ||
[r,t,pval] = skipped_Spearman(MVN,pairs); | ||
end | ||
v(MC) = min(pval); | ||
|
||
end | ||
|
||
%% get the Harell-Davis estimate of the alpha quantile | ||
n = length(v); | ||
for l=1:length(alphav) | ||
q = alphav(l)*10; % for a decile | ||
m1 = (n+1).*q; | ||
m2 = (n+1).*(1-q); | ||
vec = 1:n; | ||
w = betacdf(vec./n,m1,m2)-betacdf((vec-1)./n,m1,m2); | ||
y = sort(v); | ||
p_alpha(l) = sum(w(:).*y(:)); | ||
end | ||
|
||
% For p=6, n=20 alpha =.05 .025 .01 get | ||
% 0.01122045 0.004343809 0.002354744 | ||
% 0.0240 0.0069 0.000027 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
function [r_alpha,t_alpha,p_alpha,vv,vvv,v] = MC_corrpval(n,p,method,alphav,pairs,D) | ||
|
||
% function to compute the alpha quantile estimate of the distribution of | ||
% minimal p-values under the null of correlations in a n*p matrix with null | ||
% covariance but variance D (I by default) | ||
% | ||
% FORMAT p_alpha = MC_corrpval(n,p,D) | ||
% | ||
% INPUT n the number of observations | ||
% p the number of variables | ||
% method can be 'Pearson', 'Spearman', 'Skipped Pearson', 'Skipped Spearman' | ||
% pairs a m*2 matrix of variables to correlate (optional) | ||
% D the variance of each variable (optional) | ||
% | ||
% p_alpha the alpha quantile estimate of the distribution of | ||
% minimal p-values | ||
% | ||
% | ||
% Cyril Pernet v3 - Novembre 2017 | ||
% --------------------------------------------------- | ||
% Copyright (C) Corr_toolbox 2017 | ||
|
||
%% deal with inputs | ||
if nargin == 0 | ||
help MC_corrpval | ||
elsie nargin < 2 | ||
error('at least 2 inputs requested see help MC_corrpval'); | ||
end | ||
|
||
if ~exist('pairs','var') || isempty(pairs) | ||
pairs = nchoosek([1:p],2); | ||
end | ||
|
||
if ~exist('alphav','var') | ||
alphav = 5/100; | ||
end | ||
|
||
%% generate the variance | ||
SIGMA = eye(p); | ||
if exist('D','var') | ||
if length(D) ~= p | ||
error('the vector D of variance must be of the same size as the number of variables p') | ||
else | ||
SIGMA(SIGMA==1) = D; | ||
end | ||
end | ||
|
||
%% run the Monte Carlo simulation and keep smallest p values | ||
v = NaN(1,1000); | ||
parfor MC = 1:1000 | ||
fprintf('Running Monte Carlo %g\n',MC) | ||
MVN = mvnrnd(zeros(1,p),SIGMA,n); % a multivariate normal distribution | ||
if strcmp(method,'Pearson') | ||
[~,~,pval] = Pearson(MVN,pairs); | ||
elseif strcmp(method,'Pearson') | ||
[~,~,pval] = Spearman(MVN,pairs); | ||
elseif strcmp(method,'Skipped Pearson') | ||
[r,t,pval] = skipped_Pearson(MVN,pairs); | ||
elseif strcmp(method,'Skipped Spearman') | ||
[r,t,pval] = skipped_Spearman(MVN,pairs); | ||
end | ||
v(MC) = min(pval); | ||
vv(MC) = max(r); | ||
vvv(MC) = max(t); | ||
end | ||
|
||
%% get the Harell-Davis estimate of the alpha quantile | ||
n = length(v); | ||
for l=1:length(alphav) | ||
q = alphav(l)*10; % for a decile | ||
m1 = (n+1).*q; | ||
m2 = (n+1).*(1-q); | ||
vec = 1:n; | ||
w = betacdf(vec./n,m1,m2)-betacdf((vec-1)./n,m1,m2); | ||
y = sort(v); | ||
p_alpha(l) = sum(w(:).*y(:)); | ||
y = sort(vv); | ||
r_alpha(l) = sum(w(:).*y(:)); | ||
y = sort(vvv); | ||
t_alpha(l) = sum(w(:).*y(:)); | ||
end | ||
|
||
% For p=6, n=20 alpha =.05 .025 .01 get | ||
% 0.01122045 0.004343809 0.002354744 | ||
% 0.0240 0.0069 0.000027 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
function flag = bivariate_outliers(X) | ||
|
||
% routine that find the bivariate outliers using orthogonal projection and | ||
% box plot rule | ||
|
||
% find the centre of the data cloud using mid-covariance determinant | ||
n = size(X,1); | ||
result = mcdcov(X,'cor',1,'plots',0,'h',floor((n+size(X,2)*2+1)/2)); | ||
center = result.center; | ||
|
||
% orthogonal projection to the lines joining the center | ||
% followed by outlier detection using box plot rule | ||
|
||
gval = sqrt(chi2inv(0.975,2)); % in fact depends on size(X,2) but here always = 2 | ||
for i=1:n % for each row | ||
dis = NaN(n,1); | ||
B = (X(i,:)-center)'; | ||
BB = B.^2; | ||
bot = sum(BB); | ||
if bot~=0 | ||
for j=1:n | ||
A = (X(j,:)-center)'; | ||
dis(j)= norm(A'*B/bot.*B); | ||
end | ||
% IQR rule | ||
[ql,qu]=idealf(dis); | ||
record{i} = (dis > median(dis)+gval.*(qu-ql)) ; % + (dis < median(dis)-gval.*(qu-ql)); | ||
end | ||
end | ||
|
||
try | ||
flag = nan(n,1); | ||
flag = sum(cell2mat(record),2); % if any point is flagged | ||
|
||
catch ME % this can happen to have an empty cell so loop | ||
flag = nan(n,size(record,2)); | ||
index = 1; | ||
for s=1:size(record,2) | ||
if ~isempty(record{s}) | ||
flag(:,index) = record{s}; | ||
index = index+1; | ||
end | ||
end | ||
flag(:,index:end) = []; | ||
flag = sum(flag,2); | ||
end | ||
|
Oops, something went wrong.