diff --git a/HZmvntest.m b/HZmvntest.m index 8d95759..549393c 100644 --- a/HZmvntest.m +++ b/HZmvntest.m @@ -184,10 +184,6 @@ return; end -% remove NaNs -% ----------- -X(find(sum(isnan(X),2)),:) = []; - if c == 1 %covariance matrix normalizes by (n) [=default] S = cov(X,1); else %covariance matrix normalizes by (n-1) diff --git a/MC_corrpval.asv b/MC_corrpval.asv new file mode 100644 index 0000000..f1c1fdb --- /dev/null +++ b/MC_corrpval.asv @@ -0,0 +1,80 @@ +function [p_alpha,v] = MC_corrpval(n,p,method,alphav,pairs,D) + +% function to compute the alpha quantile estimate of the distribution of +% minimal p-values under the null of correlations in a n*p matrix with null +% covariance but variance D (I by default) +% +% FORMAT p_alpha = MC_corrpval(n,p,D) +% +% INPUT n the number of observations +% p the number of variables +% method can be 'Pearson', 'Spearman', 'Skipped Pearson', 'Skipped Spearman' +% pairs a m*2 matrix of variables to correlate (optional) +% D the variance of each variable (optional) +% +% p_alpha the alpha quantile estimate of the distribution of +% minimal p-values +% +% +% Cyril Pernet v3 - Novembre 2017 +% --------------------------------------------------- +% Copyright (C) Corr_toolbox 2017 + +%% deal with inputs +if nargin == 0 + help MC_corrpval + elsie nargin < 2 + error('at least 2 inputs requested see help MC_corrpval'); +end + +if ~exist('pairs','var') || isempty(pairs) + pairs = nchoosek([1:p],2); +end + +if ~exist('alphav','var') + alphav = 5/100; +end + +%% generate the variance +SIGMA = eye(p); +if exist('D','var') + if length(D) ~= p + error('the vector D of variance must be of the same size as the number of variables p') + else + SIGMA(SIGMA==1) = D; + end +end + +%% run the Monte Carlo simulation and keep smallest p values +v = NaN(1,1000); +parfor MC = 1:1000 + fprintf('Running Monte Carlo %g\n',MC) + MVN = mvnrnd(zeros(1,p),SIGMA,n); % a multivariate normal distribution + if strcmp(method,'Pearson') + [~,~,pval] = Pearson(MVN,pairs); + elseif strcmp(method,'Pearson') + [~,~,pval] = Spearman(MVN,pairs); + elseif strcmp(method,'Skipped Pearson') + [r,t,pval] = skipped_Pearson(MVN,pairs); + elseif strcmp(method,'Skipped Spearman') + [r,t,pval] = skipped_Spearman(MVN,pairs); + end + v(MC) = min(pval); + +end + +%% get the Harell-Davis estimate of the alpha quantile + n = length(v); +for l=1:length(alphav) + q = alphav(l)*10; % for a decile + m1 = (n+1).*q; + m2 = (n+1).*(1-q); + vec = 1:n; + w = betacdf(vec./n,m1,m2)-betacdf((vec-1)./n,m1,m2); + y = sort(v); + p_alpha(l) = sum(w(:).*y(:)); +end + +% For p=6, n=20 alpha =.05 .025 .01 get +% 0.01122045 0.004343809 0.002354744 +% 0.0240 0.0069 0.000027 diff --git a/MC_corrpval.m b/MC_corrpval.m new file mode 100644 index 0000000..98c84b3 --- /dev/null +++ b/MC_corrpval.m @@ -0,0 +1,85 @@ +function [r_alpha,t_alpha,p_alpha,vv,vvv,v] = MC_corrpval(n,p,method,alphav,pairs,D) + +% function to compute the alpha quantile estimate of the distribution of +% minimal p-values under the null of correlations in a n*p matrix with null +% covariance but variance D (I by default) +% +% FORMAT p_alpha = MC_corrpval(n,p,D) +% +% INPUT n the number of observations +% p the number of variables +% method can be 'Pearson', 'Spearman', 'Skipped Pearson', 'Skipped Spearman' +% pairs a m*2 matrix of variables to correlate (optional) +% D the variance of each variable (optional) +% +% p_alpha the alpha quantile estimate of the distribution of +% minimal p-values +% +% +% Cyril Pernet v3 - Novembre 2017 +% --------------------------------------------------- +% Copyright (C) Corr_toolbox 2017 + +%% deal with inputs +if nargin == 0 + help MC_corrpval + elsie nargin < 2 + error('at least 2 inputs requested see help MC_corrpval'); +end + +if ~exist('pairs','var') || isempty(pairs) + pairs = nchoosek([1:p],2); +end + +if ~exist('alphav','var') + alphav = 5/100; +end + +%% generate the variance +SIGMA = eye(p); +if exist('D','var') + if length(D) ~= p + error('the vector D of variance must be of the same size as the number of variables p') + else + SIGMA(SIGMA==1) = D; + end +end + +%% run the Monte Carlo simulation and keep smallest p values +v = NaN(1,1000); +parfor MC = 1:1000 + fprintf('Running Monte Carlo %g\n',MC) + MVN = mvnrnd(zeros(1,p),SIGMA,n); % a multivariate normal distribution + if strcmp(method,'Pearson') + [~,~,pval] = Pearson(MVN,pairs); + elseif strcmp(method,'Pearson') + [~,~,pval] = Spearman(MVN,pairs); + elseif strcmp(method,'Skipped Pearson') + [r,t,pval] = skipped_Pearson(MVN,pairs); + elseif strcmp(method,'Skipped Spearman') + [r,t,pval] = skipped_Spearman(MVN,pairs); + end + v(MC) = min(pval); + vv(MC) = max(r); + vvv(MC) = max(t); +end + +%% get the Harell-Davis estimate of the alpha quantile + n = length(v); +for l=1:length(alphav) + q = alphav(l)*10; % for a decile + m1 = (n+1).*q; + m2 = (n+1).*(1-q); + vec = 1:n; + w = betacdf(vec./n,m1,m2)-betacdf((vec-1)./n,m1,m2); + y = sort(v); + p_alpha(l) = sum(w(:).*y(:)); + y = sort(vv); + r_alpha(l) = sum(w(:).*y(:)); + y = sort(vvv); + t_alpha(l) = sum(w(:).*y(:)); +end + +% For p=6, n=20 alpha =.05 .025 .01 get +% 0.01122045 0.004343809 0.002354744 +% 0.0240 0.0069 0.000027 diff --git a/Pearson.m b/Pearson.m index 7e56bc4..4b6545b 100644 --- a/Pearson.m +++ b/Pearson.m @@ -1,4 +1,4 @@ -function [r,t,pval,hboot,CI] = Pearson(X,Y,fig_flag,level) +function [r,t,pval,hboot,CI,pboot] = Pearson(X,Y,fig_flag,level) % compute the Pearson correlation along with the bootstrap CI % @@ -22,15 +22,11 @@ % % If X and Y are matrices of size [n p], p correlations are computed % consequently, the CI are adjusted at a level alpha/p (Bonferonni -% correction) and hboot is based on these adjusted CI but pval remains -% uncorrected - note also that if some values are NaN, the adjustement -% is based on the largest n -% -% This function requires the nansum.m function -% from the matlab stat toolbox. -% -% Cyril Pernet v2 (10-01-2014 - deals with NaN) -% --------------------------------------------- +% correction) and hboot is based on these adjusted CI (pval remain +% uncorrected) + +% Cyril Pernet v1 +% --------------------------------- % Copyright (C) Corr_toolbox 2012 %% data check @@ -61,27 +57,20 @@ end [n p] = size(X); -if p==1 - [X Y]=pairwise_cleanup(X,Y); - [n p] = size(X); -end %% basic Pearson % compute r -r = nansum(demean(X).*demean(Y)) ./ ... - (nansum(demean(X).^2).*nansum(demean(Y).^2)).^(1/2); +r = sum(detrend(X,'constant').*detrend(Y,'constant')) ./ ... + (sum(detrend(X,'constant').^2).*sum(detrend(Y,'constant').^2)).^(1/2); t = r.*sqrt((n-2)./(1-r.^2)); pval = 2*tcdf(-abs(t),n-2); -%% bootstrap if nargout > 3 % adjust boot parameters if p == 1 nboot = 599; % adjust percentiles following Wilcox - % even if NaN n is fine since data have - % been cleaned up if n < 40 low = 7 ; high = 593; elseif n >= 40 && n < 80 @@ -105,19 +94,15 @@ high = nboot - low; end end - % compute hboot and CI table = randi(n,n,nboot); for B=1:nboot - rb(B,:) = nansum(demean(X(table(:,B),:)).*demean(Y(table(:,B),:))) ./ ... - (nansum(demean(X(table(:,B),:)).^2).*nansum(demean(Y(table(:,B),:)).^2)).^(1/2); - if fig_flag ~= 0 % to make a nice figure get regression values - for c=1:size(X,2) - tmp = [X(table(:,B),c) Y(table(:,B),c)]; % take a pair - tmp(find(sum(isnan(tmp),2)),:) = []; % remove NaNs - b = pinv([tmp(:,1) ones(size(tmp,1),1)])*tmp(:,2); % solve - slope(B,c) = b(1); intercept(B,c) = b(2,:); - end + rb(B,:) = sum(detrend(X(table(:,B),:),'constant').*detrend(Y(table(:,B),:),'constant')) ./ ... + (sum(detrend(X(table(:,B),:),'constant').^2).*sum(detrend(Y(table(:,B),:),'constant').^2)).^(1/2); + for c=1:size(X,2) + b = pinv([X(table(:,B),c) ones(n,1)])*Y(table(:,B),c); + slope(B,c) = b(1); + intercept(B,c) = b(2,:); end end diff --git a/Spearman.m b/Spearman.m index 390d9a9..3cd578a 100644 --- a/Spearman.m +++ b/Spearman.m @@ -20,14 +20,14 @@ % If X and Y are matrices of size [n p], p correlations are computed % and the CIs are adjusted at the alpha/p level (Bonferonni % correction); hboot is based on these adjusted CIs but pval remains -% uncorrected - note also that if some values are NaN, the adjustement -% is based on the largest n +% uncorrected. % -% This function requires the tiedrank.m and nansum.m functions -% from the matlab stat toolbox. +% This function requires the tiedrank.m function from the matlab stat toolbox. % -% Cyril Pernet v2 (10-01-2014 - deals with NaN) -% --------------------------------------------- +% See also TIEDRANK. + +% Cyril Pernet v1 +% --------------------------------- % Copyright (C) Corr_toolbox 2012 %% data check @@ -58,27 +58,21 @@ end [n p] = size(X); -if p==1 - [X Y]=pairwise_cleanup(X,Y); - [n p] = size(X); -end %% basic Spearman -% The corr function in the stat toolbox uses -% permutations for n<10 and some other fancy -% things when n>10 and there are no ties among -% ranks - we just do the standard way. - % compute r (default) xrank = tiedrank(X,0); yrank = tiedrank(Y,0); -r = nansum(demean(xrank).*demean(yrank)) ./ ... - (nansum(demean(xrank).^2).*nansum(demean(yrank).^2)).^(1/2); +r = sum(detrend(xrank,'constant').*detrend(yrank,'constant')) ./ ... + (sum(detrend(xrank,'constant').^2).*sum(detrend(yrank,'constant').^2)).^(1/2); t = r.*(sqrt(n-2)) ./ sqrt((1-r.^2)); pval = 2*tcdf(-abs(t),n-2); +% The corr function in the stat toolbox uses +% permutations for n<10 and some other fancy +% things when n>10 and there are no ties among +% ranks - we just do the standard way. -%% bootstrap if nargout > 3 nboot = 1000; if p > 1 @@ -96,15 +90,11 @@ for B=1:nboot xrank = tiedrank(X(table(:,B),:),0); yrank = tiedrank(Y(table(:,B),:),0); - rb(B,:) = nansum(demean(xrank).*demean(yrank)) ./ ... - (nansum(demean(xrank).^2).*nansum(demean(yrank).^2)).^(1/2); - if fig_flag ~= 0 % to make a nice figure get regression values - for c=1:size(X,2) - tmp = [xrank(:,c) yrank(:,c)]; % take a pair - tmp(find(sum(isnan(tmp),2)),:) = []; % remove NaNs - b = pinv([tmp(:,1) ones(size(tmp,1),1)])*tmp(:,2); % solve - slope(B,c) = b(1); intercept(B,c) = b(2,:); - end + rb(B,:) = sum(detrend(xrank,'constant').*detrend(yrank,'constant')) ./ ... + (sum(detrend(xrank,'constant').^2).*sum(detrend(yrank,'constant').^2)).^(1/2); + for c=1:size(X,2) + b = pinv([xrank(:,c) ones(n,1)])*yrank(:,c); + slope(B,c) = b(1); intercept(B,c) = b(2,:); end end diff --git a/bendcorr.m b/bendcorr.m index f36ef59..615a0fb 100644 --- a/bendcorr.m +++ b/bendcorr.m @@ -20,11 +20,10 @@ % pH is the p value for an omnibus test of independence between all pairs % % The percentage bend correlation is a robust method that protects against -% outliers among the marginal distributions. If NaN values are present, the -% smallest available n is used across all pairs. -% +% outliers among the marginal distributions. + % Cyril Pernet and Guillaume Rousselet 26-01-2011 -% Reformatted for Corr_toolbox 02-7-2012 +% Reformatted for Corr_toolbox 02--7-2012 % ---------------------------------------------- % Copyright (C) Corr_toolbox 2012 diff --git a/bivariate_outliers.m b/bivariate_outliers.m new file mode 100644 index 0000000..796cfd5 --- /dev/null +++ b/bivariate_outliers.m @@ -0,0 +1,47 @@ +function flag = bivariate_outliers(X) + +% routine that find the bivariate outliers using orthogonal projection and +% box plot rule + +% find the centre of the data cloud using mid-covariance determinant +n = size(X,1); +result = mcdcov(X,'cor',1,'plots',0,'h',floor((n+size(X,2)*2+1)/2)); +center = result.center; + +% orthogonal projection to the lines joining the center +% followed by outlier detection using box plot rule + +gval = sqrt(chi2inv(0.975,2)); % in fact depends on size(X,2) but here always = 2 +for i=1:n % for each row + dis = NaN(n,1); + B = (X(i,:)-center)'; + BB = B.^2; + bot = sum(BB); + if bot~=0 + for j=1:n + A = (X(j,:)-center)'; + dis(j)= norm(A'*B/bot.*B); + end + % IQR rule + [ql,qu]=idealf(dis); + record{i} = (dis > median(dis)+gval.*(qu-ql)) ; % + (dis < median(dis)-gval.*(qu-ql)); + end +end + +try + flag = nan(n,1); + flag = sum(cell2mat(record),2); % if any point is flagged + +catch ME % this can happen to have an empty cell so loop + flag = nan(n,size(record,2)); + index = 1; + for s=1:size(record,2) + if ~isempty(record{s}) + flag(:,index) = record{s}; + index = index+1; + end + end + flag(:,index:end) = []; + flag = sum(flag,2); +end + diff --git a/conditional.m b/conditional.m index 51e1aec..258dfae 100644 --- a/conditional.m +++ b/conditional.m @@ -11,7 +11,7 @@ % OUTPUTS: values are the conditioned variables X and Y % variances are the conditional variances % -% + % Cyril Pernet v1 21/05/2012 % --------------------------------- % Copyright (C) Corr_toolbox 2012 @@ -20,11 +20,11 @@ error('X and Y must have the same size') end -r = Pearson(X,Y,0); -Xhat = r*nanstd(X)*Y / nanstd(Y); -Yhat = r*nanstd(Y)*X / nanstd(X); -Cond_stdX = (1-r^2)*nanstd(X); -Cond_stdY = (1-r^2)*nanstd(Y); +r = corr(X,Y); +Xhat = r*std(X)*Y / std(Y); +Yhat = r*std(Y)*X / std(X); +Cond_stdX = (1-r^2)*std(X); +Cond_stdY = (1-r^2)*std(Y); values = [Xhat Yhat]; variance = [Cond_stdX^2 Cond_stdY^2]; diff --git a/corr_normplot.m b/corr_normplot.m index 6f5b527..2c01959 100644 --- a/corr_normplot.m +++ b/corr_normplot.m @@ -15,8 +15,6 @@ function corr_normplot(x,y) error('X and Y must have the same size') end -[x,y]=pairwise_cleanup(x,y); - [r c] = size(x); if r == 1 && c > 1 x = x'; diff --git a/detect_outliers.m b/detect_outliers.m index f26601d..664ffa9 100644 --- a/detect_outliers.m +++ b/detect_outliers.m @@ -77,7 +77,6 @@ error('vector must be of the same length') end -[X,Y]=pairwise_cleanup(X,Y); [n,p]=size(X); %% ------------------------------------------------------------------ diff --git a/iqr_method.m b/iqr_method.m index d8c6d96..e5c01a3 100644 --- a/iqr_method.m +++ b/iqr_method.m @@ -33,10 +33,7 @@ % Cyril Pernet / Guillaume Rousselet % --------------------------------- % Copyright (C) Corr_toolbox 2012 - -if nargin == 1 - type = 2; -end + a=a(:);n=length(a); [q1,q2]=idealf(a); value=q2-q1; diff --git a/joint_density.m b/joint_density.m index ee627ea..b65bf8b 100644 --- a/joint_density.m +++ b/joint_density.m @@ -7,10 +7,10 @@ % % INPUTS: x and y are two vectors of the same length % flag if 1 (default) plot both the mesh and isocontour else only plot isocontrour -% + % Ref: Martinez, W.L. & Martinez, A.R. 2008. % Computational Statistics Handbook with Matlab. 2nd Ed. -% + % Cyril Pernet v2 23/07/2012 % ----------------------------- % Copyright (C) Corr_toolbox 2012 @@ -23,8 +23,6 @@ error('X and Y must have the same size') end -[x,y]=pairwise_cleanup(x,y); - [r c] = size(x); if r == 1 && c > 1 x = x'; diff --git a/normal_no_outlier/normal_no_outlier/results.mat b/normal_no_outlier/normal_no_outlier/results.mat new file mode 100644 index 0000000..0e666ad Binary files /dev/null and b/normal_no_outlier/normal_no_outlier/results.mat differ diff --git a/normal_no_outlier/results.mat b/normal_no_outlier/results.mat new file mode 100644 index 0000000..172f5f4 Binary files /dev/null and b/normal_no_outlier/results.mat differ diff --git a/normal_no_outlier/simulation1.asv b/normal_no_outlier/simulation1.asv new file mode 100644 index 0000000..2e2b884 --- /dev/null +++ b/normal_no_outlier/simulation1.asv @@ -0,0 +1,39 @@ +% Simulations for normally distributed data +% It is expected that all methods perform well +% with Pearson's being the best + +mkdir('normal_no_outlier') +cd('normal_no_outlier') + +n = 20; +p= 6; +SIGMA = eye(p); +SIGMA(SIGMA==1) = abs(randn(p,1)); + +tic +[r_alpha,t_alpha,p_alpha,vr,vt,vp] = MC_corrpval(n,p,'Skipped Pearson'); +time = toc +save results + +parfor MC = 1:1000 + fprintf('running simulations MC %g\n',MC) + X = mvnrnd(zeros(1,p),SIGMA,n); + [r(:,MC),t(:,MC),pval(:,MC)] = skipped_Pearson(X,[],'ECP',5/100); +end + +FWE_p = length(find(sum(pval<=p_alpha,1)))/1000; +FWE_r = length(find(sum(r>quantile(vr,0.05),1)))/1000; +FWE_t = length(find(sum(t>=t_alpha,1)))/1000; +save results + +parfor MC = 1:600 + fprintf('running simulations MC %g\n',MC) + X = mvnrnd(zeros(1,p),SIGMA,n); + p_alpha2 = MC_corrpval(n,p,'Skipped Pearson',5/100,[],diag(cov(X))); + [~,~,~,h2(MC,:)] = skipped_Pearson(X,[],'ECP',5/100,p_alpha2); +end +FWE2 = sum(h2)/600; +save results + + + diff --git a/normal_no_outlier/simulation1.m b/normal_no_outlier/simulation1.m new file mode 100644 index 0000000..92d32db --- /dev/null +++ b/normal_no_outlier/simulation1.m @@ -0,0 +1,39 @@ +% Simulations for normally distributed data +% It is expected that all methods perform well +% with Pearson's being the best + +mkdir('normal_no_outlier') +cd('normal_no_outlier') + +n = 20; +p= 6; +SIGMA = eye(p); +SIGMA(SIGMA==1) = abs(randn(p,1)); + +tic +[r_alpha,t_alpha,p_alpha,vr,vt,vp] = MC_corrpval(n,p,'Skipped Pearson'); +time = toc +save results + +parfor MC = 1:1000 + fprintf('running simulations MC %g\n',MC) + X = mvnrnd(zeros(1,p),SIGMA,n); + [r(:,MC),t(:,MC),pval(:,MC)] = skipped_Pearson(X,[],'ECP',5/100); +end + +FWE_p = length(find(sum(pval<=rst_hd(vp,0.05),1)))/1000; +FWE_r = length(find(sum(r>rst_hd(vr,0.95),1)))/1000; +FWE_t = length(find(sum(t>=rst_hd(vt,0.95),1)))/1000; +save results + +parfor MC = 1:600 + fprintf('running simulations MC %g\n',MC) + X = mvnrnd(zeros(1,p),SIGMA,n); + p_alpha2 = MC_corrpval(n,p,'Skipped Pearson',5/100,[],diag(cov(X))); + [~,~,~,h2(MC,:)] = skipped_Pearson(X,[],'ECP',5/100,p_alpha2); +end +FWE2 = sum(h2)/600; +save results + + + diff --git a/robust_correlation.m b/robust_correlation.m index 2579dae..635785b 100644 --- a/robust_correlation.m +++ b/robust_correlation.m @@ -44,7 +44,7 @@ elseif r > 1 && c > 1 error('X and Y must be 2 vectors, more than 1 column/row detected') end -% [X,Y]=pairwise_cleanup(X,Y); + level = 5/100; diff --git a/skipped_Pearson.m b/skipped_Pearson.m new file mode 100644 index 0000000..0774d2c --- /dev/null +++ b/skipped_Pearson.m @@ -0,0 +1,181 @@ +function [rp,tp,pval,h,CI,outid]=skipped_Pearson(varargin) + +% performs a robust Pearson correlation on data cleaned up for bivariate outliers, +% that is after finding the central point in the distribution using the mid covariance +% determinant, orthogonal distances are computed to this point, and any data outside the +% bound defined by the idealf estimator of the interquartile range is removed. +% +% FORMAT: [r,t,p,h,CI,outid] = skipped_Pearson(X,pairs,method,alphav,p_alpha); +% +% INPUTS: X is a matrix and corelations between all pairs (default) are computed +% pairs (optional) is a n*2 matrix of pairs of column to correlate +% method (optional) is 'ECP' or 'Hochberg' (only for n>60) +% alphav (optional, 5% by default) is the requested alpha level +% p_alpha (optional) the critical p_value to correct for multiple +% comparisons (see MC_corrpval) +% +% OUTPUTS: r is the pearson/spearman correlation +% t is the T value associated to the skipped correlation +% p is the p value of that pair +% h is the significance after correction for multiple comparisons +% CI is the robust confidence interval computed by bootstrapping the +% cleaned-up data set and taking the .95 centile values +% outid is the index of bivariate outliers +% +% This code rely on the mid covariance determinant as implemented in LIBRA +% - Verboven, S., Hubert, M. (2005), LIBRA: a MATLAB Library for Robust Analysis, +% Chemometrics and Intelligent Laboratory Systems, 75, 127-136. +% - Rousseeuw, P.J. (1984), "Least Median of Squares Regression," +% Journal of the American Statistical Association, Vol. 79, pp. 871-881. +% +% The quantile of observations whose covariance is minimized is +% floor((n+size(X,2)*2+1)/2)), +% i.e. ((number of observations + number of variables*2)+1) / 2, +% thus for a correlation this is floor(n/2 + 5/2). +% +% See also MCDCOV, IDEALF. +% +% Cyril Pernet v3 - Novembre 2017 +% --------------------------------------------------- +% Copyright (C) Corr_toolbox 2017 + +%% check the data input + +% _if no input simply return the help, otherwise load the data X_ +if nargin <1 + help skipped_Pearson + return +else + x = varargin{1}; + [n,p]=size(x); +end + +% _set the default options_ +method = 'ECP'; +alphav = 5/100; +pairs = nchoosek([1:p],2); +nboot = 599; + +% _check other inputs of the function_ +for inputs = 2:nargin + if inputs == 2 + pairs = varargin{inputs}; + elseif inputs == 3 + method = varargin{inputs}; + elseif inputs == 4 + alphav = varargin{inputs}; + elseif inputs == 5 + p_alpha = varargin{inputs}; + end +end + +% _do a quick quality check_ +if isempty(pairs) + pairs = nchoosek([1:p],2); +end + +if size(pairs,2)~=2 + pairs = pairs'; +end + +if sum(strcmpi(method,{'ECP','Hochberg'})) == 0 + error('unknown method selected, see help skipped_Pearson') +end + +if strcmp(method,'Hochberg') && n<60 || strcmp(method,'Hochberg') && n<60 && alphav == 5/100 + error('Hochberg is only valid for n>60 and aplha 5%') +end + +%% start the algorithm + +% _create a table of resamples_ +boot_index = 1; +while boot_index <= nboot + resample = randi(n,n,1); + if length(unique(resample)) > p % always more observations than variables + boostrap_sampling(:,boot_index) = resample; + boot_index = boot_index +1; + end +end +lower_bound = round((alphav*nboot)/2); +upper_bound = nboot - lower_bound; + +% now for each pair to test, get the observed and boostrapped r and t +% values, then derive the p value from the bootstrap (and hboot and CI if +% requested) + +% place holders +outid = cell(size(pairs,1),1); +rp = NaN(size(pairs,1),1); +tp = NaN(size(pairs,1),1); +CI = NaN(size(pairs,1),2); +pval = NaN(size(pairs,1),1); + +% loop for each pair to test +for row = 1:size(pairs,1) + + % select relevant columns + X = [x(:,pairs(row,1)) x(:,pairs(row,2))]; + % get the bivariate outliers + flag = bivariate_outliers(X); + vec = 1:n; + if sum(flag)==0 + outid{row}=[]; + else + flag=(flag>=1); + outid{row}=vec(flag); + end + keep=vec(~flag); % the vector of data to keep + + % Pearson correlation on cleaned data + rp(row) = sum(detrend(X(keep,1),'constant').*detrend(X(keep,2),'constant')) ./ ... + (sum(detrend(X(keep,1),'constant').^2).*sum(detrend(X(keep,2),'constant').^2)).^(1/2); + tp(row) = rp(row)*sqrt((n-2)/(1-rp(row).^2)); + + if nargout > 2 + % redo this for bootstrap samples + % fprintf('computing p values by bootstrapping data, pair %g %g\n',pairs(row,1),pairs(row,2)) + parfor b=1:nboot + Xb = X(boostrap_sampling(:,b),:); + r(b) = sum(detrend(Xb(keep,1),'constant').*detrend(Xb(keep,2),'constant')) ./ ... + (sum(detrend(Xb(keep,1),'constant').^2).*sum(detrend(Xb(keep,2),'constant').^2)).^(1/2); + end + + % get the CI + r = sort(r); + CI(row,:) = [r(lower_bound) r(upper_bound)]; + + % get the p value + Q = sum(r<0)/nboot; + pval(row) = 2*min([Q 1-Q]); + end +end + + +%% once we have all the r and t values, we need to adjust for multiple comparisons +if nargout > 3 + if strcmp(method,'ECP') + if exist('p_alpha','var') + h = pval < p_alpha; + else + disp('ECP method requested, computing p alpha ... (takes a while)') + p_alpha = MC_corrpval(n,p,'Skipped Pearson',alphav,pairs); + end + elseif strcmp('method','Hochberg') + [sorted_pval,index] = sort(pval,'descend'); + k = 1; sig = 0; h = zeros(1,p); + while sig == 0 + if sorted_pval(k) < alphav/k + h(k:end) = 1; sig = 1; + else + k = k+1; + end + end + h = h(index); + end +end + +%% quick clean-up of individual p-values +pval(pval==0) = 1/nboot; + +disp('Skipped Pearson done') diff --git a/skipped_correlation.m b/skipped_correlation.m index c8cdce4..0b33c07 100644 --- a/skipped_correlation.m +++ b/skipped_correlation.m @@ -1,4 +1,4 @@ -function [r,t,h,outid,hboot,CI]=skipped_correlation(x,y,fig_flag,estimator) +function [r,t,h,outid,hboot,CI]=skipped_correlation(X,fig_flag) % performs a robust correlation using pearson/spearman correlation on % data cleaned up for bivariate outliers - that is after finding the @@ -7,22 +7,18 @@ % bound defined by the idealf estimator of the interquartile range is removed. % % FORMAT: -% [r,t,h] = skipped_correlation(X,Y); -% [r,t,h] = skipped_correlation(X,Y,fig_flag); -% [r,t,h] = skipped_correlation(X,Y,fig_flag,'estimator'); +% [r,t,h] = skipped_correlation(X); +% [r,t,h] = skipped_correlation(X,fig_flag); % [r,t,h,outid,hboot,CI] = skipped_correlation(X,Y,fig_flag); % -% INPUTS: X and Y are 2 vectors or matrices, in the latter case, -% Spearman correlations are computed column-wise (only Spearman -% because there is no good control over type 1 error with Pearson) -% fig_flag (1/0) indicates to plot the data or not -% 'estimator' is either 'Pearson' or 'Spearman' +% INPUTS: X is a matrix and corelations between all pairs (default) are computed +% pairs (optional) is a n*2 matrix of pairs of column to correlate +% fig_flag (optional, ( by default) indicates to plot the data or not % % OUTPUTS: % r is the pearson/spearman correlation % t is the T value associated to the skipped correlation % h is the hypothesis of no association at alpha = 5% -% if NaN, h depends on the largest n across columns % outid is the index of bivariate outliers % % optional: @@ -43,9 +39,8 @@ % thus for a correlation this is floor(n/2 + 5/2). % % See also MCDCOV, IDEALF. -% + % Cyril Pernet & Guillaume Rousselet, v1 - April 2012 -% Cyril Pernet v2 (10-01-2014 - deals with NaN) % --------------------------------------------------- % Copyright (C) Corr_toolbox 2012 @@ -55,24 +50,13 @@ error('not enough input arguments'); elseif nargin == 2 fig_flag = 1; - estimator = []; -elseif nargin > 4 +elseif nargin > 3 error('too many input arguments'); end % transpose if x or y are not in column if size(x,1) == 1 && size(x,2) > 1; x = x'; end if size(y,1) == 1 && size(y,2) > 1; y = y'; end -if (size(x,2)+size(y,2)) == 2 % input is 2 vectors - [x,y] = pairwise_cleanup(x, y); % check for NaN here, otherwise deal with NaN column wise -end - -if nargin == 4 && (size(x,2)+size(y,2)) ~= 2 % input is not 2 vectors - if strcmp(estimator,'Pearson') == 1 - errordlg('Pearson cannot be used to control the type 1 error on multiple variables - switching to Spearman'); - estimator = []; - end -end % if X a vector and Y a matrix, % repmat X to perform multiple tests on Y (or the other around) @@ -109,58 +93,11 @@ fprintf('skipped correlation: processing pair %g \n',column); end - % get the centre of the bivariate distributions - [xx,yy] = pairwise_cleanup(x(:,column), y(:,column)); - X = [xx yy]; n = size(X,1); - if n < 10 - error('after NaN removal, there are less than 10 observations - robust correlation can''t be computed') - end - result=mcdcov(X,'cor',1,'plots',0,'h',floor((n+size(X,2)*2+1)/2)); - center = result.center; - - - % orthogonal projection to the lines joining the center - % followed by outlier detection using the boxplot rule - - vec=1:n; - for i=1:n % for each row - dis=NaN(n,1); - B = (X(i,:)-center)'; - BB = B.^2; - bot = sum(BB); - if bot~=0 - for j=1:n - A = (X(j,:)-center)'; - dis(j)= norm(A'*B/bot.*B); - end - - % MAD median rule - % [outliers,value] = madmedianrule(dis,2); - % record{i} = dis > (median(dis)+gval.*value); - - % IQR rule - [ql,qu]=idealf(dis); - record{i} = (dis > median(dis)+gval.*(qu-ql)) ; % + (dis < median(dis)-gval.*(qu-ql)); - end - end - - try - flag = nan(n,1); - flag = sum(cell2mat(record),2); % if any point is flagged - - catch ME % this can happen to have an empty cell so loop - flag = nan(n,size(record,2)); - index = 1; - for s=1:size(record,2) - if ~isempty(record{s}) - flag(:,index) = record{s}; - index = index+1; - end - end - flag(:,index:end) = []; - flag = sum(flag,2); - end - + X = [x(:,column) y(:,column)]; + % flag bivariate outliers + flag = bivariate_outliers(X); + % remove outliers + vec=1:n; if sum(flag)==0 outid{column}=[]; else @@ -172,53 +109,37 @@ %% Pearson/Spearman correlation if p == 1 % in the special case of a single test Pearson is valid too - a{column} = xx(keep); - b{column} = yy(keep); + a{column} = x(keep); + b{column} = y(keep); - rp = sum(demean(a{column}).*demean(b{column})) ./ ... - (sum(demean(a{column}).^2).*sum(demean(b{column}).^2)).^(1/2); + rp = sum(detrend(a{column},'constant').*detrend(b{column},'constant')) ./ ... + (sum(detrend(a{column},'constant').^2).*sum(detrend(b{column},'constant').^2)).^(1/2); tp = rp*sqrt((n-2)/(1-rp.^2)); r.Pearson = rp; t.Pearson = tp; xrank = tiedrank(a{column},0); yrank = tiedrank(b{column},0); - rs = sum(demean(xrank).*demean(yrank)) ./ ... - (sum(demean(xrank).^2).*sum(demean(yrank).^2)).^(1/2); + rs = sum(detrend(xrank,'constant').*detrend(yrank,'constant')) ./ ... + (sum(detrend(xrank,'constant').^2).*sum(detrend(yrank,'constant').^2)).^(1/2); ts = rs*sqrt((n-2)/(1-rs.^2)); r.Spearman = rs; t.Spearman = ts; - - % update outputs if estimator selected - if strcmp(estimator,'Pearson') - clear r t; r = rp; t = tp; - elseif strcmp(estimator,'Spearman') - clear r t; r = rs; t = ts; - end else % multiple tests, only use Spearman to control type 1 error - a{column} = xx(keep); xrank = tiedrank(a{column},0); - b{column} = yy(keep); yrank = tiedrank(b{column},0); - r(column) = sum(demean(xrank).*demean(yrank)) ./ ... - (sum(demean(xrank).^2).*sum(demean(yrank).^2)).^(1/2); + a{column} = x(keep,column); xrank = tiedrank(a{column},0); + b{column} = y(keep,column); yrank = tiedrank(b{column},0); + r(column) = sum(detrend(xrank,'constant').*detrend(yrank,'constant')) ./ ... + (sum(detrend(xrank,'constant').^2).*sum(detrend(yrank,'constant').^2)).^(1/2); t(column) = r(column)*sqrt((n-2)/(1-r(column).^2)); end - - clear X result centre dis record - end %% get h % the default test of 0 correlation is for alpha = 5% -n = size(x,1); % readjust n to the largest available + c = 6.947 / n + 2.3197; % valid for n between 10 and 200 if p == 1 - if strcmp(estimator,'Pearson') - h = abs(tp) >= c; - elseif strcmp(estimator,'Spearman') - h = abs(ts) >= c; - else - h.Pearson = abs(tp) >= c; - h.Spearman = abs(ts) >= c; - end + h.Pearson = abs(tp) >= c; + h.Spearman = abs(ts) >= c; else h= abs(t) >= c; end @@ -280,15 +201,15 @@ % do Spearman tmp1 = a{column}; xrank = tiedrank(tmp1(table(:,B)),0); tmp2 = b{column}; yrank = tiedrank(tmp2(table(:,B)),0); - rsb(B,column) = sum(demean(xrank).*demean(yrank)) ./ ... - (sum(demean(xrank).^2).*sum(demean(yrank).^2)).^(1/2); + rsb(B,column) = sum(detrend(xrank,'constant').*detrend(yrank,'constant')) ./ ... + (sum(detrend(xrank,'constant').^2).*sum(detrend(yrank,'constant').^2)).^(1/2); % get regression lines for Spearman coef = pinv([xrank ones(length(a{column}),1)])*yrank; sslope(B,column) = coef(1); sintercept(B,column) = coef(2,:); if p == 1 % ie only 1 correlation thus Pearson is good too - rpb(B,column) = nansum(demean(tmp1(table(:,B))).*demean(tmp2(table(:,B)))) ./ ... - (nansum(demean(tmp1(table(:,B))).^2).*nansum(demean(tmp2(table(:,B))).^2)).^(1/2); + rpb(B,column) = sum(detrend(tmp1(table(:,B)),'constant').*detrend(tmp2(table(:,B)),'constant')) ./ ... + (sum(detrend(tmp1(table(:,B)),'constant').^2).*sum(detrend(tmp2(table(:,B)),'constant').^2)).^(1/2); coef = pinv([tmp1(table(:,B)) ones(length(a{column}),1)])*tmp2(table(:,B)); pslope(B,column) = coef(1); pintercept(B,column) = coef(2,:); end @@ -346,24 +267,12 @@ % update outputs tmp = hboot; clear hboot; - if strcmp(estimator,'Pearson') - hboot = hbootp; - elseif strcmp(estimator,'Spearman') - hboot = tmp; - else - hboot.Spearman = tmp; - hboot.Pearson = hbootp; - end + hboot.Spearman = tmp; + hboot.Pearson = hbootp; tmp = CI; clear CI - if strcmp(estimator,'Pearson') - CI = CIp'; - elseif strcmp(estimator,'Spearman') - CI = tmp'; - else - CI.Spearman = tmp'; - CI.Pearson = CIp'; - end + CI.Spearman = tmp'; + CI.Pearson = CIp'; end end @@ -378,187 +287,72 @@ set(gcf,'Color','w'); end + if nargout>4 + if ~isnan(r.Pearson); subplot(1,3,1); end + M = sprintf('Skipped correlation \n Pearson r=%g CI=[%g %g] \n Spearman r=%g CI=[%g %g]',r.Pearson,CI.Pearson(1),CI.Pearson(2),r.Spearman,CI.Spearman(1),CI.Spearman(2)); + else + M = sprintf('Skipped correlation \n Pearson r=%g h=%g \n Spearman r=%g h=%g',r.Pearson,h.Pearson,r.Spearman,h.Spearman); + end - if strcmp(estimator,'Pearson') == 1 - if nargout>4 - if ~isnan(r); subplot(1,2,1); end - M = sprintf('Skipped correlation \n Pearson r=%g CI=[%g %g]',r,CI(1),CI(2)); - else - M = sprintf('Skipped correlation \n Pearson r=%g h=%g',r,h); - end - - scatter(a{1},b{1},100,'b','fill'); - grid on; hold on; - hh = lsline; set(hh,'Color','r','LineWidth',4); - try - [XEmin, YEmin] = ellipse(a{column},b{column}); - plot(real(XEmin), real(YEmin),'LineWidth',2); - MM = [min(XEmin) max(XEmin) min(YEmin) max(YEmin)]; - catch ME - text(min(x)+0.01*(min(x)),max(y),'no ellipse found','Fontsize',12) - MM = []; - end - xlabel('X','Fontsize',12); ylabel('Y','Fontsize',12); - title(M,'Fontsize',16); - - % add outliers and scale axis - scatter(x(outid{1}),y(outid{1}),100,'r','filled'); - MM2 = [min(x) max(x) min(y) max(y)]; - if isempty(MM); MM = MM2; end - A = floor(min([MM(:,1);MM2(:,1)]) - min([MM(:,1);MM2(:,1)])*0.01); - B = ceil(max([MM(:,2);MM2(:,2)]) + max([MM(:,2);MM2(:,2)])*0.01); - C = floor(min([MM(:,3);MM2(:,3)]) - min([MM(:,3);MM2(:,3)])*0.01); - D = ceil(max([MM(:,4);MM2(:,4)]) + max([MM(:,4);MM2(:,4)])*0.01); - axis([A B C D]); - box on;set(gca,'Fontsize',14) - - if nargout>4 && sum(~isnan(CIpslope))==2 - % add CI - y1 = refline(CIpslope(1),CIpintercept(1)); set(y1,'Color','r'); - y2 = refline(CIpslope(2),CIpintercept(2)); set(y2,'Color','r'); - y1 = get(y1); y2 = get(y2); - xpoints=[[y1.XData(1):y1.XData(2)],[y2.XData(2):-1:y2.XData(1)]]; - step1 = y1.YData(2)-y1.YData(1); step1 = step1 / (y1.XData(2)-y1.XData(1)); - step2 = y2.YData(2)-y2.YData(1); step2 = step2 / (y2.XData(2)-y2.XData(1)); - filled=[[y1.YData(1):step1:y1.YData(2)],[y2.YData(2):-step2:y2.YData(1)]]; - hold on; fillhandle=fill(xpoints,filled,[1 0 0]); - set(fillhandle,'EdgeColor',[1 0 0],'FaceAlpha',0.2,'EdgeAlpha',0.8);%set edge color - - % add histograms of bootstrap - subplot(1,2,2); k = round(1 + log2(length(rpb))); hist(rpb,k); grid on; - mytitle = sprintf('Bootstrapped \n Pearsons'' corr h=%g', hboot); - title(mytitle,'FontSize',16); hold on - xlabel('boot correlations','FontSize',14);ylabel('frequency','FontSize',14) - plot(repmat(CI(1),max(hist(rpb,k)),1),[1:max(hist(rpb,k))],'r','LineWidth',4); - plot(repmat(CI(2),max(hist(rpb,k)),1),[1:max(hist(rpb,k))],'r','LineWidth',4); - axis tight; colormap([.4 .4 1]) - box on;set(gca,'Fontsize',14) - end - - elseif strcmp(estimator,'Spearman') == 1 - - if nargout>4 - if ~isnan(r); subplot(1,2,1); end - M = sprintf('Skipped correlation \n Spearman r=%g CI=[%g %g]',r,CI(1),CI(2)); - else - M = sprintf('Skipped correlation \n Spearman r=%g h=%g',r,h); - end - - scatter(a{1},b{1},100,'b','fill'); - grid on; hold on; - hh = lsline; set(hh,'Color','r','LineWidth',4); - try - [XEmin, YEmin] = ellipse(a{column},b{column}); - plot(real(XEmin), real(YEmin),'LineWidth',2); - MM = [min(XEmin) max(XEmin) min(YEmin) max(YEmin)]; - catch ME - text(min(x)+0.01*(min(x)),max(y),'no ellipse found','Fontsize',12) - MM = []; - end - xlabel('X','Fontsize',12); ylabel('Y','Fontsize',12); - title(M,'Fontsize',16); + scatter(a{1},b{1},100,'b','fill'); + grid on; hold on; + hh = lsline; set(hh,'Color','r','LineWidth',4); + try + [XEmin, YEmin] = ellipse(a{column},b{column}); + plot(real(XEmin), real(YEmin),'LineWidth',2); + MM = [min(XEmin) max(XEmin) min(YEmin) max(YEmin)]; + catch ME + text(min(x)+0.01*(min(x)),max(y),'no ellipse found','Fontsize',12) + MM = []; + end + xlabel('X','Fontsize',12); ylabel('Y','Fontsize',12); + title(M,'Fontsize',16); + + % add outliers and scale axis + scatter(x(outid{1}),y(outid{1}),100,'r','filled'); + MM2 = [min(x) max(x) min(y) max(y)]; + if isempty(MM); MM = MM2; end + A = floor(min([MM(:,1);MM2(:,1)]) - min([MM(:,1);MM2(:,1)])*0.01); + B = ceil(max([MM(:,2);MM2(:,2)]) + max([MM(:,2);MM2(:,2)])*0.01); + C = floor(min([MM(:,3);MM2(:,3)]) - min([MM(:,3);MM2(:,3)])*0.01); + D = ceil(max([MM(:,4);MM2(:,4)]) + max([MM(:,4);MM2(:,4)])*0.01); + axis([A B C D]); + box on;set(gca,'Fontsize',14) + + if nargout>4 && sum(~isnan(CIpslope))==2 + % add CI + y1 = refline(CIpslope(1),CIpintercept(1)); set(y1,'Color','r'); + y2 = refline(CIpslope(2),CIpintercept(2)); set(y2,'Color','r'); + y1 = get(y1); y2 = get(y2); + xpoints=[[y1.XData(1):y1.XData(2)],[y2.XData(2):-1:y2.XData(1)]]; + step1 = y1.YData(2)-y1.YData(1); step1 = step1 / (y1.XData(2)-y1.XData(1)); + step2 = y2.YData(2)-y2.YData(1); step2 = step2 / (y2.XData(2)-y2.XData(1)); + filled=[[y1.YData(1):step1:y1.YData(2)],[y2.YData(2):-step2:y2.YData(1)]]; + hold on; fillhandle=fill(xpoints,filled,[1 0 0]); + set(fillhandle,'EdgeColor',[1 0 0],'FaceAlpha',0.2,'EdgeAlpha',0.8);%set edge color - % add outliers and scale axis - scatter(x(outid{1}),y(outid{1}),100,'r','filled'); - MM2 = [min(x) max(x) min(y) max(y)]; - if isempty(MM); MM = MM2; end - A = floor(min([MM(:,1);MM2(:,1)]) - min([MM(:,1);MM2(:,1)])*0.01); - B = ceil(max([MM(:,2);MM2(:,2)]) + max([MM(:,2);MM2(:,2)])*0.01); - C = floor(min([MM(:,3);MM2(:,3)]) - min([MM(:,3);MM2(:,3)])*0.01); - D = ceil(max([MM(:,4);MM2(:,4)]) + max([MM(:,4);MM2(:,4)])*0.01); - axis([A B C D]); + % add histograms of bootstrap + subplot(1,3,2); k = round(1 + log2(length(rpb))); hist(rpb,k); grid on; + mytitle = sprintf('Bootstrapped \n Pearsons'' corr h=%g', hboot.Pearson); + title(mytitle,'FontSize',16); hold on + xlabel('boot correlations','FontSize',14);ylabel('frequency','FontSize',14) + plot(repmat(CI.Pearson(1),max(hist(rpb,k)),1),[1:max(hist(rpb,k))],'r','LineWidth',4); + plot(repmat(CI.Pearson(2),max(hist(rpb,k)),1),[1:max(hist(rpb,k))],'r','LineWidth',4); + axis tight; colormap([.4 .4 1]) box on;set(gca,'Fontsize',14) - if nargout>4 && sum(~isnan(CIpslope))==2 - % add CI - y1 = refline(CIsslope(1),CIsintercept(1)); set(y1,'Color','r'); - y2 = refline(CIsslope(2),CIsintercept(2)); set(y2,'Color','r'); - y1 = get(y1); y2 = get(y2); - xpoints=[[y1.XData(1):y1.XData(2)],[y2.XData(2):-1:y2.XData(1)]]; - step1 = y1.YData(2)-y1.YData(1); step1 = step1 / (y1.XData(2)-y1.XData(1)); - step2 = y2.YData(2)-y2.YData(1); step2 = step2 / (y2.XData(2)-y2.XData(1)); - filled=[[y1.YData(1):step1:y1.YData(2)],[y2.YData(2):-step2:y2.YData(1)]]; - hold on; fillhandle=fill(xpoints,filled,[1 0 0]); - set(fillhandle,'EdgeColor',[1 0 0],'FaceAlpha',0.2,'EdgeAlpha',0.8);%set edge color - - % add histograms of bootstrap - subplot(1,2,2); k = round(1 + log2(length(rsb))); hist(rsb,k); grid on; - mytitle = sprintf('Bootstrapped \n Spearmans'' corr h=%g', hboot); - title(mytitle,'FontSize',16); hold on - xlabel('boot correlations','FontSize',14);ylabel('frequency','FontSize',14) - plot(repmat(CI(1),max(hist(rsb,k)),1),[1:max(hist(rsb,k))],'r','LineWidth',4); - plot(repmat(CI(2),max(hist(rsb,k)),1),[1:max(hist(rsb,k))],'r','LineWidth',4); - axis tight; colormap([.4 .4 1]) - box on;set(gca,'Fontsize',14) - end - - else - if nargout>4 - if ~isnan(r.Pearson); subplot(1,3,1); end - M = sprintf('Skipped correlation \n Pearson r=%g CI=[%g %g] \n Spearman r=%g CI=[%g %g]',r.Pearson,CI.Pearson(1),CI.Pearson(2),r.Spearman,CI.Spearman(1),CI.Spearman(2)); - else - M = sprintf('Skipped correlation \n Pearson r=%g h=%g \n Spearman r=%g h=%g',r.Pearson,h.Pearson,r.Spearman,h.Spearman); - end - - scatter(a{1},b{1},100,'b','fill'); - grid on; hold on; - hh = lsline; set(hh,'Color','r','LineWidth',4); - try - [XEmin, YEmin] = ellipse(a{column},b{column}); - plot(real(XEmin), real(YEmin),'LineWidth',2); - MM = [min(XEmin) max(XEmin) min(YEmin) max(YEmin)]; - catch ME - text(min(x)+0.01*(min(x)),max(y),'no ellipse found','Fontsize',12) - MM = []; - end - xlabel('X','Fontsize',12); ylabel('Y','Fontsize',12); - title(M,'Fontsize',16); - - % add outliers and scale axis - scatter(x(outid{1}),y(outid{1}),100,'r','filled'); - MM2 = [min(x) max(x) min(y) max(y)]; - if isempty(MM); MM = MM2; end - A = floor(min([MM(:,1);MM2(:,1)]) - min([MM(:,1);MM2(:,1)])*0.01); - B = ceil(max([MM(:,2);MM2(:,2)]) + max([MM(:,2);MM2(:,2)])*0.01); - C = floor(min([MM(:,3);MM2(:,3)]) - min([MM(:,3);MM2(:,3)])*0.01); - D = ceil(max([MM(:,4);MM2(:,4)]) + max([MM(:,4);MM2(:,4)])*0.01); - axis([A B C D]); + subplot(1,3,3); k = round(1 + log2(length(rsb))); hist(rsb,k); grid on; + mytitle = sprintf('Bootstrapped \n Spearmans'' corr h=%g', hboot.Spearman); + title(mytitle,'FontSize',16); hold on + xlabel('boot correlations','FontSize',14);ylabel('frequency','FontSize',14) + plot(repmat(CI.Spearman(1),max(hist(rsb,k)),1),[1:max(hist(rsb,k))],'r','LineWidth',4); + plot(repmat(CI.Spearman(2),max(hist(rsb,k)),1),[1:max(hist(rsb,k))],'r','LineWidth',4); + axis tight; colormap([.4 .4 1]) box on;set(gca,'Fontsize',14) - - if nargout>4 && sum(~isnan(CIpslope))==2 - % add CI - y1 = refline(CIpslope(1),CIpintercept(1)); set(y1,'Color','r'); - y2 = refline(CIpslope(2),CIpintercept(2)); set(y2,'Color','r'); - y1 = get(y1); y2 = get(y2); - xpoints=[[y1.XData(1):y1.XData(2)],[y2.XData(2):-1:y2.XData(1)]]; - step1 = y1.YData(2)-y1.YData(1); step1 = step1 / (y1.XData(2)-y1.XData(1)); - step2 = y2.YData(2)-y2.YData(1); step2 = step2 / (y2.XData(2)-y2.XData(1)); - filled=[[y1.YData(1):step1:y1.YData(2)],[y2.YData(2):-step2:y2.YData(1)]]; - hold on; fillhandle=fill(xpoints,filled,[1 0 0]); - set(fillhandle,'EdgeColor',[1 0 0],'FaceAlpha',0.2,'EdgeAlpha',0.8);%set edge color - - % add histograms of bootstrap - subplot(1,3,2); k = round(1 + log2(length(rpb))); hist(rpb,k); grid on; - mytitle = sprintf('Bootstrapped \n Pearsons'' corr h=%g', hboot.Pearson); - title(mytitle,'FontSize',16); hold on - xlabel('boot correlations','FontSize',14);ylabel('frequency','FontSize',14) - plot(repmat(CI.Pearson(1),max(hist(rpb,k)),1),[1:max(hist(rpb,k))],'r','LineWidth',4); - plot(repmat(CI.Pearson(2),max(hist(rpb,k)),1),[1:max(hist(rpb,k))],'r','LineWidth',4); - axis tight; colormap([.4 .4 1]) - box on;set(gca,'Fontsize',14) - - subplot(1,3,3); k = round(1 + log2(length(rsb))); hist(rsb,k); grid on; - mytitle = sprintf('Bootstrapped \n Spearmans'' corr h=%g', hboot.Spearman); - title(mytitle,'FontSize',16); hold on - xlabel('boot correlations','FontSize',14);ylabel('frequency','FontSize',14) - plot(repmat(CI.Spearman(1),max(hist(rsb,k)),1),[1:max(hist(rsb,k))],'r','LineWidth',4); - plot(repmat(CI.Spearman(2),max(hist(rsb,k)),1),[1:max(hist(rsb,k))],'r','LineWidth',4); - axis tight; colormap([.4 .4 1]) - box on;set(gca,'Fontsize',14) - end end end + if strcmp(answer,'yes') for f = 1:p if fig_flag == 1 @@ -757,7 +551,7 @@ % Newton iterative method theta = pi; error = 1; - while abs(error) > 1e-4 + while abs(error) > 1e-6 cth = cos(theta); sth = sin(theta); a1 = a(1) * cth + b(1) * sth; b1 = -a(1) * sth + b(1) * cth; a2 = a(2) * cth + b(2) * sth; b2 = -a(2) * sth + b(2) * cth; @@ -802,4 +596,4 @@ end end end -end +end \ No newline at end of file diff --git a/univar.m b/univar.m index 02d1e84..7be8fc0 100644 --- a/univar.m +++ b/univar.m @@ -1,32 +1,29 @@ -function [nu,x,h,xp,yp]=univar(X) +function [nu,x,h,xp,yp]=univar(data) % Computes the univariate pdf of data and the histogram values. % Returns the frequency of data per bin (nu), the position of the bins (x) % and their size (h). The pdf is returned in yp for the xp values -% -% Cyril Pernet v2 (10-01-2014 - deals with NaN) + +% Cyril Pernet v1 % --------------------------------- % Copyright (C) Corr_toolbox 2012 -for c=1:size(X,2) - data = X(~isnan(X(:,c)),c); - mu = mean(data); - v = var(data); - - % get the normal pdf for this distribution - xp = linspace(min(data),max(data)); - if v <= 0 - error('Variance must be greater than zero') - return - end - arg = ((xp-mu).^2)/(2*v); - cons = sqrt(2*pi)*sqrt(v); - yp = (1/cons)*exp(-arg); - - % get histogram info using Surges' rule: - k = round(1 + log2(length(data))); - [nu{c},x{c}]=hist(data,k); - h{c} = x{c}(2) - x{c}(1); + +mu = mean(data); +v = var(data); + +% get the normal pdf for this distribution +xp = linspace(min(data),max(data)); +if v <= 0 + error('Variance must be greater than zero') + return end +arg = ((xp-mu).^2)/(2*v); +cons = sqrt(2*pi)*sqrt(v); +yp = (1/cons)*exp(-arg); -if c==1; nu = nu{1}; x = x{1}; h = h{1}; end +% get histogram info using Surges' rule: +k = round(1 + log2(length(data))); +[nu,x]=hist(data,k); +h = x(2) - x(1); +end \ No newline at end of file diff --git a/variance_homogeneity.m b/variance_homogeneity.m index 29de698..622553b 100644 --- a/variance_homogeneity.m +++ b/variance_homogeneity.m @@ -18,15 +18,13 @@ % Copyright (C) Corr_toolbox 2012 if nargin == 2 - condition = 1; + condition = 1 end if size(x)~=size(y) error('X and Y must have the same size') end -[x,y]=pairwise_cleanup(x,y); - % computes nboot = 600; nm = length(x);