Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
moon by the stars authored May 23, 2018
1 parent efb0104 commit 1a47564
Show file tree
Hide file tree
Showing 4 changed files with 356 additions and 0 deletions.
210 changes: 210 additions & 0 deletions l1eq_pd.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
% l1eq_pd.m
%
% Solve
% min_x ||x||_1 s.t. Ax = b
%
% Recast as linear program
% min_{x,u} sum(u) s.t. -u <= x <= u, Ax=b
% and use primal-dual interior point method
%
% Usage: xp = l1eq_pd(x0, A, At, b, pdtol, pdmaxiter, cgtol, cgmaxiter)
%
% x0 - Nx1 vector, initial point.
%
% A - Either a handle to a function that takes a N vector and returns a K
% vector , or a KxN matrix. If A is a function handle, the algorithm
% operates in "largescale" mode, solving the Newton systems via the
% Conjugate Gradients algorithm.
%
% At - Handle to a function that takes a K vector and returns an N vector.
% If A is a KxN matrix, At is ignored.
%
% b - Kx1 vector of observations.
%
% pdtol - Tolerance for primal-dual algorithm (algorithm terminates if
% the duality gap is less than pdtol).
% Default = 1e-3.
%
% pdmaxiter - Maximum number of primal-dual iterations.
% Default = 50.
%
% cgtol - Tolerance for Conjugate Gradients; ignored if A is a matrix.
% Default = 1e-8.
%
% cgmaxiter - Maximum number of iterations for Conjugate Gradients; ignored
% if A is a matrix.
% Default = 200.
%
% Written by: Justin Romberg, Caltech
% Email: [email protected]
% Created: October 2005
%

function xp = l1eq_pd(x0, A, At, b, pdtol, pdmaxiter, cgtol, cgmaxiter)

largescale = isa(A,'function_handle');

if (nargin < 5), pdtol = 1e-3; end
if (nargin < 6), pdmaxiter = 50; end
if (nargin < 7), cgtol = 1e-8; end
if (nargin < 8), cgmaxiter = 200; end

N = length(x0);

alpha = 0.01;
beta = 0.5;
mu = 10;

gradf0 = [zeros(N,1); ones(N,1)];

% starting point --- make sure that it is feasible
if (largescale)
if (norm(A(x0)-b)/norm(b) > cgtol)
disp('Starting point infeasible; using x0 = At*inv(AAt)*y.');
AAt = @(z) A(At(z));
[w, cgres, cgiter] = cgsolve(AAt, b, cgtol, cgmaxiter, 0);
if (cgres > 1/2)
disp('A*At is ill-conditioned: cannot find starting point');
xp = x0;
return;
end
x0 = At(w);
end
else
if (norm(A*x0-b)/norm(b) > cgtol)
disp('Starting point infeasible; using x0 = At*inv(AAt)*y.');
opts.POSDEF = true; opts.SYM = true;
[w, hcond] = linsolve(A*A', b, opts);
if (hcond < 1e-14)
disp('A*At is ill-conditioned: cannot find starting point');
xp = x0;
return;
end
x0 = A'*w;
end
end
x = x0;
u = (0.95)*abs(x0) + (0.10)*max(abs(x0));

% set up for the first iteration
fu1 = x - u;
fu2 = -x - u;
lamu1 = -1./fu1;
lamu2 = -1./fu2;
if (largescale)
v = -A(lamu1-lamu2);
Atv = At(v);
rpri = A(x) - b;
else
v = -A*(lamu1-lamu2);
Atv = A'*v;
rpri = A*x - b;
end

sdg = -(fu1'*lamu1 + fu2'*lamu2);
tau = mu*2*N/sdg;

rcent = [-lamu1.*fu1; -lamu2.*fu2] - (1/tau);
rdual = gradf0 + [lamu1-lamu2; -lamu1-lamu2] + [Atv; zeros(N,1)];
resnorm = norm([rdual; rcent; rpri]);

pditer = 0;
done = (sdg < pdtol) | (pditer >= pdmaxiter);
while (~done)

pditer = pditer + 1;

w1 = -1/tau*(-1./fu1 + 1./fu2) - Atv;
w2 = -1 - 1/tau*(1./fu1 + 1./fu2);
w3 = -rpri;

sig1 = -lamu1./fu1 - lamu2./fu2;
sig2 = lamu1./fu1 - lamu2./fu2;
sigx = sig1 - sig2.^2./sig1;

if (largescale)
w1p = w3 - A(w1./sigx - w2.*sig2./(sigx.*sig1));
h11pfun = @(z) -A(1./sigx.*At(z));
[dv, cgres, cgiter] = cgsolve(h11pfun, w1p, cgtol, cgmaxiter, 0);
if (cgres > 1/2)
disp('Cannot solve system. Returning previous iterate. (See Section 4 of notes for more information.)');
xp = x;
return
end
dx = (w1 - w2.*sig2./sig1 - At(dv))./sigx;
Adx = A(dx);
Atdv = At(dv);
else
w1p = -(w3 - A*(w1./sigx - w2.*sig2./(sigx.*sig1)));
H11p = A*(sparse(diag(1./sigx))*A');
opts.POSDEF = true; opts.SYM = true;
%[dv,hcond] = linsolve(H11p, w1p, opts);
[dv,hcond] = linsolve(H11p, w1p);
if (hcond < 1e-14)
disp('Matrix ill-conditioned. Returning previous iterate. (See Section 4 of notes for more information.)');
xp = x;
return
end
dx = (w1 - w2.*sig2./sig1 - A'*dv)./sigx;
Adx = A*dx;
Atdv = A'*dv;
end

du = (w2 - sig2.*dx)./sig1;

dlamu1 = (lamu1./fu1).*(-dx+du) - lamu1 - (1/tau)*1./fu1;
dlamu2 = (lamu2./fu2).*(dx+du) - lamu2 - 1/tau*1./fu2;

% make sure that the step is feasible: keeps lamu1,lamu2 > 0, fu1,fu2 < 0
indp = find(dlamu1 < 0); indn = find(dlamu2 < 0);
s = min([1; -lamu1(indp)./dlamu1(indp); -lamu2(indn)./dlamu2(indn)]);
indp = find((dx-du) > 0); indn = find((-dx-du) > 0);
s = (0.99)*min([s; -fu1(indp)./(dx(indp)-du(indp)); -fu2(indn)./(-dx(indn)-du(indn))]);

% backtracking line search
suffdec = 0;
backiter = 0;
while (~suffdec)
xp = x + s*dx; up = u + s*du;
vp = v + s*dv; Atvp = Atv + s*Atdv;
lamu1p = lamu1 + s*dlamu1; lamu2p = lamu2 + s*dlamu2;
fu1p = xp - up; fu2p = -xp - up;
rdp = gradf0 + [lamu1p-lamu2p; -lamu1p-lamu2p] + [Atvp; zeros(N,1)];
rcp = [-lamu1p.*fu1p; -lamu2p.*fu2p] - (1/tau);
rpp = rpri + s*Adx;
suffdec = (norm([rdp; rcp; rpp]) <= (1-alpha*s)*resnorm);
s = beta*s;
backiter = backiter + 1;
if (backiter > 32)
disp('Stuck backtracking, returning last iterate. (See Section 4 of notes for more information.)')
xp = x;
return
end
end


% next iteration
x = xp; u = up;
v = vp; Atv = Atvp;
lamu1 = lamu1p; lamu2 = lamu2p;
fu1 = fu1p; fu2 = fu2p;

% surrogate duality gap
sdg = -(fu1'*lamu1 + fu2'*lamu2);
tau = mu*2*N/sdg;
rpri = rpp;
rcent = [-lamu1.*fu1; -lamu2.*fu2] - (1/tau);
rdual = gradf0 + [lamu1-lamu2; -lamu1-lamu2] + [Atv; zeros(N,1)];
resnorm = norm([rdual; rcent; rpri]);

done = (sdg < pdtol) | (pditer >= pdmaxiter);

disp(sprintf('Iteration = %d, tau = %8.3e, Primal = %8.3e, PDGap = %8.3e, Dual res = %8.3e, Primal res = %8.3e',...
pditer, tau, sum(u), sdg, norm(rdual), norm(rpri)));
if (largescale)
disp(sprintf(' CG Res = %8.3e, CG Iter = %d', cgres, cgiter));
else
disp(sprintf(' H11p condition number = %8.3e', hcond));
end

end
46 changes: 46 additions & 0 deletions process.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
%Import our original data and divide it into sub block. For each block, rebuild it by recover.m

%{
input£ºk£ºAssumed sparsity level of the original data. Used to determine pool numbers.
pcell£ºApproximately equal to the number of cells used in each pool.
p£ºDetermining the columns of M together with pcell.
alpha£ºNoise. When the value is 1, there is no noise.
output£ºX£ºThe original data of each sub block
M£ºThe measurement matrix for each sub block
R£ºThe inference of each sub block
%}

function [X,M,R] = process(k,pcell,p,alpha)

original = importdata('count4070.txt');
XX = [original.data];
XX = XX';

ncolM = floor(pcell/p); % the number of columns per M

C = {};

% If the column size of the last block is bigger than half of the previous one,
% it will become a new seperate block, else merge it into previous one.
ni = floor(size(XX,1)/ncolM);
for i = 1:ni
C{i} = XX((ncolM*(i-1)+1):ncolM*i,:);
end
if size(XX,1)-ncolM*ni>ncolM/2
C{ni+1} = XX(ncolM*ni+1:size(XX,1),:);
else
C{ni} = [C{ni};XX(ncolM*ni+1:size(XX,1),:)];
end


M = {};% Used to save the measurement matrix of each block
R = {};% Used to save the inference of each block
X = {};% Used to save the original data of each sub block
for i = 1:length(C)
XX = C{i};
X = [X XX];
[MM,recoverXX] = recover(floor(k*size(XX,1))*2,size(XX,1),p,XX,alpha);
M = [M MM];
R = [R recoverXX];
end

48 changes: 48 additions & 0 deletions recover.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
%For each sub block, we generate a measurement matrix M and add noise e.
%Using the noisy measurement matrix eM together observed value y to inference recoverXX.

%{
input:m:The number of rows for each sub block.
n:The number of columns of each sub block.
p:Determine the proportion of 1 in the M matrix.
XX:Original data. Is used to generate the observed value y.
alpha:Noise. When the value is 1, there is no noise.
output:M:the measurement matrix of each sub block.
recoverXX:inference sub X.
%}

function [M,recoverXX] = recover(m,n,p,XX,alpha)
recoverXX = [];
M = double(rand(m,n)<p); %Use uniform distribution of [0,1] to randomly generate M (m×n).
%For each element in M, if value is less than p, we set to 1, otherwise 0.


%The code below is used to generate the full observation matrix.
%{
a(1:round(n/4))=1;
a(round(n/4)+1:round(2*n/4))=0.1;
a(round(2*n/4)+1:round(3*n/4))=0.01;
a(round(3*n/4)+1:n)=0.001;
M=[];
for i=1:m
a = a(randperm(length(a)));
M = [M;a];
end
%}

e = unifrnd(alpha,2-alpha,m,n); %Use uniform distribution of [alpha,2-alpha] to randomly generate erorr(m×n).
eM = e.*M;
for i = 1:size(XX,2)
y = eM*XX(:,i);
if sum(y) == 0
xp = 0*XX(:,i);
recoverXX = [recoverXX xp];
else
[w, hcond] = linsolve(M*M',y);
x0 = M'*w;
xp = l1eq_pd(x0, M, [], y);
recoverXX = [recoverXX xp];
end
end
end

52 changes: 52 additions & 0 deletions star.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
%{
input£º
k£ºAssumed sparsity level of the original data. Used to determine pool numbers.
pcell£ºApproximately equal to the number of cells used in each pool.
p£ºDetermining the columns of M together with pcell.
out£º A mat file include:
X(The original data of each sub block)
M(The measurement matrix for each sub block)
R(The inference of each sub block)
median correlation of all genes
%}

function star(k,pcell,p,alpha)
k = str2num(k);
pcell = str2num(pcell);
p = str2num(p);
alpha = str2num(alpha);

[X,M,R] = process(k,pcell,p,alpha);


%Correlation analysis
XX = [];
recoverXX = []; % Combine each sub block to original dimension
for i = 1:length(R)
XX = [XX;X{i}];
recoverXX = [recoverXX;R{i}];
end

recoverXX(recoverXX<0) = 0; % Cleaning our reconstructed data
recoverXX = round(recoverXX);

CC=[];
for i = 1:size(XX,2)
C = corrcoef(XX(:,i),recoverXX(:,i));
C = C(1,2);
CC = [CC;C];
end

data = CC;
data(isnan(data)) = 1;


an = mean(data);
dian = median(data);


fname = ['k_',num2str(k),'_pcell_',num2str(pcell),'_p_',num2str(p),'_a_',num2str(alpha),'_pj_',num2str(an),'_zw_',num2str(dian),'.mat'];
save(fname,'X','M','R','an','dian','-mat');
%end

0 comments on commit 1a47564

Please sign in to comment.