-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
moon by the stars
authored
May 23, 2018
1 parent
efb0104
commit 1a47564
Showing
4 changed files
with
356 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,210 @@ | ||
% l1eq_pd.m | ||
% | ||
% Solve | ||
% min_x ||x||_1 s.t. Ax = b | ||
% | ||
% Recast as linear program | ||
% min_{x,u} sum(u) s.t. -u <= x <= u, Ax=b | ||
% and use primal-dual interior point method | ||
% | ||
% Usage: xp = l1eq_pd(x0, A, At, b, pdtol, pdmaxiter, cgtol, cgmaxiter) | ||
% | ||
% x0 - Nx1 vector, initial point. | ||
% | ||
% A - Either a handle to a function that takes a N vector and returns a K | ||
% vector , or a KxN matrix. If A is a function handle, the algorithm | ||
% operates in "largescale" mode, solving the Newton systems via the | ||
% Conjugate Gradients algorithm. | ||
% | ||
% At - Handle to a function that takes a K vector and returns an N vector. | ||
% If A is a KxN matrix, At is ignored. | ||
% | ||
% b - Kx1 vector of observations. | ||
% | ||
% pdtol - Tolerance for primal-dual algorithm (algorithm terminates if | ||
% the duality gap is less than pdtol). | ||
% Default = 1e-3. | ||
% | ||
% pdmaxiter - Maximum number of primal-dual iterations. | ||
% Default = 50. | ||
% | ||
% cgtol - Tolerance for Conjugate Gradients; ignored if A is a matrix. | ||
% Default = 1e-8. | ||
% | ||
% cgmaxiter - Maximum number of iterations for Conjugate Gradients; ignored | ||
% if A is a matrix. | ||
% Default = 200. | ||
% | ||
% Written by: Justin Romberg, Caltech | ||
% Email: [email protected] | ||
% Created: October 2005 | ||
% | ||
|
||
function xp = l1eq_pd(x0, A, At, b, pdtol, pdmaxiter, cgtol, cgmaxiter) | ||
|
||
largescale = isa(A,'function_handle'); | ||
|
||
if (nargin < 5), pdtol = 1e-3; end | ||
if (nargin < 6), pdmaxiter = 50; end | ||
if (nargin < 7), cgtol = 1e-8; end | ||
if (nargin < 8), cgmaxiter = 200; end | ||
|
||
N = length(x0); | ||
|
||
alpha = 0.01; | ||
beta = 0.5; | ||
mu = 10; | ||
|
||
gradf0 = [zeros(N,1); ones(N,1)]; | ||
|
||
% starting point --- make sure that it is feasible | ||
if (largescale) | ||
if (norm(A(x0)-b)/norm(b) > cgtol) | ||
disp('Starting point infeasible; using x0 = At*inv(AAt)*y.'); | ||
AAt = @(z) A(At(z)); | ||
[w, cgres, cgiter] = cgsolve(AAt, b, cgtol, cgmaxiter, 0); | ||
if (cgres > 1/2) | ||
disp('A*At is ill-conditioned: cannot find starting point'); | ||
xp = x0; | ||
return; | ||
end | ||
x0 = At(w); | ||
end | ||
else | ||
if (norm(A*x0-b)/norm(b) > cgtol) | ||
disp('Starting point infeasible; using x0 = At*inv(AAt)*y.'); | ||
opts.POSDEF = true; opts.SYM = true; | ||
[w, hcond] = linsolve(A*A', b, opts); | ||
if (hcond < 1e-14) | ||
disp('A*At is ill-conditioned: cannot find starting point'); | ||
xp = x0; | ||
return; | ||
end | ||
x0 = A'*w; | ||
end | ||
end | ||
x = x0; | ||
u = (0.95)*abs(x0) + (0.10)*max(abs(x0)); | ||
|
||
% set up for the first iteration | ||
fu1 = x - u; | ||
fu2 = -x - u; | ||
lamu1 = -1./fu1; | ||
lamu2 = -1./fu2; | ||
if (largescale) | ||
v = -A(lamu1-lamu2); | ||
Atv = At(v); | ||
rpri = A(x) - b; | ||
else | ||
v = -A*(lamu1-lamu2); | ||
Atv = A'*v; | ||
rpri = A*x - b; | ||
end | ||
|
||
sdg = -(fu1'*lamu1 + fu2'*lamu2); | ||
tau = mu*2*N/sdg; | ||
|
||
rcent = [-lamu1.*fu1; -lamu2.*fu2] - (1/tau); | ||
rdual = gradf0 + [lamu1-lamu2; -lamu1-lamu2] + [Atv; zeros(N,1)]; | ||
resnorm = norm([rdual; rcent; rpri]); | ||
|
||
pditer = 0; | ||
done = (sdg < pdtol) | (pditer >= pdmaxiter); | ||
while (~done) | ||
|
||
pditer = pditer + 1; | ||
|
||
w1 = -1/tau*(-1./fu1 + 1./fu2) - Atv; | ||
w2 = -1 - 1/tau*(1./fu1 + 1./fu2); | ||
w3 = -rpri; | ||
|
||
sig1 = -lamu1./fu1 - lamu2./fu2; | ||
sig2 = lamu1./fu1 - lamu2./fu2; | ||
sigx = sig1 - sig2.^2./sig1; | ||
|
||
if (largescale) | ||
w1p = w3 - A(w1./sigx - w2.*sig2./(sigx.*sig1)); | ||
h11pfun = @(z) -A(1./sigx.*At(z)); | ||
[dv, cgres, cgiter] = cgsolve(h11pfun, w1p, cgtol, cgmaxiter, 0); | ||
if (cgres > 1/2) | ||
disp('Cannot solve system. Returning previous iterate. (See Section 4 of notes for more information.)'); | ||
xp = x; | ||
return | ||
end | ||
dx = (w1 - w2.*sig2./sig1 - At(dv))./sigx; | ||
Adx = A(dx); | ||
Atdv = At(dv); | ||
else | ||
w1p = -(w3 - A*(w1./sigx - w2.*sig2./(sigx.*sig1))); | ||
H11p = A*(sparse(diag(1./sigx))*A'); | ||
opts.POSDEF = true; opts.SYM = true; | ||
%[dv,hcond] = linsolve(H11p, w1p, opts); | ||
[dv,hcond] = linsolve(H11p, w1p); | ||
if (hcond < 1e-14) | ||
disp('Matrix ill-conditioned. Returning previous iterate. (See Section 4 of notes for more information.)'); | ||
xp = x; | ||
return | ||
end | ||
dx = (w1 - w2.*sig2./sig1 - A'*dv)./sigx; | ||
Adx = A*dx; | ||
Atdv = A'*dv; | ||
end | ||
|
||
du = (w2 - sig2.*dx)./sig1; | ||
|
||
dlamu1 = (lamu1./fu1).*(-dx+du) - lamu1 - (1/tau)*1./fu1; | ||
dlamu2 = (lamu2./fu2).*(dx+du) - lamu2 - 1/tau*1./fu2; | ||
|
||
% make sure that the step is feasible: keeps lamu1,lamu2 > 0, fu1,fu2 < 0 | ||
indp = find(dlamu1 < 0); indn = find(dlamu2 < 0); | ||
s = min([1; -lamu1(indp)./dlamu1(indp); -lamu2(indn)./dlamu2(indn)]); | ||
indp = find((dx-du) > 0); indn = find((-dx-du) > 0); | ||
s = (0.99)*min([s; -fu1(indp)./(dx(indp)-du(indp)); -fu2(indn)./(-dx(indn)-du(indn))]); | ||
|
||
% backtracking line search | ||
suffdec = 0; | ||
backiter = 0; | ||
while (~suffdec) | ||
xp = x + s*dx; up = u + s*du; | ||
vp = v + s*dv; Atvp = Atv + s*Atdv; | ||
lamu1p = lamu1 + s*dlamu1; lamu2p = lamu2 + s*dlamu2; | ||
fu1p = xp - up; fu2p = -xp - up; | ||
rdp = gradf0 + [lamu1p-lamu2p; -lamu1p-lamu2p] + [Atvp; zeros(N,1)]; | ||
rcp = [-lamu1p.*fu1p; -lamu2p.*fu2p] - (1/tau); | ||
rpp = rpri + s*Adx; | ||
suffdec = (norm([rdp; rcp; rpp]) <= (1-alpha*s)*resnorm); | ||
s = beta*s; | ||
backiter = backiter + 1; | ||
if (backiter > 32) | ||
disp('Stuck backtracking, returning last iterate. (See Section 4 of notes for more information.)') | ||
xp = x; | ||
return | ||
end | ||
end | ||
|
||
|
||
% next iteration | ||
x = xp; u = up; | ||
v = vp; Atv = Atvp; | ||
lamu1 = lamu1p; lamu2 = lamu2p; | ||
fu1 = fu1p; fu2 = fu2p; | ||
|
||
% surrogate duality gap | ||
sdg = -(fu1'*lamu1 + fu2'*lamu2); | ||
tau = mu*2*N/sdg; | ||
rpri = rpp; | ||
rcent = [-lamu1.*fu1; -lamu2.*fu2] - (1/tau); | ||
rdual = gradf0 + [lamu1-lamu2; -lamu1-lamu2] + [Atv; zeros(N,1)]; | ||
resnorm = norm([rdual; rcent; rpri]); | ||
|
||
done = (sdg < pdtol) | (pditer >= pdmaxiter); | ||
|
||
disp(sprintf('Iteration = %d, tau = %8.3e, Primal = %8.3e, PDGap = %8.3e, Dual res = %8.3e, Primal res = %8.3e',... | ||
pditer, tau, sum(u), sdg, norm(rdual), norm(rpri))); | ||
if (largescale) | ||
disp(sprintf(' CG Res = %8.3e, CG Iter = %d', cgres, cgiter)); | ||
else | ||
disp(sprintf(' H11p condition number = %8.3e', hcond)); | ||
end | ||
|
||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
%Import our original data and divide it into sub block. For each block, rebuild it by recover.m | ||
|
||
%{ | ||
input£ºk£ºAssumed sparsity level of the original data. Used to determine pool numbers. | ||
pcell£ºApproximately equal to the number of cells used in each pool. | ||
p£ºDetermining the columns of M together with pcell. | ||
alpha£ºNoise. When the value is 1, there is no noise. | ||
output£ºX£ºThe original data of each sub block | ||
M£ºThe measurement matrix for each sub block | ||
R£ºThe inference of each sub block | ||
%} | ||
|
||
function [X,M,R] = process(k,pcell,p,alpha) | ||
|
||
original = importdata('count4070.txt'); | ||
XX = [original.data]; | ||
XX = XX'; | ||
|
||
ncolM = floor(pcell/p); % the number of columns per M | ||
|
||
C = {}; | ||
|
||
% If the column size of the last block is bigger than half of the previous one, | ||
% it will become a new seperate block, else merge it into previous one. | ||
ni = floor(size(XX,1)/ncolM); | ||
for i = 1:ni | ||
C{i} = XX((ncolM*(i-1)+1):ncolM*i,:); | ||
end | ||
if size(XX,1)-ncolM*ni>ncolM/2 | ||
C{ni+1} = XX(ncolM*ni+1:size(XX,1),:); | ||
else | ||
C{ni} = [C{ni};XX(ncolM*ni+1:size(XX,1),:)]; | ||
end | ||
|
||
|
||
M = {};% Used to save the measurement matrix of each block | ||
R = {};% Used to save the inference of each block | ||
X = {};% Used to save the original data of each sub block | ||
for i = 1:length(C) | ||
XX = C{i}; | ||
X = [X XX]; | ||
[MM,recoverXX] = recover(floor(k*size(XX,1))*2,size(XX,1),p,XX,alpha); | ||
M = [M MM]; | ||
R = [R recoverXX]; | ||
end | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
%For each sub block, we generate a measurement matrix M and add noise e. | ||
%Using the noisy measurement matrix eM together observed value y to inference recoverXX. | ||
|
||
%{ | ||
input:m:The number of rows for each sub block. | ||
n:The number of columns of each sub block. | ||
p:Determine the proportion of 1 in the M matrix. | ||
XX:Original data. Is used to generate the observed value y. | ||
alpha:Noise. When the value is 1, there is no noise. | ||
output:M:the measurement matrix of each sub block. | ||
recoverXX:inference sub X. | ||
%} | ||
|
||
function [M,recoverXX] = recover(m,n,p,XX,alpha) | ||
recoverXX = []; | ||
M = double(rand(m,n)<p); %Use uniform distribution of [0,1] to randomly generate M (m×n). | ||
%For each element in M, if value is less than p, we set to 1, otherwise 0. | ||
|
||
|
||
%The code below is used to generate the full observation matrix. | ||
%{ | ||
a(1:round(n/4))=1; | ||
a(round(n/4)+1:round(2*n/4))=0.1; | ||
a(round(2*n/4)+1:round(3*n/4))=0.01; | ||
a(round(3*n/4)+1:n)=0.001; | ||
M=[]; | ||
for i=1:m | ||
a = a(randperm(length(a))); | ||
M = [M;a]; | ||
end | ||
%} | ||
|
||
e = unifrnd(alpha,2-alpha,m,n); %Use uniform distribution of [alpha,2-alpha] to randomly generate erorr(m×n). | ||
eM = e.*M; | ||
for i = 1:size(XX,2) | ||
y = eM*XX(:,i); | ||
if sum(y) == 0 | ||
xp = 0*XX(:,i); | ||
recoverXX = [recoverXX xp]; | ||
else | ||
[w, hcond] = linsolve(M*M',y); | ||
x0 = M'*w; | ||
xp = l1eq_pd(x0, M, [], y); | ||
recoverXX = [recoverXX xp]; | ||
end | ||
end | ||
end | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
%{ | ||
input£º | ||
k£ºAssumed sparsity level of the original data. Used to determine pool numbers. | ||
pcell£ºApproximately equal to the number of cells used in each pool. | ||
p£ºDetermining the columns of M together with pcell. | ||
out£º A mat file include: | ||
X(The original data of each sub block) | ||
M(The measurement matrix for each sub block) | ||
R(The inference of each sub block) | ||
median correlation of all genes | ||
%} | ||
|
||
function star(k,pcell,p,alpha) | ||
k = str2num(k); | ||
pcell = str2num(pcell); | ||
p = str2num(p); | ||
alpha = str2num(alpha); | ||
|
||
[X,M,R] = process(k,pcell,p,alpha); | ||
|
||
|
||
%Correlation analysis | ||
XX = []; | ||
recoverXX = []; % Combine each sub block to original dimension | ||
for i = 1:length(R) | ||
XX = [XX;X{i}]; | ||
recoverXX = [recoverXX;R{i}]; | ||
end | ||
|
||
recoverXX(recoverXX<0) = 0; % Cleaning our reconstructed data | ||
recoverXX = round(recoverXX); | ||
|
||
CC=[]; | ||
for i = 1:size(XX,2) | ||
C = corrcoef(XX(:,i),recoverXX(:,i)); | ||
C = C(1,2); | ||
CC = [CC;C]; | ||
end | ||
|
||
data = CC; | ||
data(isnan(data)) = 1; | ||
|
||
|
||
an = mean(data); | ||
dian = median(data); | ||
|
||
|
||
fname = ['k_',num2str(k),'_pcell_',num2str(pcell),'_p_',num2str(p),'_a_',num2str(alpha),'_pj_',num2str(an),'_zw_',num2str(dian),'.mat']; | ||
save(fname,'X','M','R','an','dian','-mat'); | ||
%end |