Add files via upload

Irenexzwen · May 23, 2018 · 1a47564 · 1a47564
1 parent efb0104
commit 1a47564
Show file tree

Hide file tree

Showing 4 changed files with 356 additions and 0 deletions.
diff --git a/l1eq_pd.m b/l1eq_pd.m
@@ -0,0 +1,210 @@
+% l1eq_pd.m
+%
+% Solve
+% min_x ||x||_1  s.t.  Ax = b
+%
+% Recast as linear program
+% min_{x,u} sum(u)  s.t.  -u <= x <= u,  Ax=b
+% and use primal-dual interior point method
+%
+% Usage: xp = l1eq_pd(x0, A, At, b, pdtol, pdmaxiter, cgtol, cgmaxiter)
+%
+% x0 - Nx1 vector, initial point.
+%
+% A - Either a handle to a function that takes a N vector and returns a K 
+%     vector , or a KxN matrix.  If A is a function handle, the algorithm
+%     operates in "largescale" mode, solving the Newton systems via the
+%     Conjugate Gradients algorithm.
+%
+% At - Handle to a function that takes a K vector and returns an N vector.
+%      If A is a KxN matrix, At is ignored.
+%
+% b - Kx1 vector of observations.
+%
+% pdtol - Tolerance for primal-dual algorithm (algorithm terminates if
+%     the duality gap is less than pdtol).  
+%     Default = 1e-3.
+%
+% pdmaxiter - Maximum number of primal-dual iterations.  
+%     Default = 50.
+%
+% cgtol - Tolerance for Conjugate Gradients; ignored if A is a matrix.
+%     Default = 1e-8.
+%
+% cgmaxiter - Maximum number of iterations for Conjugate Gradients; ignored
+%     if A is a matrix.
+%     Default = 200.
+%
+% Written by: Justin Romberg, Caltech
+% Email: [email protected]
+% Created: October 2005
+%
+
+function xp = l1eq_pd(x0, A, At, b, pdtol, pdmaxiter, cgtol, cgmaxiter)
+
+largescale = isa(A,'function_handle');
+
+if (nargin < 5), pdtol = 1e-3;  end
+if (nargin < 6), pdmaxiter = 50;  end
+if (nargin < 7), cgtol = 1e-8;  end
+if (nargin < 8), cgmaxiter = 200;  end
+
+N = length(x0);
+
+alpha = 0.01;
+beta = 0.5;
+mu = 10;
+
+gradf0 = [zeros(N,1); ones(N,1)];
+
+% starting point --- make sure that it is feasible
+if (largescale)
+  if (norm(A(x0)-b)/norm(b) > cgtol)
+    disp('Starting point infeasible; using x0 = At*inv(AAt)*y.');
+    AAt = @(z) A(At(z));
+    [w, cgres, cgiter] = cgsolve(AAt, b, cgtol, cgmaxiter, 0);
+    if (cgres > 1/2)
+      disp('A*At is ill-conditioned: cannot find starting point');
+      xp = x0;
+      return;
+    end
+    x0 = At(w);
+  end
+else
+  if (norm(A*x0-b)/norm(b) > cgtol)
+    disp('Starting point infeasible; using x0 = At*inv(AAt)*y.');
+    opts.POSDEF = true; opts.SYM = true;
+    [w, hcond] = linsolve(A*A', b, opts);
+    if (hcond < 1e-14)
+      disp('A*At is ill-conditioned: cannot find starting point');
+      xp = x0;
+      return;
+    end
+    x0 = A'*w;
+  end  
+end
+x = x0;
+u = (0.95)*abs(x0) + (0.10)*max(abs(x0));
+
+% set up for the first iteration
+fu1 = x - u;
+fu2 = -x - u;
+lamu1 = -1./fu1;
+lamu2 = -1./fu2;
+if (largescale)
+  v = -A(lamu1-lamu2);
+  Atv = At(v);
+  rpri = A(x) - b;
+else
+  v = -A*(lamu1-lamu2);
+  Atv = A'*v;
+  rpri = A*x - b;
+end
+
+sdg = -(fu1'*lamu1 + fu2'*lamu2);
+tau = mu*2*N/sdg;
+
+rcent = [-lamu1.*fu1; -lamu2.*fu2] - (1/tau);
+rdual = gradf0 + [lamu1-lamu2; -lamu1-lamu2] + [Atv; zeros(N,1)];
+resnorm = norm([rdual; rcent; rpri]);
+
+pditer = 0;
+done = (sdg < pdtol) | (pditer >= pdmaxiter);
+while (~done)
+
+  pditer = pditer + 1;
+
+  w1 = -1/tau*(-1./fu1 + 1./fu2) - Atv;
+  w2 = -1 - 1/tau*(1./fu1 + 1./fu2);
+  w3 = -rpri;
+
+  sig1 = -lamu1./fu1 - lamu2./fu2;
+  sig2 = lamu1./fu1 - lamu2./fu2;
+  sigx = sig1 - sig2.^2./sig1;
+
+  if (largescale)
+    w1p = w3 - A(w1./sigx - w2.*sig2./(sigx.*sig1));
+    h11pfun = @(z) -A(1./sigx.*At(z));
+    [dv, cgres, cgiter] = cgsolve(h11pfun, w1p, cgtol, cgmaxiter, 0);
+    if (cgres > 1/2)
+      disp('Cannot solve system.  Returning previous iterate.  (See Section 4 of notes for more information.)');
+      xp = x;
+      return
+    end
+    dx = (w1 - w2.*sig2./sig1 - At(dv))./sigx;
+    Adx = A(dx);
+    Atdv = At(dv);
+  else
+    w1p = -(w3 - A*(w1./sigx - w2.*sig2./(sigx.*sig1)));
+    H11p = A*(sparse(diag(1./sigx))*A');
+    opts.POSDEF = true; opts.SYM = true;
+    %[dv,hcond] = linsolve(H11p, w1p, opts);
+    [dv,hcond] = linsolve(H11p, w1p);
+    if (hcond < 1e-14)
+      disp('Matrix ill-conditioned.  Returning previous iterate.  (See Section 4 of notes for more information.)');
+      xp = x;
+      return
+    end
+    dx = (w1 - w2.*sig2./sig1 - A'*dv)./sigx;
+    Adx = A*dx;
+    Atdv = A'*dv;
+  end
+
+  du = (w2 - sig2.*dx)./sig1;
+
+  dlamu1 = (lamu1./fu1).*(-dx+du) - lamu1 - (1/tau)*1./fu1;
+  dlamu2 = (lamu2./fu2).*(dx+du) - lamu2 - 1/tau*1./fu2;
+
+  % make sure that the step is feasible: keeps lamu1,lamu2 > 0, fu1,fu2 < 0
+  indp = find(dlamu1 < 0);  indn = find(dlamu2 < 0);
+  s = min([1; -lamu1(indp)./dlamu1(indp); -lamu2(indn)./dlamu2(indn)]);
+  indp = find((dx-du) > 0);  indn = find((-dx-du) > 0);
+  s = (0.99)*min([s; -fu1(indp)./(dx(indp)-du(indp)); -fu2(indn)./(-dx(indn)-du(indn))]);
+
+  % backtracking line search
+  suffdec = 0;
+  backiter = 0;
+  while (~suffdec)
+    xp = x + s*dx;  up = u + s*du; 
+    vp = v + s*dv;  Atvp = Atv + s*Atdv; 
+    lamu1p = lamu1 + s*dlamu1;  lamu2p = lamu2 + s*dlamu2;
+    fu1p = xp - up;  fu2p = -xp - up;  
+    rdp = gradf0 + [lamu1p-lamu2p; -lamu1p-lamu2p] + [Atvp; zeros(N,1)];
+    rcp = [-lamu1p.*fu1p; -lamu2p.*fu2p] - (1/tau);
+    rpp = rpri + s*Adx;
+    suffdec = (norm([rdp; rcp; rpp]) <= (1-alpha*s)*resnorm);
+    s = beta*s;
+    backiter = backiter + 1;
+    if (backiter > 32)
+      disp('Stuck backtracking, returning last iterate.  (See Section 4 of notes for more information.)')
+      xp = x;
+      return
+    end
+  end
+
+
+  % next iteration
+  x = xp;  u = up;
+  v = vp;  Atv = Atvp; 
+  lamu1 = lamu1p;  lamu2 = lamu2p;
+  fu1 = fu1p;  fu2 = fu2p;
+
+  % surrogate duality gap
+  sdg = -(fu1'*lamu1 + fu2'*lamu2);
+  tau = mu*2*N/sdg;
+  rpri = rpp;
+  rcent = [-lamu1.*fu1; -lamu2.*fu2] - (1/tau);
+  rdual = gradf0 + [lamu1-lamu2; -lamu1-lamu2] + [Atv; zeros(N,1)];
+  resnorm = norm([rdual; rcent; rpri]);
+
+  done = (sdg < pdtol) | (pditer >= pdmaxiter);
+
+  disp(sprintf('Iteration = %d, tau = %8.3e, Primal = %8.3e, PDGap = %8.3e, Dual res = %8.3e, Primal res = %8.3e',...
+    pditer, tau, sum(u), sdg, norm(rdual), norm(rpri)));
+  if (largescale)
+    disp(sprintf('                  CG Res = %8.3e, CG Iter = %d', cgres, cgiter));
+  else
+    disp(sprintf('                  H11p condition number = %8.3e', hcond));
+  end
+
+end
diff --git a/process.m b/process.m
@@ -0,0 +1,46 @@
+%Import our original data and divide it into sub block. For each block, rebuild it by recover.m
+
+%{
+    input£ºk£ºAssumed sparsity level of the original data. Used to determine pool numbers.
+           pcell£ºApproximately equal to the number of cells used in each pool.
+           p£ºDetermining the columns of M together with pcell.
+           alpha£ºNoise. When the value is 1, there is no noise. 
+    output£ºX£ºThe original data of each sub block
+            M£ºThe measurement matrix for each sub block
+            R£ºThe inference of each sub block
+%}
+
+function [X,M,R] = process(k,pcell,p,alpha)
+
+original = importdata('count4070.txt');
+XX = [original.data];
+XX = XX';
+
+ncolM = floor(pcell/p);	% the number of columns per M
+
+C = {};
+
+% If the column size of the last block is bigger than half of the previous one, 
+% it will become a new seperate block, else merge it into previous one.
+ni = floor(size(XX,1)/ncolM);
+for i = 1:ni
+    C{i} = XX((ncolM*(i-1)+1):ncolM*i,:);
+end
+if size(XX,1)-ncolM*ni>ncolM/2
+    C{ni+1} = XX(ncolM*ni+1:size(XX,1),:);
+else
+    C{ni} = [C{ni};XX(ncolM*ni+1:size(XX,1),:)];
+end
+
+
+M = {};% Used to save the measurement matrix of each block
+R = {};% Used to save the inference of each block
+X = {};% Used to save the original data of each sub block
+for i = 1:length(C)
+    XX = C{i};
+    X = [X XX];
+    [MM,recoverXX] = recover(floor(k*size(XX,1))*2,size(XX,1),p,XX,alpha);
+    M = [M MM];
+    R = [R recoverXX];
+end
+
diff --git a/recover.m b/recover.m
@@ -0,0 +1,48 @@
+%For each sub block, we generate a measurement matrix M and add noise e. 
+%Using the noisy measurement matrix eM together observed value y to inference recoverXX.
+
+%{
+    input：m：The number of rows for each sub block.
+           n：The number of columns of each sub block.
+           p：Determine the proportion of 1 in the M matrix.
+           XX：Original data. Is used to generate the observed value y.
+           alpha：Noise. When the value is 1, there is no noise.
+    output：M：the measurement matrix of each sub block.
+            recoverXX：inference sub X.
+%}
+
+function [M,recoverXX] = recover(m,n,p,XX,alpha)
+recoverXX = [];
+M = double(rand(m,n)<p);  %Use uniform distribution of [0,1] to randomly generate M (m×n).
+                          %For each element in M, if value is less than p, we set to 1, otherwise 0.
+
+
+%The code below is used to generate the full observation matrix.
+%{ 
+a(1:round(n/4))=1;
+a(round(n/4)+1:round(2*n/4))=0.1;
+a(round(2*n/4)+1:round(3*n/4))=0.01;
+a(round(3*n/4)+1:n)=0.001;
+M=[];
+for i=1:m
+    a = a(randperm(length(a)));
+    M = [M;a];
+end
+%}
+
+e = unifrnd(alpha,2-alpha,m,n); %Use uniform distribution of [alpha,2-alpha] to randomly generate erorr(m×n).
+eM = e.*M;                                  
+for i = 1:size(XX,2)
+	y = eM*XX(:,i);            
+	if sum(y) == 0              
+		xp = 0*XX(:,i);         
+		recoverXX = [recoverXX xp];
+	else
+		[w, hcond] = linsolve(M*M',y);
+		x0 = M'*w;
+		xp = l1eq_pd(x0, M, [], y);
+		recoverXX = [recoverXX xp];
+	end
+end
+end
+
diff --git a/star.m b/star.m
@@ -0,0 +1,52 @@
+%{
+    input£º
+		   k£ºAssumed sparsity level of the original data. Used to determine pool numbers.
+           pcell£ºApproximately equal to the number of cells used in each pool.
+           p£ºDetermining the columns of M together with pcell.
+		   
+		   
+	out£º  A mat file include:	
+			   X(The original data of each sub block)
+			   M(The measurement matrix for each sub block)
+			   R(The inference of each sub block)
+			   median correlation of all genes 
+%}
+
+function star(k,pcell,p,alpha)
+k = str2num(k);
+pcell = str2num(pcell);
+p = str2num(p);
+alpha = str2num(alpha);
+
+[X,M,R] = process(k,pcell,p,alpha);
+
+
+%Correlation analysis
+XX = [];
+recoverXX = [];	% Combine each sub block to original dimension
+for i = 1:length(R)
+    XX = [XX;X{i}];
+    recoverXX = [recoverXX;R{i}];
+end
+
+recoverXX(recoverXX<0) = 0;	% Cleaning our reconstructed data
+recoverXX = round(recoverXX);
+
+CC=[];
+for i = 1:size(XX,2)
+    C = corrcoef(XX(:,i),recoverXX(:,i));	
+    C = C(1,2);
+    CC = [CC;C];
+end
+
+data = CC;
+data(isnan(data)) = 1;
+
+
+an = mean(data);
+dian = median(data);
+
+
+fname = ['k_',num2str(k),'_pcell_',num2str(pcell),'_p_',num2str(p),'_a_',num2str(alpha),'_pj_',num2str(an),'_zw_',num2str(dian),'.mat'];
+save(fname,'X','M','R','an','dian','-mat');
+%end