Additional decomposition on fit_fast

microsoft · May 6, 2020 · e54963d · e54963d
1 parent dc742e8
commit e54963d
Showing 1 changed file with 77 additions and 21 deletions.
diff --git a/src/SparseSC/fit_fast.py b/src/SparseSC/fit_fast.py
@@ -18,12 +18,13 @@ def fit_fast(  # pylint: disable=unused-argument, missing-raises-doc
     targets,
     model_type="restrospective",
     treated_units=None,
-    w_pens = np.logspace(start=-5, stop=5, num=40),
+    w_pens = None,
     custom_donor_pool=None,  
     match_space_maker = None,
     w_pen_inner=True,
     avoid_NxN_mats=False,
     verbose=0,
+    targets_aux=None,
     **kwargs #keep so that calls can switch easily between fit() and fit_fast()
 ):
     r"""
@@ -44,7 +45,7 @@ def fit_fast(  # pylint: disable=unused-argument, missing-raises-doc
     :type treated_units: int[], Optional
     
     :param w_pens:  Penalization values to try when searching for unit weights.
-    :type w_pens: float[], default=None
+    :type w_pens: float[], default=np.logspace(start=-5, stop=5, num=40)
     
     :param treated_units:  An iterable indicating the rows
         of `X` and `Y` which contain data from treated units.
@@ -92,6 +93,7 @@ def fit_fast(  # pylint: disable=unused-argument, missing-raises-doc
         tracemalloc.start()
     X = features
     Y = targets
+    w_pens = np.logspace(start=-5, stop=5, num=40) if w_pens is None else w_pens
     try:
         X = np.float64(X)
     except ValueError:
@@ -132,7 +134,7 @@ def _fit_fast_wrapper(MatchSpace, V):
 
     return _fit_fast_inner(X, M, Y, V, model_type, treated_units, best_v_pen, w_pens, custom_donor_pool, 
                            MatchSpace, MatchSpaceDesc, w_pen_inner=w_pen_inner, avoid_NxN_mats=avoid_NxN_mats, 
-                           verbose=verbose)
+                           verbose=verbose, Y_aux=targets_aux)
 
 
 def _weights(V , X_treated, X_control, w_pen):
@@ -160,11 +162,13 @@ def _sc_weights_trad(M, M_c, V, N, N0, custom_donor_pool, best_w_pen, verbose=0)
     weight_log_inc = max(int(N/100), 1)
     for i in range(N):
         if ((i % weight_log_inc) == 0 and verbose>0):
-            print_progress(i, N)
+            print_progress(i+1, N)
             if verbose > 1:
                 print_memory_snapshot(extra_str="Loop " + str(i))
         allowed = custom_donor_pool[i,:]
         sc_weights[i,allowed] = _weights(V, M[i,:], M_c[allowed,:], best_w_pen)
+    if ((N-1) % weight_log_inc) != 0 and verbose > 0:
+        print_progress(N, N)
     return sc_weights
 
 def _RidgeSolution(M, control_units, V, w_pen, custom_donor_pool, ret_weights=True, Y_c=None, verbose=0):
@@ -182,7 +186,7 @@ def _RidgeSolution(M, control_units, V, w_pen, custom_donor_pool, ret_weights=Tr
         Y_sc = np.full((N, Y_c.shape[1]), 0.)
     for i in range(N):
         if ((i % weight_log_inc) == 0 and verbose > 0):
-            print_progress(i, N)
+            print_progress(i+1, N)
             if verbose > 1:
                 print_memory_snapshot(extra_str="Loop " + str(i))
         if i in control_units:
@@ -204,6 +208,8 @@ def _RidgeSolution(M, control_units, V, w_pen, custom_donor_pool, ret_weights=Tr
         if Y_c is not None:
             Y_sc[i,:] = weights_i.dot(Y_c)
 
+    if ((N-1) % weight_log_inc) != 0 and verbose > 0:
+        print_progress(N, N)
     ret = ()
     if ret_weights:
         ret = (*ret, weights)
@@ -225,7 +231,8 @@ def _fit_fast_inner(
     match_space_desc = None,
     w_pen_inner=True,
     avoid_NxN_mats=False,
-    verbose=0
+    verbose=0,
+    Y_aux=None,
 ):
     #returns in-sample score
     if treated_units is not None:
@@ -245,18 +252,6 @@ def _fit_fast_inner(
 
     if len(V) == 0 or M.shape[1]==0:
         best_v_pen, best_w_pen, M = None, None, None
-        log_if_necessary("Completed calculation of best_w_pen", verbose)
-        sc_weights = None if avoid_NxN_mats else np.full((N,N0), 0.)
-        Y_c = Y[control_units, :]
-        Y_sc = np.full((N, Y_c.shape[1]), 0.)
-        for i in range(N):
-            weights_i = np.full((1,N0), 0.)
-            allowed = custom_donor_pool[i,:]
-            weights_i[0,allowed] = 1/np.sum(allowed)
-            if not avoid_NxN_mats:
-                sc_weights[i,:] = weights_i
-            Y_sc[i,:] = weights_i.dot(Y_c)
-        log_if_necessary("Completed calculation of sc_weights", verbose)
     else:
         M_c = M[control_units,:]
         separate_calcs = True if avoid_NxN_mats else None
@@ -279,8 +274,64 @@ def _fit_fast_inner(
                 if mscore<best_w_pen_score:
                     best_w_pen = w_pen
                     best_w_pen_score = mscore
-        log_if_necessary("Completed calculation of best_w_pen", verbose)
+    log_if_necessary("Completed calculation of best_w_pen", verbose)
 
+    return _fit_fast_match(X, M, Y, V, model_type, treated_units, best_v_pen, best_w_pen, custom_donor_pool, 
+                           match_space_trans, match_space_desc, w_pen_inner=w_pen_inner, avoid_NxN_mats=avoid_NxN_mats, 
+                           verbose=verbose, Y_aux=Y_aux)
+
+def _fit_fast_match(
+    X, 
+    M,
+    Y,
+    V,
+    model_type="restrospective",
+    treated_units=None,
+    best_v_pen = None,
+    best_w_pen = None,
+    custom_donor_pool=None,
+    match_space_trans = None,
+    match_space_desc = None,
+    w_pen_inner=True,
+    avoid_NxN_mats=False,
+    verbose=0,
+    Y_aux=None,
+):
+    if treated_units is not None:
+        control_units = [u for u in range(Y.shape[0]) if u not in treated_units]
+        N0, N1 = len(control_units), len(treated_units)
+    else:
+        control_units = [u for u in range(Y.shape[0])]
+        N0, N1 = Y.shape[0], 0
+    N = N0 + N1
+    fit_units = _get_fit_units(model_type, control_units, treated_units, N)
+    if custom_donor_pool is not None:
+        assert custom_donor_pool.shape == (N,N0)
+    else:
+        custom_donor_pool = np.full((N,N0), True)
+    custom_donor_pool = _ensure_good_donor_pool(custom_donor_pool, control_units)
+
+    Y_aux_sc = None
+
+    if len(V) == 0 or M.shape[1]==0:
+        sc_weights = None if avoid_NxN_mats else np.full((N,N0), 0.)
+        Y_c = Y[control_units, :]
+        Y_sc = np.full((N, Y_c.shape[1]), 0.)
+        if Y_aux is not None:
+            Y_aux_sc = np.full(Y_aux.shape, 0.)
+            Y_aux_c = Y_aux[control_units,:]
+        for i in range(N):
+            weights_i = np.full((1,N0), 0.)
+            allowed = custom_donor_pool[i,:]
+            weights_i[0,allowed] = 1/np.sum(allowed)
+            if not avoid_NxN_mats:
+                sc_weights[i,:] = weights_i
+            Y_sc[i,:] = weights_i.dot(Y_c)
+            if Y_aux is not None:
+                Y_aux_sc[i,:] = weights_i.dot(Y_aux_c)
+        log_if_necessary("Completed calculation of sc_weights", verbose)
+    else:
+        M_c = M[control_units,:]
         Y_c = Y[control_units, :]
         if not avoid_NxN_mats:
             sc_weights = _sc_weights_trad(M, M_c, V, N, N0, custom_donor_pool, best_w_pen, verbose=verbose)
@@ -290,9 +341,12 @@ def _fit_fast_inner(
             sc_weights = None
             Y_sc = _RidgeSolution(M, control_units, V, best_w_pen, custom_donor_pool, Y_c=Y_c, ret_weights=False, 
                                   verbose=verbose)[0]
+            if Y_aux is not None:
+                Y_aux_sc = _RidgeSolution(M, control_units, V, best_w_pen, custom_donor_pool, Y_c=Y_aux[control_units, :], ret_weights=False, 
+                                      verbose=verbose)[0]
             log_if_necessary("Completed calculation of (temp.) sc_weights", verbose)
-        
-        
+
+
     log_if_necessary("Completed calculation of synthetic controls", verbose)
     mscore = np.sum(np.square(Y[fit_units,:] - Y_sc[fit_units,:]))
 
@@ -312,5 +366,7 @@ def _fit_fast_inner(
         match_space = M,
         match_space_desc = match_space_desc
     )
+    if Y_aux is not None:
+        fit_obj.Y_aux_sc = Y_aux_sc
 
     return fit_obj