Skip to content

Commit

Permalink
Additional decomposition on fit_fast
Browse files Browse the repository at this point in the history
  • Loading branch information
bquistorff committed May 6, 2020
1 parent dc742e8 commit e54963d
Showing 1 changed file with 77 additions and 21 deletions.
98 changes: 77 additions & 21 deletions src/SparseSC/fit_fast.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,13 @@ def fit_fast( # pylint: disable=unused-argument, missing-raises-doc
targets,
model_type="restrospective",
treated_units=None,
w_pens = np.logspace(start=-5, stop=5, num=40),
w_pens = None,
custom_donor_pool=None,
match_space_maker = None,
w_pen_inner=True,
avoid_NxN_mats=False,
verbose=0,
targets_aux=None,
**kwargs #keep so that calls can switch easily between fit() and fit_fast()
):
r"""
Expand All @@ -44,7 +45,7 @@ def fit_fast( # pylint: disable=unused-argument, missing-raises-doc
:type treated_units: int[], Optional
:param w_pens: Penalization values to try when searching for unit weights.
:type w_pens: float[], default=None
:type w_pens: float[], default=np.logspace(start=-5, stop=5, num=40)
:param treated_units: An iterable indicating the rows
of `X` and `Y` which contain data from treated units.
Expand Down Expand Up @@ -92,6 +93,7 @@ def fit_fast( # pylint: disable=unused-argument, missing-raises-doc
tracemalloc.start()
X = features
Y = targets
w_pens = np.logspace(start=-5, stop=5, num=40) if w_pens is None else w_pens
try:
X = np.float64(X)
except ValueError:
Expand Down Expand Up @@ -132,7 +134,7 @@ def _fit_fast_wrapper(MatchSpace, V):

return _fit_fast_inner(X, M, Y, V, model_type, treated_units, best_v_pen, w_pens, custom_donor_pool,
MatchSpace, MatchSpaceDesc, w_pen_inner=w_pen_inner, avoid_NxN_mats=avoid_NxN_mats,
verbose=verbose)
verbose=verbose, Y_aux=targets_aux)


def _weights(V , X_treated, X_control, w_pen):
Expand Down Expand Up @@ -160,11 +162,13 @@ def _sc_weights_trad(M, M_c, V, N, N0, custom_donor_pool, best_w_pen, verbose=0)
weight_log_inc = max(int(N/100), 1)
for i in range(N):
if ((i % weight_log_inc) == 0 and verbose>0):
print_progress(i, N)
print_progress(i+1, N)
if verbose > 1:
print_memory_snapshot(extra_str="Loop " + str(i))
allowed = custom_donor_pool[i,:]
sc_weights[i,allowed] = _weights(V, M[i,:], M_c[allowed,:], best_w_pen)
if ((N-1) % weight_log_inc) != 0 and verbose > 0:
print_progress(N, N)
return sc_weights

def _RidgeSolution(M, control_units, V, w_pen, custom_donor_pool, ret_weights=True, Y_c=None, verbose=0):
Expand All @@ -182,7 +186,7 @@ def _RidgeSolution(M, control_units, V, w_pen, custom_donor_pool, ret_weights=Tr
Y_sc = np.full((N, Y_c.shape[1]), 0.)
for i in range(N):
if ((i % weight_log_inc) == 0 and verbose > 0):
print_progress(i, N)
print_progress(i+1, N)
if verbose > 1:
print_memory_snapshot(extra_str="Loop " + str(i))
if i in control_units:
Expand All @@ -204,6 +208,8 @@ def _RidgeSolution(M, control_units, V, w_pen, custom_donor_pool, ret_weights=Tr
if Y_c is not None:
Y_sc[i,:] = weights_i.dot(Y_c)

if ((N-1) % weight_log_inc) != 0 and verbose > 0:
print_progress(N, N)
ret = ()
if ret_weights:
ret = (*ret, weights)
Expand All @@ -225,7 +231,8 @@ def _fit_fast_inner(
match_space_desc = None,
w_pen_inner=True,
avoid_NxN_mats=False,
verbose=0
verbose=0,
Y_aux=None,
):
#returns in-sample score
if treated_units is not None:
Expand All @@ -245,18 +252,6 @@ def _fit_fast_inner(

if len(V) == 0 or M.shape[1]==0:
best_v_pen, best_w_pen, M = None, None, None
log_if_necessary("Completed calculation of best_w_pen", verbose)
sc_weights = None if avoid_NxN_mats else np.full((N,N0), 0.)
Y_c = Y[control_units, :]
Y_sc = np.full((N, Y_c.shape[1]), 0.)
for i in range(N):
weights_i = np.full((1,N0), 0.)
allowed = custom_donor_pool[i,:]
weights_i[0,allowed] = 1/np.sum(allowed)
if not avoid_NxN_mats:
sc_weights[i,:] = weights_i
Y_sc[i,:] = weights_i.dot(Y_c)
log_if_necessary("Completed calculation of sc_weights", verbose)
else:
M_c = M[control_units,:]
separate_calcs = True if avoid_NxN_mats else None
Expand All @@ -279,8 +274,64 @@ def _fit_fast_inner(
if mscore<best_w_pen_score:
best_w_pen = w_pen
best_w_pen_score = mscore
log_if_necessary("Completed calculation of best_w_pen", verbose)
log_if_necessary("Completed calculation of best_w_pen", verbose)

return _fit_fast_match(X, M, Y, V, model_type, treated_units, best_v_pen, best_w_pen, custom_donor_pool,
match_space_trans, match_space_desc, w_pen_inner=w_pen_inner, avoid_NxN_mats=avoid_NxN_mats,
verbose=verbose, Y_aux=Y_aux)

def _fit_fast_match(
X,
M,
Y,
V,
model_type="restrospective",
treated_units=None,
best_v_pen = None,
best_w_pen = None,
custom_donor_pool=None,
match_space_trans = None,
match_space_desc = None,
w_pen_inner=True,
avoid_NxN_mats=False,
verbose=0,
Y_aux=None,
):
if treated_units is not None:
control_units = [u for u in range(Y.shape[0]) if u not in treated_units]
N0, N1 = len(control_units), len(treated_units)
else:
control_units = [u for u in range(Y.shape[0])]
N0, N1 = Y.shape[0], 0
N = N0 + N1
fit_units = _get_fit_units(model_type, control_units, treated_units, N)
if custom_donor_pool is not None:
assert custom_donor_pool.shape == (N,N0)
else:
custom_donor_pool = np.full((N,N0), True)
custom_donor_pool = _ensure_good_donor_pool(custom_donor_pool, control_units)

Y_aux_sc = None

if len(V) == 0 or M.shape[1]==0:
sc_weights = None if avoid_NxN_mats else np.full((N,N0), 0.)
Y_c = Y[control_units, :]
Y_sc = np.full((N, Y_c.shape[1]), 0.)
if Y_aux is not None:
Y_aux_sc = np.full(Y_aux.shape, 0.)
Y_aux_c = Y_aux[control_units,:]
for i in range(N):
weights_i = np.full((1,N0), 0.)
allowed = custom_donor_pool[i,:]
weights_i[0,allowed] = 1/np.sum(allowed)
if not avoid_NxN_mats:
sc_weights[i,:] = weights_i
Y_sc[i,:] = weights_i.dot(Y_c)
if Y_aux is not None:
Y_aux_sc[i,:] = weights_i.dot(Y_aux_c)
log_if_necessary("Completed calculation of sc_weights", verbose)
else:
M_c = M[control_units,:]
Y_c = Y[control_units, :]
if not avoid_NxN_mats:
sc_weights = _sc_weights_trad(M, M_c, V, N, N0, custom_donor_pool, best_w_pen, verbose=verbose)
Expand All @@ -290,9 +341,12 @@ def _fit_fast_inner(
sc_weights = None
Y_sc = _RidgeSolution(M, control_units, V, best_w_pen, custom_donor_pool, Y_c=Y_c, ret_weights=False,
verbose=verbose)[0]
if Y_aux is not None:
Y_aux_sc = _RidgeSolution(M, control_units, V, best_w_pen, custom_donor_pool, Y_c=Y_aux[control_units, :], ret_weights=False,
verbose=verbose)[0]
log_if_necessary("Completed calculation of (temp.) sc_weights", verbose)


log_if_necessary("Completed calculation of synthetic controls", verbose)
mscore = np.sum(np.square(Y[fit_units,:] - Y_sc[fit_units,:]))

Expand All @@ -312,5 +366,7 @@ def _fit_fast_inner(
match_space = M,
match_space_desc = match_space_desc
)
if Y_aux is not None:
fit_obj.Y_aux_sc = Y_aux_sc

return fit_obj

0 comments on commit e54963d

Please sign in to comment.