Skip to content

Commit

Permalink
Merge pull request #236 from knaaptime/builddocs
Browse files Browse the repository at this point in the history
use ax instead of plt
  • Loading branch information
knaaptime authored Feb 4, 2025
2 parents bd169f1 + a467c81 commit 3b3ee00
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 46 deletions.
33 changes: 16 additions & 17 deletions segregation/inference/inference_wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,13 @@
from tqdm.auto import tqdm

from .._base import MultiGroupIndex
from .comparative import (DUAL_SIMULATORS, _estimate_counterfac_difference,
_estimate_random_label_difference,
_generate_counterfactual, _prepare_random_label)
from .comparative import (
DUAL_SIMULATORS,
_estimate_counterfac_difference,
_estimate_random_label_difference,
_generate_counterfactual,
_prepare_random_label,
)
from .randomization import SIMULATORS, simulate_null


Expand Down Expand Up @@ -45,12 +49,12 @@ def _infer_segregation(
* ``bootstrap``:
generates bootstrap replications of the units with replacement of the same size of the
original data. This procedure creates a confidence interval for the index statistic to test
original data. This procedure creates a confidence interval for the index statistic to test
whether the null value lies within.
* ``evenness``:
assumes that each spatial unit has the same global probability of drawing elements from the
minority group of the fixed total unit population (binomial distribution).
minority group of the fixed total unit population (binomial distribution).
* ``person_permutation``:
randomly allocates individuals into units keeping the total population of each
Expand Down Expand Up @@ -224,7 +228,6 @@ def __init__(
n_jobs=-1,
**kwargs,
):

aux = _infer_segregation(
seg_class,
iterations_under_null,
Expand Down Expand Up @@ -433,7 +436,6 @@ def _compare_segregation(
"share",
"dual_composition",
]:

if isinstance(seg_class_1, MultiGroupIndex):
raise ValueError("Not implemented for MultiGroup indexes.")

Expand Down Expand Up @@ -575,7 +577,6 @@ def __init__(
index_kwargs_2=None,
**kwargs,
):

aux = _compare_segregation(
seg_class_1,
seg_class_2,
Expand Down Expand Up @@ -620,21 +621,19 @@ def plot(self, color="darkblue", color2="darkred", kde=True, ax=None, **kwargs):
import seaborn as sns
except ImportError:
warnings.warn("This method relies on importing `matplotlib` and `seaborn`")
if ax is None:
_, ax = plt.subplots()

if self._null_approach == "bootstrap":
ax = sns.histplot(self.est_sim[0], color=color, kde=kde, ax=ax, **kwargs)
ax = sns.histplot(self.est_sim[1], color=color2, kde=kde, ax=ax, **kwargs)
plt.title(
"{} (Diff. value = {})".format(
self._class_name, round(self.est_point_diff, 3)
)
ax.set_title(
f"{self._class_name} (Diff. value = {round(self.est_point_diff, 3)})"
)
else:
ax = sns.histplot(self.est_sim, color=color, kde=kde, ax=ax, **kwargs)
plt.axvline(self.est_point_diff, color="red")
plt.title(
"{} (Diff. value = {})".format(
self._class_name, round(self.est_point_diff, 3)
)
ax.vlines(self.est_point_diff, 0, ax.get_ylim()[1], color="red")
ax.set_title(
f"{self._class_name} (Diff. value = {round(self.est_point_diff, 3)})"
)
return ax
51 changes: 33 additions & 18 deletions segregation/inference/randomization.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,18 @@ def _generate_estimate(input):
else:
df = input[0].data.copy()
if input[0].index_type == "singlegroup":
df = input[1](df, group=input[0].group_pop_var, total=input[0].total_pop_var,)
df = input[1](
df,
group=input[0].group_pop_var,
total=input[0].total_pop_var,
)
estimate = (
input[0]
.__class__(df, input[0].group_pop_var, input[0].total_pop_var, **input[2])
.statistic
)
else:
df = input[1](df, groups=input[0].groups)
df = input[1](df, groups=input[0].groups, verbose=input[3])
estimate = input[0].__class__(df, input[0].groups, **input[2]).statistic
return estimate

Expand All @@ -36,6 +40,7 @@ def simulate_null(
n_jobs=-1,
backend="loky",
index_kwargs=None,
verbose=False,
):
"""Simulate a series of index values in parallel to serve as a null distribution.
Expand All @@ -56,6 +61,8 @@ def simulate_null(
index_kwargs : dict, optional
additional keyword arguments used to fit the index, such as distance or network
if estimating a spatial index; by default None
verbose: bool
whether to print warning statements
Returns
-------
Expand All @@ -67,13 +74,13 @@ def simulate_null(
if n_jobs == -1:
n_jobs = multiprocessing.cpu_count()
estimates = Parallel(n_jobs=n_jobs, backend=backend)(
delayed(_generate_estimate)((seg_class, sim_func, index_kwargs))
delayed(_generate_estimate)((seg_class, sim_func, index_kwargs, verbose))
for i in tqdm(range(iterations))
)
return pd.Series(estimates)


def simulate_person_permutation(df, group=None, total=None, groups=None):
def simulate_person_permutation(df, group=None, total=None, groups=None, verbose=False):
"""Simulate the permutation of individuals across spatial units.
Parameters
Expand Down Expand Up @@ -145,7 +152,7 @@ def simulate_person_permutation(df, group=None, total=None, groups=None):
return gpd.GeoDataFrame(df, geometry=geoms.geometry.name)


def simulate_evenness(df, group=None, total=None, groups=None):
def simulate_evenness(df, group=None, total=None, groups=None, verbose=True):
"""Simulate even redistribution of population groups across spatial units.
Parameters
Expand Down Expand Up @@ -192,17 +199,17 @@ def simulate_evenness(df, group=None, total=None, groups=None):
global_prob_vector = df.sum(axis=0) / df.sum().sum()
t = df[groups].sum(axis=1).astype(int)

simul = list(
map(lambda i: list(np.random.multinomial(i, global_prob_vector)), t)
)
simul = [list(np.random.multinomial(i, global_prob_vector)) for i in t]
output = pd.DataFrame(simul, columns=groups)
if geoms:
return gpd.GeoDataFrame(output, geometry=geoms, crs=crs)

return output


def simulate_systematic_randomization(df, group=None, total=None, groups=None):
def simulate_systematic_randomization(
df, group=None, total=None, groups=None, verbose=True
):
"""Simulate systematic redistribution of population groups across spatial units.
Parameters
Expand All @@ -211,7 +218,7 @@ def simulate_systematic_randomization(df, group=None, total=None, groups=None):
geodataframe with population data to be randomized
group : str, optional
name of column on geodataframe that holds the group total
(for use with singlegroup indices).
(for use with singlegroup indices).
total : str, optional
name of column on geodataframe that holds the total population for
each unit. For singlegroup indices, this parameter is required. For
Expand Down Expand Up @@ -242,16 +249,18 @@ def simulate_systematic_randomization(df, group=None, total=None, groups=None):
Reference: :cite:`allen2015more`
"""
if groups:
if not total:
if not total and verbose:
warn(
"No `total` argument passed. Assuming population groups are exhaustive"
"No `total` argument passed. Assuming population groups are exhaustive",
stacklevel=2,
)
total = "total"
df[total] = df[groups].sum(axis=1)
if group:
assert (
total
), "If simulating a single group, you must also supply a total population column"
if not total:
raise ValueError(
"If simulating a single group, you must also supply a total population column"
)
df["other_group_pop"] = df[total] - df[group]
groups = [group, "other_group_pop"]

Expand Down Expand Up @@ -320,7 +329,9 @@ def simulate_geo_permutation(df, **kwargs):
return data


def simulate_systematic_geo_permutation(df, group=None, total=None, groups=None):
def simulate_systematic_geo_permutation(
df, group=None, total=None, groups=None, verbose=True
):
"""Simulate systematic redistribution followed by random permutation of geographic units.
Parameters
Expand All @@ -342,12 +353,16 @@ def simulate_systematic_geo_permutation(df, group=None, total=None, groups=None)
geopandas.GeoDataFrame
geodataframe with systematically randomized population groups
"""
df = simulate_systematic_randomization(df, group=group, total=total, groups=groups)
df = simulate_systematic_randomization(
df, group=group, total=total, groups=groups, verbose=verbose
)
df = simulate_geo_permutation(df)
return df


def simulate_evenness_geo_permutation(df, group=None, total=None, groups=None):
def simulate_evenness_geo_permutation(
df, group=None, total=None, groups=None, verbose=True
):
"""Simulate evenness followed by random permutation of geographic units.
Parameters
Expand Down
30 changes: 19 additions & 11 deletions segregation/singlegroup/gini.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,20 @@

__author__ = "Renan X. Cortes <[email protected]>, Sergio J. Rey <[email protected]> and Elijah Knaap <[email protected]>"

import os

import geopandas as gpd
import numpy as np

from .._base import SingleGroupIndex, SpatialImplicitIndex
# must be set prior to importing numpy
# <https://github.com/numba/numba/issues/5275>
os.environ["KMP_WARNINGS"] = "off"

import numpy as np

from .._base import SingleGroupIndex, SpatialImplicitIndex

try:
from numba import njit, jit, prange, boolean
from numba import boolean, jit, njit, prange
except (ImportError, ModuleNotFoundError):

def jit(*dec_args, **dec_kwargs):
Expand All @@ -28,7 +33,11 @@ def intercepted_function(f, *f_args, **f_kwargs):
prange = range
boolean = bool

@njit(parallel=True, fastmath=True,)

@njit(
parallel=True,
fastmath=True,
)
def _gini_vecp(pi: np.ndarray, ti: np.ndarray):
"""Memory efficient calculation of Gini
Expand All @@ -41,25 +50,23 @@ def _gini_vecp(pi: np.ndarray, ti: np.ndarray):
Returns
----------
implicit: float
Gini coefficient
"""


n = ti.shape[0]
num = np.zeros(1)
T = ti.sum()
P = pi.sum() / T
pi = np.where(ti == 0, 0, pi / ti)
T = ti.sum()
for i in prange(n-1):
num += (ti[i] * ti[i+1:] * np.abs(pi[i] - pi[i+1:])).sum()
for i in prange(n - 1):
num += (ti[i] * ti[i + 1 :] * np.abs(pi[i] - pi[i + 1 :])).sum()
num *= 2
den = (2 * T * T * P * (1-P))
den = 2 * T * T * P * (1 - P)
return (num / den)[0]



def _gini_seg(data, group_pop_var, total_pop_var):
"""Calculate Gini segregation index.
Expand Down Expand Up @@ -107,6 +114,7 @@ def _gini_seg(data, group_pop_var, total_pop_var):

return G, data


class Gini(SingleGroupIndex, SpatialImplicitIndex):
"""Gini Index.
Expand Down Expand Up @@ -154,7 +162,7 @@ def __init__(
decay=None,
function="triangular",
precompute=None,
**kwargs
**kwargs,
):
"""Init."""
SingleGroupIndex.__init__(self, data, group_pop_var, total_pop_var)
Expand Down

0 comments on commit 3b3ee00

Please sign in to comment.