-
Notifications
You must be signed in to change notification settings - Fork 142
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Expose configuration for handling coded values like -666666666 #75
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -36,7 +36,42 @@ def list_or_str(v): | |
return v | ||
return [v] | ||
|
||
def cast_nulls(func): | ||
""" | ||
Decorator to format null values in API result casting functions. | ||
""" | ||
def null_wrapper(v, cast_nulls): | ||
# This value indicates that there were too few observations to compute | ||
# an estimate. See: | ||
# https://www.census.gov/data/developers/data-sets/acs-1year/notes-on-acs-estimate-and-annotation-values.html | ||
if str(v) == '-666666666': | ||
if cast_nulls is True: | ||
return None | ||
else: | ||
raise NullValueException('Unhandled coded value: ', str(v)) | ||
else: | ||
return func(v) | ||
return null_wrapper | ||
|
||
@cast_nulls | ||
def to_str(v): | ||
""" | ||
Cast an API result to a string. | ||
""" | ||
return str(v) | ||
|
||
@cast_nulls | ||
def to_float(v): | ||
""" | ||
Cast an API result to a float. | ||
""" | ||
return float(v) | ||
|
||
@cast_nulls | ||
def float_or_str(v): | ||
""" | ||
Try casting an API result to a float, and fall back to a string. | ||
""" | ||
try: | ||
return float(v) | ||
except ValueError: | ||
|
@@ -92,6 +127,10 @@ class UnsupportedYearException(CensusException): | |
pass | ||
|
||
|
||
class NullValueException(Exception): | ||
pass | ||
|
||
|
||
class Client(object): | ||
endpoint_url = 'https://api.census.gov/data/%s/%s' | ||
definitions_url = 'https://api.census.gov/data/%s/%s/variables.json' | ||
|
@@ -158,6 +197,10 @@ def get(self, fields, geo, year=None, **kwargs): | |
|
||
@retry_on_transient_error | ||
def query(self, fields, geo, year=None, **kwargs): | ||
cast_nulls = kwargs.get('cast_nulls', True) | ||
if cast_nulls not in [True, False]: | ||
raise CensusException('cast_nulls argument must be True or False') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not totally confident in |
||
|
||
if year is None: | ||
year = self.default_year | ||
|
||
|
@@ -187,10 +230,30 @@ def query(self, fields, geo, year=None, **kwargs): | |
|
||
headers = data.pop(0) | ||
types = [self._field_type(header, year) for header in headers] | ||
results = [{header : (cast(item) if item is not None else None) | ||
for header, cast, item | ||
in zip(headers, types, d)} | ||
for d in data] | ||
results = [] | ||
error = False | ||
for d in data: | ||
result = {} | ||
for header, cast, item in zip(headers, types, d): | ||
if item is not None: | ||
try: | ||
var_value = cast(item, cast_nulls) | ||
except NullValueException: | ||
# This value needs to raise an error, but we need the | ||
# rest of the result values for context, so flag the | ||
# error and continue the iteration | ||
error = True | ||
var_value = item | ||
else: | ||
var_value = None | ||
result[header] = var_value | ||
if error: | ||
msg = 'Null estimate code found: ' + str(result) | ||
msg += '\nSee the Census documentation for more information:' | ||
msg += '\nhttps://www.census.gov/data/developers/data-sets/acs-1year/notes-on-acs-estimate-and-annotation-values.html' | ||
raise CensusException(msg) | ||
else: | ||
results.append(result) | ||
return results | ||
|
||
elif resp.status_code == 204: | ||
|
@@ -204,17 +267,17 @@ def _field_type(self, field, year): | |
url = self.definition_url % (year, self.dataset, field) | ||
resp = self.session.get(url) | ||
|
||
types = {"fips-for" : str, | ||
"fips-in" : str, | ||
types = {"fips-for" : to_str, | ||
"fips-in" : to_str, | ||
"int" : float_or_str, | ||
"float": float, | ||
"string": str} | ||
"float": to_float, | ||
"string": to_str} | ||
|
||
if resp.status_code == 200: | ||
predicate_type = resp.json().get("predicateType", "string") | ||
return types[predicate_type] | ||
else: | ||
return str | ||
return to_str | ||
|
||
@supported_years() | ||
def us(self, fields, **kwargs): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,7 +8,7 @@ | |
import requests | ||
|
||
from census.core import ( | ||
Census, UnsupportedYearException) | ||
Census, UnsupportedYearException, CensusException) | ||
|
||
KEY = os.environ.get('CENSUS_KEY', '') | ||
|
||
|
@@ -121,6 +121,67 @@ def test_la_canada_2015(self): | |
) | ||
|
||
|
||
class TestCodedValues(CensusTestCase): | ||
""" | ||
Tests for handling coded values, like -666666666 and -999999999. | ||
""" | ||
def test_handle_666666666(self): | ||
""" | ||
Test the default behavior of handling -666666666 values, which is to | ||
cast them to null. | ||
""" | ||
# This call should return a value of -666666666 | ||
return_val = self._client.acs5.state_county_tract('B19081_001E', | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we want this to be the default behavior? I think the default should be the behavior of cast_null=True, but I'm persuadable. |
||
42, | ||
101, | ||
'989100', | ||
year=2016) | ||
self.assertEqual(return_val[0]['B19081_001E'], None) | ||
|
||
def test_handle_666666666_as_error(self): | ||
""" | ||
Test raising an error for -666666666 values. | ||
""" | ||
with self.assertRaises(CensusException): | ||
return_val = self._client.acs5.state_county_tract('B19081_001E', | ||
42, | ||
101, | ||
'989100', | ||
year=2016, | ||
cast_nulls=False) | ||
|
||
def test_handle_666666666_as_null(self): | ||
""" | ||
Test casting -666666666 values to null. | ||
""" | ||
return_val = self._client.acs5.state_county_tract('B19081_001E', | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should also test that a warning is emitted that a value was cast to a null. Particularly if we decide to make this the default behavior. |
||
42, | ||
101, | ||
'989100', | ||
year=2016, | ||
cast_nulls=True) | ||
self.assertEqual(return_val[0]['B19081_001E'], None) | ||
|
||
def test_bad_cast_nulls_argument(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. great to have these tests! |
||
""" | ||
Test that an error gets raised for poorly-formated cast_nulls argument. | ||
""" | ||
with self.assertRaises(CensusException): | ||
return_val = self._client.acs5.state('NAME', | ||
Census.ALL, | ||
cast_nulls='foobar') | ||
|
||
with self.assertRaises(CensusException): | ||
return_val = self._client.acs5.state('NAME', | ||
Census.ALL, | ||
cast_nulls=None) | ||
|
||
with self.assertRaises(CensusException): | ||
return_val = self._client.acs5.state('NAME', | ||
Census.ALL, | ||
cast_nulls=10) | ||
|
||
|
||
class TestEndpoints(CensusTestCase): | ||
|
||
def check_endpoints(self, client_name, tests, **kwargs): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Another (maybe simpler) way to do this would be to avoid the decorator abstraction and use a "checking" function instead. I liked the decorator since it felt to me like a semantic way of indicating that this checking logic is a high-level modification that can be applied somewhat abstractly to the other type casting functions.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think I'd be a bit more comfortable with this architecture if we had more cases than just the
-666666666
. That said I'm comfortable to go with this approach until we accumulate more cases, and then we can potentially refactor if appropriate.