Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Morningstar Retry Fix and Google API Patch #515

Merged
merged 15 commits into from
Apr 13, 2018
Merged
2 changes: 2 additions & 0 deletions docs/source/whatsnew/v0.7.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,5 @@ Bug Fixes
setting the environmental variable QUANDL_API_KEY (:issue:`485`).
- Added back support for Yahoo! price data
- Handle Morningstar index volume data properly (:issue:`486`).
- Fixed Morningstar 'retry' incrementation (:issue:`513`)
- Updated Google Daily Price API to functional url (:issue:`502`)
9 changes: 6 additions & 3 deletions pandas_datareader/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def _sanitize_response(response):
"""
return response.content

def _get_response(self, url, params=None, headers=None):
def _get_response(self, url, params=None):
""" send raw HTTP request to get requests.Response from the specified url
Parameters
----------
Expand All @@ -128,8 +128,7 @@ def _get_response(self, url, params=None, headers=None):
last_response_text = ''
for i in range(self.retry_count + 1):
response = self.session.get(url,
params=params,
headers=headers)
params=params)
if response.status_code == requests.codes.ok:
return response

Expand Down Expand Up @@ -170,6 +169,9 @@ def _output_error(self, out):
def _read_lines(self, out):
rs = read_csv(out, index_col=0, parse_dates=True,
na_values=('-', 'null'))[::-1]
# Needed to remove blank space character in header names
rs.columns = list(map(lambda x: x.strip(), rs.columns.values.tolist()))

# Yahoo! Finance sometimes does this awesome thing where they
# return 2 rows for the most recent business day
if len(rs) > 2 and rs.index[-1] == rs.index[-2]: # pragma: no cover
Expand All @@ -181,6 +183,7 @@ def _read_lines(self, out):
except AttributeError:
# Python 3 string has no decode method.
rs.index.name = rs.index.name.encode('ascii', 'ignore').decode()

return rs


Expand Down
3 changes: 2 additions & 1 deletion pandas_datareader/google/daily.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ def __init__(self, symbols=None, start=None, end=None, retry_count=3,
@property
def url(self):
"""API URL"""
return 'https://finance.google.com/finance/historical'
# Thanks user:vnmabus for pointing this out.
return 'https://finance.google.co.uk/bctzjpnsun/historical'

def _get_params(self, symbol):
params = {
Expand Down
34 changes: 15 additions & 19 deletions pandas_datareader/mstar/daily.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,6 @@ def __init__(self, symbols, start=None, end=None, retry_count=3,
self.currency = currency
self.interval = interval

self._symbol_data_cache = []

def _url_params(self):
if self.interval not in ['d', 'wk', 'mo', 'm', 'w']:
raise ValueError("Invalid interval: valid values are 'd', 'wk' "
Expand Down Expand Up @@ -98,17 +96,18 @@ def _get_crumb(self, *args):
"""Not required """
pass

def _dl_mult_symbols(self, symbols):
def _dl_mult_symbols(self, symbols, symbol_data=None):
failed = []
symbol_data = []
if symbol_data is None:
symbol_data = []
for symbol in symbols:

params = self._url_params()
params.update({"ticker": symbol})

try:
resp = requests.get(self.url, params=params)
except Exception:
except (requests.HTTPError, requests.ConnectionError,
requests.RequestException):
if symbol not in failed:
if self.retry_count == 0:
warn("skipping symbol %s: number of retries "
Expand All @@ -122,25 +121,24 @@ def _dl_mult_symbols(self, symbols):
jsondata = resp.json()
if jsondata is None:
failed.append(symbol)
continue
jsdata = self._restruct_json(symbol=symbol,
jsondata=jsondata)
symbol_data.extend(jsdata)
pass
else:
jsdata = self._restruct_json(symbol=symbol,
jsondata=jsondata)
symbol_data.extend(jsdata)
else:
raise Exception("Request Error!: %s : %s" % (
resp.status_code, resp.reason))

time.sleep(self.pause)

if len(failed) > 0 and self.retry_count > 0:
# TODO: This appears to do nothing since
# TODO: successful symbols are not added to
self._dl_mult_symbols(symbols=failed)
self.retry_count -= 1
self._dl_mult_symbols(symbols=failed, symbol_data=symbol_data)
else:
self.retry_count = 0

if not symbol_data:
if len(symbol_data) == 0 and self.retry_count == 0:
raise ValueError('All symbols were invalid')
elif self.retry_count == 0 and len(failed) > 0:
warn("The following symbols were excluded do to http "
Expand All @@ -156,8 +154,7 @@ def _convert_index2date(indexvals):
return [base + pd.to_timedelta(iv, unit='d') for iv in indexvals]

def _restruct_json(self, symbol, jsondata):
if jsondata is None:
return

divdata = jsondata["DividendData"]

pricedata = jsondata["PriceDataList"][0]["Datapoints"]
Expand All @@ -174,8 +171,7 @@ def _restruct_json(self, symbol, jsondata):
d = dates[p]
bardict = {
"Symbol": symbol, "Date": d, "Close": bar[0], "High": bar[1],
"Low": bar[2], "Open": bar[3]
}
"Low": bar[2], "Open": bar[3]}
if len(divdata) == 0:
pass
else:
Expand Down Expand Up @@ -216,7 +212,7 @@ def read(self):
is_str = False
try:
is_str = all(map(lambda v: isinstance(v, str), symbols))
except Exception:
except ValueError:
pass

if not is_str:
Expand Down
14 changes: 11 additions & 3 deletions pandas_datareader/stooq.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@ class StooqDailyReader(_DailyBaseReader):

"""
Returns DataFrame/Panel of historical stock prices from symbols, over date
range, start to end. To avoid being penalized by Google Finance servers,
pauses between downloading 'chunks' of symbols can be specified.
range, start to end.

Parameters
----------
Expand All @@ -23,6 +22,7 @@ class StooqDailyReader(_DailyBaseReader):
session : Session, default None
requests.sessions.Session instance to be used


Notes
-----
See `Stooq <https://stooq.com>`__
Expand All @@ -33,7 +33,15 @@ def url(self):
"""API URL"""
return 'https://stooq.com/q/d/l/'

def _get_params(self, symbol):
def _get_params(self, symbol, country="US"):
symbol_parts = symbol.split(".")
if len(symbol_parts) == 1:
symbol = ".".join([symbol, country])
else:
if symbol_parts[1].lower() not in ['de', 'hk', 'hu', 'jp',
'pl', 'uk', 'us']:
symbol = ".".join([symbol, "US"])

params = {
's': symbol,
'i': "d"
Expand Down
28 changes: 15 additions & 13 deletions pandas_datareader/tests/google/test_google.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,19 +55,20 @@ def teardown_class(cls):

@skip_on_exception(RemoteDataError)
def test_google(self):

# asserts that google is minimally working and that it throws
# an exception when DataReader can't get a 200 response from
# google

start = datetime(2010, 1, 1)
end = datetime(2013, 1, 27)

for locale in self.locales:
with tm.set_locale(locale):
panel = web.DataReader("NYSE:F", 'google', start, end)
assert panel.Close[-1] == 13.68
panel = web.DataReader("NYSE:F", 'google', start, end)
assert panel.Close[-1] == 13.68

with pytest.raises(Exception):
web.DataReader('NON EXISTENT TICKER', 'google', start, end)
with pytest.raises(Exception):
web.DataReader('NON EXISTENT TICKER', 'google', start, end)

def assert_option_result(self, df):
"""
Expand Down Expand Up @@ -99,6 +100,7 @@ def test_get_quote_stringlist(self):

@skip_on_exception(RemoteDataError)
def test_get_goog_volume(self):

for locale in self.locales:
with tm.set_locale(locale):
df = web.get_data_google('GOOG').sort_index()
Expand All @@ -120,16 +122,14 @@ def test_get_multi1(self):

@skip_on_exception(RemoteDataError)
def test_get_multi_invalid(self):
with warnings.catch_warnings(record=True):
sl = ['AAPL', 'AMZN', 'INVALID']
pan = web.get_data_google(sl, '2012')
assert 'INVALID' in pan.minor_axis
sl = ['AAPL', 'AMZN', 'INVALID']
pan = web.get_data_google(sl, '2012')
assert 'INVALID' in pan.minor_axis

def test_get_multi_all_invalid(self):
with warnings.catch_warnings(record=True):
sl = ['INVALID', 'INVALID2', 'INVALID3']
with pytest.raises(RemoteDataError):
web.get_data_google(sl, '2012')
sl = ['INVALID', 'INVALID2', 'INVALID3']
with pytest.raises(RemoteDataError):
web.get_data_google(sl, '2012')

@skip_on_exception(RemoteDataError)
def test_get_multi2(self):
Expand Down Expand Up @@ -165,6 +165,7 @@ def test_dtypes(self):
@skip_on_exception(RemoteDataError)
def test_unicode_date(self):
# see gh-8967

data = web.get_data_google(
'NYSE:F',
start='JAN-01-10',
Expand All @@ -173,6 +174,7 @@ def test_unicode_date(self):

@skip_on_exception(RemoteDataError)
def test_google_reader_class(self):

r = GoogleDailyReader('GOOG')
df = r.read()
assert df.Volume.loc['JAN-02-2015'] == 1446662
Expand Down
7 changes: 5 additions & 2 deletions pandas_datareader/tests/mstar/test_daily.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from pandas_datareader._testing import skip_on_exception
from pandas_datareader._utils import RemoteDataError
from pandas_datareader.data import MorningstarDailyReader
from pandas_datareader._utils import SymbolWarning


class TestMorningstarDaily(object):
Expand All @@ -22,8 +23,10 @@ def test_invalid_date(self):
end="1999-03-03")

def test_invalid_partial_multi_symbols(self):
df = web.DataReader(['MSFT', "21##", ""], "morningstar", retry_count=0)
assert (len(df.index.levels[0]) == 1)
with pytest.warns(SymbolWarning):
df = web.DataReader(['MSFT', "21##", ""],
"morningstar", retry_count=0)
assert (len(df.index.levels[0]) == 1)

def test_invalid_multi_symbols(self):
with pytest.raises(ValueError):
Expand Down
10 changes: 8 additions & 2 deletions pandas_datareader/tests/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from pandas import DataFrame
from pandas_datareader.data import DataReader
from pandas_datareader.exceptions import UnstableAPIWarning
from pandas_datareader._utils import RemoteDataError
from pandas_datareader._testing import skip_on_exception

Expand All @@ -10,8 +11,9 @@ class TestDataReader(object):

@skip_on_exception(RemoteDataError)
def test_read_google(self):
gs = DataReader("GS", "google")
assert isinstance(gs, DataFrame)
with pytest.warns(UnstableAPIWarning):
gs = DataReader("GS", "google")
assert isinstance(gs, DataFrame)

def test_read_iex(self):
gs = DataReader("GS", "iex-last")
Expand All @@ -21,6 +23,10 @@ def test_read_fred(self):
vix = DataReader("VIXCLS", "fred")
assert isinstance(vix, DataFrame)

def test_read_mstar(self):
gs = DataReader("GS", data_source="morningstar")
assert isinstance(gs, DataFrame)

def test_not_implemented(self):
with pytest.raises(NotImplementedError):
DataReader("NA", "NA")
2 changes: 2 additions & 0 deletions pandas_datareader/tests/test_fred.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@


class TestFred(object):

def test_fred(self):

# Raises an exception when DataReader can't
Expand Down Expand Up @@ -70,6 +71,7 @@ def test_fred_multi(self): # pragma: no cover
end = datetime(2013, 1, 27)

received = web.DataReader(names, "fred", start, end).head(1)

expected = DataFrame([[217.488, 99.68746, 220.633]], columns=names,
index=[pd.tslib.Timestamp('2010-01-01 00:00:00')])
expected.index.rename('DATE', inplace=True)
Expand Down
8 changes: 5 additions & 3 deletions pandas_datareader/tests/test_iex.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from pandas_datareader.data import (DataReader, get_summary_iex, get_last_iex,
get_dailysummary_iex, get_iex_symbols,
get_iex_book)
from pandas_datareader.exceptions import UnstableAPIWarning


class TestIEX(object):
Expand All @@ -29,9 +30,10 @@ def test_false_ticker(self):
@pytest.mark.xfail(reason='IEX daily history API is returning 500 as of '
'Jan 2018')
def test_daily(self):
df = get_dailysummary_iex(start=datetime(2017, 5, 5),
end=datetime(2017, 5, 6))
assert df['routedVolume'].iloc[0] == 39974788
with pytest.warns(UnstableAPIWarning):
df = get_dailysummary_iex(start=datetime(2017, 5, 5),
end=datetime(2017, 5, 6))
assert df['routedVolume'].iloc[0] == 39974788

def test_symbols(self):
df = get_iex_symbols()
Expand Down
3 changes: 2 additions & 1 deletion pandas_datareader/tests/test_iex_daily.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ def test_iex_bad_symbol(self):

def test_iex_bad_symbol_list(self):
with pytest.raises(Exception):
web.DataReader(["AAPL", "BADTICKER"], "iex", self.start, self.end)
web.DataReader(["AAPL", "BADTICKER"], "iex",
self.start, self.end)

def test_daily_invalid_date(self):
start = datetime(2010, 1, 5)
Expand Down
4 changes: 2 additions & 2 deletions pandas_datareader/tests/test_stooq.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@


def test_stooq_dji():
f = web.DataReader('^DJI', 'stooq')
f = web.DataReader('GS', 'stooq')
assert f.shape[0] > 0


def test_get_data_stooq_dji():
f = get_data_stooq('^DAX')
f = get_data_stooq('AMZN')
assert f.shape[0] > 0