From 15983dbf41bf3e022026467181ae2ff771139737 Mon Sep 17 00:00:00 2001 From: Phil Weir Date: Sat, 21 Sep 2019 20:07:58 +0100 Subject: [PATCH] add arithmetic functions --- README.md | 37 ++++++------- examples/three_currency.py | 22 ++++++++ moneypandas/base.py | 7 ++- moneypandas/money_array.py | 107 ++++++++++++++++++++++++++++++++++++- 4 files changed, 147 insertions(+), 26 deletions(-) create mode 100644 examples/three_currency.py diff --git a/README.md b/README.md index 63b18f5..527c15a 100644 --- a/README.md +++ b/README.md @@ -1,40 +1,33 @@ # Moneypandas -Moneypandas is a prototype fork of Cyberpandas for currency, using the `money` library. +Moneypandas is a prototype fork of Cyberpandas for currency, using the `money` library. Even this README is shamelessly purloigned, with thanks to Tom Augspurger and the ContinuumIO team. ----- - -# Cyberpandas - -[![Build Status](https://travis-ci.org/ContinuumIO/cyberpandas.svg?branch=master)](https://travis-ci.org/ContinuumIO/cyberpandas) -[![Documentation Status](https://readthedocs.org/projects/cyberpandas/badge/?version=latest)](http://cyberpandas.readthedocs.io/en/latest/?badge=latest) - -Cyberpandas provides support for storing IP and MAC address data inside a pandas DataFrame using pandas' [Extension Array Interface](http://pandas-docs.github.io/pandas-docs-travis/extending.html#extension-types) +This package provides support for storing currency data inside a pandas DataFrame using pandas' [Extension Array Interface](http://pandas-docs.github.io/pandas-docs-travis/extending.html#extension-types) ```python -In [1]: from cyberpandas import IPArray +In [1]: from moneypandas import MoneyArray In [2]: import pandas as pd -In [3]: df = pd.DataFrame({"address": IPArray(['192.168.1.1', '192.168.1.10'])}) +In [3]: df = pd.DataFrame({"money": MoneyArray(['1284 EUR', '121 EUR', '€14'])}) In [4]: df Out[4]: - address -0 192.168.1.1 -1 192.168.1.10 + money +0 EUR 1,284.00 +1 EUR 121.00 +2 EUR 14.00 ``` -See the [documentation](https://cyberpandas.readthedocs.io/en/latest/) for more. - -## Installation - -With Conda: +For more examples, including summing and converting mixed-currency columns, see the `examples` folder. - conda install -c conda-forge cyberpandas +(note: not yet tested with Conda, only setuptools/pipenv) -Or from PyPI +To efficiently perform operations, aggregation is done per currency first, and then XMoney used to do necessary operations on the output aggregates. - pip install cyberpandas +Currency conversion of a Series only uses XMoney and conversion where currencies mismatch, so converting a column mostly of BBBs, with a few AAAs, should scale according to the number of AAAs. +## TODO +* implement more reduce functions +* testing for arithmetic diff --git a/examples/three_currency.py b/examples/three_currency.py new file mode 100644 index 0000000..49543e3 --- /dev/null +++ b/examples/three_currency.py @@ -0,0 +1,22 @@ +from moneypandas import MoneyArray +from money import xrates +import decimal +import pandas as pd + + +xrates.install('money.exchange.SimpleBackend') +xrates.base = 'USD' +xrates.setrate('EUR', decimal.Decimal('0.9')) +xrates.setrate('GBP', decimal.Decimal('0.8')) + +df = pd.DataFrame({"money": MoneyArray(['1284 EUR', '121 EUR', '€14', '£12'], 'USD')}) +total = df['money'].sum() +print("Total: ", total) +print("Total (EUR): ", total.to('EUR')) + +df['money'] = df['money'].money.to_currency('EUR') +mean = df['money'].mean() +print("Mean: ", mean) + +df['money'].money.to_currency('GBP', shallow=False, in_place=True) +print('All converted to GBP', df) diff --git a/moneypandas/base.py b/moneypandas/base.py index 9569334..2998f3b 100644 --- a/moneypandas/base.py +++ b/moneypandas/base.py @@ -2,10 +2,10 @@ import numpy as np -from pandas.core.arrays import ExtensionArray +from pandas.core.arrays import ExtensionArray, ExtensionScalarOpsMixin -class NumPyBackedExtensionArrayMixin(ExtensionArray): +class NumPyBackedExtensionArrayMixin(ExtensionArray, ExtensionScalarOpsMixin): @property def dtype(self): """The dtype for this extension array, MoneyType""" @@ -69,3 +69,6 @@ def unique(self): _, indices = np.unique(self.data, return_index=True) data = self.data.take(np.sort(indices)) return self._from_ndarray(data) + +NumPyBackedExtensionArrayMixin._add_arithmetic_ops() +NumPyBackedExtensionArrayMixin._add_comparison_ops() diff --git a/moneypandas/money_array.py b/moneypandas/money_array.py index a03bac3..8ed02c2 100644 --- a/moneypandas/money_array.py +++ b/moneypandas/money_array.py @@ -2,9 +2,10 @@ import decimal import collections -import six import numpy as np +from pandas.compat.numpy import function as nv import pandas as pd +from pandas.core import nanops import money from pandas.api.extensions import ExtensionDtype @@ -124,10 +125,71 @@ def to_decimals(self, money_code=None): result = decimalize(self.data['va']) for i, ceq in enumerate(same): if not ceq: - result[i] = money.XMoney(*self.data[i]).to(money_code) + result[i] = money.XMoney(*self.data[i]).to(money_code).amount return result + # Operations thanks to pandas.core.arrays.base.numpy_ + def _min(self, ndarray, axis=None, out=None, keepdims=False, skipna=True): + nv.validate_min((), dict(out=out, keepdims=keepdims)) + return nanops.nanmin(ndarray, axis=axis, skipna=skipna) + + def _max(self, ndarray, axis=None, out=None, keepdims=False, skipna=True): + nv.validate_max((), dict(out=out, keepdims=keepdims)) + return nanops.nanmax(ndarray, axis=axis, skipna=skipna) + + def _sum( + self, + ndarray, + axis=None, + dtype=None, + out=None, + keepdims=False, + initial=None, + skipna=True, + min_count=0, + ): + nv.validate_sum( + (), dict(dtype=dtype, out=out, keepdims=keepdims, initial=initial) + ) + return nanops.nansum( + ndarray, axis=axis, skipna=skipna, min_count=min_count + ) + + def _reduce(self, name, skipna=True, **kwargs): + currencies = [cu for cu in np.unique(self.data['cu']) if cu] + totals = {} + + if name == 'mean': + meth = getattr(self, '_sum', None) + else: + meth = getattr(self, '_' + name, None) + + if meth: + if len(currencies) > 1: + money_code = self.default_money_code if self.default_money_code else currencies[0] + for i, currency in enumerate(currencies): + totals[currency] = money.XMoney( + meth(self.data['va'][self.data['cu'] == currency], skipna=skipna, **kwargs), + currency + ) + total = meth( + np.array([subtotal.to(money_code).amount for subtotal in totals.values()]), + skipna=skipna, + **kwargs + ) + if name == 'mean': + total = total / len(self.data) + total = money.XMoney(amount=total, currency=money_code) + else: + money_code = currencies[0] if currencies else self.default_money_code + total = money.XMoney(meth(self.data['va'], skipna=skipna, **kwargs), money_code) + + return total + else: + msg = "'{}' does not implement reduction '{}'" + raise TypeError(msg.format(type(self).__name__, name)) + @classmethod def from_bytes(cls, bytestring): r"""Create a MoneyArray from a bytestring. @@ -398,6 +460,37 @@ def fmt(x): def _values_for_factorize(self): return self.astype(object), (0, '') + def to_currency(self, money_code, shallow=True, in_place=False): + if shallow: + if in_place: + copy = self + else: + copy = self.copy() + copy.default_money_code = money_code + else: + mask = self.isna() + same = (self.data['cu'] == money_code) | mask + decimalize = np.vectorize(decimal.Decimal) + + result = self.data + if not in_place: + result = result.copy() + + for i, ceq in enumerate(same): + if not ceq: + va = money.XMoney(self.data[i]['va'], self.data[i]['cu']) \ + .to(money_code).amount + result[i] = (va, money_code) + + if in_place: + self.data = result + copy = self.__class__( + result, + default_money_code=money_code, + dtype=self.dtype + ) + + return copy # ----------------------------------------------------------------------------- # Accessor @@ -421,6 +514,16 @@ def _validate(obj): raise AttributeError("Cannot use 'money' accessor on objects of " "dtype '{}'.".format(obj.dtype)) + def to_currency(self, money_code, shallow=True, in_place=True): + return delegated_method( + self._data.to_currency, + self._index, + self._name, + money_code, + shallow, + in_place + ) + def is_money_type(obj): t = getattr(obj, 'dtype', obj)