From 046768764f69d0738f6922efcc6b13befa61c0b3 Mon Sep 17 00:00:00 2001 From: Phil Weir Date: Sat, 21 Sep 2019 18:38:53 +0100 Subject: [PATCH] first attempt at converting IPArray to a money.XMoney extension type --- LICENSE | 3 +- Makefile | 8 +- Pipfile | 11 + README.md | 6 + ci/build.sh | 4 +- ci/environment.yml | 2 +- ci/install-travis.sh | 2 +- ci/upload-anaconda.sh | 4 +- .../{cyberpandas => moneypandas}/meta.yaml | 8 +- cyberpandas/_utils.py | 30 - cyberpandas/common.py | 3 - cyberpandas/dtypes.py | 17 - cyberpandas/ip_array.py | 718 ------------------ cyberpandas/ip_methods.py | 69 -- cyberpandas/mac_array.py | 160 ---- cyberpandas/parser.py | 110 --- {cyberpandas => moneypandas}/__init__.py | 23 +- {cyberpandas => moneypandas}/_accessor.py | 0 {cyberpandas => moneypandas}/base.py | 2 +- moneypandas/dtypes.py | 35 + moneypandas/money_array.py | 430 +++++++++++ moneypandas/parser.py | 104 +++ setup.py | 19 +- tests/Pipfile | 11 + tests/ip/__init__.py | 0 tests/ip/test_dtypes.py | 20 - tests/ip/test_ip.py | 396 ---------- tests/ip/test_parser.py | 58 -- tests/mac/__init__.py | 0 tests/mac/test_interface.py | 122 --- tests/{ip => }/test_interface.py | 43 +- tests/test_ip.py | 230 ++++++ ...test_ip_pandas.py => test_money_pandas.py} | 74 +- tests/{ip => }/test_pandas_methods.py | 22 +- tests/test_parser.py | 39 + 35 files changed, 977 insertions(+), 1806 deletions(-) create mode 100644 Pipfile rename conda-recipes/{cyberpandas => moneypandas}/meta.yaml (84%) delete mode 100644 cyberpandas/_utils.py delete mode 100644 cyberpandas/common.py delete mode 100644 cyberpandas/dtypes.py delete mode 100644 cyberpandas/ip_array.py delete mode 100644 cyberpandas/ip_methods.py delete mode 100644 cyberpandas/mac_array.py delete mode 100644 cyberpandas/parser.py rename {cyberpandas => moneypandas}/__init__.py (53%) rename {cyberpandas => moneypandas}/_accessor.py (100%) rename {cyberpandas => moneypandas}/base.py (96%) create mode 100644 moneypandas/dtypes.py create mode 100644 moneypandas/money_array.py create mode 100644 moneypandas/parser.py create mode 100644 tests/Pipfile delete mode 100644 tests/ip/__init__.py delete mode 100644 tests/ip/test_dtypes.py delete mode 100644 tests/ip/test_ip.py delete mode 100644 tests/ip/test_parser.py delete mode 100644 tests/mac/__init__.py delete mode 100644 tests/mac/test_interface.py rename tests/{ip => }/test_interface.py (64%) create mode 100644 tests/test_ip.py rename tests/{ip/test_ip_pandas.py => test_money_pandas.py} (62%) rename tests/{ip => }/test_pandas_methods.py (81%) create mode 100644 tests/test_parser.py diff --git a/LICENSE b/LICENSE index 06b5062..4d443f0 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,7 @@ BSD 3-Clause License -Copyright (c) 2018, Anaconda, Inc. +Cyberpandas Copyright (c) 2018, Anaconda, Inc. +Moneypandas Copyright (c) 2019-, Flax & Teal Limited All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/Makefile b/Makefile index c6e90a4..d9413cb 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ -.PHONY: build-cyberpandas all +.PHONY: build-moneypandas all -all: build-cyberpandas +all: build-moneypandas -build-cyberpandas-%: - LDFLAGS="-headerpad_max_install_name" conda build conda-recipes/cyberpandas $(patsubst build-cyberpandas-%,--python=%,$@) +build-moneypandas-%: + LDFLAGS="-headerpad_max_install_name" conda build conda-recipes/moneypandas $(patsubst build-moneypandas-%,--python=%,$@) diff --git a/Pipfile b/Pipfile new file mode 100644 index 0000000..7a9e19a --- /dev/null +++ b/Pipfile @@ -0,0 +1,11 @@ +[[source]] +url = "https://pypi.org/simple" +verify_ssl = true +name = "pypi" + +[packages] + +[dev-packages] + +[requires] +python_version = "3.6" diff --git a/README.md b/README.md index d761799..63b18f5 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,9 @@ +# Moneypandas + +Moneypandas is a prototype fork of Cyberpandas for currency, using the `money` library. + +---- + # Cyberpandas [![Build Status](https://travis-ci.org/ContinuumIO/cyberpandas.svg?branch=master)](https://travis-ci.org/ContinuumIO/cyberpandas) diff --git a/ci/build.sh b/ci/build.sh index 751dd1d..2abcc00 100755 --- a/ci/build.sh +++ b/ci/build.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash set -e -echo "Building cyberpandas" +echo "Building moneypandas" -conda build -c defaults -c conda-forge conda-recipes/cyberpandas --python=${PYTHON} +conda build -c defaults -c conda-forge conda-recipes/moneypandas --python=${PYTHON} diff --git a/ci/environment.yml b/ci/environment.yml index 38c6121..8776dff 100644 --- a/ci/environment.yml +++ b/ci/environment.yml @@ -1,4 +1,4 @@ -name: cyberpandas-docs +name: moneypandas-docs channels: - defaults - conda-forge diff --git a/ci/install-travis.sh b/ci/install-travis.sh index 1921ade..ef5d0ac 100755 --- a/ci/install-travis.sh +++ b/ci/install-travis.sh @@ -61,7 +61,7 @@ conda list test-environment python -c 'import os,sys,fcntl; flags = fcntl.fcntl(sys.stdout, fcntl.F_GETFL); fcntl.fcntl(sys.stdout, fcntl.F_SETFL, flags&~os.O_NONBLOCK);' echo -echo "[install cyberpandas]" +echo "[install moneypandas]" pip install --no-deps -e . echo "[finished install]" diff --git a/ci/upload-anaconda.sh b/ci/upload-anaconda.sh index ff8e87b..39f408e 100644 --- a/ci/upload-anaconda.sh +++ b/ci/upload-anaconda.sh @@ -16,9 +16,9 @@ if [ -z "$UPLOAD_KEY" ]; then return 0 fi -export UPLOADFILE=`conda build conda-recipes/cyberpandas --python=${PYTHON} --output` +export UPLOADFILE=`conda build conda-recipes/moneypandas --python=${PYTHON} --output` echo "UPLOADFILE = ${UPLOADFILE}" -echo "[Upload cyberpandas]" +echo "[Upload moneypandas]" echo ${UPLOADFILE} anaconda -t ${UPLOAD_KEY} upload -u intake --force ${UPLOADFILE} diff --git a/conda-recipes/cyberpandas/meta.yaml b/conda-recipes/moneypandas/meta.yaml similarity index 84% rename from conda-recipes/cyberpandas/meta.yaml rename to conda-recipes/moneypandas/meta.yaml index ede1eee..a56c729 100644 --- a/conda-recipes/cyberpandas/meta.yaml +++ b/conda-recipes/moneypandas/meta.yaml @@ -1,5 +1,5 @@ package: - name: cyberpandas + name: moneypandas version: {{ environ.get('GIT_DESCRIBE_TAG', '') }} build: @@ -18,15 +18,15 @@ requirements: - setuptools >=3.3 run: - - ipaddress # [py27] + - money - pandas >=0.23.0 - python - setuptools >=3.3 test: imports: - - cyberpandas + - moneypandas about: - home: https://github.com/continuumio/cyberpandas + home: https://github.com/flaxandteal/moneypandas license: BSD diff --git a/cyberpandas/_utils.py b/cyberpandas/_utils.py deleted file mode 100644 index 8c36be8..0000000 --- a/cyberpandas/_utils.py +++ /dev/null @@ -1,30 +0,0 @@ -"""Utilities for working with IP address data.""" -import struct - -import six - - -def to_bytes(n, length, byteorder='big'): - # https://stackoverflow.com/a/20793663/1889400 - h = '%x' % n - s = ('0' * (len(h) % 2) + h).zfill(length * 2).decode('hex') - return s if byteorder == 'big' else s[::-1] - - -def pack(ip): - if six.PY2: - return to_bytes(ip, length=16, byteorder='big') - else: - return ip.to_bytes(16, byteorder='big') - - -def unpack(ip): - # Recipe 3.5 from Python Cookbook 3rd ed. (p. 90) - # int.from_bytes(data, 'big') for Py3+ - hi, lo = struct.unpack(">QQ", ip) - return hi, lo - - -def combine(hi, lo): - """Combine the hi and lo bytes into the final ip address.""" - return (hi << 64) + lo diff --git a/cyberpandas/common.py b/cyberpandas/common.py deleted file mode 100644 index f1f9353..0000000 --- a/cyberpandas/common.py +++ /dev/null @@ -1,3 +0,0 @@ -_IPv4_MAX = 2 ** 32 - 1 -_IPv6_MAX = 2 ** 128 - 1 -_U8_MAX = 2 ** 64 - 1 diff --git a/cyberpandas/dtypes.py b/cyberpandas/dtypes.py deleted file mode 100644 index 2803f14..0000000 --- a/cyberpandas/dtypes.py +++ /dev/null @@ -1,17 +0,0 @@ -from .common import _IPv4_MAX - - -def is_ipv4(value): - if isinstance(value, str): - return value.count(".") == 3 - elif isinstance(value, bytes): - pass - elif isinstance(value, int): - return value < _IPv4_MAX - else: - return False - - -def is_ipv6(value): - if isinstance(value, str): - return value.count(":") == 7 diff --git a/cyberpandas/ip_array.py b/cyberpandas/ip_array.py deleted file mode 100644 index e2cc67f..0000000 --- a/cyberpandas/ip_array.py +++ /dev/null @@ -1,718 +0,0 @@ -import abc -import collections -import ipaddress - -import six -import numpy as np -import pandas as pd -from pandas.api.extensions import ExtensionDtype - -from ._accessor import (DelegatedMethod, DelegatedProperty, - delegated_method) -from ._utils import combine, pack, unpack -from .base import NumPyBackedExtensionArrayMixin -from .common import _U8_MAX, _IPv4_MAX -from .parser import _to_ipaddress_pyint, _as_ip_object - -# ----------------------------------------------------------------------------- -# Extension Type -# ----------------------------------------------------------------------------- - - -@six.add_metaclass(abc.ABCMeta) -class IPv4v6Base(object): - """Metaclass providing a common base class for the two scalar IP types.""" - pass - - -IPv4v6Base.register(ipaddress.IPv4Address) -IPv4v6Base.register(ipaddress.IPv6Address) - - -@pd.api.extensions.register_extension_dtype -class IPType(ExtensionDtype): - name = 'ip' - type = IPv4v6Base - kind = 'O' - _record_type = np.dtype([('hi', '>u8'), ('lo', '>u8')]) - na_value = ipaddress.IPv4Address(0) - - @classmethod - def construct_from_string(cls, string): - if string == cls.name: - return cls() - else: - raise TypeError("Cannot construct a '{}' from " - "'{}'".format(cls, string)) - - @classmethod - def construct_array_type(cls): - return IPArray - - -# ----------------------------------------------------------------------------- -# Extension Container -# ----------------------------------------------------------------------------- - - -class IPArray(NumPyBackedExtensionArrayMixin): - """Holder for IP Addresses. - - IPArray is a container for IPv4 or IPv6 addresses. It satisfies pandas' - extension array interface, and so can be stored inside - :class:`pandas.Series` and :class:`pandas.DataFrame`. - - See :ref:`usage` for more. - """ - # A note on the internal data layout. IPv6 addresses require 128 bits, - # which is more than a uint64 can store. So we use a NumPy structured array - # with two fields, 'hi', 'lo' to store the data. Each field is a uint64. - # The 'hi' field contains upper 64 bits. The think this is correct since - # all IP traffic is big-endian. - __array_priority__ = 1000 - _dtype = IPType() - _itemsize = 16 - ndim = 1 - can_hold_na = True - - def __init__(self, values, dtype=None, copy=False): - from .parser import _to_ip_array - - values = _to_ip_array(values) # TODO: avoid potential copy - # TODO: dtype? - if copy: - values = values.copy() - self.data = values - - @classmethod - def from_pyints(cls, values): - """Construct an IPArray from a sequence of Python integers. - - This can be useful for representing IPv6 addresses, which may - be larger than 2**64. - - Parameters - ---------- - values : Sequence - Sequence of Python integers. - - Examples - -------- - >>> IPArray.from_pyints([0, 10, 2 ** 64 + 1]) - IPArray(['0.0.0.1', '0.0.0.2', '0.0.0.3', '0:0:0:1::']) - """ - return cls(_to_ipaddress_pyint(values)) - - @classmethod - def from_bytes(cls, bytestring): - r"""Create an IPArray from a bytestring. - - Parameters - ---------- - bytestring : bytes - Note that bytestring is a Python 3-style string of bytes, - not a sequences of bytes where each element represents an - IPAddress. - - Returns - ------- - IPArray - - Examples - -------- - >>> arr = IPArray([10, 20]) - >>> buf = arr.to_bytes() - >>> buf - b'\x00\x00\...x00\x02' - >>> IPArray.from_bytes(buf) - IPArray(['0.0.0.10', '0.0.0.20']) - - See Also - -------- - to_bytes - from_pyints - """ - data = np.frombuffer(bytestring, dtype=IPType._record_type) - return cls._from_ndarray(data) - - @classmethod - def _from_ndarray(cls, data, copy=False): - """Zero-copy construction of an IPArray from an ndarray. - - Parameters - ---------- - data : ndarray - This should have IPType._record_type dtype - copy : bool, default False - Whether to copy the data. - - Returns - ------- - ExtensionArray - """ - if copy: - data = data.copy() - new = IPArray([]) - new.data = data - return new - - @property - def _as_u8(self): - """A 2-D view on our underlying data, for bit-level manipulation.""" - return self.data.view(">> IPArray([]).na_value - IPv4Address('0.0.0.0') - """ - return self.dtype.na_value - - def take(self, indices, allow_fill=False, fill_value=None): - # Can't use pandas' take yet - # 1. axis - # 2. I don't know how to do the reshaping correctly. - indices = np.asarray(indices, dtype='int') - - if allow_fill and fill_value is None: - fill_value = unpack(pack(int(self.na_value))) - elif allow_fill and not isinstance(fill_value, tuple): - fill_value = unpack(pack(int(fill_value))) - - if allow_fill: - mask = (indices == -1) - if not len(self): - if not (indices == -1).all(): - msg = "Invalid take for empty array. Must be all -1." - raise IndexError(msg) - else: - # all NA take from and empty array - took = (np.full((len(indices), 2), fill_value, dtype='>u8') - .reshape(-1).astype(self.dtype._record_type)) - return self._from_ndarray(took) - if (indices < -1).any(): - msg = ("Invalid value in 'indicies'. Must be all >= -1 " - "for 'allow_fill=True'") - raise ValueError(msg) - - took = self.data.take(indices) - if allow_fill: - took[mask] = fill_value - - return self._from_ndarray(took) - - # ------------------------------------------------------------------------- - # Interfaces - # ------------------------------------------------------------------------- - - def __repr__(self): - formatted = self._format_values() - return "IPArray({!r})".format(formatted) - - def _format_values(self): - formatted = [] - # TODO: perf - for i in range(len(self)): - hi, lo = self.data[i] - if lo == -1: - formatted.append("NA") - elif hi == 0 and lo <= _IPv4_MAX: - formatted.append(ipaddress.IPv4Address._string_from_ip_int( - int(lo))) - elif hi == 0: - formatted.append(ipaddress.IPv6Address._string_from_ip_int( - int(lo))) - else: - # TODO: - formatted.append(ipaddress.IPv6Address._string_from_ip_int( - (int(hi) << 64) + int(lo))) - return formatted - - @staticmethod - def _box_scalar(scalar): - return ipaddress.ip_address(combine(*scalar)) - - @property - def _parser(self): - from .parser import to_ipaddress - return to_ipaddress - - def __setitem__(self, key, value): - from .parser import to_ipaddress - - value = to_ipaddress(value).data - self.data[key] = value - - def __iter__(self): - return iter(self.to_pyipaddress()) - - # ------------------------------------------------------------------------ - # Serializaiton / Export - # ------------------------------------------------------------------------ - - def to_pyipaddress(self): - """Convert the array to a list of scalar IP Adress objects. - - Returns - ------- - addresses : List - Each element of the list will be an :class:`ipaddress.IPv4Address` - or :class:`ipaddress.IPv6Address`, depending on the size of that - element. - - See Also - -------- - IPArray.to_pyints - - Examples - --------- - >>> IPArray(['192.168.1.1', '2001:db8::1000']).to_pyipaddress() - [IPv4Address('192.168.1.1'), IPv6Address('2001:db8::1000')] - """ - import ipaddress - return [ipaddress.ip_address(x) for x in self._format_values()] - - def to_pyints(self): - """Convert the array to a list of Python integers. - - Returns - ------- - addresses : List[int] - These will be Python integers (not NumPy), which are unbounded in - size. - - See Also - -------- - IPArray.to_pyipaddresses - IPArray.from_pyints - - Examples - -------- - >>> IPArray(['192.168.1.1', '2001:db8::1000']).to_pyints() - [3232235777, 42540766411282592856903984951653830656] - """ - return [combine(*map(int, x)) for x in self.data] - - def to_bytes(self): - r"""Serialize the IPArray as a Python bytestring. - - This and :meth:IPArray.from_bytes is the fastest way to roundtrip - serialize and de-serialize an IPArray. - - See Also - -------- - IPArray.from_bytes - - Examples - -------- - >>> arr = IPArray([10, 20]) - >>> arr.to_bytes() - b'\x00\x00\...x00\x02' - """ - return self.data.tobytes() - - def astype(self, dtype, copy=True): - if isinstance(dtype, IPType): - if copy: - self = self.copy() - return self - return super(IPArray, self).astype(dtype) - - # ------------------------------------------------------------------------ - # Ops - # ------------------------------------------------------------------------ - - def __eq__(self, other): - # TDOO: scalar ipaddress - if not isinstance(other, IPArray): - return NotImplemented - mask = self.isna() | other.isna() - result = self.data == other.data - result[mask] = False - return result - - def __lt__(self, other): - # TDOO: scalar ipaddress - if not isinstance(other, IPArray): - return NotImplemented - mask = self.isna() | other.isna() - result = ((self.data['hi'] <= other.data['hi']) & - (self.data['lo'] < other.data['lo'])) - result[mask] = False - return result - - def __le__(self, other): - if not isinstance(other, IPArray): - return NotImplemented - mask = self.isna() | other.isna() - result = ((self.data['hi'] <= other.data['hi']) & - (self.data['lo'] <= other.data['lo'])) - result[mask] = False - return result - - def __gt__(self, other): - if not isinstance(other, IPArray): - return NotImplemented - return other < self - - def __ge__(self, other): - if not isinstance(other, IPArray): - return NotImplemented - return other <= self - - def equals(self, other): - if not isinstance(other, IPArray): - raise TypeError("Cannot compare 'IPArray' " - "to type '{}'".format(type(other))) - # TODO: missing - return (self.data == other.data).all() - - def _values_for_factorize(self): - return self.astype(object), ipaddress.IPv4Address(0) - - def isna(self): - """Indicator for whether each element is missing. - - The IPAddress 0 is used to indecate missing values. - - Examples - -------- - >>> IPArray(['0.0.0.0', '192.168.1.1']).isna() - array([ True, False]) - """ - ips = self.data - return (ips['lo'] == 0) & (ips['hi'] == 0) - - def isin(self, other): - """Check whether elements of `self` are in `other`. - - Comparison is done elementwise. - - Parameters - ---------- - other : str or sequences - For ``str`` `other`, the argument is attempted to - be converted to an :class:`ipaddress.IPv4Network` or - a :class:`ipaddress.IPv6Network` or an :class:`IPArray`. - If all those conversions fail, a TypeError is raised. - - For a sequence of strings, the same conversion is attempted. - You should not mix networks with addresses. - - Finally, other may be an ``IPArray`` of addresses to compare to. - - Returns - ------- - contained : ndarray - A 1-D boolean ndarray with the same length as self. - - Examples - -------- - Comparison to a single network - - >>> s = IPArray(['192.168.1.1', '255.255.255.255']) - >>> s.isin('192.168.1.0/24') - array([ True, False]) - - Comparison to many networks - >>> s.isin(['192.168.1.0/24', '192.168.2.0/24']) - array([ True, False]) - - Comparison to many IP Addresses - - >>> s.isin(['192.168.1.1', '192.168.1.2', '255.255.255.1']]) - array([ True, False]) - """ - box = (isinstance(other, str) or - not isinstance(other, (IPArray, collections.Sequence))) - if box: - other = [other] - - networks = [] - addresses = [] - - if not isinstance(other, IPArray): - for net in other: - net = _as_ip_object(net) - if isinstance(net, (ipaddress.IPv4Network, - ipaddress.IPv6Network)): - networks.append(net) - if isinstance(net, (ipaddress.IPv4Address, - ipaddress.IPv6Address)): - addresses.append(ipaddress.IPv6Network(net)) - else: - addresses = other - - # Flatten all the addresses - addresses = IPArray(addresses) # TODO: think about copy=False - - mask = np.zeros(len(self), dtype='bool') - for network in networks: - mask |= self._isin_network(network) - - # no... we should flatten this. - mask |= self._isin_addresses(addresses) - return mask - - def _isin_network(self, other): - """Check whether an array of addresses is contained in a network.""" - # A network is bounded below by 'network_address' and - # above by 'broadcast_address'. - # IPArray handles comparisons between arrays of addresses, and NumPy - # handles broadcasting. - net_lo = type(self)([other.network_address]) - net_hi = type(self)([other.broadcast_address]) - - return (net_lo <= self) & (self <= net_hi) - - def _isin_addresses(self, other): - """Check whether elements of self are present in other.""" - from pandas.core.algorithms import isin - # TODO(factorize): replace this - return isin(self, other) - - # ------------------------------------------------------------------------ - # IP Specific - # ------------------------------------------------------------------------ - - @property - def is_ipv4(self): - """Indicator for whether each address fits in the IPv4 space.""" - # TODO: NA should be NA - ips = self.data - return (ips['hi'] == 0) & (ips['lo'] < _U8_MAX) - - @property - def is_ipv6(self): - """Indicator for whether each address requires IPv6.""" - ips = self.data - return (ips['hi'] > 0) | (ips['lo'] > _U8_MAX) - - @property - def version(self): - """IP version (4 or 6).""" - return np.where(self.is_ipv4, 4, 6) - - @property - def is_multicast(self): - """Indiciator for whether each address is multicast.""" - pyips = self.to_pyipaddress() - return np.array([ip.is_multicast for ip in pyips]) - - @property - def is_private(self): - """Indiciator for whether each address is private.""" - pyips = self.to_pyipaddress() - return np.array([ip.is_private for ip in pyips]) - - @property - def is_global(self): - """Indiciator for whether each address is global.""" - pyips = self.to_pyipaddress() - return np.array([ip.is_global for ip in pyips]) - - @property - def is_unspecified(self): - """Indiciator for whether each address is unspecified.""" - pyips = self.to_pyipaddress() - return np.array([ip.is_unspecified for ip in pyips]) - - @property - def is_reserved(self): - """Indiciator for whether each address is reserved.""" - pyips = self.to_pyipaddress() - return np.array([ip.is_reserved for ip in pyips]) - - @property - def is_loopback(self): - """Indiciator for whether each address is loopback.""" - pyips = self.to_pyipaddress() - return np.array([ip.is_loopback for ip in pyips]) - - @property - def is_link_local(self): - """Indiciator for whether each address is link local.""" - pyips = self.to_pyipaddress() - return np.array([ip.is_link_local for ip in pyips]) - - @property - def packed(self): - """Bytestring of the IP addresses - - Each address takes 16 bytes. IPv4 addresses are prefixed - by zeros. - """ - # TODO: I wonder if that should be post-fixed by 0s. - return self.data.tobytes() - - def _apply_mask(self, op, v4_prefixlen, v6_prefixlen): - """Apply a netmask or hostmask""" - self = self.copy() - is_v4 = self.is_ipv4 - v4_net = getattr( - ipaddress.ip_network(u'0.0.0.0/{}'.format(v4_prefixlen)), - op) - v4_mask = IPArray([v4_net]) - self.data[is_v4] = v4_mask.data - - v6_net = getattr( - ipaddress.ip_network(u'0::0/{}'.format(v6_prefixlen)), - op) - v6_mask = IPArray([v6_net]) - self.data[~is_v4] = v6_mask.data - return self - - def netmask(self, v4_prefixlen=32, v6_prefixlen=128): - """Compute an array of netmasks for an array of IP addresses. - - Note that this is a method, rather than a property, to support - taking `v4_prefixlen` and `v6_prefixlen` as arguments. - - Parameters - ---------- - v4_prefixlen : int, default 32 - Length of the network prefix, in bits, for IPv4 addresses - v6_prefixlen : int, default 128 - Lnegth of the network prefix, in bits, for IPv6 addresses - - Returns - ------- - IPArray - - See Also - -------- - IPArray.hostmask - - Examples - -------- - >>> arr = ip.IPArray(['192.0.0.0', '1:1::']) - >>> arr.netmask(v4_prefixlen=16, v6_prefixlen=32) - IPArray(['255.255.0.0', 'ffff:ffff::']) - """ - return self._apply_mask('netmask', v4_prefixlen, v6_prefixlen) - - def hostmask(self, v4_prefixlen=32, v6_prefixlen=128): - """Compute an array of hostmasks for an array of IP addresses. - - Parameters - ---------- - v4_prefixlen : int, default 32 - Length of the network prefix, in bits, for IPv4 addresses - v6_prefixlen : int, default 128 - Lnegth of the network prefix, in bits, for IPv6 addresses - - Returns - ------- - IPArray - - See Also - -------- - IPArray.netmask - - Examples - -------- - >>> arr = ip.IPArray(['192.0.0.0', '1:1::']) - >>> arr.netmask(v4_prefixlen=16, v6_prefixlen=32) - IPArray(['0.0.255.255', '::ffff:ffff:ffff:ffff:ffff:ffff']) - """ - return self._apply_mask('hostmask', v4_prefixlen, v6_prefixlen) - - def mask(self, mask): - """Apply a host or subnet mask. - - Parameters - ---------- - mask : IPArray - The host or subnet mask to be applied - - Returns - ------- - masked : IPArray - - See Also - -------- - netmask - hostmask - - Examples - -------- - >>> arr = IPArray(['216.003.128.12', '192.168.100.1']) - >>> mask = arr.netmask(v4_prefixlen=24) - >>> mask - IPArray(['255.255.255.0', '255.255.255.0']) - >>> arr.mask(mask) - IPArray(['216.3.128.0', '192.168.100.0']) - """ - mask = type(self)(mask) - a = self._as_u8 - b = mask._as_u8 - masked = np.bitwise_and(a, b).ravel().view(self.dtype._record_type) - return type(self)(masked) - - -# ----------------------------------------------------------------------------- -# Accessor -# ----------------------------------------------------------------------------- - - -@pd.api.extensions.register_series_accessor("ip") -class IPAccessor: - - is_ipv4 = DelegatedProperty("is_ipv4") - is_ipv6 = DelegatedProperty("is_ipv6") - version = DelegatedProperty("version") - is_multicast = DelegatedProperty("is_multicast") - is_private = DelegatedProperty("is_private") - is_global = DelegatedProperty("is_global") - is_unspecified = DelegatedProperty("is_unspecified") - is_reserved = DelegatedProperty("is_reserved") - is_loopback = DelegatedProperty("is_loopback") - is_link_local = DelegatedProperty("is_link_local") - - isna = DelegatedMethod("isna") - to_pyints = DelegatedMethod("to_pyints") - - def __init__(self, obj): - self._validate(obj) - self._data = obj.values - self._index = obj.index - self._name = obj.name - - @staticmethod - def _validate(obj): - if not is_ipaddress_type(obj): - raise AttributeError("Cannot use 'ip' accessor on objects of " - "dtype '{}'.".format(obj.dtype)) - - def isin(self, other): - return delegated_method(self._data.isin, self._index, - self._name, other) - - def netmask(self, v4_prefixlen=32, v6_prefixlen=128): - return delegated_method(self._data.netmask, self._index, - self._name, v4_prefixlen, v6_prefixlen) - - def hostmask(self, v4_prefixlen=32, v6_prefixlen=128): - return delegated_method(self._data.hostmask, self._index, - self._name, v4_prefixlen, v6_prefixlen) - - def mask(self, other): - return delegated_method(self._data.mask, self._index, self._name, - other) - - -def is_ipaddress_type(obj): - t = getattr(obj, 'dtype', obj) - try: - return isinstance(t, IPType) or issubclass(t, IPType) - except Exception: - return False diff --git a/cyberpandas/ip_methods.py b/cyberpandas/ip_methods.py deleted file mode 100644 index 98644e5..0000000 --- a/cyberpandas/ip_methods.py +++ /dev/null @@ -1,69 +0,0 @@ -import ipaddress - -import numpy as np -import six - -from .ip_array import IPArray -from .common import _U8_MAX - - -def _as_int(ip): - if isinstance(ip, six.string_types): - ip = ipaddress.ip_address(ip) - return int(ip) - - -def _crosses_boundary(lo, hi): - return (lo <= _U8_MAX) == (hi <= _U8_MAX) - - -def ip_range(start=None, stop=None, step=None): - """Generate a range of IP Addresses - - Parameters - ---------- - start : int, str, IPv4Address, or IPv6Address, optional - Start of interval. The interval includes this value. The default - start value is 0. - start : int, str, IPv4Address, or IPv6Address, optional - End of interval. The interval does not include this value. - step : int, optional - Spacing between values. For any output `out`, this is the distance - between two adjacent values, ``out[i+1] - out[i]``. The default - step size is 1. If `step` is specified as a position argument, - `start` must also be given. - - Returns - ------- - IPArray - - Notes - ----- - Performance will worsen if either of `start` or `stop` are larger than - 2**64. - - Examples - -------- - From integers - - >>> ip_range(1, 5) - IPArray(['0.0.0.1', '0.0.0.2', '0.0.0.3', '0.0.0.4']) - - Or strings - - >>> ip_range('0.0.0.1', '0.0.0.5') - IPArray(['0.0.0.1', '0.0.0.2', '0.0.0.3', '0.0.0.4']) - - Or `ipaddress` objects - - >>> ip_range(ipaddress.IPv4Address(1), ipaddress.IPv4Address(5)) - IPArray(['0.0.0.1', '0.0.0.2', '0.0.0.3', '0.0.0.4']) - """ - if start is not None: - start = _as_int(start) - if stop is not None: - stop = _as_int(stop) - if step is not None: - step = _as_int(step) - arr = IPArray(np.arange(start, stop, step)) - return arr diff --git a/cyberpandas/mac_array.py b/cyberpandas/mac_array.py deleted file mode 100644 index 183b4a5..0000000 --- a/cyberpandas/mac_array.py +++ /dev/null @@ -1,160 +0,0 @@ -from collections import Iterable - -import numpy as np -import six - -from pandas.api.extensions import ( - ExtensionDtype, take, register_extension_dtype) - -from .base import NumPyBackedExtensionArrayMixin - - -@register_extension_dtype -class MACType(ExtensionDtype): - """Dtype for MAC Address Data.""" - name = 'mac' - # type is long for Py2 and int for py3 - type = six.integer_types[-1] - kind = 'u' - na_value = 0 # TODO: Check this. - - @classmethod - def construct_from_string(cls, string): - if string == cls.name: - return cls() - else: - raise TypeError("Cannot construct a '{}' from " - "'{}'".format(cls, string)) - - @classmethod - def construct_array_type(cls): - return MACArray - - -class MACArray(NumPyBackedExtensionArrayMixin): - """Array for MAC Address data. - - * https://en.wikipedia.org/wiki/MAC_address - * https://tools.ietf.org/html/rfc5342 - """ - # What type(s) do we support? - # MAC-48 or EUI-64? - _dtype = MACType() - _itemsize = 8 - ndim = 1 - can_hold_na = True - - def __init__(self, values, copy=True, dtype=None): - # TODO: parse hex / strings - self.data = np.array(values, dtype='uint64', copy=copy) - if isinstance(dtype, str): - MACType.construct_array_type(dtype) - elif dtype: - assert isinstance(dtype, MACType) - - @classmethod - def _from_ndarray(cls, data, copy=False): - return cls(data, copy=copy) - - @property - def na_value(self): - return self.dtype.na_value - - def __repr__(self): - formatted = self._format_values() - return "MACArray({!r})".format(formatted) - - def _format_values(self): - return [_format(x) for x in self.data] - - @staticmethod - def _box_scalar(scalar): - return scalar - - def __setitem__(self, key, value): - value = to_macaddress(value) - self.data[key] = value - - def __iter__(self): - return iter(self.data.tolist()) - - def __lt__(self, other): - return self.data < other - - def __le__(self, other): - return self.data <= other - - def __eq__(self, other): - return self.data == other - - def __ge__(self, other): - return other <= self - - def __gt__(self, other): - return other < self - - def equals(self, other): - if not isinstance(other, type(self)): - raise TypeError - return (self.data == other.data).all() - - def _values_for_factorize(self): - # Should hit pandas' UInt64Hashtable - return self, 0 - - def isna(self): - return (self.data == 0) - - @property - def _parser(self): - return lambda x: x - - def take(self, indexer, allow_fill=False, fill_value=None): - if fill_value is None: - fill_value = 0 - took = take(self.data, indexer, allow_fill=allow_fill, - fill_value=fill_value) - return type(self)(took) - - def _formatting_values(self): - return np.array(self._format_values(), dtype='object') - - @classmethod - def _concat_same_type(cls, to_concat): - return cls(np.concatenate([array.data for array in to_concat])) - - def take_nd(self, indexer, allow_fill=True, fill_value=None): - return self.take(indexer, allow_fill=allow_fill, fill_value=fill_value) - - def copy(self, deep=False): - return type(self)(self.data.copy()) - - def astype(self, dtype, copy=True): - if isinstance(dtype, type(self.dtype)): - if copy: - self = self.copy() - return self - return super().astype(dtype, copy) - - -def _format(mac): - # https://stackoverflow.com/a/36883363/1889400 - mac_hex = "{:012x}".format(mac) - mac_str = ":".join(mac_hex[i:i+2] for i in range(0, len(mac_hex), 2)) - return mac_str - - -def _parse(mac): - # https://stackoverflow.com/a/36883363/1889400 - mac_int = int(mac.replace(":", "").replace("-", ""), 16) - return mac_int - - -def to_macaddress(addresses): - if (isinstance(addresses, six.string_types) or - not isinstance(addresses, Iterable)): - addresses = [addresses] - - addresses = [_parse(mac) if isinstance(mac, six.string_types) else mac - for mac in addresses] - return np.array(addresses, dtype='u8') diff --git a/cyberpandas/parser.py b/cyberpandas/parser.py deleted file mode 100644 index 215381b..0000000 --- a/cyberpandas/parser.py +++ /dev/null @@ -1,110 +0,0 @@ -import ipaddress - -import numpy as np -from pandas.api.types import is_list_like - -from ._utils import pack, unpack - - -def to_ipaddress(values): - """Convert values to IPArray - - Parameters - ---------- - values : int, str, bytes, or sequence of those - - Returns - ------- - addresses : IPArray - - Examples - -------- - Parse strings - >>> to_ipaddress(['192.168.1.1', - ... '2001:0db8:85a3:0000:0000:8a2e:0370:7334']) - - - Or integers - >>> to_ipaddress([3232235777, - 42540766452641154071740215577757643572]) - - - Or packed binary representations - >>> to_ipaddress([b'\xc0\xa8\x01\x01', - b' \x01\r\xb8\x85\xa3\x00\x00\x00\x00\x8a.\x03ps4']) - - """ - from . import IPArray - - if not is_list_like(values): - values = [values] - - return IPArray(_to_ip_array(values)) - - -def _to_ip_array(values): - from .ip_array import IPType, IPArray - - if isinstance(values, IPArray): - return values.data - - if (isinstance(values, np.ndarray) and - values.ndim == 1 and - np.issubdtype(values.dtype, np.integer)): - # We assume we're given the low bits here. - values = values.astype("u8") - values = np.asarray(values, dtype=IPType._record_type) - values['hi'] = 0 - - elif not (isinstance(values, np.ndarray) and - values.dtype == IPType._record_type): - values = _to_int_pairs(values) - return np.atleast_1d(np.asarray(values, dtype=IPType._record_type)) - - -def _to_int_pairs(values): - if isinstance(values, (str, bytes, int)): - values = ipaddress.ip_address(values)._ip - return unpack(pack(values)) - elif isinstance(values, np.ndarray) and values.dtype != object: - if values.ndim != 2: - raise ValueError("'values' should be a 2-D when passing a " - "NumPy array.") - elif isinstance(values, tuple) and len(values) == 2: - # like IPArray((0, 0)) - # which isn't IPArray([0, 0]) - pass - elif all(isinstance(x, tuple) for x in values): - # TODO: not great - pass - else: - values = [ipaddress.ip_address(v)._ip for v in values] - values = [unpack(pack(v)) for v in values] - return values - - -def _to_ipaddress_pyint(values): - from .ip_array import IPType - - values2 = [unpack(pack(x)) for x in values] - return np.atleast_1d(np.asarray(values2, dtype=IPType._record_type)) - - -def _as_ip_object(val): - """Attempt to parse 'val' as any IP object. - - Attempts to parse as these in order: - - - IP Address (v4 or v6) - - IP Network (v4 or v6) - """ - try: - return ipaddress.ip_address(val) - except ValueError: - pass - - try: - return ipaddress.ip_network(val) - except ValueError: - raise ValueError("Could not parse {} is an address or " - "network".format(val)) diff --git a/cyberpandas/__init__.py b/moneypandas/__init__.py similarity index 53% rename from cyberpandas/__init__.py rename to moneypandas/__init__.py index 710b1b2..ee05c55 100644 --- a/cyberpandas/__init__.py +++ b/moneypandas/__init__.py @@ -1,13 +1,11 @@ """Custom IP address dtype / block for pandas""" -from .ip_array import ( - IPType, - IPArray, - IPAccessor, +from .money_array import ( + MoneyType, + MoneyArray, + MoneyAccessor, ) -from .ip_methods import ip_range -from .parser import to_ipaddress -from .mac_array import MACType, MACArray +from .parser import to_money from pkg_resources import get_distribution, DistributionNotFound try: @@ -22,11 +20,8 @@ __all__ = [ '__version__', - 'IPAccessor', - 'IPArray', - 'IPType', - 'MACArray', - 'MACType', - 'ip_range', - 'to_ipaddress', + 'MoneyAccessor', + 'MoneyArray', + 'MoneyType', + 'to_money', ] diff --git a/cyberpandas/_accessor.py b/moneypandas/_accessor.py similarity index 100% rename from cyberpandas/_accessor.py rename to moneypandas/_accessor.py diff --git a/cyberpandas/base.py b/moneypandas/base.py similarity index 96% rename from cyberpandas/base.py rename to moneypandas/base.py index ffe512e..9569334 100644 --- a/cyberpandas/base.py +++ b/moneypandas/base.py @@ -8,7 +8,7 @@ class NumPyBackedExtensionArrayMixin(ExtensionArray): @property def dtype(self): - """The dtype for this extension array, IPType""" + """The dtype for this extension array, MoneyType""" return self._dtype @classmethod diff --git a/moneypandas/dtypes.py b/moneypandas/dtypes.py new file mode 100644 index 0000000..68518b4 --- /dev/null +++ b/moneypandas/dtypes.py @@ -0,0 +1,35 @@ +import re +import numpy as np + +symbols = { + '£': 'GBP', + '$': 'USD', + '€': 'EUR', + '¥': 'JPY', + '₹': 'INR' +} +money_patterns = [(re.compile(r[0]), r[1]) for r in [ + ( + r'(-?)([' + ''.join(symbols) + r'])(\d*\.?\d*\d)', # -£123.00 + lambda m: (np.float64(m.group(1) + m.group(3)), symbols[m.group(2)]) + ), + ( + r'([A-Z]{3})\s*(-?\d*\.?\d*\d)', # EUR 123 + lambda m: (np.float64(m.group(2)), m.group(1)) + ), + ( + r'(-?\d*\.?\d*\d)\s*([A-Z]{3})', # 97GBP + lambda m: (np.float64(m.group(1)), m.group(2)) + ), +]] + +def is_money(value): + # TODO: Better detection + if isinstance(value, str): + return any([r[0].match(value) for r in money_patterns]) + elif isinstance(value, bytes): + pass + elif isinstance(value, int): + return True + else: + return False diff --git a/moneypandas/money_array.py b/moneypandas/money_array.py new file mode 100644 index 0000000..a03bac3 --- /dev/null +++ b/moneypandas/money_array.py @@ -0,0 +1,430 @@ +import abc +import decimal +import collections + +import six +import numpy as np +import pandas as pd +import money +from pandas.api.extensions import ExtensionDtype + +from ._accessor import (DelegatedMethod, DelegatedProperty, + delegated_method) +from .base import NumPyBackedExtensionArrayMixin +from .parser import _as_money_object +import re + +# ----------------------------------------------------------------------------- +# Extension Type +# ----------------------------------------------------------------------------- + +@pd.api.extensions.register_extension_dtype +class MoneyType(ExtensionDtype): + name = 'money' + na_value = np.nan + type = money.XMoney + kind = 'O' + default_money_code = None + _record_type = np.dtype([('va', np.float64), ('cu', 'U3')]) + _record_na_value = (0, '') + + def __init__(self, *args, default_money_code=None, **kwargs): + self.default_money_code = default_money_code + + super(MoneyType, self).__init__(*args, **kwargs) + + @classmethod + def construct_from_string(cls, string): + if string == cls.name: + return cls() + else: + match = re.match(cls.name + r'\[([A-Z]{3})\]', string) + + if match: + default_money_code = match.group(0) + return cls(default_money_code=default_money_code) + + raise TypeError("Cannot construct a '{}' from " + "'{}'".format(cls, string)) + + @classmethod + def construct_array_type(cls): + return MoneyArray + + +# ----------------------------------------------------------------------------- +# Extension Container +# ----------------------------------------------------------------------------- + + +class MoneyArray(NumPyBackedExtensionArrayMixin): + """Holder for Money Amounts. + + MoneyArray is a container for Money Amounts. It satisfies pandas' + extension array interface, and so can be stored inside + :class:`pandas.Series` and :class:`pandas.DataFrame`. + + See :ref:`usage` for more. + """ + __array_priority__ = 1000 + _dtype = MoneyType() + _itemsize = 20 + ndim = 1 + can_hold_na = True + default_money_code = None + + def __init__(self, values, default_money_code=None, dtype=None, copy=False): + from .parser import _to_money_array + + # TODO: copy + if dtype and dtype != self.dtype: + raise TypeError("Can only construct MoneyArray with underlying (f64, U3) not {}".format(dtype)) + + values, self.default_money_code = _to_money_array(values, default_money_code=default_money_code) # TODO: avoid potential copy + # TODO: dtype? + if copy: + values = values.copy() + self.data = values + + def to_decimals(self, money_code=None): + r"""Create a list of decimals from an ISO4712 code, attempting conversion with XMoney where necessary. + + Parameters + ---------- + money_code : ISO4712 3-letter currency code + + Returns + ------- + list of decimals + + Examples + -------- + >>> arr = MoneyArray([10, 20], 'GBP') + >>> values = arr.to_decimals('GBP') + >>> values + [10, 20] + + See Also + -------- + to_bytes + """ + + if not money_code: + money_code = self.default_money_code + if not money_code: + codes = {c['cu'] for c in self.data if c['cu']} + if len(codes) != 1: + raise TypeError("Cannot output mixed-currency monies as decimal " + "without either a target or default currency") + money_code = codes[0] + + mask = self.isna() + same = (self.data['cu'] == money_code) | mask + decimalize = np.vectorize(decimal.Decimal) + result = decimalize(self.data['va']) + for i, ceq in enumerate(same): + if not ceq: + result[i] = money.XMoney(*self.data[i]).to(money_code) + + return result + + @classmethod + def from_bytes(cls, bytestring): + r"""Create a MoneyArray from a bytestring. + + Parameters + ---------- + bytestring : bytes + Note that bytestring is a Python 3-style string of bytes + + Returns + ------- + MoneyArray + + Examples + -------- + >>> arr = MoneyArray([10, 20]) + >>> buf = arr.to_bytes() + >>> buf + b'\x00\x00\...x00\x02' + >>> MoneyArray.from_bytes(buf) + MoneyArray(['10GBP', '10GBP']) + + See Also + -------- + to_bytes + """ + data = np.frombuffer(bytestring, dtype=MoneyType._record_type) + return cls._from_ndarray(data) + + @classmethod + def _from_ndarray(cls, data, copy=False): + """Zero-copy construction of an MoneyArray from an ndarray. + + Parameters + ---------- + data : ndarray + This should have MoneyType._record_type dtype + copy : bool, default False + Whether to copy the data. + + Returns + ------- + ExtensionArray + """ + if copy: + data = data.copy() + new = MoneyArray([]) + new.data = data + return new + + # ------------------------------------------------------------------------- + # Properties + # ------------------------------------------------------------------------- + # With thanks to the pandas docs + def take(self, indices, allow_fill=False, fill_value=None): + from pandas.core.algorithms import take + + if allow_fill and fill_value is None: + fill_value = self.dtype.na_value + + if fill_value is self.dtype.na_value: + fill_value = self.dtype._record_na_value + + # fill value should always be translated from the scalar + # type for the array, to the physical storage type for + # the data, before passing to take. + + indices = np.asarray(indices) + if allow_fill: + mask = (indices == -1) + if not len(self): + if not (indices == -1).all(): + msg = "Invalid take for empty array. Must be all -1." + raise IndexError(msg) + else: + # all NA take from and empty array + result = np.zeros(len(indices), dtype=self.dtype._record_type) + result.fill(fill_value) + return self._from_ndarray(result) + if (np.asarray(indices) < -1).any(): + msg = ("Invalid value in 'indices'. Must be all >= -1 " + "for 'allow_fill=True'") + raise ValueError(msg) + + result = take(self.data, indices, allow_fill=False) + + if allow_fill: + result[mask] = fill_value + + return self._from_sequence(result, dtype=self.dtype, default_money_code=self.default_money_code) + + + @classmethod + def _from_sequence(cls, scalars, dtype=None, copy=False, default_money_code=None): + return cls(scalars, dtype=dtype, copy=copy, default_money_code=default_money_code) + + @classmethod + def _from_sequence_of_strings(cls, strings, dtype=None, copy=False, default_money_code=None): + return cls(strings, dtype=dtype, copy=copy, default_money_code=default_money_code) + + def isna(self): + return self.data['cu'] == '' + + # ------------------------------------------------------------------------- + # Interfaces + # ------------------------------------------------------------------------- + + def __repr__(self): + rep = super(MoneyArray, self).__repr__() + + money_code = self.default_money_code + if money_code: + class_name = self.__class__.__name__ + rep = rep.replace(class_name, f"{class_name}[{money_code}]") + + return rep + + @staticmethod + def _box_scalar(scalar): + if scalar == (0, ''): + return np.nan + elif type(scalar) is tuple: + return money.XMoney(scalar[0], scalar[1]) + return money.XMoney(scalar['va'], scalar['cu']) + + @property + def _parser(self): + from .parser import to_money + return lambda val: to_money(val, default_money_code=self.default_money_code) + + def __setitem__(self, key, value): + from .parser import to_money + + value = to_money(value, default_money_code=self.default_money_code).data + self.data[key] = value + + def __iter__(self): + return iter(self.to_pymoney()) + + # ------------------------------------------------------------------------ + # Serializaiton / Export + # ------------------------------------------------------------------------ + + def to_pymoney(self): + """Convert the array to a list of scalar Money objects. + + Returns + ------- + addresses : List + Each element of the list will be a :class:`money.XMoney` or np.nan + + See Also + -------- + + Examples + --------- + >>> MoneyArray(['120 EUR', '127 USD']).to_pymoney() + [XMoney('120', 'EUR'), XMoney('127', 'USD')] + """ + return [money.XMoney(x['va'], x['cu']) if x['cu'] else np.nan for x in self.data] + + def to_bytes(self): + r"""Serialize the MoneyArray as a Python bytestring. + + This and :meth:MoneyArray.from_bytes is the fastest way to roundtrip + serialize and de-serialize a MoneyArray. + + See Also + -------- + MoneyArray.from_bytes + + Examples + -------- + >>> arr = MoneyArray('GBP', [10, 20]) + >>> arr.to_bytes() + b'\x00\x00\...x00\x02' + """ + return self.data.tobytes() + + def astype(self, dtype, copy=True): + if isinstance(dtype, MoneyType): + if copy: + self = self.copy() + return self + return super(MoneyArray, self).astype(dtype) + + # ------------------------------------------------------------------------ + # Ops + # ------------------------------------------------------------------------ + + def __eq__(self, other): + # Currently, this does not account for exchange, unlike other comparators + if not isinstance(other, MoneyArray): + return NotImplemented + mask = self.isna() | other.isna() + result = self.data == other.data + result[mask] = False + return result + + def __lt__(self, other): + if not isinstance(other, MoneyArray): + return NotImplemented + mask = self.isna() | other.isna() + same = (self.data['cu'] == other.data['cu']) | mask + result = (self.data['va'] < other.data['va']) + for i, ceq in enumerate(same): + if not ceq: + result[i] = money.XMoney(*self.data[i]) < money.XMoney(*self.other[i]) + + result[mask] = False + return result + + def __le__(self, other): + if not isinstance(other, MoneyArray): + return NotImplemented + mask = self.isna() | other.isna() + same = (self.data['cu'] == other.data['cu']) | mask + result = (self.data['va'] < other.data['va']) + for i, ceq in enumerate(same): + if not ceq: + result[i] = money.XMoney(*self.data[i]) < money.XMoney(*self.other[i]) + + result[mask] = False + return result + + def __gt__(self, other): + if not isinstance(other, MoneyArray): + return NotImplemented + mask = self.isna() | other.isna() + same = (self.data['cu'] == other.data['cu']) | mask + result = (self.data['va'] > other.data['va']) + for i, ceq in enumerate(same): + if not ceq: + result[i] = money.XMoney(*self.data[i]) > money.XMoney(*self.other[i]) + + result[mask] = False + return result + + def __ge__(self, other): + if not isinstance(other, MoneyArray): + return NotImplemented + mask = self.isna() | other.isna() + same = (self.data['cu'] == other.data['cu']) | mask + result = (self.data['va'] >= other.data['va']) + for i, ceq in enumerate(same): + if not ceq: + result[i] = money.XMoney(*self.data[i]) < money.XMoney(*self.other[i]) + + result[mask] = False + return result + + def equals(self, other): + if not isinstance(other, MoneyArray): + raise TypeError("Cannot compare 'MoneyArray' " + "to type '{}'".format(type(other))) + # TODO: missing + return (self.data == other.data).all() + + _formatting_values = None + def _formatter(self, boxed=False): + def fmt(x): + if isinstance(x, money.XMoney): + return str(x) + elif not x: + return "NA" + return fmt + + def _values_for_factorize(self): + return self.astype(object), (0, '') + + +# ----------------------------------------------------------------------------- +# Accessor +# ----------------------------------------------------------------------------- + + +@pd.api.extensions.register_series_accessor("money") +class MoneyAccessor: + + isna = DelegatedMethod("isna") + + def __init__(self, obj): + self._validate(obj) + self._data = obj.values + self._index = obj.index + self._name = obj.name + + @staticmethod + def _validate(obj): + if not is_money_type(obj): + raise AttributeError("Cannot use 'money' accessor on objects of " + "dtype '{}'.".format(obj.dtype)) + + +def is_money_type(obj): + t = getattr(obj, 'dtype', obj) + try: + return isinstance(t, MoneyType) or issubclass(t, MoneyType) + except Exception: + return False diff --git a/moneypandas/parser.py b/moneypandas/parser.py new file mode 100644 index 0000000..fb3ed98 --- /dev/null +++ b/moneypandas/parser.py @@ -0,0 +1,104 @@ +import money + +import numpy as np +from pandas.api.types import is_list_like +from .dtypes import money_patterns + + +def to_money(values, default_money_code=None): + """Convert values to MoneyArray + + Parameters + ---------- + values : int, str, bytes, or sequence of those + + Returns + ------- + addresses : MoneyArray + + Examples + -------- + Parse strings + >>> to_money(['£128', + ... '129 EUR']) + + + Or integers + >>> to_money([128, 131], default_money_code='GBP') + + """ + from . import MoneyArray + + if not is_list_like(values): + values = [values] + + values, default_money_code = _to_money_array(values, default_money_code=default_money_code) + return MoneyArray( + values, + default_money_code=default_money_code + ) + + +def _to_money_array(values, default_money_code=None): + from .money_array import MoneyType, MoneyArray + + if isinstance(values, MoneyArray): + if values.default_money_code: + default_money_code = default_money_code + return values.data, default_money_code + + values = [_as_money_object(v, default_money_code) for v in values] + + return np.atleast_1d(np.asarray(values, dtype=MoneyType._record_type)), default_money_code + + + + +def _as_money_object(val, default_money_code=None): + """Attempt to parse 'val' as any Money object. + + """ + + from .money_array import MoneyType + + cu, va = None, None + + if isinstance(val, np.void): + cu = val['cu'] + va = val['va'] + elif val in (None, '', np.nan): + cu = '' + va = 0 + elif isinstance(val, money.Money): + cu = val.currency + va = np.float64(val.amount) + elif isinstance(val, str): + for r, extract in money_patterns: + m = r.match(val) + if m: + va, cu = extract(m) + elif is_list_like(val) and len(val) == 2: + try: + va = np.float64(val[0]) + cu = str(val[1]) + except TypeError: + pass + elif default_money_code: + cu = default_money_code + try: + va = np.float64(val) + except: + pass + + if cu is not None and va is not None: + return va, cu + + if cu is None: + try: + va = np.float64(val) + except TypeError: + pass + else: + raise ValueError("Currency code is not available, so cannot convert {} - have you set a default?".format(val)) + + raise ValueError("Could not parse {} as money".format(val)) diff --git a/setup.py b/setup.py index d18439e..1970012 100644 --- a/setup.py +++ b/setup.py @@ -10,28 +10,31 @@ install_requires = [ "pandas>=0.23.0", "six", - "ipaddress; python_version < '3'" + "money" +] +tests_require = install_requires + [ + "pytest", + "hypothesis" ] setup( - name='cyberpandas', + name='moneypandas', use_scm_version=True, setup_requires=['setuptools_scm'], - description='IP Address type for pandas', + description='Money type for pandas', long_description=long_description, long_description_content_type="text/markdown", - url='https://github.com/ContinuumIO/cyberpandas', - author='Tom Augspurger', - author_email='tom.w.augspurger@gmail.com', + url='https://github.com/flaxandteal/moneypandas', + author='Phil Weir (moneypandas tweaks), Tom Augspurger (cyberpandas)', + author_email='phil.weir@flaxandteal.co.uk', license="BSD", classifiers=[ # Optional 'License :: OSI Approved :: BSD License', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', ], packages=find_packages(), install_requires=install_requires, + tests_require=tests_require ) diff --git a/tests/Pipfile b/tests/Pipfile new file mode 100644 index 0000000..7a9e19a --- /dev/null +++ b/tests/Pipfile @@ -0,0 +1,11 @@ +[[source]] +url = "https://pypi.org/simple" +verify_ssl = true +name = "pypi" + +[packages] + +[dev-packages] + +[requires] +python_version = "3.6" diff --git a/tests/ip/__init__.py b/tests/ip/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/ip/test_dtypes.py b/tests/ip/test_dtypes.py deleted file mode 100644 index 6ad15ed..0000000 --- a/tests/ip/test_dtypes.py +++ /dev/null @@ -1,20 +0,0 @@ -import pytest - -from cyberpandas import dtypes - - -@pytest.mark.parametrize('value', [ - "192.168.0.1", - 3232235521, -]) -@pytest.mark.xfail -def test_is_ipv4(value): - assert dtypes.is_ipv4(value) - - -@pytest.mark.parametrize('value', [ - '123.123.1', - 2 ** 32, -]) -def test_is_not_ipv4(value): - assert not dtypes.is_ipv4(value) diff --git a/tests/ip/test_ip.py b/tests/ip/test_ip.py deleted file mode 100644 index 9a06a36..0000000 --- a/tests/ip/test_ip.py +++ /dev/null @@ -1,396 +0,0 @@ -import ipaddress -import operator - -import pytest -import six -from hypothesis.strategies import integers, lists, tuples -from hypothesis import given, example - -import numpy as np -import numpy.testing as npt -import pandas as pd -import cyberpandas as ip -import pandas.util.testing as tm -from cyberpandas.common import _U8_MAX - - -def test_make_container(): - values = ip.IPArray.from_pyints([1, 2, 3]) - npt.assert_array_equal( - values.data, - np.array([(0, 1), - (0, 2), - (0, 3)], dtype=values.dtype._record_type) - ) - - -def test_repr_works(): - values = ip.IPArray.from_pyints([0, 1, 2, 3, 2**32, 2**64 + 1]) - result = repr(values) - if six.PY2: - expected = ("IPArray([u'0.0.0.0', u'0.0.0.1', u'0.0.0.2', u'0.0.0.3', " - "u'::1:0:0', u'::1:0:0:0:1'])") - else: - expected = ("IPArray(['0.0.0.0', '0.0.0.1', '0.0.0.2', '0.0.0.3', " - "'::1:0:0', '::1:0:0:0:1'])") - assert result == expected - - -def test_isna(): - v = ip.IPArray.from_pyints([0, 2, 2 ** 64, 2 ** 64 + 1, 2 ** 64 + 2]) - r1 = v.isna() - r2 = pd.isna(v) - expected = np.array([True, False, False, False, False]) - - np.testing.assert_array_equal(r1, expected) - np.testing.assert_array_equal(r2, expected) - - -def test_array(): - v = ip.IPArray.from_pyints([1, 2, 3]) - result = np.array(v) - expected = np.array([ - ipaddress.IPv4Address(1), - ipaddress.IPv4Address(2), - ipaddress.IPv4Address(3), - ]) - tm.assert_numpy_array_equal(result, expected) - - -def test_tolist(): - v = ip.IPArray.from_pyints([1, 2, 3]) - result = v.tolist() - expected = [(0, 1), (0, 2), (0, 3)] - assert result == expected - - -def test_to_pyipaddress(): - v = ip.IPArray.from_pyints([1, 2, 3]) - result = v.to_pyipaddress() - expected = [ - ipaddress.ip_address(1), - ipaddress.ip_address(2), - ipaddress.ip_address(3), - ] - assert result == expected - - -def test_isip(): - v = ip.to_ipaddress([ - u'192.168.1.1', - u'2001:0db8:85a3:0000:0000:8a2e:0370:7334', - ]) - result = v.is_ipv4 - expected = np.array([True, False]) - tm.assert_numpy_array_equal(result, expected) - - result = v.is_ipv6 - expected = np.array([False, True]) - tm.assert_numpy_array_equal(result, expected) - - -def test_equality(): - v1 = ip.to_ipaddress([ - u'192.168.1.1', - u'2001:0db8:85a3:0000:0000:8a2e:0370:7334', - ]) - assert np.all(v1 == v1) - assert v1.equals(v1) - - v2 = ip.to_ipaddress([ - u'192.168.1.2', - u'2001:0db8:85a3:0000:0000:8a2e:0370:7334', - ]) - result = v1 == v2 - expected = np.array([False, True]) - tm.assert_numpy_array_equal(result, expected) - - result = bool(v1.equals(v2)) - assert result is False - - with pytest.raises(TypeError): - v1.equals("a") - - -@pytest.mark.parametrize('op', [ - operator.lt, - operator.le, - operator.ge, - operator.gt, -]) -@pytest.mark.skipif(six.PY2, reason="Flexible comparisons") -def test_comparison_raises(op): - arr = ip.IPArray([0, 1, 2]) - with pytest.raises(TypeError): - op(arr, 'a') - - with pytest.raises(TypeError): - op('a', arr) - - -@given( - tuples( - lists(integers(min_value=0, max_value=2**128 - 1)), - lists(integers(min_value=0, max_value=2**128 - 1)) - ).filter(lambda x: len(x[0]) == len(x[1])) -) -@example((1, 1)) -@example((0, 0)) -@example((0, 1)) -@example((1, 0)) -@example((1, 2)) -@example((2, 1)) -@pytest.mark.skip(reason="Flaky") -def test_ops(tup): - a, b = tup - v1 = ip.IPArray(a) - v2 = ip.IPArray(b) - - r1 = v1 <= v2 - r2 = v2 >= v1 - tm.assert_numpy_array_equal(r1, r2) - - -@pytest.mark.xfail(reason='upstream') -def test_value_counts(): - x = ip.IPArray([0, 0, 1]) - result = x.value_counts() - assert len(result) - - -def test_iter_works(): - x = ip.IPArray([0, 1, 2]) - result = list(x) - expected = [ - ipaddress.IPv4Address(0), - ipaddress.IPv4Address(1), - ipaddress.IPv4Address(2), - ] - assert result == expected - - -def test_topyints(): - values = [0, 1, 2**32] - arr = ip.IPArray(values) - result = arr.to_pyints() - assert result == values - - -@pytest.mark.parametrize('prop', [ - 'version', - 'is_multicast', - 'is_private', - 'is_global', - 'is_unspecified', - 'is_reserved', - 'is_loopback', - 'is_link_local', -]) -def test_attributes(prop): - addrs = [ipaddress.ip_address(0), - ipaddress.ip_address(1)] - arr = ip.IPArray(addrs) - result = getattr(arr, prop) - expected = np.array([getattr(addr, prop) - for addr in addrs]) - tm.assert_numpy_array_equal(result, expected) - - -def test_isin_all4(): - s = ip.IPArray([u'192.168.1.1', u'255.255.255.255']) - result = s.isin([u'192.168.1.0/24']) - expected = np.array([True, False]) - tm.assert_numpy_array_equal(result, expected) - - -def test_isin_all6(): - s = ip.IPArray([u'2001:db8::1', u'2001:db9::1']) - result = s.isin([u'2001:db8::0/96']) - expected = np.array([True, False]) - tm.assert_numpy_array_equal(result, expected) - - -def test_isin_mix(): - s = ip.IPArray([u'192.168.1.1', u'255.255.255.255', - u'2001:db8::1', u'2001:db9::1']) - - result = s.isin([u'192.168.1.0/24']) - expected = np.array([True, False, False, False]) - tm.assert_numpy_array_equal(result, expected) - - result = s.isin([u'2001:db8::0/96']) - expected = np.array([False, False, True, False]) - tm.assert_numpy_array_equal(result, expected) - - result = s.isin([u'192.168.1.0/24', u'2001:db8::0/96']) - expected = np.array([True, False, True, False]) - tm.assert_numpy_array_equal(result, expected) - - s = ip.IPArray([u'192.168.1.1', u'192.168.1.2', - u'255.255.255.255']) - result = s.isin([u'192.168.1.0/24']) - expected = np.array([True, True, False]) - tm.assert_numpy_array_equal(result, expected) - - -def test_isin_iparray(): - s = ip.IPArray([10, 20, 20, 30]) - result = s.isin(ip.IPArray([30, 20])) - expected = np.array([False, True, True, True]) - tm.assert_numpy_array_equal(result, expected) - - -def test_getitem_scalar(): - ser = ip.IPArray([0, 1, 2]) - result = ser[1] - assert result == ipaddress.ip_address(1) - - -def test_getitem_slice(): - ser = ip.IPArray([0, 1, 2]) - result = ser[1:] - expected = ip.IPArray([1, 2]) - assert result.equals(expected) - - -@pytest.mark.parametrize('value', [ - u'0.0.0.10', - 10, - ipaddress.ip_address(10), -]) -def test_setitem_scalar(value): - ser = ip.IPArray([0, 1, 2]) - ser[1] = ipaddress.ip_address(value) - expected = ip.IPArray([0, 10, 2]) - assert ser.equals(expected) - - -def test_setitem_array(): - ser = ip.IPArray([0, 1, 2]) - ser[[1, 2]] = [10, 20] - expected = ip.IPArray([0, 10, 20]) - assert ser.equals(expected) - - -def test_bytes_roundtrip(): - arr = ip.IPArray([1, 2, 3, _U8_MAX + 10]) - bytestring = arr.to_bytes() - assert isinstance(bytestring, bytes) - - result = ip.IPArray.from_bytes(bytestring) - assert result.equals(arr) - - -def test_unique(): - arr = ip.IPArray([3, 3, 1, 2, 3, _U8_MAX + 1]) - result = arr.unique() - assert isinstance(result, ip.IPArray) - - result = result.astype(object) - expected = pd.unique(arr.astype(object)) - tm.assert_numpy_array_equal(result, expected) - - -def test_factorize(): - arr = ip.IPArray([3, 3, 1, 2, 3, _U8_MAX + 1]) - labels, uniques = arr.factorize() - expected_labels, expected_uniques = pd.factorize(arr.astype(object)) - - assert isinstance(uniques, ip.IPArray) - - uniques = uniques.astype(object) - tm.assert_numpy_array_equal(labels, expected_labels) - tm.assert_numpy_array_equal(uniques, expected_uniques) - - -@pytest.mark.parametrize('values', [ - [0, 1, 2], -]) -def test_from_ndarray(values): - result = ip.IPArray(np.asarray(values)) - expected = ip.IPArray(values) - assert result.equals(expected) - - -@pytest.mark.parametrize('start, stop, step, expected', [ - (1, 3, None, [1, 2]), - (u'0.0.0.1', u'0.0.0.3', None, [1, 2]), - (2**64 + 1, 2**64 + 3, None, [2**64 + 1, 2**64 + 2]), - (u'::1:0:0:0:1', u'::1:0:0:0:3', None, [2**64 + 1, 2**64 + 2]), - (2**64 - 1, 2**64 + 2, None, [2**64 - 1, 2**64, 2**64 + 1]), - (u'::ffff:ffff:ffff:ffff', u'::1:0:0:0:2', None, - [2**64 - 1, 2**64, 2**64 + 1]), - (1, 6, 2, [1, 3, 5]), - (u'0.0.0.1', u'0.0.0.6', u'0.0.0.2', [1, 3, 5]), -]) -def test_ip_range(start, stop, step, expected): - result = ip.ip_range(start, stop, step) - expected = ip.IPArray(expected) - assert result.equals(expected) - - -@pytest.mark.parametrize('addresses', [ - [u'0.0.0.0', u'192.168.1.1', u'::1:1:0:0:0:1'] -]) -@pytest.mark.parametrize('v4_prefixlen, v6_prefixlen', [ - (32, 128), - (24, 96), - (16, 64), - (8, 8), -]) -@pytest.mark.parametrize('op', ['netmask', 'hostmask']) -def test_mask(op, v4_prefixlen, v6_prefixlen, addresses): - is_v6 = [':' in x for x in addresses] - prefixes = [v6_prefixlen if v6 else v4_prefixlen for v6 in is_v6] - networks = [ - ipaddress.ip_network(u"{}/{}".format(addr, prefix), strict=False) - for addr, prefix in zip(addresses, prefixes) - ] - expected = [getattr(net, op) for net in networks] - call = operator.methodcaller(op, v4_prefixlen=v4_prefixlen, - v6_prefixlen=v6_prefixlen) - result = list(call(ip.IPArray(addresses))) - if op == 'hostmask': - # ipaddress will return an IPv6(0), which doesn't compare equal - # to an IPv4(0), our result. - expected = [int(x) for x in expected] - result = [int(x) for x in result] - - assert result == expected - - -def test_netmask_basic(): - arr = ip.IPArray([u'192.0.0.0', u'1:1::']) - result = arr.netmask(v4_prefixlen=16, v6_prefixlen=32) - expected = ip.IPArray([u'255.255.0.0', u'ffff:ffff::']) - assert result.equals(expected) - - result = pd.Series(arr, name='foo').ip.netmask(v4_prefixlen=16, - v6_prefixlen=32) - assert result.name == 'foo' - assert result.values.equals(expected) - - -def test_hostmask_basic(): - arr = ip.IPArray([u'192.0.0.0', u'1:1::']) - result = arr.hostmask(v4_prefixlen=16, v6_prefixlen=32) - expected = ip.IPArray([u'0.0.255.255', - u'::ffff:ffff:ffff:ffff:ffff:ffff']) - assert result.equals(expected) - - result = pd.Series(arr, name='foo').ip.hostmask(v4_prefixlen=16, - v6_prefixlen=32) - assert result.name == 'foo' - assert result.values.equals(expected) - - -def test_apply_mask(): - arr = ip.IPArray([u'216.3.128.0', u'192.168.100.0', u'1::1:12']) - mask = arr.netmask(v4_prefixlen=24, v6_prefixlen=112) - result = arr.mask(mask) - expected = ip.IPArray([u'216.3.128.0', u'192.168.100.0', u'1::1:0']) - assert result.equals(expected) - - result = pd.Series(arr, name='test').ip.mask(mask) - expected = pd.Series(expected, name='test') - tm.assert_series_equal(result, expected) diff --git a/tests/ip/test_parser.py b/tests/ip/test_parser.py deleted file mode 100644 index 40826aa..0000000 --- a/tests/ip/test_parser.py +++ /dev/null @@ -1,58 +0,0 @@ -import ipaddress - -import pytest - -from cyberpandas import parser, IPArray - - -@pytest.mark.parametrize('values', [ - [u'192.168.1.1', - u'2001:0db8:85a3:0000:0000:8a2e:0370:7334'], - [3232235777, - 42540766452641154071740215577757643572], - [b'\xc0\xa8\x01\x01', - b' \x01\r\xb8\x85\xa3\x00\x00\x00\x00\x8a.\x03ps4'], -]) -def test_to_ipaddress(values): - result = parser.to_ipaddress(values) - expected = IPArray.from_pyints([ - 3232235777, - 42540766452641154071740215577757643572 - ]) - assert result.equals(expected) - - -def test_to_ipaddress_edge(): - ip_int = 2 ** 64 - result = parser.to_ipaddress([ip_int]).to_pyipaddress()[0] - assert int(result) == ip_int - - -def test_to_ipaddress_scalar(): - result = parser.to_ipaddress(1) - expected = parser.to_ipaddress([1]) - assert len(result) == 1 - assert all(result == expected) - - -@pytest.mark.parametrize('val, expected', [ - (u'192.168.1.1', ipaddress.IPv4Address(u'192.168.1.1')), - (100, ipaddress.IPv4Address(100)), - (ipaddress.IPv4Address(100), ipaddress.IPv4Address(100)), - (2**64, ipaddress.IPv6Address(2**64)), - (u'192.168.0.0/28', ipaddress.IPv4Network(u'192.168.0.0/28')), - (ipaddress.IPv4Network(u'192.168.0.0/28'), - ipaddress.IPv4Network(u'192.168.0.0/28')), - (u'2001:db00::0/24', ipaddress.IPv6Network(u'2001:db00::0/24')), -]) -def test_as_ip_object(val, expected): - result = parser._as_ip_object(val) - assert result == expected - - -@pytest.mark.parametrize("val", [ - u"129", -1 -]) -def test_as_ip_object_raises(val): - with pytest.raises(ValueError): - parser._as_ip_object(val) diff --git a/tests/mac/__init__.py b/tests/mac/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/mac/test_interface.py b/tests/mac/test_interface.py deleted file mode 100644 index dd84bbd..0000000 --- a/tests/mac/test_interface.py +++ /dev/null @@ -1,122 +0,0 @@ -import pytest - -from pandas.tests.extension import base - -from cyberpandas.mac_array import MACArray, MACType - - -@pytest.fixture -def dtype(): - return MACType() - - -@pytest.fixture -def data(): - return MACArray(list(range(100))) - - -@pytest.fixture -def data_missing(): - return MACArray([0, 1]) - - -@pytest.fixture(params=['data', 'data_missing']) -def all_data(request, data, data_missing): - """Parametrized fixture giving 'data' and 'data_missing'""" - if request.param == 'data': - return data - elif request.param == 'data_missing': - return data_missing - - -@pytest.fixture -def data_for_sorting(): - return MACArray([10, 2 ** 64 - 1, 1]) - - -@pytest.fixture -def data_missing_for_sorting(): - return MACArray([2 ** 64 - 1, 0, 1]) - - -@pytest.fixture -def data_for_grouping(): - b = 1 - a = 2 ** 32 + 1 - c = 2 ** 32 + 10 - return MACArray([ - b, b, 0, 0, a, a, b, c - ]) - - -@pytest.fixture -def data_repeated(data): - def gen(count): - for _ in range(count): - yield data - return gen - - -@pytest.fixture -def na_cmp(): - """Binary operator for comparing NA values. - - Should return a function of two arguments that returns - True if both arguments are (scalar) NA for your type. - - By defult, uses ``operator.or`` - """ - return lambda x, y: int(x) == int(y) == 0 - - -@pytest.fixture -def na_value(): - return MACType.na_value - - -class TestDtype(base.BaseDtypeTests): - pass - - -class TestInterface(base.BaseInterfaceTests): - pass - - -class TestConstructors(base.BaseConstructorsTests): - pass - - -class TestReshaping(base.BaseReshapingTests): - @pytest.mark.skip(reason='Pandas inferrs us as int64.') - def test_concat_mixed_dtypes(self): - pass - - @pytest.mark.skip(reason="0 for null") - def test_stack(self): - pass - - @pytest.mark.skip(reason="0 for null") - def test_unstack(self): - pass - - -class TestGetitem(base.BaseGetitemTests): - pass - - -class TestMissing(base.BaseMissingTests): - pass - - -class TestMethods(base.BaseMethodsTests): - @pytest.mark.xfail(reason='upstream') - def test_value_counts(data, dropna): - pass - - @pytest.mark.skip(reason="buggy comparison") - def test_combine_le(self, data_repeated): - super().test_combine_le(data_repeated) - - @pytest.mark.skip(reason="TODO") - def test_hash_pandas_object_works(self): - pass diff --git a/tests/ip/test_interface.py b/tests/test_interface.py similarity index 64% rename from tests/ip/test_interface.py rename to tests/test_interface.py index a38e55f..bf8ca66 100644 --- a/tests/ip/test_interface.py +++ b/tests/test_interface.py @@ -1,22 +1,27 @@ import pytest +import pandas.util.testing as tm +import pandas as pd from pandas.tests.extension import base +from pandas.tests.extension.conftest import * +import numpy as np -import cyberpandas as ip +import moneypandas as mpd @pytest.fixture def dtype(): - return ip.IPType() + return mpd.MoneyType() @pytest.fixture def data(): - return ip.IPArray(list(range(100))) + ma = mpd.MoneyArray(list(range(1, 101)), 'USD') + return ma @pytest.fixture def data_missing(): - return ip.IPArray([0, 1]) + return mpd.MoneyArray([np.nan, 1], 'USD') @pytest.fixture(params=['data', 'data_missing']) @@ -30,22 +35,22 @@ def all_data(request, data, data_missing): @pytest.fixture def data_for_sorting(): - return ip.IPArray([10, 2 ** 64 + 1, 1]) + return mpd.MoneyArray([10, 123, 1], default_money_code='GBP') @pytest.fixture def data_missing_for_sorting(): - return ip.IPArray([2 ** 64 + 1, 0, 1]) + return mpd.MoneyArray([2, None, 1], default_money_code='GBP') @pytest.fixture def data_for_grouping(): b = 1 - a = 2 ** 32 + 1 - c = 2 ** 32 + 10 - return ip.IPArray([ - b, b, 0, 0, a, a, b, c - ]) + a = 233 + c = 242 + return mpd.MoneyArray([ + b, b, np.nan, None, a, a, b, c + ], 'USD') @pytest.fixture @@ -63,14 +68,14 @@ def na_cmp(): Should return a function of two arguments that returns True if both arguments are (scalar) NA for your type. - By defult, uses ``operator.or`` + By default, uses ``operator.or`` """ - return lambda x, y: int(x) == int(y) == 0 + return lambda x, y: pd.isna(x) and pd.isna(y) @pytest.fixture def na_value(): - return ip.IPType.na_value + return mpd.MoneyType.na_value class TestDtype(base.BaseDtypeTests): @@ -117,6 +122,10 @@ def test_combine_le(self, data_repeated): def test_combine_add(self, data_repeated): super().test_combine_add(data_repeated) - @pytest.mark.xfail(reason="buggy comparison of v4 and v6") - def test_searchsorted(self, data_for_sorting, as_series): - return super().test_searchsorted(data_for_sorting, as_series) + def test_argsort_missing_array(self, data_missing_for_sorting): + result = data_missing_for_sorting.argsort() + expected = np.array([1, 2, 0], dtype=np.dtype("int")) + # we don't care whether it's int32 or int64 + result = result.astype("int64", casting="safe") + expected = expected.astype("int64", casting="safe") + tm.assert_numpy_array_equal(result, expected) diff --git a/tests/test_ip.py b/tests/test_ip.py new file mode 100644 index 0000000..eb0a208 --- /dev/null +++ b/tests/test_ip.py @@ -0,0 +1,230 @@ +import money +import decimal +import operator + +import pytest +import six +from hypothesis.strategies import integers, lists, tuples +from hypothesis import given, example + +import numpy as np +import numpy.testing as npt +import pandas as pd +import moneypandas as mpd +import pandas.util.testing as tm + + +def test_make_container(): + values = mpd.MoneyArray([1, 2, 3], 'GBP') + npt.assert_array_equal( + values.data, + np.array([(1, 'GBP'), + (2, 'GBP'), + (3, 'GBP')], dtype=values.dtype._record_type) + ) + + +def test_repr_works(): + values = mpd.MoneyArray([0, 1, 2, 3], 'GBP') + result = repr(values) + expected = ("\n[GBP 0.00, GBP 1.00, GBP 2.00, GBP 3.00]\nLength: 4, dtype: money") + assert result == expected + + +def test_isna(): + v = mpd.MoneyArray([None, 2], 'GBP') + r1 = v.isna() + r2 = pd.isna(v) + expected = np.array([True, False]) + + np.testing.assert_array_equal(r1, expected) + np.testing.assert_array_equal(r2, expected) + + +def test_array(): + v = mpd.MoneyArray([1, 2, 3], 'GBP') + result = np.array(v) + expected = np.array([ + money.XMoney(1, 'GBP'), + money.XMoney(2, 'GBP'), + money.XMoney(3, 'GBP'), + ]) + tm.assert_numpy_array_equal(result, expected) + + +def test_tolist(): + v = mpd.MoneyArray([1, 2, 3], 'USD') + result = v.tolist() + expected = [(1, 'USD'), (2, 'USD'), (3, 'USD')] + assert result == expected + + +def test_to_pymoney(): + v = mpd.MoneyArray([1, 2, 3], 'USD') + result = v.to_pymoney() + expected = [ + money.XMoney(1, 'USD'), + money.XMoney(2, 'USD'), + money.XMoney(3, 'USD'), + ] + assert result == expected + + + +def test_equality(): + v1 = mpd.to_money([ + u'123 EUR', + u'345 GBP', + ]) + assert np.all(v1 == v1) + assert v1.equals(v1) + + v2 = mpd.to_money([ + u'124 EUR', + u'345 GBP', + ]) + result = v1 == v2 + expected = np.array([False, True]) + tm.assert_numpy_array_equal(result, expected) + + result = bool(v1.equals(v2)) + assert result is False + + with pytest.raises(TypeError): + v1.equals("a") + + +@pytest.mark.parametrize('op', [ + operator.lt, + operator.le, + operator.ge, + operator.gt, +]) +@pytest.mark.skipif(six.PY2, reason="Flexible comparisons") +def test_comparison_raises(op): + arr = mpd.MoneyArray([0, 1, 2], 'JPY') + with pytest.raises(TypeError): + op(arr, 'a') + + with pytest.raises(TypeError): + op('a', arr) + + +@given( + tuples( + lists(integers(min_value=0, max_value=99999)), + lists(integers(min_value=0, max_value=99999)) + ).filter(lambda x: len(x[0]) == len(x[1])) +) +@example((1, 1)) +@example((0, 0)) +@example((0, 1)) +@example((1, 0)) +@example((1, 2)) +@example((2, 1)) +@pytest.mark.skip(reason="Flaky") +def test_ops(tup): + a, b = tup + v1 = mpd.MoneyArray(a, 'GBP') + v2 = mpd.MoneyArray(b, 'GBP') + + r1 = v1 <= v2 + r2 = v2 >= v1 + tm.assert_numpy_array_equal(r1, r2) + + +@pytest.mark.xfail(reason='upstream') +def test_value_counts(): + x = mpd.MoneyArray([0, 0, 1], 'USD') + result = x.value_counts() + assert len(result) + + +def test_iter_works(): + x = mpd.MoneyArray([0, 1, 2], 'GBP') + result = list(x) + expected = [ + money.XMoney(0, 'GBP'), + money.XMoney(1, 'GBP'), + money.XMoney(2, 'GBP'), + ] + assert result == expected + + +def test_todecimal(): + values = [0, 1, 2] + arr = mpd.MoneyArray(values, 'EUR') + result = arr.to_decimals() + assert all([r == decimal.Decimal(v) for r, v in zip(result, values)]) + + +def test_getitem_scalar(): + ser = mpd.MoneyArray([0, 1, 2], 'USD') + result = ser[1] + assert result == money.XMoney(1, 'USD') + + +def test_getitem_slice(): + ser = mpd.MoneyArray([0, 1, 2], 'USD') + result = ser[1:] + expected = mpd.MoneyArray([1, 2], 'USD') + assert result.equals(expected) + + +@pytest.mark.parametrize('value', [ + u'123 USD', + 123, + money.XMoney(123, 'USD'), +]) +def test_setitem_scalar(value): + ser = mpd.MoneyArray([0, 1, 2], 'USD') + ser[1] = value + expected = mpd.MoneyArray([0, 123, 2], 'USD') + assert ser.equals(expected) + + +def test_setitem_array(): + ser = mpd.MoneyArray([0, 1, 2], 'USD') + ser[[1, 2]] = ['10 USD', '20 USD'] + expected = mpd.MoneyArray([0, 10, 20], 'USD') + assert ser.equals(expected) + + +def test_bytes_roundtrip(): + arr = mpd.MoneyArray([1, 2, 3], 'USD') + bytestring = arr.to_bytes() + assert isinstance(bytestring, bytes) + + result = mpd.MoneyArray.from_bytes(bytestring) + assert result.equals(arr) + + +def test_unique(): + arr = mpd.MoneyArray([3, 3, 1, 2, 3], 'USD') + result = arr.unique() + assert isinstance(result, mpd.MoneyArray) + + result = result.astype(object) + expected = pd.unique(arr.astype(object)) + tm.assert_numpy_array_equal(result, expected) + + +def test_factorize(): + arr = mpd.MoneyArray([3, 3, 1, 2, 3], 'USD') + labels, uniques = arr.factorize() + expected_labels, expected_uniques = pd.factorize(arr.astype(object)) + + assert isinstance(uniques, mpd.MoneyArray) + + uniques = uniques.astype(object) + tm.assert_numpy_array_equal(labels, expected_labels) + tm.assert_numpy_array_equal(uniques, expected_uniques) + + +@pytest.mark.parametrize('values', [ + [0, 1, 2], +]) +def test_from_ndarray(values): + result = mpd.MoneyArray(np.asarray(values), 'USD') + expected = mpd.MoneyArray(values, 'USD') + assert result.equals(expected) diff --git a/tests/ip/test_ip_pandas.py b/tests/test_money_pandas.py similarity index 62% rename from tests/ip/test_ip_pandas.py rename to tests/test_money_pandas.py index 16bbb87..62db308 100644 --- a/tests/ip/test_ip_pandas.py +++ b/tests/test_money_pandas.py @@ -1,6 +1,6 @@ """Tests involving pandas, not just the new array. """ -import ipaddress +import money import pytest import numpy as np @@ -10,7 +10,7 @@ from pandas.core.internals import ExtensionBlock import pandas.util.testing as tm -import cyberpandas as ip +import moneypandas as mpd # ---------------------------------------------------------------------------- @@ -19,10 +19,10 @@ def test_concatenate_blocks(): - v1 = ip.IPArray.from_pyints([1, 2, 3]) + v1 = mpd.MoneyArray([1, 2, 3], 'GBP') s = pd.Series(v1, index=pd.RangeIndex(3), fastpath=True) result = pd.concat([s, s], ignore_index=True) - expected = pd.Series(ip.IPArray.from_pyints([1, 2, 3, 1, 2, 3])) + expected = pd.Series(mpd.MoneyArray([1, 2, 3, 1, 2, 3], 'GBP')) tm.assert_series_equal(result, expected) @@ -32,56 +32,56 @@ def test_concatenate_blocks(): def test_series_constructor(): - v = ip.IPArray.from_pyints([1, 2, 3]) + v = mpd.MoneyArray([1, 2, 3], 'USD') result = pd.Series(v) assert result.dtype == v.dtype assert isinstance(result._data.blocks[0], ExtensionBlock) def test_dataframe_constructor(): - v = ip.IPArray.from_pyints([1, 2, 3]) + v = mpd.MoneyArray([1, 2, 3], 'USD') df = pd.DataFrame({"A": v}) - assert isinstance(df.dtypes['A'], ip.IPType) + assert isinstance(df.dtypes['A'], mpd.MoneyType) assert df.shape == (3, 1) str(df) def test_dataframe_from_series_no_dict(): - s = pd.Series(ip.IPArray([1, 2, 3])) + s = pd.Series(mpd.MoneyArray([1, 2, 3], 'INR')) result = pd.DataFrame(s) expected = pd.DataFrame({0: s}) tm.assert_frame_equal(result, expected) - s = pd.Series(ip.IPArray([1, 2, 3]), name='A') + s = pd.Series(mpd.MoneyArray([1, 2, 3], 'INR'), name='A') result = pd.DataFrame(s) expected = pd.DataFrame({'A': s}) tm.assert_frame_equal(result, expected) def test_dataframe_from_series(): - s = pd.Series(ip.IPArray([0, 1, 2])) + s = pd.Series(mpd.MoneyArray([0, 1, 2], 'EUR')) c = pd.Series(pd.Categorical(['a', 'b'])) result = pd.DataFrame({"A": s, 'B': c}) - assert isinstance(result.dtypes['A'], ip.IPType) + assert isinstance(result.dtypes['A'], mpd.MoneyType) def test_getitem_scalar(): - ser = pd.Series(ip.IPArray([0, 1, 2])) + ser = pd.Series(mpd.MoneyArray([None, 1, 2], 'USD')) result = ser[1] - assert result == ipaddress.ip_address(1) + assert result == money.XMoney(1, 'USD') def test_getitem_slice(): - ser = pd.Series(ip.IPArray([0, 1, 2])) + ser = pd.Series(mpd.MoneyArray([0, 1, 2], 'EUR')) result = ser[1:] - expected = pd.Series(ip.IPArray([1, 2]), index=range(1, 3)) + expected = pd.Series(mpd.MoneyArray([1, 2], 'EUR'), index=range(1, 3)) tm.assert_series_equal(result, expected) def test_setitem_scalar(): - ser = pd.Series(ip.IPArray([0, 1, 2])) - ser[1] = ipaddress.ip_address(10) - expected = pd.Series(ip.IPArray([0, 10, 2])) + ser = pd.Series(mpd.MoneyArray([0, 1, 2], 'EUR')) + ser[1] = money.XMoney(10, 'EUR') + expected = pd.Series(mpd.MoneyArray([0, 10, 2], 'EUR')) tm.assert_series_equal(ser, expected) @@ -93,32 +93,32 @@ def test_setitem_scalar(): @given(lists(integers(min_value=1, max_value=2**128 - 1))) def test_argsort(ints): pass - # result = pd.Series(ip.IPArray(ints)).argsort() + # result = pd.Series(mpd.MoneyArray(ints)).argsort() # expected = pd.Series(ints).argsort() - # tm.assert_series_equal(result.ip.to_pyints(), expected) + # tm.assert_series_equal(result.mpd.to_decimals('GBP'), expected) # -------- # Accessor # -------- -def test_non_ip_raises(): - s = pd.Series([1, 2]) +#def test_non_money_raises(): +# s = pd.Series([1, 2]) +# +# with pytest.raises(AttributeError) as m: +# s.money.is_currency('EUR') +# +# assert m.match("Cannot use 'money' accessor on objects of dtype 'int.*") - with pytest.raises(AttributeError) as m: - s.ip.is_ipv4 - assert m.match("Cannot use 'ip' accessor on objects of dtype 'int.*") +#def test_accessor_works(): +# s = pd.Series(mpd.MoneyArray([0, 1, 2, 3], 'USD')) +# s.money.is_currency('USD') -def test_accessor_works(): - s = pd.Series(ip.IPArray([0, 1, 2, 3])) - s.ip.is_ipv4 - - -def test_accessor_frame(): - s = pd.DataFrame({"A": ip.IPArray([0, 1, 2, 3])}) - s['A'].ip.is_ipv4 +#def test_accessor_frame(): +# s = pd.DataFrame({"A": mpd.MoneyArray([0, 1, 2, 3], 'EUR')}) +# s['A'].money.is_currency('USD') # --------- @@ -128,20 +128,20 @@ def test_accessor_frame(): @pytest.mark.xfail(reason="TODO") def test_factorize(): - arr = ip.IPArray([1, 1, 10, 10]) + arr = mpd.MoneyArray([1, 1, 10, 10], 'JPY') labels, uniques = pd.factorize(arr) expected_labels = np.array([0, 0, 1, 1]) tm.assert_numpy_array_equal(labels, expected_labels) - expected_uniques = ip.IPArray([1, 10]) + expected_uniques = mpd.MoneyArray([1, 10], 'JPY') assert uniques.equals(expected_uniques) @pytest.mark.xfail(reason="TODO") def test_groupby_make_grouper(): df = pd.DataFrame({"A": [1, 1, 2, 2], - "B": ip.IPArray([1, 1, 2, 2])}) + "B": mpd.MoneyArray([1, 1, 2, 2], 'EUR')}) gr = df.groupby("B") result = gr.grouper.groupings[0].grouper assert result.equals(df.B.values) @@ -150,7 +150,7 @@ def test_groupby_make_grouper(): @pytest.mark.xfail(reason="TODO") def test_groupby_make_grouper_groupings(): df = pd.DataFrame({"A": [1, 1, 2, 2], - "B": ip.IPArray([1, 1, 2, 2])}) + "B": mpd.MoneyArray([1, 1, 2, 2], 'EUR')}) p1 = df.groupby("A").grouper.groupings[0] p2 = df.groupby("B").grouper.groupings[0] diff --git a/tests/ip/test_pandas_methods.py b/tests/test_pandas_methods.py similarity index 81% rename from tests/ip/test_pandas_methods.py rename to tests/test_pandas_methods.py index 2f5f20a..58c54ee 100644 --- a/tests/ip/test_pandas_methods.py +++ b/tests/test_pandas_methods.py @@ -5,19 +5,19 @@ import pandas.util.testing as tm import pytest -import cyberpandas as ip +import moneypandas as mpd @pytest.fixture def series(): - return pd.Series(ip.IPArray.from_pyints([0, 1, 2])) + return pd.Series(mpd.MoneyArray([None, 1, 2], 'USD')) @pytest.fixture def frame(): - return pd.DataFrame({"A": ip.IPArray.from_pyints([0, 1, 2]), + return pd.DataFrame({"A": mpd.MoneyArray([None, 1, 2], 'GBP'), "B": [0, 1, 2], - "C": ip.IPArray.from_pyints([0, 1, 2])}) + "C": mpd.MoneyArray([np.nan, 1, 2], 'USD')}) @pytest.fixture(params=['series', 'frame']) @@ -47,7 +47,7 @@ def test_works_frame(frame, method): def test__take(frame): - return frame._take([0], axis=0) + return frame.take([0], axis=0) def test_iloc_series(series): @@ -100,9 +100,9 @@ def test_loc_frame(frame): def test_reindex(frame): result = frame.reindex([0, 10]) - expected = pd.DataFrame({"A": ip.IPArray.from_pyints([0, 0]), + expected = pd.DataFrame({"A": mpd.MoneyArray([None, np.nan], 'USD'), "B": [0, np.nan], - "C": ip.IPArray.from_pyints([0, 0])}, + "C": mpd.MoneyArray([None, np.nan], 'USD')}, index=[0, 10]) tm.assert_frame_equal(result, expected) @@ -127,16 +127,16 @@ def test_isna_frame(frame): @pytest.mark.xfail(reason="Not implemented") def test_fillna(): - result = pd.Series(ip.IPArray([1, 0])).fillna(method='ffill') - expected = pd.Series(ip.IPArray([1, 1])) + result = pd.Series(mpd.MoneyArray([1, 0], 'USD')).fillna(method='ffill') + expected = pd.Series(mpd.MoneyArray([1, 1], 'USD')) tm.assert_series_equal(result, expected) @pytest.mark.xfail(reason="Not implemented") def test_dropna(): - missing = pd.Series(ip.IPArray([1, 0])) + missing = pd.Series(mpd.MoneyArray([1, 0], 'USD')) result = missing.dropna() - expected = pd.Series(ip.IPArray([1])) + expected = pd.Series(mpd.MoneyArray([1], 'USD')) tm.assert_series_equal(result, expected) result = missing.to_frame().dropna() diff --git a/tests/test_parser.py b/tests/test_parser.py new file mode 100644 index 0000000..4180f70 --- /dev/null +++ b/tests/test_parser.py @@ -0,0 +1,39 @@ +import money + +import pytest + +from moneypandas import parser, MoneyArray + + +@pytest.mark.parametrize('values', [ + [u'123 EUR', + u'234 EUR'] + # TODO: reinstate byte tests + #[b'\xc0\xa8\x01\x01', + # b' \x01\r\xb8\x85\xa3\x00\x00\x00\x00\x8a.\x03ps4'], +]) +def test_to_money(values): + result = parser.to_money(values) + expected = MoneyArray([ + 123, + 234 + ], 'EUR') + assert result.equals(expected) + + +@pytest.mark.parametrize('val, expected, money_code', [ + (u'123 EUR', money.XMoney(123, 'EUR'), None), + (123, money.XMoney(123, 'EUR'), 'EUR'), + (money.XMoney(100, 'GBP'), money.XMoney(100, 'GBP'), None) +]) +def test_as_money_object(val, expected, money_code): + result = parser._as_money_object(val, money_code) + assert result == (expected.amount, expected.currency) + + +@pytest.mark.parametrize("val", [ + u"129", -1 +]) +def test_as_money_object_raises(val): + with pytest.raises(ValueError): + parser._as_money_object(val)