From 54d296fa9251b75e57cc5d9afde14501723e055d Mon Sep 17 00:00:00 2001 From: "Vanya A. Sergeev" Date: Wed, 26 Apr 2017 05:26:15 -0700 Subject: [PATCH] add support for msgpack timestamp format --- README.md | 54 +++++++++++++++++++------------ msgpack.org.md | 34 +++++++++----------- test_umsgpack.py | 28 +++++++++++++++- umsgpack.py | 83 +++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 158 insertions(+), 41 deletions(-) diff --git a/README.md b/README.md index 3dab4eb..9f11bce 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # u-msgpack-python [![Build Status](https://travis-ci.org/vsergeev/u-msgpack-python.svg?branch=master)](https://travis-ci.org/vsergeev/u-msgpack-python) [![GitHub release](https://img.shields.io/github/release/vsergeev/u-msgpack-python.svg?maxAge=7200)](https://github.com/vsergeev/u-msgpack-python) [![License](https://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/vsergeev/u-msgpack-python/blob/master/LICENSE) -u-msgpack-python is a lightweight [MessagePack](http://msgpack.org/) serializer and deserializer module written in pure Python, compatible with both Python 2 and 3, as well CPython and PyPy implementations of Python. u-msgpack-python is fully compliant with the latest [MessagePack specification](https://github.com/msgpack/msgpack/blob/master/spec.md). In particular, it supports the new binary, UTF-8 string, and application-defined ext types. +u-msgpack-python is a lightweight [MessagePack](http://msgpack.org/) serializer and deserializer module written in pure Python, compatible with both Python 2 and 3, as well CPython and PyPy implementations of Python. u-msgpack-python is fully compliant with the latest [MessagePack specification](https://github.com/msgpack/msgpack/blob/master/spec.md). In particular, it supports the new binary, UTF-8 string, application-defined ext, and timestamp types. u-msgpack-python is currently distributed on [PyPI](https://pypi.python.org/pypi/u-msgpack-python) and as a single file: [umsgpack.py](https://raw.github.com/vsergeev/u-msgpack-python/master/umsgpack.py). @@ -81,18 +81,18 @@ b'\x01\x02\x03' Serializing and deserializing application-defined types with Ext handlers: ``` python ->>> umsgpack.packb([complex(1,2), datetime.datetime.now()], +>>> umsgpack.packb([complex(1,2), decimal.Decimal("0.31")], ... ext_handlers = { ... complex: lambda obj: umsgpack.Ext(0x30, struct.pack("ff", obj.real, obj.imag)), -... datetime.datetime: lambda obj: umsgpack.Ext(0x40, obj.strftime("%Y%m%dT%H:%M:%S.%f").encode()), -... }) -b'\x92\xd70\x00\x00\x80?\x00\x00\x00@\xc7\x18@20161017T00:12:53.719377' +... decimal.Decimal: lambda obj: umsgpack.Ext(0x40, str(obj).encode()), +... }) +b'\x92\xd70\x00\x00\x80?\x00\x00\x00@\xd6@0.31' >>> umsgpack.unpackb(_, ... ext_handlers = { ... 0x30: lambda ext: complex(*struct.unpack("ff", ext.data)), -... 0x40: lambda ext: datetime.datetime.strptime(ext.data.decode(), "%Y%m%dT%H:%M:%S.%f"), -... }) -[(1+2j), datetime.datetime(2016, 10, 17, 0, 12, 53, 719377)] +... 0x40: lambda ext: decimal.Decimal(ext.data.decode()), +... }) +[(1+2j), Decimal('0.31')] >>> ``` @@ -120,37 +120,35 @@ custom types to callables that pack the type into an Ext object. The callable should accept the custom type object as an argument and return a packed `umsgpack.Ext` object. -Example for packing `set`, `complex`, and `datetime.datetime` types into Ext +Example for packing `set`, `complex`, and `decimal.Decimal` types into Ext objects with type codes 0x20, 0x30, and 0x40, respectively: ``` python ->>> umsgpack.packb([1, True, {"foo", 2}, complex(3, 4), datetime.datetime.now()], +>>> umsgpack.packb([1, True, {"foo", 2}, complex(3, 4), decimal.Decimal("0.31")], ... ext_handlers = { ... set: lambda obj: umsgpack.Ext(0x20, umsgpack.packb(list(obj))), ... complex: lambda obj: umsgpack.Ext(0x30, struct.pack("ff", obj.real, obj.imag)), -... datetime.datetime: lambda obj: umsgpack.Ext(0x40, obj.strftime("%Y%m%dT%H:%M:%S.%f").encode()), -... }) -b'\x95\x01\xc3\xc7\x06 \x92\xa3foo\x02\xd70\x00\x00@@\x00\x00\x80@\xc7\x18@20161015T02:28:35.666425' +... decimal.Decimal: lambda obj: umsgpack.Ext(0x40, str(obj).encode()), +... }) +b'\x95\x01\xc3\xc7\x06 \x92\xa3foo\x02\xd70\x00\x00@@\x00\x00\x80@\xd6@0.31' >>> ``` - Similarly, the unpacking functions accept an optional `ext_handlers` dictionary that maps Ext type codes to callables that unpack the Ext into a custom object. The callable should accept a `umsgpack.Ext` object as an argument and return an unpacked custom type object. Example for unpacking Ext objects with type codes 0x20, 0x30, and 0x40, into -`set`, `complex`, and `datetime.datetime` typed objects, respectively: +`set`, `complex`, and `decimal.Decimal` typed objects, respectively: ``` python ->>> umsgpack.unpackb(b'\x95\x01\xc3\xc7\x06 \x92\xa3foo\x02\xd70\x00\x00@@\x00\x00\x80@' \ -... b'\xc7\x18@20161015T02:28:35.666425', +>>> umsgpack.unpackb(b'\x95\x01\xc3\xc7\x06 \x92\xa3foo\x02\xd70\x00\x00@@\x00\x00\x80@\xd6@0.31', ... ext_handlers = { ... 0x20: lambda ext: set(umsgpack.unpackb(ext.data)), ... 0x30: lambda ext: complex(*struct.unpack("ff", ext.data)), -... 0x40: lambda ext: datetime.datetime.strptime(ext.data.decode(), "%Y%m%dT%H:%M:%S.%f"), -... }) -[1, True, {'foo', 2}, (3+4j), datetime.datetime(2016, 10, 15, 2, 28, 35, 666425)] +... 0x40: lambda ext: decimal.Decimal(ext.data.decode()), +... }) +[1, True, {'foo', 2}, (3+4j), Decimal('0.31')] >>> ``` @@ -341,6 +339,20 @@ If a non-byte-string argument is passed to `umsgpack.unpackb()`, it will raise a >>> ``` +* `UnsupportedTimestampException`: Unsupported timestamp encountered during unpacking. + + The official timestamp extension type supports 32-bit, 64-bit and 96-bit + formats. This exception is thrown if a timestamp extension type with an + unsupported format is encountered. + + ``` python + # Attempt to unpack invalid timestamp + >>> umsgpack.unpackb(b"\xd5\xff\x01\x02") + ... + umsgpack.UnsupportedTimestampException: unsupported timestamp with data length 2 + >>> + ``` + * `ReservedCodeException`: Reserved code encountered during unpacking. ``` python @@ -387,6 +399,8 @@ If a non-byte-string argument is passed to `umsgpack.unpackb()`, it will raise a * The msgpack array format is unpacked into a Python list, unless it is the key of a map, in which case it is unpacked into a Python tuple * Python tuples and lists are both packed into the msgpack array format * Python float types are packed into the msgpack float32 or float64 format depending on the system's `sys.float_info` +* The Python `datetime.datetime` type is packed into, and unpacked from, the msgpack `timestamp` format + * Note that this Python type only supports microsecond resolution, while the msgpack `timestamp` format supports nanosecond resolution. Timestamps with finer than microsecond resolution will lose precision during unpacking. ## Testing diff --git a/msgpack.org.md b/msgpack.org.md index c94d371..009e629 100644 --- a/msgpack.org.md +++ b/msgpack.org.md @@ -82,26 +82,22 @@ b'\x01\x02\x03' Serializing and deserializing application-defined types with Ext handlers: ``` python ->>> umsgpack.packb([complex(1,2), datetime.datetime.now()], -... ext_handlers = { -... complex: lambda obj: umsgpack.Ext(0x30, -... struct.pack("ff", obj.real, obj.imag)), -... datetime.datetime: lambda obj: umsgpack.Ext(0x40, -... obj.strftime("%Y%m%dT%H:%M:%S.%f").encode()), -... }) -b'\x92\xd70\x00\x00\x80?\x00\x00\x00@\xc7\x18@20161017T00:12:53.7' -b'19377' +>>> umsgpack.packb([complex(1,2), decimal.Decimal("0.31")], +... ext_handlers = { +... complex: lambda obj: +... umsgpack.Ext(0x30, struct.pack("ff", obj.real, obj.imag)), +... decimal.Decimal: lambda obj: +... umsgpack.Ext(0x40, str(obj).encode()), +... }) +b'\x92\xd70\x00\x00\x80?\x00\x00\x00@\xd6@0.31' >>> umsgpack.unpackb(_, -... ext_handlers = { -... 0x30: lambda ext: -... complex(*struct.unpack("ff", ext.data)), -... 0x40: lambda ext: -... datetime.datetime.strptime( -... ext.data.decode(), -... "%Y%m%dT%H:%M:%S.%f" -... ), -... }) -[(1+2j), datetime.datetime(2016, 10, 17, 0, 12, 53, 719377)] +... ext_handlers = { +... 0x30: lambda ext: +... complex(*struct.unpack("ff", ext.data)), +... 0x40: lambda ext: +... decimal.Decimal(ext.data.decode()), +... }) +[(1+2j), Decimal('0.31')] >>> ``` diff --git a/test_umsgpack.py b/test_umsgpack.py index e500276..926e0c6 100644 --- a/test_umsgpack.py +++ b/test_umsgpack.py @@ -11,6 +11,7 @@ import sys import struct import unittest +import datetime import io from collections import OrderedDict, namedtuple @@ -116,6 +117,27 @@ ["empty array", [], b"\x90"], # Empty Map ["empty map", {}, b"\x80"], + # 32-bit Timestamp + ["32-bit timestamp", datetime.datetime(1970, 1, 1, 0, 0, 0, 0, umsgpack._utc_tzinfo), + b"\xd6\xff\x00\x00\x00\x00"], + ["32-bit timestamp", datetime.datetime(2000, 1, 1, 10, 5, 2, 0, umsgpack._utc_tzinfo), + b"\xd6\xff\x38\x6d\xd1\x4e"], + # 64-bit Timestamp + ["64-bit timestamp", datetime.datetime(2000, 1, 1, 10, 5, 2, 1234, umsgpack._utc_tzinfo), + b"\xd7\xff\x00\x4b\x51\x40\x38\x6d\xd1\x4e"], + ["64-bit timestamp", datetime.datetime(2200, 1, 1, 10, 5, 2, 0, umsgpack._utc_tzinfo), + b"\xd7\xff\x00\x00\x00\x01\xb0\x9e\xa6\xce"], + ["64-bit timestamp", datetime.datetime(2200, 1, 1, 10, 5, 2, 1234, umsgpack._utc_tzinfo), + b"\xd7\xff\x00\x4b\x51\x41\xb0\x9e\xa6\xce"], + # 96-bit Timestamp + ["96-bit timestamp", datetime.datetime(1900, 1, 1, 10, 5, 2, 0, umsgpack._utc_tzinfo), + b"\xc7\x0c\xff\x00\x00\x00\x00\xff\xff\xff\xff\x7c\x56\x0f\x4e"], + ["96-bit timestamp", datetime.datetime(1900, 1, 1, 10, 5, 2, 1234, umsgpack._utc_tzinfo), + b"\xc7\x0c\xff\x00\x12\xd4\x50\xff\xff\xff\xff\x7c\x56\x0f\x4e"], + ["96-bit timestamp", datetime.datetime(3000, 1, 1, 10, 5, 2, 0, umsgpack._utc_tzinfo), + b"\xc7\x0c\xff\x00\x00\x00\x00\x00\x00\x00\x07\x91\x5f\x59\xce"], + ["96-bit timestamp", datetime.datetime(3000, 1, 1, 10, 5, 2, 1234, umsgpack._utc_tzinfo), + b"\xc7\x0c\xff\x00\x12\xd4\x50\x00\x00\x00\x07\x91\x5f\x59\xce"], ] composite_test_vectors = [ @@ -262,6 +284,9 @@ # Reserved code (0xc1) ["reserved code", b"\xc1", umsgpack.ReservedCodeException], + # Unsupported timestamp (unsupported data length) + ["unsupported timestamp", b"\xc7\x02\xff\xaa\xbb", + umsgpack.UnsupportedTimestampException], # Invalid string (non utf-8) ["invalid string", b"\xa1\x80", umsgpack.InvalidStringException], @@ -318,6 +343,7 @@ "UnsupportedTypeException", "InsufficientDataException", "InvalidStringException", + "UnsupportedTimestampException", "ReservedCodeException", "UnhashableKeyException", "DuplicateKeyException", @@ -519,7 +545,7 @@ def test_namespacing(self): exported_vars = list(filter(lambda x: not x.startswith("_"), dir(umsgpack))) # Ignore imports - exported_vars = list(filter(lambda x: x != "struct" and x != "collections" and x != + exported_vars = list(filter(lambda x: x != "struct" and x != "collections" and x != "datetime" and x != "sys" and x != "io" and x != "xrange", exported_vars)) self.assertTrue(len(exported_vars) == len(exported_vars_test_vector)) diff --git a/umsgpack.py b/umsgpack.py index cd7a203..7e2beb4 100644 --- a/umsgpack.py +++ b/umsgpack.py @@ -45,6 +45,7 @@ """ import struct import collections +import datetime import sys import io @@ -168,6 +169,11 @@ class InvalidStringException(UnpackException): pass +class UnsupportedTimestampException(UnpackException): + "Unsupported timestamp format encountered during unpacking." + pass + + class ReservedCodeException(UnpackException): "Reserved code encountered during unpacking." pass @@ -341,6 +347,29 @@ def _pack_ext(obj, fp, options): raise UnsupportedTypeException("huge ext data") +def _pack_ext_timestamp(obj, fp, options): + delta = obj - _epoch + seconds = delta.seconds + delta.days * 86400 + microseconds = delta.microseconds + + if microseconds == 0 and 0 <= seconds <= 2**32 - 1: + # 32-bit timestamp + fp.write(b"\xd6\xff" + + struct.pack(">I", seconds)) + elif 0 <= seconds <= 2**34 - 1: + # 64-bit timestamp + value = ((microseconds * 1000) << 34) | seconds + fp.write(b"\xd7\xff" + + struct.pack(">Q", value)) + elif -2**63 <= abs(seconds) <= 2**63 - 1: + # 96-bit timestamp + fp.write(b"\xc7\x0c\xff" + + struct.pack(">I", microseconds * 1000) + + struct.pack(">q", seconds)) + else: + raise UnsupportedTypeException("huge timestamp") + + def _pack_array(obj, fp, options): if len(obj) <= 15: fp.write(struct.pack("B", 0x90 | len(obj))) @@ -428,6 +457,8 @@ def _pack2(obj, fp, **options): _pack_array(obj, fp, options) elif isinstance(obj, dict): _pack_map(obj, fp, options) + elif isinstance(obj, datetime.datetime): + _pack_ext_timestamp(obj, fp, options) elif isinstance(obj, Ext): _pack_ext(obj, fp, options) elif ext_handlers: @@ -498,6 +529,8 @@ def _pack3(obj, fp, **options): _pack_array(obj, fp, options) elif isinstance(obj, dict): _pack_map(obj, fp, options) + elif isinstance(obj, datetime.datetime): + _pack_ext_timestamp(obj, fp, options) elif isinstance(obj, Ext): _pack_ext(obj, fp, options) elif ext_handlers: @@ -703,7 +736,15 @@ def _unpack_ext(code, fp, options): else: raise Exception("logic error, not ext: 0x%02x" % ord(code)) - ext = Ext(ord(_read_except(fp, 1)), _read_except(fp, length)) + ext_type = struct.unpack("b", _read_except(fp, 1))[0] + ext_data = _read_except(fp, length) + + # Timestamp extension + if ext_type == -1: + return _unpack_ext_timestamp(code, ext_data, options) + + # Application extension + ext = Ext(ext_type, ext_data) # Unpack with ext handler, if we have one ext_handlers = options.get("ext_handlers") @@ -713,6 +754,28 @@ def _unpack_ext(code, fp, options): return ext +def _unpack_ext_timestamp(code, data, options): + if len(data) == 4: + # 32-bit timestamp + seconds = struct.unpack(">I", data)[0] + microseconds = 0 + elif len(data) == 8: + # 64-bit timestamp + value = struct.unpack(">Q", data)[0] + seconds = value & 0x3ffffffff + microseconds = (value >> 34) // 1000 + elif len(data) == 12: + # 96-bit timestamp + seconds = struct.unpack(">q", data[4:12])[0] + microseconds = struct.unpack(">I", data[0:4])[0] // 1000 + else: + raise UnsupportedTimestampException( + "unsupported timestamp with data length %d" % len(data)) + + return _epoch + datetime.timedelta(seconds=seconds, + microseconds=microseconds) + + def _unpack_array(code, fp, options): if (ord(code) & 0xf0) == 0x90: length = (ord(code) & ~0xf0) @@ -801,6 +864,8 @@ def _unpack2(fp, **options): Insufficient data to unpack the serialized object. InvalidStringException(UnpackException): Invalid UTF-8 string encountered during unpacking. + UnsupportedTimestampException(UnpackException): + Unsupported timestamp format encountered during unpacking. ReservedCodeException(UnpackException): Reserved code encountered during unpacking. UnhashableKeyException(UnpackException): @@ -843,6 +908,8 @@ def _unpack3(fp, **options): Insufficient data to unpack the serialized object. InvalidStringException(UnpackException): Invalid UTF-8 string encountered during unpacking. + UnsupportedTimestampException(UnpackException): + Unsupported timestamp format encountered during unpacking. ReservedCodeException(UnpackException): Reserved code encountered during unpacking. UnhashableKeyException(UnpackException): @@ -888,6 +955,8 @@ def _unpackb2(s, **options): Insufficient data to unpack the serialized object. InvalidStringException(UnpackException): Invalid UTF-8 string encountered during unpacking. + UnsupportedTimestampException(UnpackException): + Unsupported timestamp format encountered during unpacking. ReservedCodeException(UnpackException): Reserved code encountered during unpacking. UnhashableKeyException(UnpackException): @@ -934,6 +1003,8 @@ def _unpackb3(s, **options): Insufficient data to unpack the serialized object. InvalidStringException(UnpackException): Invalid UTF-8 string encountered during unpacking. + UnsupportedTimestampException(UnpackException): + Unsupported timestamp format encountered during unpacking. ReservedCodeException(UnpackException): Reserved code encountered during unpacking. UnhashableKeyException(UnpackException): @@ -966,6 +1037,8 @@ def __init(): global load global loads global compatibility + global _epoch + global _utc_tzinfo global _float_precision global _unpack_dispatch_table global xrange @@ -973,6 +1046,14 @@ def __init(): # Compatibility mode for handling strings/bytes with the old specification compatibility = False + if sys.version_info[0] == 3: + _utc_tzinfo = datetime.timezone.utc + else: + _utc_tzinfo = None + + # Calculate epoch datetime + _epoch = datetime.datetime(1970, 1, 1, tzinfo=_utc_tzinfo) + # Auto-detect system float precision if sys.float_info.mant_dig == 53: _float_precision = "double"