Skip to content

Commit

Permalink
minor changes to the codebase from uqfoundation#475
Browse files Browse the repository at this point in the history
  • Loading branch information
leogama committed Oct 4, 2022
1 parent 25a7e45 commit 372de38
Show file tree
Hide file tree
Showing 5 changed files with 128 additions and 114 deletions.
13 changes: 6 additions & 7 deletions dill/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@
from .__info__ import __version__, __author__, __doc__, __license__
except: # pragma: no cover
import os
import sys
import sys
parent = os.path.dirname(os.path.abspath(os.path.dirname(__file__)))
sys.path.append(parent)
# get distribution meta info
# get distribution meta info
from version import (__version__, __author__,
get_license_text, get_readme_as_rst)
__license__ = get_license_text(os.path.join(parent, 'LICENSE'))
Expand All @@ -24,9 +24,9 @@


from ._dill import (
Pickler, Unpickler,
check, copy, dump, dumps, load, loads, pickle, pickles, register,
DEFAULT_PROTOCOL, HIGHEST_PROTOCOL, CONTENTS_FMODE, FILE_FMODE, HANDLE_FMODE,
dump, dumps, load, loads, copy,
Pickler, Unpickler, register, pickle, pickles, check,
DEFAULT_PROTOCOL, HIGHEST_PROTOCOL, HANDLE_FMODE, CONTENTS_FMODE, FILE_FMODE,
PickleError, PickleWarning, PicklingError, PicklingWarning, UnpicklingError,
UnpicklingWarning,
)
Expand All @@ -42,8 +42,6 @@
# make sure "trace" is turned off
logger.trace(False)

from importlib import reload

objects = {}
# local import of dill._objects
#from . import _objects
Expand All @@ -68,6 +66,7 @@ def load_types(pickleable=True, unpickleable=True):
Returns:
None
"""
from importlib import reload
# local import of dill.objects
from . import _objects
if pickleable:
Expand Down
117 changes: 77 additions & 40 deletions dill/_dill.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,27 @@
"""
dill: a utility for serialization of python objects
The main API of the package are the functions :func:`dump` and
:func:`dumps` for serialization ("pickling"), and :func:`load`
and :func:`loads` for deserialization ("unpickling"). The
functions :func:`~dill.session.dump_module` and
:func:`~dill.session.load_module` can be used to save and restore
the intepreter session.
Based on code written by Oren Tirosh and Armin Ronacher.
Extended to a (near) full set of the builtin types (in types module),
and coded to the pickle interface, by <[email protected]>.
Initial port to python3 by Jonathan Dobson, continued by mmckerns.
Test against "all" python types (Std. Lib. CH 1-15 @ 2.7) by mmckerns.
Test against CH16+ Std. Lib. ... TBD.
"""

from __future__ import annotations

__all__ = [
'Pickler','Unpickler',
'check','copy','dump','dumps','load','loads','pickle','pickles','register',
'DEFAULT_PROTOCOL','HIGHEST_PROTOCOL','CONTENTS_FMODE','FILE_FMODE','HANDLE_FMODE',
'dump','dumps','load','loads','copy',
'Pickler','Unpickler','register','pickle','pickles','check',
'DEFAULT_PROTOCOL','HIGHEST_PROTOCOL','HANDLE_FMODE','CONTENTS_FMODE','FILE_FMODE',
'PickleError','PickleWarning','PicklingError','PicklingWarning','UnpicklingError',
'UnpicklingWarning',
]
Expand All @@ -39,6 +49,7 @@
#XXX: get types from .objtypes ?
import builtins as __builtin__
from pickle import _Pickler as StockPickler, Unpickler as StockUnpickler
from pickle import GLOBAL, POP
from _thread import LockType
from _thread import RLock as RLockType
#from io import IOBase
Expand All @@ -58,6 +69,7 @@
import marshal
import gc
# import zlib
import dataclasses
from weakref import ReferenceType, ProxyType, CallableProxyType
from collections import OrderedDict
from functools import partial
Expand Down Expand Up @@ -158,22 +170,19 @@ def get_file_type(*args, **kwargs):
from socket import socket as SocketType
#FIXME: additionally calls ForkingPickler.register several times
from multiprocessing.reduction import _reduce_socket as reduce_socket
try:
try: #pragma: no cover
IS_IPYTHON = __IPYTHON__ # is True
ExitType = None # IPython.core.autocall.ExitAutocall
singletontypes = ['exit', 'quit', 'get_ipython']
ExitType = None # IPython.core.autocall.ExitAutocall
IPYTHON_SINGLETONS = ('exit', 'quit', 'get_ipython')
except NameError:
IS_IPYTHON = False
try: ExitType = type(exit) # apparently 'exit' can be removed
except NameError: ExitType = None
singletontypes = []
IPYTHON_SINGLETONS = ()

import inspect
import dataclasses
import typing

from pickle import GLOBAL


### Shims for different versions of Python and dill
class Sentinel(object):
Expand Down Expand Up @@ -212,6 +221,9 @@ def __reduce_ex__(self, protocol):
#: Pickles the entire file (handle and contents), preserving mode and position.
FILE_FMODE = 2

# Exceptions commonly raised by unpickleable objects in the Standard Library.
UNPICKLEABLE_ERRORS = (PicklingError, TypeError, ValueError, NotImplementedError)

### Shorthands (modified from python2.5/lib/pickle.py)
def copy(obj, *args, **kwds):
"""
Expand Down Expand Up @@ -320,9 +332,20 @@ class UnpicklingWarning(PickleWarning, UnpicklingError):
### Extend the Picklers
class Pickler(StockPickler):
"""python's Pickler extended to interpreter sessions"""
dispatch = MetaCatchingDict(StockPickler.dispatch.copy())
_session = False
dispatch: typing.Dict[type, typing.Callable[[Pickler, typing.Any], None]] \
= MetaCatchingDict(StockPickler.dispatch.copy())
"""The dispatch table, a dictionary of serializing functions used
by Pickler to save objects of specific types. Use :func:`pickle`
or :func:`register` to associate types to custom functions.
:meta hide-value:
"""
from .settings import settings
# Flags set by dump_module() is dill.session:
_refimported = False
_refonfail = False
_session = False
_first_pass = False

def __init__(self, file, *args, **kwds):
settings = Pickler.settings
Expand All @@ -341,11 +364,12 @@ def __init__(self, file, *args, **kwds):
self._file = file

def save(self, obj, save_persistent_id=True):
# register if the object is a numpy ufunc
# thanks to Paul Kienzle for pointing out ufuncs didn't pickle
# numpy hack
obj_type = type(obj)
if NumpyArrayType and not (obj_type is type or obj_type in Pickler.dispatch):
if NumpyUfuncType and numpyufunc(obj_type):
# register if the object is a numpy ufunc
# thanks to Paul Kienzle for pointing out ufuncs didn't pickle
if numpyufunc(obj_type):
@register(obj_type)
def save_numpy_ufunc(pickler, obj):
logger.trace(pickler, "Nu: %s", obj)
Expand All @@ -359,7 +383,7 @@ def save_numpy_ufunc(pickler, obj):
# def uload(name): return getattr(numpy, name)
# copy_reg.pickle(NumpyUfuncType, udump, uload)
# register if the object is a numpy dtype
if NumpyDType and numpydtype(obj_type):
if numpydtype(obj_type):
@register(obj_type)
def save_numpy_dtype(pickler, obj):
logger.trace(pickler, "Dt: %s", obj)
Expand All @@ -372,7 +396,7 @@ def save_numpy_dtype(pickler, obj):
# def udump(f): return uload, (f.type,)
# copy_reg.pickle(NumpyDTypeType, udump, uload)
# register if the object is a subclassed numpy array instance
if NumpyArrayType and ndarraysubclassinstance(obj_type):
if ndarraysubclassinstance(obj_type):
@register(obj_type)
def save_numpy_array(pickler, obj):
logger.trace(pickler, "Nu: (%s, %s)", obj.shape, obj.dtype)
Expand All @@ -381,8 +405,9 @@ def save_numpy_array(pickler, obj):
pickler.save_reduce(_create_array, (f,args,state,npdict), obj=obj)
logger.trace(pickler, "# Nu")
return
# end hack
if GENERATOR_FAIL and type(obj) == GeneratorType:
# end numpy hack

if GENERATOR_FAIL and obj_type is GeneratorType:
msg = "Can't pickle %s: attribute lookup builtins.generator failed" % GeneratorType
raise PicklingError(msg)
StockPickler.save(self, obj, save_persistent_id)
Expand All @@ -392,7 +417,6 @@ def save_numpy_array(pickler, obj):
def dump(self, obj): #NOTE: if settings change, need to update attributes
logger.trace_setup(self)
StockPickler.dump(self, obj)

dump.__doc__ = StockPickler.dump.__doc__

class Unpickler(StockUnpickler):
Expand Down Expand Up @@ -436,12 +460,12 @@ def dispatch_table():
pickle_dispatch_copy = StockPickler.dispatch.copy()

def pickle(t, func):
"""expose dispatch table for user-created extensions"""
"""expose :attr:`~Pickler.dispatch` table for user-created extensions"""
Pickler.dispatch[t] = func
return

def register(t):
"""register type to Pickler's dispatch table """
"""decorator to register types to Pickler's :attr:`~Pickler.dispatch` table"""
def proxy(func):
Pickler.dispatch[t] = func
return func
Expand All @@ -460,7 +484,7 @@ def use_diff(on=True):
Reduces size of pickles by only including object which have changed.
Decreases pickle size but increases CPU time needed.
Also helps avoid some unpicklable objects.
Also helps avoid some unpickleable objects.
MUST be called at start of script, otherwise changes will not be recorded.
"""
global _use_diff, diff
Expand Down Expand Up @@ -1088,7 +1112,7 @@ def _save_with_postproc(pickler, reduction, is_pickler_dill=None, obj=Getattr.NO
else:
pickler.save_reduce(*reduction)
# pop None created by calling preprocessing step off stack
pickler.write(bytes('0', 'UTF-8'))
pickler.write(POP)

#@register(CodeType)
#def save_code(pickler, obj):
Expand Down Expand Up @@ -1158,7 +1182,7 @@ def save_code(pickler, obj):
return

def _repr_dict(obj):
"""make a short string representation of a dictionary"""
"""Make a short string representation of a dictionary."""
return "<%s object at %#012x>" % (type(obj).__name__, id(obj))

@register(dict)
Expand Down Expand Up @@ -1470,7 +1494,7 @@ def save_cell(pickler, obj):
# The result of this function call will be None
pickler.save_reduce(_shims._delattr, (obj, 'cell_contents'))
# pop None created by calling _delattr off stack
pickler.write(bytes('0', 'UTF-8'))
pickler.write(POP)
logger.trace(pickler, "# Ce3")
return
if is_dill(pickler, child=True):
Expand Down Expand Up @@ -1606,7 +1630,7 @@ def save_module(pickler, obj):
pass
else:
logger.trace(pickler, "M2: %s with diff", obj)
logger.trace(pickler, "Diff: %s", changed.keys())
logger.info("Diff: %s", changed.keys())
pickler.save_reduce(_import_module, (obj.__name__,), obj=obj,
state=changed)
logger.trace(pickler, "# M2")
Expand All @@ -1617,15 +1641,20 @@ def save_module(pickler, obj):
logger.trace(pickler, "# M1")
else:
builtin_mod = _is_builtin_module(obj)
if obj.__name__ not in ("builtins", "dill", "dill._dill") and not builtin_mod or \
is_dill(pickler, child=True) and obj is pickler._main:
is_session_main = is_dill(pickler, child=True) and obj is pickler._main
if (obj.__name__ not in ("builtins", "dill", "dill._dill") and not builtin_mod
or is_session_main):
logger.trace(pickler, "M1: %s", obj)
_main_dict = obj.__dict__.copy() #XXX: better no copy? option to copy?
[_main_dict.pop(item, None) for item in singletontypes
+ ["__builtins__", "__loader__"]]
# Hack for handling module-type objects in load_module().
mod_name = obj.__name__ if _is_imported_module(obj) else '__runtime__.%s' % obj.__name__
pickler.save_reduce(_import_module, (mod_name,), obj=obj,
state=_main_dict)
# Second references are saved as __builtin__.__main__ in save_module_dict().
main_dict = obj.__dict__.copy()
for item in ('__builtins__', '__loader__'):
main_dict.pop(item, None)
for item in IPYTHON_SINGLETONS: #pragma: no cover
if getattr(main_dict.get(item), '__module__', '').startswith('IPython'):
del main_dict[item]
pickler.save_reduce(_import_module, (mod_name,), obj=obj, state=main_dict)
logger.trace(pickler, "# M1")
elif obj.__name__ == "dill._dill":
logger.trace(pickler, "M2: %s", obj)
Expand All @@ -1635,7 +1664,6 @@ def save_module(pickler, obj):
logger.trace(pickler, "M2: %s", obj)
pickler.save_reduce(_import_module, (obj.__name__,), obj=obj)
logger.trace(pickler, "# M2")
return
return

@register(TypeType)
Expand All @@ -1661,7 +1689,7 @@ def save_type(pickler, obj, postproc_list=None):
elif obj is type(None):
logger.trace(pickler, "T7: %s", obj)
#XXX: pickler.save_reduce(type, (None,), obj=obj)
pickler.write(bytes('c__builtin__\nNoneType\n', 'UTF-8'))
pickler.write(GLOBAL + b'__builtin__\nNoneType\n')
logger.trace(pickler, "# T7")
elif obj is NotImplementedType:
logger.trace(pickler, "T7: %s", obj)
Expand Down Expand Up @@ -1702,9 +1730,18 @@ def save_type(pickler, obj, postproc_list=None):
else:
logger.trace(pickler, "T4: %s", obj)
if incorrectly_named:
warnings.warn('Cannot locate reference to %r.' % (obj,), PicklingWarning)
warnings.warn(
"Cannot locate reference to %r." % (obj,),
PicklingWarning,
stacklevel=3,
)
if obj_recursive:
warnings.warn('Cannot pickle %r: %s.%s has recursive self-references that trigger a RecursionError.' % (obj, obj.__module__, obj_name), PicklingWarning)
warnings.warn(
"Cannot pickle %r: %s.%s has recursive self-references that "
"trigger a RecursionError." % (obj, obj.__module__, obj_name),
PicklingWarning,
stacklevel=3,
)
#print (obj.__dict__)
#print ("%s\n%s" % (type(obj), obj.__name__))
#print ("%s\n%s" % (obj.__bases__, obj.__dict__))
Expand Down Expand Up @@ -1840,7 +1877,7 @@ def save_function(pickler, obj):
# Change the value of the cell
pickler.save_reduce(*possible_postproc)
# pop None created by calling preprocessing step off stack
pickler.write(bytes('0', 'UTF-8'))
pickler.write(POP)

logger.trace(pickler, "# F1")
else:
Expand Down Expand Up @@ -1949,7 +1986,7 @@ def pickles(obj,exact=False,safe=False,**kwds):
"""
if safe: exceptions = (Exception,) # RuntimeError, ValueError
else:
exceptions = (TypeError, AssertionError, NotImplementedError, PicklingError, UnpicklingError)
exceptions = UNPICKLEABLE_ERRORS + (AssertionError, UnpicklingError)
try:
pik = copy(obj, **kwds)
#FIXME: should check types match first, then check content if "exact"
Expand Down
18 changes: 17 additions & 1 deletion dill/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,13 +192,29 @@ def dump_module(
>>> [foo.sin(x) for x in foo.values]
[0.8414709848078965, 0.9092974268256817, 0.1411200080598672]
- Use `refimported` to save imported objects by reference:
>>> import dill
>>> from html.entities import html5
>>> type(html5), len(html5)
(dict, 2231)
>>> import io
>>> buf = io.BytesIO()
>>> dill.dump_module(buf) # saves __main__, with html5 saved by value
>>> len(buf.getvalue()) # pickle size in bytes
71665
>>> buf = io.BytesIO()
>>> dill.dump_module(buf, refimported=True) # html5 saved by reference
>>> len(buf.getvalue())
438
*Changed in version 0.3.6:* Function ``dump_session()`` was renamed to
``dump_module()``. Parameters ``main`` and ``byref`` were renamed to
``module`` and ``refimported``, respectively.
Note:
Currently, ``dill.settings['byref']`` and ``dill.settings['recurse']``
don't apply to this function.`
don't apply to this function.
"""
for old_par, par in [('main', 'module'), ('byref', 'refimported')]:
if old_par in kwds:
Expand Down
Loading

0 comments on commit 372de38

Please sign in to comment.