"""Functionality related with filters in a PyTables file."""
import warnings
from typing import Any, Literal, Optional, TYPE_CHECKING
import numpy as np
from . import (
utilsextension,
blosc_compressor_list,
blosc_compcode_to_compname,
blosc2_compressor_list,
blosc2_compcode_to_compname,
)
from .exceptions import FiltersWarning
from packaging.version import Version
import tables as tb
if TYPE_CHECKING:
from .leaf import Leaf
blosc_version = Version(tb.which_lib_version("blosc")[1])
blosc2_version = Version(tb.which_lib_version("blosc2")[1])
__docformat__ = 'reStructuredText'
"""The format of documentation strings in this module."""
all_complibs = ['zlib', 'lzo', 'bzip2', 'blosc', 'blosc2']
all_complibs += ['blosc:%s' % cname for cname in blosc_compressor_list()]
all_complibs += ['blosc2:%s' % cname for cname in blosc2_compressor_list()]
"""List of all compression libraries."""
foreign_complibs = ['szip']
"""List of known but unsupported compression libraries."""
default_complib = 'zlib'
"""The default compression library."""
_shuffle_flag = 0x1
_fletcher32_flag = 0x2
_rounding_flag = 0x4
_bitshuffle_flag = 0x8
[docs]
class Filters:
"""Container for filter properties.
This class is meant to serve as a container that keeps information about
the filter properties associated with the chunked leaves, that is Table,
CArray, EArray and VLArray.
Instances of this class can be directly compared for equality.
Parameters
----------
complevel : int
Specifies a compression level for data. The allowed
range is 0-9. A value of 0 (the default) disables
compression.
complib : str
Specifies the compression library to be used. Right now, 'zlib' (the
default), 'lzo', 'bzip2', 'blosc' and 'blosc2' are supported.
Additional compressors for Blosc like 'blosc:blosclz' ('blosclz' is
the default in case the additional compressor is not specified),
'blosc:lz4', 'blosc:lz4hc', 'blosc:zlib' and 'blosc:zstd' are
supported too.
Also, additional compressors for Blosc2 like 'blosc2:blosclz'
('blosclz' is the default in case the additional compressor is not
specified), 'blosc2:lz4', 'blosc2:lz4hc', 'blosc2:zlib' and
'blosc2:zstd' are supported too.
Specifying a compression library which is not available
in the system issues a FiltersWarning and sets the library to the
default one.
shuffle : bool
Whether to use the *Shuffle* filter in the HDF5 library.
This is normally used to improve the compression ratio.
A false value disables shuffling and a true one enables
it. The default value depends on whether compression is
enabled or not; if compression is enabled, shuffling defaults
to be enabled, else shuffling is disabled. Shuffling can only
be used when compression is enabled.
bitshuffle : bool
Whether to use the *BitShuffle* filter in the Blosc/Blosc2
libraries. This is normally used to improve the compression
ratio. A false value disables bitshuffling and a true one
enables it. The default value is disabled.
fletcher32 : bool
Whether to use the *Fletcher32* filter in the HDF5 library.
This is used to add a checksum on each data chunk. A false
value (the default) disables the checksum.
least_significant_digit : int
If specified, data will be truncated (quantized). In conjunction
with enabling compression, this produces 'lossy', but
significantly more efficient compression. For example, if
*least_significant_digit=1*, data will be quantized using
``around(scale*data)/scale``, where ``scale = 2**bits``, and
bits is determined so that a precision of 0.1 is retained (in
this case bits=4). Default is *None*, or no quantization.
.. note::
quantization is only applied if some form of compression is
enabled
Examples
--------
This is a small example on using the Filters class::
import numpy as np
import tables as tb
fileh = tb.open_file('test5.h5', mode='w')
atom = Float32Atom()
filters = Filters(complevel=1, complib='blosc', fletcher32=True)
arr = fileh.create_earray(fileh.root, 'earray', atom, (0,2),
"A growable array", filters=filters)
# Append several rows in only one call
arr.append(np.array([[1., 2.],
[2., 3.],
[3., 4.]], dtype=np.float32))
# Print information on that enlargeable array
print("Result Array:")
print(repr(arr))
fileh.close()
This enforces the use of the Blosc library, a compression level of 1 and a
Fletcher32 checksum filter as well. See the output of this example::
Result Array:
/earray (EArray(3, 2), fletcher32, shuffle, blosc(1)) 'A growable array'
type = float32
shape = (3, 2)
itemsize = 4
nrows = 3
extdim = 0
flavor = 'numpy'
byteorder = 'little'
.. rubric:: Filters attributes
.. attribute:: fletcher32
Whether the *Fletcher32* filter is active or not.
.. attribute:: complevel
The compression level (0 disables compression).
.. attribute:: complib
The compression filter used (irrelevant when compression is not
enabled).
.. attribute:: shuffle
Whether the *Shuffle* filter is active or not.
.. attribute:: bitshuffle
Whether the *BitShuffle* filter is active or not (Blosc/Blosc2 only).
"""
@property
def shuffle_bitshuffle(self) -> Literal[0, 1, 2]:
"""Encode NoShuffle (0), Shuffle (1) and BitShuffle (2) filters."""
if (self.shuffle and self.bitshuffle):
raise ValueError(
"Shuffle and BitShuffle cannot be active at the same time")
if not (self.shuffle or self.bitshuffle):
return 0
if self.shuffle:
return 1
if self.bitshuffle:
return 2
@classmethod
def _from_leaf(cls, leaf: "Leaf") -> "Filters":
# Get a dictionary with all the filters
parent = leaf._v_parent
filters_dict = utilsextension.get_filters(parent._v_objectid,
leaf._v_name)
if filters_dict is None:
filters_dict = {} # not chunked
# Keyword arguments are all off
kwargs = dict(complevel=0, shuffle=False, bitshuffle=False,
fletcher32=False, least_significant_digit=None,
_new=False)
for (name, values) in filters_dict.items():
if name == 'deflate':
name = 'zlib'
if name in all_complibs:
kwargs['complib'] = name
if name in ('blosc', 'blosc2'):
kwargs['complevel'] = values[4]
if values[5] == 1:
# Shuffle filter is internal to blosc/blosc2
kwargs['shuffle'] = True
elif values[5] == 2:
# Shuffle filter is internal to blosc/blosc2
kwargs['bitshuffle'] = True
# From Blosc 1.3 on, parameter 6 is used for the compressor
if len(values) > 6:
if name == "blosc":
cname = blosc_compcode_to_compname(values[6])
kwargs['complib'] = "blosc:%s" % cname
else:
cname = blosc2_compcode_to_compname(values[6])
kwargs['complib'] = "blosc2:%s" % cname
else:
kwargs['complevel'] = values[0]
elif name in foreign_complibs:
kwargs['complib'] = name
kwargs['complevel'] = 1 # any nonzero value will do
elif name in ['shuffle', 'fletcher32']:
kwargs[name] = True
return cls(**kwargs)
@classmethod
def _unpack(cls, packed: int) -> "Filters":
"""Create a new `Filters` object from a packed version.
>>> Filters._unpack(0)
Filters(complevel=0, shuffle=False, bitshuffle=False, fletcher32=False, least_significant_digit=None)
>>> Filters._unpack(0x101)
Filters(complevel=1, complib='zlib', shuffle=False, bitshuffle=False, fletcher32=False, least_significant_digit=None)
>>> Filters._unpack(0x30109)
Filters(complevel=9, complib='zlib', shuffle=True, bitshuffle=False, fletcher32=True, least_significant_digit=None)
>>> Filters._unpack(0x3010A)
Traceback (most recent call last):
...
ValueError: compression level must be between 0 and 9
>>> Filters._unpack(0x1)
Traceback (most recent call last):
...
ValueError: invalid compression library id: 0
"""
kwargs = {'_new': False}
# Byte 0: compression level.
kwargs['complevel'] = complevel = packed & 0xff
packed >>= 8
# Byte 1: compression library id (0 for none).
if complevel > 0:
complib_id = int(packed & 0xff)
if not (0 < complib_id <= len(all_complibs)):
raise ValueError("invalid compression library id: %d"
% complib_id)
kwargs['complib'] = all_complibs[complib_id - 1]
packed >>= 8
# Byte 2: parameterless filters.
kwargs['shuffle'] = packed & _shuffle_flag
kwargs['bitshuffle'] = packed & _bitshuffle_flag
kwargs['fletcher32'] = packed & _fletcher32_flag
has_rounding = packed & _rounding_flag
packed >>= 8
# Byte 3: least significant digit.
if has_rounding:
kwargs['least_significant_digit'] = np.int8(packed & 0xff)
else:
kwargs['least_significant_digit'] = None
return cls(**kwargs)
def _pack(self) -> np.int64:
"""Pack the `Filters` object into a 64-bit NumPy integer."""
packed = np.int64(0)
# Byte 3: least significant digit.
if self.least_significant_digit is not None:
# assert isinstance(self.least_significant_digit, np.int8)
packed |= self.least_significant_digit
packed <<= 8
# Byte 2: parameterless filters.
if self.shuffle:
packed |= _shuffle_flag
if self.bitshuffle:
packed |= _bitshuffle_flag
if self.fletcher32:
packed |= _fletcher32_flag
if self.least_significant_digit:
packed |= _rounding_flag
packed <<= 8
# Byte 1: compression library id (0 for none).
if self.complevel > 0:
packed |= all_complibs.index(self.complib) + 1
packed <<= 8
# Byte 0: compression level.
packed |= self.complevel
return packed
def __init__(self,
complevel: int=0,
complib: Literal["zlib", "lzo", "bzip2", "blosc", "blosc2"]=default_complib,
shuffle: bool=True,
bitshuffle: bool=False,
fletcher32: bool=False,
least_significant_digit: Optional[int]=None,
_new: bool=True) -> None:
if not (0 <= complevel <= 9):
raise ValueError("compression level must be between 0 and 9")
if _new and complevel > 0:
# These checks are not performed when loading filters from disk.
if complib not in all_complibs:
raise ValueError(
"compression library ``%s`` is not supported; "
"it must be one of: %s"
% (complib, ", ".join(all_complibs)))
if utilsextension.which_lib_version(complib) is None:
warnings.warn("compression library ``%s`` is not available; "
"using ``%s`` instead"
% (complib, default_complib), FiltersWarning)
complib = default_complib # always available
complevel = int(complevel)
complib = str(complib)
shuffle = bool(shuffle)
bitshuffle = bool(bitshuffle)
fletcher32 = bool(fletcher32)
if least_significant_digit is not None:
least_significant_digit = np.int8(least_significant_digit)
if complevel == 0:
# Override some inputs when compression is not enabled.
complib = None # make it clear there is no compression
shuffle = False # shuffling and not compressing makes no sense
least_significant_digit = None
elif complib not in all_complibs:
# Do not try to use a meaningful level for unsupported libs.
complevel = -1
self.complevel = complevel
"""The compression level (0 disables compression)."""
self.complib = complib
"""The compression filter used (irrelevant when compression is
not enabled).
"""
self.shuffle = shuffle
"""Whether the *Shuffle* filter is active or not."""
self.bitshuffle = bitshuffle
"""Whether the *BitShuffle* filter is active or not."""
if (self.complib and
self.bitshuffle and
not self.complib.startswith('blosc')):
raise ValueError("BitShuffle can only be used inside Blosc/Blosc2")
if self.shuffle and self.bitshuffle:
# BitShuffle has priority in case both are specified
self.shuffle = False
self.fletcher32 = fletcher32
"""Whether the *Fletcher32* filter is active or not."""
self.least_significant_digit = least_significant_digit
"""The least significant digit to which data shall be truncated."""
def __repr__(self) -> str:
args = []
if self.complevel >= 0: # meaningful compression level
args.append(f'complevel={self.complevel}')
if self.complevel != 0: # compression enabled (-1 or > 0)
args.append(f'complib={self.complib!r}')
args.append(f'shuffle={self.shuffle}')
args.append(f'bitshuffle={self.bitshuffle}')
args.append(f'fletcher32={self.fletcher32}')
args.append(f'least_significant_digit={self.least_significant_digit}')
return f'{self.__class__.__name__}({", ".join(args)})'
def __str__(self) -> str:
return repr(self)
def __eq__(self, other: Any) -> bool:
if not isinstance(other, self.__class__):
return False
for attr in self.__dict__:
if getattr(self, attr) != getattr(other, attr):
return False
return True
# XXX: API incompatible change for PyTables 3 line
# Overriding __eq__ blocks inheritance of __hash__ in 3.x
# def __hash__(self):
# return hash((self.__class__, self.complevel, self.complib,
# self.shuffle, self.bitshuffle, self.fletcher32))
[docs]
def copy(self, **override) -> "Filters":
"""Get a copy of the filters, possibly overriding some arguments.
Constructor arguments to be overridden must be passed as keyword
arguments.
Using this method is recommended over replacing the attributes of an
instance, since instances of this class may become immutable in the
future::
>>> filters1 = Filters()
>>> filters2 = filters1.copy()
>>> filters1 == filters2
True
>>> filters1 is filters2
False
>>> filters3 = filters1.copy(complevel=1) #doctest: +ELLIPSIS
Traceback (most recent call last):
...
ValueError: compression library ``None`` is not supported...
>>> filters3 = filters1.copy(complevel=1, complib='zlib')
>>> print(filters1)
Filters(complevel=0, shuffle=False, bitshuffle=False, fletcher32=False, least_significant_digit=None)
>>> print(filters3)
Filters(complevel=1, complib='zlib', shuffle=False, bitshuffle=False, fletcher32=False, least_significant_digit=None)
>>> filters1.copy(foobar=42) #doctest: +ELLIPSIS
Traceback (most recent call last):
...
TypeError: ...__init__() got an unexpected keyword argument 'foobar'
"""
newargs = self.__dict__.copy()
newargs.update(override)
return self.__class__(**newargs)
def _test() -> None:
"""Run ``doctest`` on this module."""
import doctest
doctest.testmod()
if __name__ == '__main__':
_test()