Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 51 additions & 22 deletions pandas/io/pickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,7 @@
import warnings

from pandas.compat import pickle_compat
from pandas.util._decorators import (
doc,
set_module,
)

from pandas.core.shared_docs import _shared_docs
from pandas.util._decorators import set_module

from pandas.io.common import get_handle

Expand All @@ -35,10 +30,6 @@


@set_module("pandas")
@doc(
storage_options=_shared_docs["storage_options"],
compression_options=_shared_docs["compression_options"] % "filepath_or_buffer",
)
def to_pickle(
obj: Any,
filepath_or_buffer: FilePath | WriteBuffer[bytes],
Expand All @@ -57,8 +48,21 @@ def to_pickle(
String, path object (implementing ``os.PathLike[str]``), or file-like
object implementing a binary ``write()`` function.
Also accepts URL. URL has to be of S3 or GCS.
{compression_options}

compression : str or dict, default 'infer'
For on-the-fly compression of the output data. If 'infer' and
'filepath_or_buffer' is path-like, then detect compression from the
following extensions: '.gz', '.bz2', '.zip', '.xz', '.zst', '.tar',
'.tar.gz', '.tar.xz' or '.tar.bz2' (otherwise no compression).
Set to ``None`` for no compression.
Can also be a dict with key ``'method'`` set
to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``,
``'tar'``} and other key-value pairs are forwarded to
``zipfile.ZipFile``, ``gzip.GzipFile``,
``bz2.BZ2File``, ``zstandard.ZstdCompressor``, ``lzma.LZMAFile`` or
``tarfile.TarFile``, respectively.
As an example, the following could be passed for faster compression
and to create a reproducible gzip archive:
``compression={'method': 'gzip', 'compresslevel': 1, 'mtime': 1}``.
protocol : int
Int which indicates which protocol should be used by the pickler,
default HIGHEST_PROTOCOL (see [1], paragraph 12.1.2). The possible
Expand All @@ -67,8 +71,15 @@ def to_pickle(
For Python >= 3.4, 4 is a valid value. A negative value for the
protocol parameter is equivalent to setting its value to
HIGHEST_PROTOCOL.

{storage_options}
storage_options : dict, optional
Extra options that make sense for a particular storage connection, e.g.
host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
are forwarded to ``urllib.request.Request`` as header options. For other
URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are
forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more
details, and for more examples on storage options refer `here
<https://pandas.pydata.org/docs/user_guide/io.html?
highlight=storage_options#reading-writing-remote-files>`_.

.. [1] https://docs.python.org/3/library/pickle.html

Expand Down Expand Up @@ -117,10 +128,6 @@ def to_pickle(


@set_module("pandas")
@doc(
storage_options=_shared_docs["storage_options"],
decompression_options=_shared_docs["decompression_options"] % "filepath_or_buffer",
)
def read_pickle(
filepath_or_buffer: FilePath | ReadPickleBuffer,
compression: CompressionOptions = "infer",
Expand All @@ -140,10 +147,32 @@ def read_pickle(
String, path object (implementing ``os.PathLike[str]``), or file-like
object implementing a binary ``readlines()`` function.
Also accepts URL. URL is not limited to S3 and GCS.

{decompression_options}

{storage_options}
compression : str or dict, default 'infer'
For on-the-fly decompression of on-disk data. If 'infer' and
'filepath_or_buffer' is path-like, then detect compression from the
following extensions: '.gz', '.bz2', '.zip', '.xz', '.zst', '.tar',
'.tar.gz', '.tar.xz' or '.tar.bz2' (otherwise no compression).
If using 'zip' or 'tar', the ZIP file must contain only one data file
to be read in.
Set to ``None`` for no decompression.
Can also be a dict with key ``'method'`` set
to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``,
``'tar'``} and other key-value pairs are forwarded to
``zipfile.ZipFile``, ``gzip.GzipFile``,
``bz2.BZ2File``, ``zstandard.ZstdDecompressor``, ``lzma.LZMAFile`` or
``tarfile.TarFile``, respectively.
As an example, the following could be passed for Zstandard decompression
using a custom compression dictionary:
``compression={'method': 'zstd', 'dict_data': my_compression_dict}``.
storage_options : dict, optional
Extra options that make sense for a particular storage connection, e.g.
host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
are forwarded to ``urllib.request.Request`` as header options. For other
URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are
forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more
details, and for more examples on storage options refer `here
<https://pandas.pydata.org/docs/user_guide/io.html?
highlight=storage_options#reading-writing-remote-files>`_.

Returns
-------
Expand Down
Loading