pickle and cPickle

Table of Contents

The pickle module implements an algorithm for turning an arbitrary Python object into a series of bytes.

The cPickle module implements the same algorithm, in C instead of Python. It is many times faster than the Python implementation, but does not allow the user to subclass from Pickle.

Importing

try:
   import cPickle as pickle
except:
   import pickle

Encoding and Decoding Data in strings

try:
    import cPickle as pickle
except:
    import pickle
import pprint

data1 = [{'a':'A', 'b':2, 'c':3.0}]
print 'BEFORE:'
pprint.pprint(data1)

data1_string = pickle.dumps(data1)

print 'PICKLE:'
print data1_string

data2 = pickle.loads(data1_string)
print 'AFTER:'
pprint.pprint(data2)

output:

$ python ./pickle_unpickle.py
BEFORE:
[{'a': 'A', 'b': 2, 'c': 3.0}]
PICKLE:
(lp1
(dp2
S'a'
S'A'
sS'c'
F3
sS'b'
I2
sa.
AFTER:
[{'a': 'A', 'b': 2, 'c': 3.0}]

in files

try:
    import cPickle as pickle
except:
    import pickle
import pprint
import sys

data1 = [{'a':'A', 'b':2, 'c':3.0}]
print 'BEFORE:'
pprint.pprint(data1)

try:
    filename = sys.argv[1]
except IndexError:
    raise RuntimeError('Please specify a \
filename as an argument to %s' % sys.argv[0])

with open(filename, 'wb') as out_s:
    pickle.dump(data1, out_s)

print 'AFTER:'
with open(filename, 'rb') as in_s:
    while True:
        try:
            data2 = pickle.load(in_s)
        except EOFError:
            break
        else:
            pprint.pprint(data2)
$ python pickle_unpickle_in_files.py test.dat
BEFORE:
[{'a': 'A', 'b': 2, 'c': 3.0}]
AFTER:
[{'a': 'A', 'b': 2, 'c': 3.0}]

Circular References1

The pickle protocol automatically handles circular references between objects

circular-graph.png

import pickle

class Node(object):
    """A simple digraph where each node knows about the other nodes
    it leads to.
    """
    def __init__(self, name):
        self.name = name
        self.connections = []
        return

    def add_edge(self, node):
        "Create an edge between this node and the other."
        self.connections.append(node)
        return

    def __iter__(self):
        return iter(self.connections)

def preorder_traversal(root, seen=None, parent=None):
    """Generator function to yield the edges via a preorder traversal."""
    if seen is None:
        seen = set()
    yield (parent, root)
    if root in seen:
        return
    seen.add(root)
    for node in root:
        for (parent, subnode) in preorder_traversal(node, seen, root):
            yield (parent, subnode)
    return

def show_edges(root):
    "Print all of the edges in the graph."
    for parent, child in preorder_traversal(root):
        if not parent:
            continue
        print '%5s -> %2s (%s)' % (parent.name, child.name, id(child))

# Set up the nodes.
root = Node('root')
a = Node('a')
b = Node('b')
c = Node('c')

# Add edges between them.
root.add_edge(a)
root.add_edge(b)
a.add_edge(b)
b.add_edge(a)
b.add_edge(c)
a.add_edge(a)

print 'ORIGINAL GRAPH:'
show_edges(root)

# Pickle and unpickle the graph to create
# a new set of nodes.
dumped = pickle.dumps(root)
reloaded = pickle.loads(dumped)

print
print 'RELOADED GRAPH:'
show_edges(reloaded)
$ python pickle_cycle.py

ORIGINAL GRAPH:
 root ->  a (4299721744)
    a ->  b (4299721808)
    b ->  a (4299721744)
    b ->  c (4299721872)
    a ->  a (4299721744)
 root ->  b (4299721808)

RELOADED GRAPH:
 root ->  a (4299722000)
    a ->  b (4299722064)
    b ->  a (4299722000)
    b ->  c (4299722128)
    a ->  a (4299722000)
 root ->  b (4299722064)

Footnotes:

Author: Shi Shougang

Created: 2015-03-05 Thu 23:19

Emacs 24.3.1 (Org mode 8.2.10)

Validate