Skip to content

shelve Module Complexity

The shelve module provides persistent dictionary storage using the DBM database backend, allowing Python objects to be stored and retrieved from disk efficiently.

Complexity Reference

Operation Time Space Notes
shelve.open() O(1) O(1) Open/create database
shelf[key] = value O(1) avg, O(n) worst O(k) Store pickled value; O(n) worst case due to hash collisions
shelf[key] O(1) avg, O(n) worst O(k) Retrieve and unpickle; O(n) worst case due to hash collisions
del shelf[key] O(1) avg, O(n) worst O(1) Delete key; O(n) worst case due to hash collisions
key in shelf O(1) avg, O(n) worst O(1) Key lookup; O(n) worst case due to hash collisions
len(shelf) O(n) O(1) Scan all keys
shelf.keys() O(n) O(n) Get all keys
shelf.close() O(n) O(1) Flush and close

Basic Usage

Create and Store

import shelve

# Open or create database - O(1)
shelf = shelve.open('mydata.db')

# Store objects - O(1) avg each (hash-based DBM)
shelf['name'] = 'Alice'
shelf['age'] = 30
shelf['scores'] = [95, 87, 92]
shelf['config'] = {'debug': True, 'timeout': 30}

# Changes are buffered until close
shelf.close()  # O(n) - flush all to disk

Retrieve Data

import shelve

# Open existing database - O(1)
shelf = shelve.open('mydata.db')

# Retrieve objects - O(1) avg each (hash-based DBM)
name = shelf['name']        # 'Alice'
age = shelf['age']          # 30
scores = shelf['scores']    # [95, 87, 92]

# Check existence - O(1) avg
if 'name' in shelf:
    print(f"Hello {shelf['name']}")

# Get with default - O(1) avg
email = shelf.get('email', 'no-email')

shelf.close()

Context Manager Usage

Automatic Cleanup

import shelve

# Use with statement for automatic close - O(1) open/close
with shelve.open('mydata.db') as shelf:

    # Store data - O(1) avg
    shelf['user'] = {'name': 'Bob', 'age': 25}

    # Retrieve data - O(1) avg
    user = shelf['user']
    print(user)

# Automatically closed and flushed

Iteration and Keys

Iterate Over Keys

import shelve

with shelve.open('mydata.db') as shelf:

    # Store multiple items - O(1) avg each
    shelf['user1'] = {'name': 'Alice'}
    shelf['user2'] = {'name': 'Bob'}
    shelf['user3'] = {'name': 'Charlie'}

    # Get all keys - O(n)
    keys = shelf.keys()
    for key in keys:
        print(f"{key}: {shelf[key]}")

    # Iterate directly - O(n)
    for key in shelf:
        print(f"{key}: {shelf[key]}")

    # Number of items - O(n)
    count = len(shelf)

Iterate Over Items

import shelve

with shelve.open('mydata.db') as shelf:

    # Store data
    shelf['a'] = [1, 2, 3]
    shelf['b'] = [4, 5, 6]
    shelf['c'] = [7, 8, 9]

    # Iterate items - O(n)
    for key, value in shelf.items():
        print(f"{key}: {value}")

    # Get all values - O(n)
    values = shelf.values()
    for v in values:
        print(v)

Modifications and Deletion

Update Values

import shelve

with shelve.open('mydata.db') as shelf:

    # Store initial data - O(1) avg
    shelf['counter'] = 0

    # Modify in-place - need to reassign - O(1) avg
    counter = shelf['counter']
    counter += 1
    shelf['counter'] = counter  # Reassign required

    # Modify list in-place won't work
    shelf['scores'] = [1, 2, 3]
    shelf['scores'].append(4)  # Changes don't persist!
    shelf['scores'] = [1, 2, 3, 4]  # Must reassign

    # Modify dictionary - need reassign - O(1) avg
    config = shelf['config']
    config['debug'] = False
    shelf['config'] = config  # Must reassign

Delete Keys

import shelve

with shelve.open('mydata.db') as shelf:

    # Store items
    shelf['temp1'] = 'data1'
    shelf['temp2'] = 'data2'
    shelf['keep'] = 'data3'

    # Delete key - O(1) avg
    del shelf['temp1']
    del shelf['temp2']

    # Delete with check - O(1) avg
    if 'temp1' in shelf:
        del shelf['temp1']

    # Remaining key
    print(shelf['keep'])  # 'data3'

Database Options

Specify Backend

import shelve

# Use default (usually anydbm) - O(1)
shelf = shelve.open('data')

# Explicitly specify backend
import dbm.dumb
import dbm.gnu

# dbm.dumb (pure Python, slow) - O(1)
shelf = shelve.open('data', flag='c')

# Check available DBM modules
import dbm
print(dbm.whichdb('data'))  # Detect backend

Open Flags

import shelve

# 'r' = read-only - O(1)
shelf = shelve.open('data.db', flag='r')

# 'w' = read-write, create if missing - O(1)
shelf = shelve.open('data.db', flag='w')

# 'c' = read-write, create if missing (default) - O(1)
shelf = shelve.open('data.db', flag='c')

# 'n' = always create new, empty - O(1)
shelf = shelve.open('data.db', flag='n')

Object Compatibility

Pickleable Objects

import shelve

with shelve.open('objects.db') as shelf:

    # Primitive types - O(1) avg DBM + O(k) pickle
    shelf['int'] = 42
    shelf['str'] = 'hello'
    shelf['list'] = [1, 2, 3]
    shelf['dict'] = {'a': 1, 'b': 2}
    shelf['tuple'] = (1, 2, 3)

    # Complex objects - O(1) avg DBM + O(k) pickle
    shelf['set'] = {1, 2, 3}
    shelf['bytes'] = b'binary'

    # Nested structures - O(1) avg DBM + O(k) pickle
    shelf['complex'] = {
        'data': [1, 2, 3],
        'config': {'debug': True},
        'items': [(1, 'a'), (2, 'b')]
    }

Custom Classes

import shelve

class Person:
    def __init__(self, name, age):
        self.name = name
        self.age = age

    def __repr__(self):
        return f"Person({self.name}, {self.age})"

with shelve.open('people.db') as shelf:

    # Store custom object - O(1) avg DBM + O(k) pickle
    shelf['person1'] = Person('Alice', 30)
    shelf['person2'] = Person('Bob', 25)

    # Retrieve and use - O(1) avg DBM + O(k) unpickle
    person = shelf['person1']
    print(person)  # Person(Alice, 30)
    print(person.age)  # 30

Performance Considerations

Time Complexity

  • Storage operations: O(1) avg for hash-based DBM + disk I/O
  • Retrieval: O(1) avg + unpickling time
  • Iteration: O(n) to scan all keys
  • Deletion: O(1) avg for hash-based DBM

Space Complexity

  • Database: O(n) for n stored objects
  • Memory: Objects loaded on demand, cached

Buffering and Flushing

import shelve

shelf = shelve.open('data.db')

# Store items - buffered - O(1) avg per item
for i in range(1000):
    shelf[f'key{i}'] = f'value{i}'

# Data not written to disk yet!
# Force flush - O(n) writes all
shelf.sync()

# More safe: flush before reading in another process
shelf.sync()

# Close also flushes - O(n)
shelf.close()

Common Patterns

Cache Implementation

import shelve
import time

class Cache:
    """Simple persistent cache"""

    def __init__(self, db_path='cache.db'):
        self.shelf = shelve.open(db_path)

    # Store with optional TTL
    def set(self, key, value, ttl=None):
        """Cache value - O(1) avg"""
        entry = {
            'value': value,
            'time': time.time(),
            'ttl': ttl
        }
        self.shelf[key] = entry
        self.shelf.sync()

    # Retrieve with expiration check
    def get(self, key, default=None):
        """Get cached value - O(1) avg"""
        if key not in self.shelf:
            return default

        entry = self.shelf[key]

        # Check expiration
        if entry['ttl'] and time.time() - entry['time'] > entry['ttl']:
            del self.shelf[key]
            return default

        return entry['value']

# Usage
cache = Cache()
cache.set('user:1', {'name': 'Alice'})
user = cache.get('user:1')

Session Storage

import shelve
import json

class SessionStore:
    """Store web session data"""

    def __init__(self, path='sessions.db'):
        self.shelf = shelve.open(path)

    # Create session - O(1) avg
    def create_session(self, session_id, data):
        self.shelf[session_id] = {
            'data': data,
            'created': time.time()
        }
        self.shelf.sync()

    # Get session - O(1) avg
    def get_session(self, session_id):
        if session_id in self.shelf:
            return self.shelf[session_id]['data']
        return None

    # Update session - O(1) avg
    def update_session(self, session_id, data):
        if session_id in self.shelf:
            session = self.shelf[session_id]
            session['data'].update(data)
            self.shelf[session_id] = session
            self.shelf.sync()

    # Delete session - O(1) avg
    def delete_session(self, session_id):
        if session_id in self.shelf:
            del self.shelf[session_id]
            self.shelf.sync()

# Usage
store = SessionStore()
store.create_session('abc123', {'user_id': 1, 'role': 'admin'})

Configuration Storage

import shelve
import json

class ConfigStore:
    """Persistent configuration"""

    def __init__(self, path='config.db'):
        self.shelf = shelve.open(path)

    # Save config - O(1) avg
    def save(self, key, value):
        self.shelf[key] = value
        self.shelf.sync()

    # Load config - O(1) avg
    def load(self, key, default=None):
        return self.shelf.get(key, default)

    # Load all - O(n)
    def load_all(self):
        return dict(self.shelf)

    # Remove - O(1) avg
    def remove(self, key):
        if key in self.shelf:
            del self.shelf[key]
            self.shelf.sync()

# Usage
config = ConfigStore()
config.save('database.host', 'localhost')
config.save('database.port', 5432)
host = config.load('database.host')

Limitations and Alternatives

Limitations

  • Not thread-safe for concurrent writes
  • Object must be pickleable
  • Limited query capabilities
  • Not suitable for large-scale data

Alternatives

# For simple cases: pickle
import pickle
with open('data.pkl', 'wb') as f:
    pickle.dump(data, f)

# For larger data: SQLite
import sqlite3
conn = sqlite3.connect('data.db')
cursor = conn.cursor()

# For web applications: Redis
import redis
client = redis.Redis()
client.set('key', 'value')

# For complex queries: PostgreSQL/MySQL
# Use SQLAlchemy or psycopg2

Best Practices

Do's

  • Use context manager (with statement)
  • Call sync() before other processes read
  • Use for simple persistent storage
  • Test data pickling compatibility

Avoid's

  • Don't share between processes without syncing
  • Don't store non-pickleable objects
  • Don't use for large datasets
  • Don't modify mutable objects in-place