zipfile Module Complexity¶
The zipfile module provides tools for working with ZIP archives.
Functions & Methods¶
| Operation | Time | Space | Notes |
|---|---|---|---|
ZipFile(filename, 'r') |
O(n) | O(n) | Open and read central directory; n = file count |
ZipFile.read(name) |
O(m) | O(m) | Read file, m = uncompressed size |
ZipFile.write(filename) |
O(m) | O(m) | Write file to archive |
ZipFile.namelist() |
O(n) | O(n) | List all files, n = file count |
ZipFile.getinfo(name) |
O(n) | O(1) | Get file info; linear search in central directory |
ZipFile.extractall() |
O(m) | O(m) | Extract all files |
Opening Archives¶
Time Complexity: O(n)¶
Where n = number of files in archive (reading central directory).
from zipfile import ZipFile
# Opening ZIP: O(n) where n = number of files
# Reads central directory
with ZipFile('archive.zip', 'r') as zf:
# O(n) to open and read directory
files = zf.namelist() # O(n)
Space Complexity: O(n)¶
from zipfile import ZipFile
# Memory for central directory
with ZipFile('archive.zip', 'r') as zf:
# Stores info for all files: O(n)
info = zf.infolist() # O(n) memory
Reading Files¶
Time Complexity: O(m)¶
Where m = uncompressed file size.
from zipfile import ZipFile
with ZipFile('archive.zip', 'r') as zf:
# Reading single file: O(m)
# m = file size in archive
content = zf.read('file.txt') # O(m)
# Compressed: decompress O(m)
# Uncompressed: just read O(m)
Space Complexity: O(m)¶
from zipfile import ZipFile
with ZipFile('archive.zip', 'r') as zf:
# Full file loaded to memory
content = zf.read('large_file.bin') # O(m) memory
# Streaming alternative
with zf.open('file.txt') as f:
# Can read chunks: O(k) memory per chunk
while True:
chunk = f.read(4096) # O(k) memory, k = chunk size
if not chunk:
break
Writing Archives¶
Time Complexity: O(m)¶
Where m = total size of all files being added.
from zipfile import ZipFile
with ZipFile('output.zip', 'w') as zf:
# Writing single file: O(m)
zf.write('file.txt') # O(m) to read and compress
# Multiple files: O(sum of sizes)
for filename in ['a.txt', 'b.txt', 'c.txt']:
zf.write(filename) # O(m) per file
Space Complexity: O(m)¶
from zipfile import ZipFile
# Compression buffer
with ZipFile('output.zip', 'w', compression=ZIP_DEFLATED) as zf:
# Space for compression buffers
zf.write('large_file.bin') # O(m) space for compression
Listing Contents¶
Time Complexity: O(n)¶
Where n = number of files in archive.
from zipfile import ZipFile
with ZipFile('archive.zip', 'r') as zf:
# List all files: O(n)
names = zf.namelist() # O(n)
# Get all info: O(n)
info_list = zf.infolist() # O(n)
# Individual lookup: O(n) in worst case
# (linear search through central directory)
info = zf.getinfo('specific.txt') # O(n)
Space Complexity: O(n)¶
from zipfile import ZipFile
with ZipFile('archive.zip', 'r') as zf:
# Stores list of all files
names = zf.namelist() # O(n) space
Extracting Archives¶
Time Complexity: O(m)¶
Where m = total uncompressed size of all files.
from zipfile import ZipFile
with ZipFile('archive.zip', 'r') as zf:
# Extract single file: O(m)
# m = uncompressed size
zf.extract('file.txt') # O(m)
# Extract all: O(sum of all sizes)
zf.extractall() # O(m) total
# Extract with path: O(m) + file I/O
zf.extractall(path='output_dir') # O(m)
Space Complexity: O(m)¶
from zipfile import ZipFile
# Memory usage for extraction
with ZipFile('archive.zip', 'r') as zf:
# Temporary buffers for decompression
zf.extractall() # O(k) space per file being extracted
# k = max file size in archive
Compression Methods¶
Uncompressed (STORED)¶
from zipfile import ZipFile, ZIP_STORED
with ZipFile('archive.zip', 'w', compression=ZIP_STORED) as zf:
# No compression overhead
zf.write('file.txt') # O(m) time, minimal CPU
DEFLATE Compression (Default)¶
from zipfile import ZipFile, ZIP_DEFLATED
with ZipFile('archive.zip', 'w', compression=ZIP_DEFLATED) as zf:
# Compression adds CPU overhead: O(m log m)
# But reduces file size
zf.write('file.txt') # O(m log m) time
Common Patterns¶
Reading with Context Manager¶
from zipfile import ZipFile
# Safe handling: O(n) to open, O(m) to read
with ZipFile('archive.zip', 'r') as zf:
if 'target.txt' in zf.namelist(): # O(n)
data = zf.read('target.txt') # O(m)
Batch Operations¶
from zipfile import ZipFile
# Process all files: O(sum of sizes)
with ZipFile('archive.zip', 'r') as zf:
for file_info in zf.infolist(): # O(n)
if file_info.filename.endswith('.txt'):
content = zf.read(file_info.filename) # O(m) per file
# Process content
Creating Archives from Directory¶
from zipfile import ZipFile
import os
def create_archive(directory, archive_name):
with ZipFile(archive_name, 'w') as zf:
for root, dirs, files in os.walk(directory):
for file in files: # O(n) iterations
file_path = os.path.join(root, file)
zf.write(file_path) # O(m) per file
# Total: O(sum of file sizes)
# Time: O(n files * average size)
# Space: O(archive size)
create_archive('my_dir', 'backup.zip')
Incremental Backup¶
from zipfile import ZipFile
# Add files without rewriting entire archive
with ZipFile('archive.zip', 'a') as zf: # 'a' = append
# Appending: O(m) to add new file
# Efficient: doesn't rewrite existing files
zf.write('new_file.txt') # O(m) for new file
Performance Characteristics¶
Best Practices¶
from zipfile import ZipFile
# Good: Use context manager
with ZipFile('archive.zip', 'r') as zf:
data = zf.read('file.txt') # Automatic cleanup
# Avoid: Manual cleanup prone to errors
zf = ZipFile('archive.zip', 'r')
data = zf.read('file.txt')
zf.close() # Could be skipped if exception occurs
# Good: Check existence before reading
if 'target.txt' in zf.namelist():
data = zf.read('target.txt')
# Avoid: Try-catch for missing files (slower)
try:
data = zf.read('target.txt')
except KeyError:
pass
Memory Efficiency¶
from zipfile import ZipFile
# Bad: Load entire large file
with ZipFile('archive.zip', 'r') as zf:
content = zf.read('huge_file.bin') # All in memory
# Better: Stream large files
with ZipFile('archive.zip', 'r') as zf:
with zf.open('huge_file.bin') as f:
while True:
chunk = f.read(4096) # Process in chunks
if not chunk:
break
process_chunk(chunk)
Version Notes¶
- Python 2.6+: Basic zipfile support
- Python 3.0+: Enhanced features
- Python 3.6+: Support for bz2 and lzma compression
- Python 3.8+: Performance improvements
Related Documentation¶
- tarfile Module - TAR archive handling
- gzip Module - GZIP compression
- bz2 Module - BZIP2 compression