struct Module Complexity¶
The struct module handles binary data conversions, packing Python values into bytes and unpacking bytes into Python values using format strings.
Complexity Reference¶
| Operation | Time | Space | Notes |
|---|---|---|---|
Struct() compilation |
O(m) | O(m) | m = format string length; compile once |
pack() |
O(k) | O(k) | k = number of fields; add O(m) parsing if not pre-compiled |
unpack() |
O(k) | O(k) | k = number of fields |
pack_into() |
O(n) | O(1) | n = number of fields |
unpack_from() |
O(n) | O(n) | n = number of fields |
calcsize() |
O(m) | O(1) | m = format string length |
Format Strings¶
Character Types¶
import struct
# Format string characters
# 'b' = signed char (1 byte)
# 'B' = unsigned char (1 byte)
# 'h' = signed short (2 bytes)
# 'H' = unsigned short (2 bytes)
# 'i' = signed int (4 bytes)
# 'I' = unsigned int (4 bytes)
# 'l' = signed long (4 bytes)
# 'L' = unsigned long (4 bytes)
# 'q' = signed long long (8 bytes)
# 'Q' = unsigned long long (8 bytes)
# 'f' = float (4 bytes)
# 'd' = double (8 bytes)
# 's' = char[] (variable)
# 'p' = pascal string (variable)
# 'P' = void* (pointer)
# Calculate size - O(m) where m = format length
size = struct.calcsize('i') # 4 bytes
size = struct.calcsize('ihh') # 8 bytes (4+2+2)
size = struct.calcsize('ihhf') # 12 bytes (4+2+2+4)
Byte Order and Alignment¶
import struct
# Byte order prefix (optional, default='@' native)
# '@' = native (default)
# '=' = native (no alignment)
# '<' = little-endian
# '>' = big-endian
# '!' = network (big-endian)
# Native order - O(1) lookup
native = struct.calcsize('i') # 4
# Little-endian - O(1) lookup
little = struct.calcsize('<i') # 4
# Big-endian - O(1) lookup
big = struct.calcsize('>i') # 4
# With alignment - O(1) lookup
aligned = struct.calcsize('@ii') # 8 (with padding)
unaligned = struct.calcsize('=ii') # 8 (no padding)
Packing Data¶
Simple Packing¶
import struct
# Pack single value - O(n)
# Format: integer (4 bytes)
bytes_data = struct.pack('i', 42)
print(bytes_data) # b'*\x00\x00\x00' (little-endian)
# Pack multiple values - O(n) for n values
bytes_data = struct.pack('ihh', 100, 200, 300)
# 4 bytes (int) + 2 bytes (short) + 2 bytes (short) = 8 bytes
# Pack with byte order - O(n)
bytes_data = struct.pack('>i', 42) # Big-endian
bytes_data = struct.pack('<i', 42) # Little-endian
String Packing¶
import struct
# Pack fixed-length string - O(n)
text = "Hello"
bytes_data = struct.pack('5s', text.encode()) # 5-byte string
# Pack with padding
name = "Bob"
bytes_data = struct.pack('10s', name.encode()) # Padded to 10 bytes
# Pack multiple strings
bytes_data = struct.pack('5s3s', b"Hello", b"Bob") # 8 bytes total
Pack Into Buffer¶
import struct
# Pack into existing buffer - O(n) for n fields
buffer = bytearray(20)
# Write at offset 0
struct.pack_into('i', buffer, 0, 42)
# Write at offset 4
struct.pack_into('h', buffer, 4, 100)
# Write at offset 6
struct.pack_into('f', buffer, 6, 3.14)
print(buffer[:10]) # First 10 bytes with packed data
Unpacking Data¶
Simple Unpacking¶
import struct
# Pack first
bytes_data = struct.pack('ihhf', 100, 200, 300, 3.14)
# Unpack all - O(n)
values = struct.unpack('ihhf', bytes_data)
print(values) # (100, 200, 300, 3.140000104904175)
# Unpack specific subset
values = struct.unpack('ih', bytes_data[:6]) # Skip last values
print(values) # (100, 200)
Unpack From Buffer¶
import struct
# Create buffer with packed data
buffer = bytearray(20)
struct.pack_into('i', buffer, 0, 42)
struct.pack_into('h', buffer, 4, 100)
struct.pack_into('f', buffer, 6, 3.14)
# Unpack from buffer - O(n)
value1 = struct.unpack_from('i', buffer, 0)[0] # 42
value2 = struct.unpack_from('h', buffer, 4)[0] # 100
value3 = struct.unpack_from('f', buffer, 6)[0] # 3.14
Struct Objects (Compiled Format)¶
Create and Reuse Struct¶
import struct
# Create compiled struct - O(n) once, then O(1) per operation
header_format = struct.Struct('4sI') # 4-char string + unsigned int
# Pack with compiled struct - O(n)
bytes_data = header_format.pack(b"HEAD", 12345)
# Unpack with compiled struct - O(n)
header, version = header_format.unpack(bytes_data)
print(header) # b'HEAD'
print(version) # 12345
# Size calculation - O(1)
size = header_format.size # 8
Struct for Network Protocol¶
import struct
# Message format: type(1) + length(2) + timestamp(4) + data
class Message:
HEADER_FORMAT = struct.Struct('!BHI') # Network byte order
def __init__(self, msg_type, timestamp, data):
self.type = msg_type
self.timestamp = timestamp
self.data = data
def serialize(self):
"""Pack to bytes - O(n)"""
header = self.HEADER_FORMAT.pack(
self.type,
len(self.data),
self.timestamp
)
return header + self.data
@classmethod
def deserialize(cls, data):
"""Unpack from bytes - O(n)"""
header_size = cls.HEADER_FORMAT.size
msg_type, length, timestamp = cls.HEADER_FORMAT.unpack(
data[:header_size]
)
payload = data[header_size:header_size + length]
return cls(msg_type, timestamp, payload)
# Usage
msg = Message(1, 1234567890, b"Hello")
bytes_msg = msg.serialize()
msg2 = Message.deserialize(bytes_msg)
Common Patterns¶
Binary File I/O¶
import struct
class BinaryWriter:
"""Write binary data to file"""
def __init__(self, filename):
self.file = open(filename, 'wb')
def write_int(self, value):
"""Write integer - O(1)"""
self.file.write(struct.pack('i', value))
def write_string(self, value, length):
"""Write fixed-length string - O(n)"""
self.file.write(struct.pack(f'{length}s', value.encode()))
def close(self):
self.file.close()
class BinaryReader:
"""Read binary data from file"""
def __init__(self, filename):
self.file = open(filename, 'rb')
def read_int(self):
"""Read integer - O(1)"""
return struct.unpack('i', self.file.read(4))[0]
def read_string(self, length):
"""Read fixed-length string - O(n)"""
return struct.unpack(f'{length}s', self.file.read(length))[0]
def close(self):
self.file.close()
# Usage
writer = BinaryWriter("data.bin")
writer.write_int(42)
writer.write_string("Hello", 10)
writer.close()
reader = BinaryReader("data.bin")
value = reader.read_int()
text = reader.read_string(10)
reader.close()
Network Packet Parsing¶
import struct
class PacketHeader:
"""Parse binary packet header"""
FORMAT = struct.Struct('!HHBBHH') # Network byte order
# Destination port (H)
# Source port (H)
# Sequence (B)
# Flags (B)
# Window (H)
# Checksum (H)
def __init__(self, data):
"""Parse header - O(1)"""
if len(data) < self.FORMAT.size:
raise ValueError("Insufficient data")
self.dst_port, self.src_port, self.seq, self.flags, \
self.window, self.checksum = self.FORMAT.unpack(
data[:self.FORMAT.size]
)
def to_bytes(self):
"""Serialize header - O(1)"""
return self.FORMAT.pack(
self.dst_port, self.src_port, self.seq,
self.flags, self.window, self.checksum
)
# Parse packet
packet_data = b'\x00P\x00P\x01\x00\x00\x10\x12\x34'
header = PacketHeader(packet_data)
print(f"Dst: {header.dst_port}, Src: {header.src_port}")
C Structure Mapping¶
import struct
# Map C struct: typedef struct { int id; float score; } Result;
class CResult:
"""Map to C struct"""
STRUCT_FORMAT = struct.Struct('if') # int + float
def __init__(self, id=0, score=0.0):
self.id = id
self.score = score
def pack(self):
"""Convert to C struct bytes - O(1)"""
return self.STRUCT_FORMAT.pack(self.id, self.score)
@classmethod
def unpack(cls, data):
"""Create from C struct bytes - O(1)"""
id, score = cls.STRUCT_FORMAT.unpack(data)
return cls(id, score)
# Usage
result = CResult(42, 95.5)
data = result.pack()
result2 = CResult.unpack(data)
Performance Considerations¶
Time Complexity¶
- pack(): O(n) for n fields (linear in data size)
- unpack(): O(n) for n fields
- Struct creation: O(m) one-time cost (m = format length)
- pack_into(): O(n) for n fields
Space Complexity¶
- Output: O(n) for packed data (n = total bytes)
- Input: O(n) for unpacked tuple
Optimization Tips¶
import struct
# Bad: Compiling format every time - O(n) per operation
for i in range(1000):
data = struct.pack('i', i) # Recompile each time
# Good: Compile once, reuse - O(n) compile + O(1000) use
int_struct = struct.Struct('i')
for i in range(1000):
data = int_struct.pack(i) # O(1) per use
# Benchmark difference is significant for repeated operations
Format Modifiers¶
Repetition¶
import struct
# Pack 10 integers - O(n)
data = struct.pack('10i', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
# Unpack 10 integers - O(n)
values = struct.unpack('10i', data)
print(len(values)) # 10
# With byte order
data = struct.pack('>10H', *range(10)) # 10 big-endian shorts
Error Handling¶
import struct
# Format must match data size
try:
struct.unpack('i', b'AB') # Only 2 bytes, need 4
except struct.error as e:
print(f"Unpack error: {e}")
# Invalid format character
try:
struct.pack('z', 42) # 'z' is invalid
except struct.error as e:
print(f"Pack error: {e}")