Skip to content

io Module Complexity

The io module provides core I/O classes for working with binary and text data, including in-memory streams and file-like objects.

Complexity Reference

Operation Time Space Notes
StringIO() creation O(1) O(1) Create empty string buffer
StringIO.write() O(n) amortized O(n) n = string length; amortized due to buffer resizing
StringIO.read() O(n) O(n) n = bytes to read
StringIO.getvalue() O(n) O(n) n = total buffer size; returns copy
BytesIO() creation O(1) O(1) Create empty bytes buffer
BytesIO.write() O(n) amortized O(n) n = bytes length; amortized due to buffer resizing
BytesIO.read() O(n) O(n) n = bytes to read
BytesIO.getvalue() O(n) O(n) n = total buffer size; returns copy
seek() position change O(1) O(1) Random access pointer
tell() get position O(1) O(1) Return current position

In-Memory Text Streams

StringIO Basics

from io import StringIO

# Create in-memory text stream - O(1)
stream = StringIO()

# Write strings - O(k) amortized for k bytes
stream.write("Hello\n")   # O(5)
stream.write("World\n")   # O(5)

# Get all content - O(n)
content = stream.getvalue()  # O(11) for "Hello\nWorld\n"
print(content)
# Hello
# World

# Reset position to beginning - O(1)
stream.seek(0)

# Read all - O(n)
data = stream.read()
print(data)  # "Hello\nWorld\n"

Writing and Reading

from io import StringIO

stream = StringIO()

# Write data - O(k)
lines = ['apple', 'banana', 'cherry']
for line in lines:
    stream.write(line + '\n')

# Get value - O(n)
output = stream.getvalue()
print(output)
# apple
# banana
# cherry

# Reset and read line by line - O(n)
stream.seek(0)
for line in stream:  # O(n) iteration
    print(f"Line: {line.strip()}")

# Clear stream - O(1)
stream.close()

In-Memory Binary Streams

BytesIO Basics

from io import BytesIO

# Create in-memory bytes stream - O(1)
stream = BytesIO()

# Write bytes - O(k) amortized
stream.write(b"Binary ")     # O(7)
stream.write(b"data")        # O(4)

# Get all content - O(n)
content = stream.getvalue()  # b"Binary data"
print(content)

# Reset and read - O(1) seek + O(n) read
stream.seek(0)
data = stream.read()
print(data)  # b"Binary data"

Binary Data Manipulation

from io import BytesIO
import struct

stream = BytesIO()

# Pack binary data - O(k) per write
stream.write(struct.pack('i', 42))      # 4 bytes
stream.write(struct.pack('f', 3.14))    # 4 bytes
stream.write(struct.pack('2s', b'AB'))  # 2 bytes

# Get packed data - O(n)
binary = stream.getvalue()  # 10 bytes total

# Unpack from stream - O(1) seek + O(n) read
stream.seek(0)
value1 = struct.unpack('i', stream.read(4))[0]
value2 = struct.unpack('f', stream.read(4))[0]
value3 = struct.unpack('2s', stream.read(2))[0]

Stream Position Operations

Seeking and Telling

from io import StringIO

stream = StringIO("Hello World")

# Current position - O(1)
print(stream.tell())  # 0

# Seek to position - O(1)
stream.seek(6)
print(stream.tell())  # 6

# Read from position - O(n)
print(stream.read())  # "World"

# Seek from end - O(1)
stream.seek(-5, 2)  # 2 = os.SEEK_END
print(stream.read())  # "World"

# Seek from current - O(1)
stream.seek(0)
stream.read(5)       # Read "Hello"
stream.seek(1, 1)    # 1 = os.SEEK_CUR
print(stream.read())  # "World"

Truncate and Resize

from io import StringIO

stream = StringIO("Hello World")

# Get length via tell - O(1)
stream.seek(0, 2)  # Seek to end
size = stream.tell()
print(size)  # 11

# Truncate to position - O(1)
stream.seek(5)
stream.truncate()  # Truncate at position 5

# Get value after truncate - O(n)
print(stream.getvalue())  # "Hello"

# Truncate to size - O(1)
stream.truncate(3)
print(stream.getvalue())  # "Hel"

Using Streams in Functions

Capture Output

from io import StringIO
import sys

# Capture stdout - O(1) setup
captured_output = StringIO()

# Redirect stdout - O(1)
original_stdout = sys.stdout
sys.stdout = captured_output

# Code writes to captured stream
print("Line 1")
print("Line 2")

# Restore stdout - O(1)
sys.stdout = original_stdout

# Get captured output - O(n)
output = captured_output.getvalue()
print(f"Captured:\n{output}")

Stream Wrapping

from io import StringIO, TextIOWrapper
from io import BytesIO

# Wrap BytesIO with text layer
bytes_stream = BytesIO()
text_stream = TextIOWrapper(bytes_stream, encoding='utf-8')

# Write text - O(k)
text_stream.write("Hello\nWorld\n")
text_stream.flush()  # O(1)

# Get bytes - O(n)
print(bytes_stream.getvalue())  # b"Hello\nWorld\n"

Advanced Stream Operations

Read Entire File-like Object

from io import StringIO

def process_stream(stream):
    """Process any file-like object - O(n)"""
    lines = stream.readlines()  # O(n) read all lines
    return [line.strip() for line in lines]  # O(n) process

stream = StringIO("line1\nline2\nline3\n")
result = process_stream(stream)
print(result)  # ['line1', 'line2', 'line3']

Read with Buffer

from io import BytesIO

stream = BytesIO(b"x" * 1000)

# Read in chunks - O(n) total, O(1) per chunk
chunk_size = 100
while True:
    chunk = stream.read(chunk_size)  # O(k) per chunk
    if not chunk:
        break
    process(chunk)  # Process chunk

def process(data):
    print(f"Processing {len(data)} bytes")

Seek Performance

from io import BytesIO

# Large stream - O(1) space due to in-memory
stream = BytesIO(b"A" * 1000000)

# Random access is fast - O(1)
stream.seek(500000)
data = stream.read(100)

stream.seek(100000)
data = stream.read(100)

# Much faster than sequential file I/O

Use Cases

String Formatting

from io import StringIO

def format_table(rows):
    """Format table without disk I/O"""
    output = StringIO()

    for row in rows:
        output.write(f"{row[0]:10} {row[1]:10} {row[2]:10}\n")

    return output.getvalue()

data = [
    ('Name', 'Age', 'Score'),
    ('Alice', '30', '95'),
    ('Bob', '25', '87')
]

table = format_table(data)
print(table)

CSV Processing

from io import StringIO
import csv

# Generate CSV in memory
output = StringIO()
writer = csv.writer(output)

# Write rows - O(n)
writer.writerow(['Name', 'Age', 'City'])
writer.writerow(['Alice', '30', 'NYC'])
writer.writerow(['Bob', '25', 'LA'])

# Get CSV string - O(n)
csv_data = output.getvalue()
print(csv_data)

# Parse CSV - O(n)
input_stream = StringIO(csv_data)
reader = csv.DictReader(input_stream)
for row in reader:
    print(row)

JSON Processing

from io import StringIO
import json

# Generate JSON in memory
data = {
    'users': [
        {'name': 'Alice', 'age': 30},
        {'name': 'Bob', 'age': 25}
    ]
}

output = StringIO()

# Write JSON - O(n)
json.dump(data, output, indent=2)

# Get JSON string - O(n)
json_str = output.getvalue()
print(json_str)

# Parse JSON - O(n)
input_stream = StringIO(json_str)
parsed = json.load(input_stream)

Common Patterns

Multiline String Construction

from io import StringIO

output = StringIO()

# Build multi-line string efficiently - O(n)
lines = [f"Line {i}\n" for i in range(100)]
for line in lines:
    output.write(line)

result = output.getvalue()  # Single string, no concatenation overhead

Filter and Transform

from io import StringIO

def filter_and_transform(text):
    """Process text with streaming"""
    input_stream = StringIO(text)
    output_stream = StringIO()

    # Process line by line - O(n)
    for line in input_stream:
        processed = line.upper().strip()
        output_stream.write(processed + '\n')

    return output_stream.getvalue()

result = filter_and_transform("hello\nworld\npython")
print(result)

Performance Comparison

StringIO vs String Concatenation

from io import StringIO

# Bad: String concatenation - O(n²)
result = ""
for i in range(1000):
    result += f"Line {i}\n"  # Creates new string each time

# Good: StringIO - O(n)
output = StringIO()
for i in range(1000):
    output.write(f"Line {i}\n")
result = output.getvalue()

# StringIO is much faster for many writes

File vs In-Memory

from io import StringIO
import time

data = "x" * 1000

# File I/O - slower, disk bound
start = time.time()
with open('temp.txt', 'w') as f:
    for _ in range(1000):
        f.write(data)
file_time = time.time() - start

# In-memory - faster, CPU bound
start = time.time()
stream = StringIO()
for _ in range(1000):
    stream.write(data)
memory_time = time.time() - start

print(f"File: {file_time:.4f}s, Memory: {memory_time:.4f}s")
# Memory is typically 10-100x faster

Memory Efficiency

When to Use io Module

from io import StringIO, BytesIO

# Good: Temporary buffers, testing
stream = StringIO()
stream.write(some_output)
assert "expected" in stream.getvalue()

# Good: In-memory formatting
output = StringIO()
for item in items:
    output.write(format_item(item))
result = output.getvalue()

# Avoid: Very large data (use generators/streaming)
# Don't use io for multi-megabyte datasets