Skip to content

graphlib Module Complexity

The graphlib module provides a topological sort implementation for resolving dependencies between tasks, useful for build systems, task scheduling, and dependency resolution.

Complexity Reference

Operation Time Space Notes
TopologicalSorter() init O(1) O(1) Create sorter
add(node, *predecessors) O(k) O(k) Add node with k predecessors
prepare() O(v + e) O(v) Prepare sort, v = vertices, e = edges
get_ready() O(1) amortized O(k) Get all ready nodes as tuple, k = ready count
done(node) O(d) O(1) Mark done, d = node degree
static_order() O(v + e) O(v + e) Complete topological sort

Basic Topological Sort

Simple Dependency Order

from graphlib import TopologicalSorter

# Create sorter - O(1)
ts = TopologicalSorter()

# Add dependencies - O(k) for k predecessors
# format: add(node, *predecessors)
ts.add('lunch', 'cook')    # lunch depends on cook
ts.add('cook', 'shop')     # cook depends on shop
ts.add('shop')              # shop has no dependencies

# Prepare for iteration - O(v + e)
ts.prepare()

# Get nodes in dependency order - O(v)
for task in ts.static_order():
    print(task)

# Output:
# shop
# cook
# lunch

Dynamic Topological Sort

Process While Sorting

from graphlib import TopologicalSorter

# Create sorter - O(1)
ts = TopologicalSorter()

# Add tasks and dependencies
ts.add('compile', 'preprocess')
ts.add('preprocess', 'download')
ts.add('download')
ts.add('test', 'compile')
ts.add('package', 'test')

# Prepare - O(v + e)
ts.prepare()

# Process nodes dynamically - O(v + e) total
processed = []
while ts.is_active():
    # Get all ready nodes - O(1) amortized
    ready = ts.get_ready()  # Returns tuple of ready nodes

    if not ready:
        break

    # Process each ready node
    for node in ready:
        print(f"Processing: {node}")
        processed.append(node)
        # Mark as done - O(d) for degree d
        ts.done(node)

print(f"Processed: {processed}")

Graph Structure

Complex Dependencies

from graphlib import TopologicalSorter

# Build dependency graph - O(v + e)
ts = TopologicalSorter()

# Task graph with multiple dependencies
ts.add('main.o', 'main.c', 'defs.h')
ts.add('util.o', 'util.c', 'util.h', 'defs.h')
ts.add('prog', 'main.o', 'util.o')

# Add files with no dependencies
for file in ['main.c', 'util.c', 'util.h', 'defs.h']:
    ts.add(file)

# Get execution order - O(v + e)
ts.prepare()
order = list(ts.static_order())
print(order)

# Output: source files first, then object files, then executable

Static vs Dynamic Sort

Static Order

from graphlib import TopologicalSorter

# Static sort - simpler, complete result
ts = TopologicalSorter()

ts.add('D', 'B', 'C')
ts.add('B', 'A')
ts.add('C', 'A')
ts.add('A')

# Get complete order - O(v + e) returns list
order = ts.static_order()
print(list(order))  # ['A', 'B', 'C', 'D'] or valid permutation

Dynamic Processing

from graphlib import TopologicalSorter

# Dynamic sort - process nodes as they become available
ts = TopologicalSorter()

ts.add('compile', 'preprocess')
ts.add('preprocess', 'download')
ts.add('download')
ts.add('test', 'compile')

ts.prepare()

# Process dynamically - O(1) per get_ready
while ts.is_active():
    ready = ts.get_ready()  # Returns tuple of ready nodes
    for node in ready:
        print(f"Processing {node}")
        # Do work...
        ts.done(node)

Error Detection

Cycle Detection

from graphlib import TopologicalSorter, CycleError

ts = TopologicalSorter()

# Add cyclic dependencies
ts.add('A', 'B')
ts.add('B', 'C')
ts.add('C', 'A')  # Creates cycle: A -> B -> C -> A

# Detect cycle when preparing - O(v + e)
try:
    ts.prepare()
except CycleError as e:
    print(f"Cycle detected: {e}")
    print(f"Nodes in cycle: {e.args[1]}")

Common Patterns

Build System

from graphlib import TopologicalSorter

class BuildSystem:
    """Simple build system with dependency resolution"""

    def __init__(self):
        self.sorter = TopologicalSorter()
        self.targets = {}

    # Add build target with dependencies
    def add_target(self, target, *dependencies):
        """Register build target - O(k)"""
        self.sorter.add(target, *dependencies)
        self.targets[target] = dependencies

    # Get build order
    def get_build_order(self):
        """Get topological order - O(v + e)"""
        try:
            self.sorter.prepare()
            return list(self.sorter.static_order())
        except Exception as e:
            return None

    # Build targets
    def build(self):
        """Build all targets in order - O(v + e)"""
        self.sorter.prepare()

        built = set()
        while True:
            target = self.sorter.get_node()
            if target is None:
                break

            # Skip already built
            if target in built:
                self.sorter.done(target)
                continue

            print(f"Building {target}...")
            # Simulate build
            built.add(target)
            self.sorter.done(target)

        return built

# Usage
build = BuildSystem()
build.add_target('main.o', 'main.c', 'defs.h')
build.add_target('util.o', 'util.c', 'defs.h')
build.add_target('prog', 'main.o', 'util.o')
build.add_target('main.c')
build.add_target('util.c')
build.add_target('defs.h')

print("Build order:", build.get_build_order())
build.build()

Task Scheduler

from graphlib import TopologicalSorter
import time

class TaskScheduler:
    """Schedule tasks respecting dependencies"""

    def __init__(self):
        self.sorter = TopologicalSorter()
        self.tasks = {}

    # Register task with dependencies
    def add_task(self, name, func, *dependencies):
        """Register task - O(k)"""
        self.sorter.add(name, *dependencies)
        self.tasks[name] = func

    # Execute all tasks in order
    def execute(self):
        """Execute tasks in dependency order - O(v + e)"""
        try:
            self.sorter.prepare()
        except Exception as e:
            print(f"Dependency error: {e}")
            return False

        executed = {}
        start_time = time.time()

        while True:
            # Get next ready task - O(1) amortized
            task_name = self.sorter.get_node()
            if task_name is None:
                break

            # Execute task - O(?)
            if task_name in self.tasks:
                print(f"Executing {task_name}...")
                start = time.time()
                result = self.tasks[task_name]()
                elapsed = time.time() - start
                executed[task_name] = (result, elapsed)
                print(f"  Completed in {elapsed:.3f}s")

            # Mark done - O(d)
            self.sorter.done(task_name)

        total = time.time() - start_time
        print(f"Total execution time: {total:.3f}s")
        return executed

# Usage
scheduler = TaskScheduler()

def download():
    time.sleep(0.1)
    return "downloaded"

def process():
    time.sleep(0.1)
    return "processed"

def upload():
    time.sleep(0.1)
    return "uploaded"

scheduler.add_task('download', download)
scheduler.add_task('process', process, 'download')
scheduler.add_task('upload', upload, 'process')

results = scheduler.execute()

Package Dependency Resolver

from graphlib import TopologicalSorter

class DependencyResolver:
    """Resolve package installation order"""

    def __init__(self):
        self.sorter = TopologicalSorter()
        self.packages = {}

    # Add package with dependencies
    def add_package(self, name, version='latest', *dependencies):
        """Add package - O(k)"""
        self.sorter.add((name, version), *dependencies)
        self.packages[(name, version)] = dependencies

    # Get installation order
    def get_install_order(self):
        """Get topological order - O(v + e)"""
        try:
            self.sorter.prepare()
            return list(self.sorter.static_order())
        except Exception as e:
            print(f"Circular dependency: {e}")
            return None

# Usage
resolver = DependencyResolver()

# Typical dependency graph
resolver.add_package('flask', '2.0')
resolver.add_package('werkzeug', '2.0', ('flask', '2.0'))
resolver.add_package('jinja2', '3.0', ('flask', '2.0'))
resolver.add_package('itsdangerous', '2.0')
resolver.add_package('click', '8.0', ('flask', '2.0'))

order = resolver.get_install_order()
print("Install order:")
for pkg, ver in order:
    print(f"  {pkg} {ver}")

Performance Characteristics

Time Complexity

  • Initialization: O(1)
  • Adding nodes: O(k) for k predecessors
  • prepare(): O(v + e) where v = vertices, e = edges
  • Complete sort: O(v + e)
  • Dynamic iteration: O(v + e) total for all operations

Space Complexity

  • Graph storage: O(v + e) for v nodes and e edges
  • Cycle tracking: O(v + e) during prepare

Scalability

from graphlib import TopologicalSorter
import time

# Performance test
def benchmark_sort_size(num_nodes):
    ts = TopologicalSorter()

    # Create linear dependency chain
    for i in range(1, num_nodes):
        ts.add(i, i-1)
    ts.add(0)

    start = time.time()
    ts.prepare()
    list(ts.static_order())
    elapsed = time.time() - start

    return elapsed

# Test various sizes
for size in [100, 1000, 10000]:
    time_taken = benchmark_sort_size(size)
    print(f"Nodes: {size}, Time: {time_taken:.4f}s")

Best Practices

Do's

  • Use for dependency resolution
  • Detect cycles before processing
  • Use static_order() when you need complete result
  • Use dynamic approach for large graphs or concurrent processing

Avoid's

  • Don't assume order within independent nodes
  • Don't modify graph during iteration
  • Don't rely on specific order of equally-ranked nodes

Limitations

  • Only handles DAGs (Directed Acyclic Graphs)
  • No weights on edges
  • No path finding between nodes
  • Cannot represent "soft" dependencies

Alternatives

For Complex Graph Processing

# networkx - full graph library
# pip install networkx

import networkx as nx

G = nx.DiGraph()
G.add_edges_from([('A', 'B'), ('B', 'C')])
order = list(nx.topological_sort(G))

# For constraint solving
# Use PuLP, OR-Tools, or similar for optimization