Skip to content

Pkgutil Module

The pkgutil module provides utilities for working with packages and module search paths.

Complexity Reference

Operation Time Space Notes
iter_modules(path) O(n) O(n) n = modules in path
walk_packages() O(n) O(n) n = all subpackages
find_loader(name) O(1) avg, O(n) worst O(1) Check sys.modules; O(n) worst case due to hash collisions
get_data(name) O(n) O(n) n = file size
extend_path() O(n) O(n) n = path entries

Common Operations

Finding Modules in a Path

import pkgutil

# O(n) where n = modules in directory
for importer, modname, ispkg in pkgutil.iter_modules(['./plugins']):
    print(f"{'Package' if ispkg else 'Module'}: {modname}")

    # ispkg = True if subpackage, False if module
    # importer = loader for the module

Walking Package Tree

import pkgutil
import sys

# O(n) where n = all subpackages/modules recursively
for importer, modname, ispkg in pkgutil.walk_packages(
    path=['./mypackage'], 
    prefix='mypackage.'
):
    print(modname)

# Example output:
# mypackage.module1
# mypackage.module2
# mypackage.subpkg
# mypackage.subpkg.module3

Getting Package Data Files

import pkgutil

# O(n) where n = file size
data = pkgutil.get_data('mypackage', 'data.txt')
# Returns bytes

# Can also work with nested paths - O(n)
data = pkgutil.get_data('mypackage.subpackage', 'resource.json')

# Example: loading JSON data
import json
try:
    raw_data = pkgutil.get_data('myapp', 'config.json')
    config = json.loads(raw_data)  # O(n) to parse
except (ImportError, FileNotFoundError):
    config = {}

Common Use Cases

Discovering Plugins

import pkgutil
import importlib

def load_plugins(plugin_package):
    """Load all modules in plugin package - O(n*m)"""
    plugins = {}

    # O(n) to iterate modules
    for importer, modname, ispkg in pkgutil.iter_modules(
        plugin_package.__path__
    ):
        if not ispkg:  # Skip sub-packages
            # O(m) to import each module
            full_name = f"{plugin_package.__name__}.{modname}"
            module = importlib.import_module(full_name)

            # Assume each plugin has a 'Plugin' class
            if hasattr(module, 'Plugin'):
                plugins[modname] = module.Plugin()

    return plugins

# Usage - O(n*m) where n = plugins, m = avg module size
import plugins as plugin_package
loaded = load_plugins(plugin_package)

Checking Package Contents

import pkgutil

def get_submodules(package_name):
    """Get list of submodules - O(n)"""
    import importlib
    package = importlib.import_module(package_name)

    submodules = []

    # O(n) where n = direct submodules
    for importer, modname, ispkg in pkgutil.iter_modules(
        package.__path__
    ):
        submodules.append(modname)

    return submodules

# Usage
modules = get_submodules('email')
print(modules)  # ['mime', 'parser', 'generator', ...]

Extending Path for Namespace Packages

import pkgutil
import sys

# Extend path for namespace packages - O(n)
extended = pkgutil.extend_path(
    __path__,  # Current package path
    __name__   # Current package name
)

# Allows finding modules in multiple locations
# Useful for plugin directories

Gathering Metadata

import pkgutil
import importlib

def analyze_package(package_name):
    """Analyze package structure - O(n)"""
    import importlib
    package = importlib.import_module(package_name)

    analysis = {
        'modules': [],
        'subpackages': [],
        'module_count': 0,
        'has_init': hasattr(package, '__file__')
    }

    # O(n) to iterate
    for importer, modname, ispkg in pkgutil.iter_modules(
        package.__path__
    ):
        if ispkg:
            analysis['subpackages'].append(modname)
        else:
            analysis['modules'].append(modname)
        analysis['module_count'] += 1

    return analysis

# Usage
info = analyze_package('collections')
print(f"Modules: {info['module_count']}")
print(f"Subpackages: {info['subpackages']}")

Performance Tips

Cache pkgutil Results

import pkgutil
import importlib

class PackageCache:
    def __init__(self):
        self._cache = {}

    def get_modules(self, package_name):
        """Get modules with caching - O(1) after first call"""
        if package_name not in self._cache:
            # O(n) first time
            package = importlib.import_module(package_name)
            modules = []
            for _, modname, _ in pkgutil.iter_modules(package.__path__):
                modules.append(modname)
            self._cache[package_name] = modules

        # O(1) subsequent calls
        return self._cache[package_name]

# Usage
cache = PackageCache()
modules = cache.get_modules('email')  # O(n)
modules = cache.get_modules('email')  # O(1)

Lazy Load Heavy Modules

import pkgutil
import importlib

def lazy_load_plugins(plugin_package):
    """Return loader dict instead of loading - O(n)"""
    import importlib
    package = importlib.import_module(plugin_package)

    loaders = {}

    # O(n) to setup loaders, O(1) per load
    for importer, modname, ispkg in pkgutil.iter_modules(
        package.__path__
    ):
        full_name = f"{plugin_package}.{modname}"

        # Store loader, don't import yet
        loaders[modname] = lambda fn=full_name: importlib.import_module(fn)

    return loaders

# Usage - O(n) setup, O(1) per lazy load
plugins = lazy_load_plugins('myapp.plugins')

# Load only when needed - O(m) for each plugin
plugin1 = plugins['plugin1']()  # Loads on demand
plugin2 = plugins['plugin2']()  # Loads on demand

Limit Walk Depth for Large Trees

import pkgutil
import importlib

def walk_packages_limited(package_name, max_depth=2):
    """Walk package tree with depth limit - O(n)"""
    package = importlib.import_module(package_name)

    def walk(path, prefix, depth):
        if depth > max_depth:
            return

        # O(k) at each depth
        for _, modname, ispkg in pkgutil.iter_modules(path):
            yield f"{prefix}.{modname}"
            if ispkg:
                subpackage = importlib.import_module(
                    f"{prefix}.{modname}"
                )
                yield from walk(
                    subpackage.__path__,
                    f"{prefix}.{modname}",
                    depth + 1
                )

    return list(walk(package.__path__, package_name, 0))

Version Notes

  • Python 2.6+: Basic functionality
  • Python 3.3+: Namespace packages support
  • Python 3.9+: Enhanced path handling
  • Python 3.10+: Various optimizations