Skip to content

glob Module Complexity

The glob module provides Unix shell-style pathname expansion using wildcard patterns to find files matching specific criteria.

Complexity Reference

Operation Time Space Notes
glob() function O(n) O(n) n = matching files
iglob() function O(1) init O(1) per item Iterator, lazy evaluation
Pattern matching O(n) O(1) n = files in directory
Recursive search ** O(d) O(1) per file d = depth of directory tree

Basic Globbing

Simple Patterns

import glob

# Find all Python files - O(n)
py_files = glob.glob('*.py')
print(py_files)  # ['script.py', 'test.py', ...]

# Find all files in directory - O(n)
all_files = glob.glob('*')
print(all_files)  # All files in current directory

# Find specific pattern - O(n)
data_files = glob.glob('data/*.csv')
print(data_files)  # ['data/file1.csv', 'data/file2.csv', ...]

Pattern Wildcards

import glob

# * - matches any sequence of characters
# [abc] - matches any character in brackets
# [a-z] - matches character range
# ? - matches single character
# ** - matches zero or more directories (recursive)

# Examples
print(glob.glob('test*.py'))      # test_*.py files
print(glob.glob('file?.txt'))     # file1.txt, fileA.txt, etc.
print(glob.glob('[a-c]*.txt'))    # a*, b*, or c* txt files
print(glob.glob('**/*.py'))       # All .py files recursively

Iterator vs List

Using iglob for Large Directories

import glob

# glob() returns list - O(n) space, all at once
files = glob.glob('*.py')  # O(n) - loads all results
for file in files:
    print(file)

# iglob() returns iterator - O(1) space, lazy evaluation
files = glob.iglob('*.py')  # O(1) - returns iterator
for file in files:
    print(file)  # Each iteration finds one match

Memory Efficiency

import glob
import sys

# Large directory with many matches
# glob() - loads all in memory
all_files = glob.glob('**/*.txt', recursive=True)
print(f"Memory used by glob: {sys.getsizeof(all_files)} bytes")

# iglob() - loads one at a time
file_iter = glob.iglob('**/*.txt', recursive=True)
print(f"Memory used by iglob: {sys.getsizeof(file_iter)} bytes")

# For large results, iglob is better
for file in file_iter:
    process(file)  # Process one at a time

Recursive Globbing

Find Files in Subdirectories

import glob

# Recursive search with ** - O(d) where d = tree depth
# Must use recursive=True parameter

# Find all Python files recursively
py_files = glob.glob('**/*.py', recursive=True)
print(py_files)

# Find files at specific depth
# All .txt files in any subdirectory
txt_files = glob.glob('*/*.txt')  # One level deep

# All .txt files recursively
txt_files = glob.glob('**/*.txt', recursive=True)

# Find in nested structure
nested = glob.glob('src/**/test_*.py', recursive=True)

Common Patterns

Find Configuration Files

import glob

# Find all config files - O(n)
configs = glob.glob('**/*.conf', recursive=True)
configs.extend(glob.glob('**/*.ini', recursive=True))
configs.extend(glob.glob('**/*.yaml', recursive=True))

print(f"Found {len(configs)} config files")

Find Files by Extension

import glob

class FileCollector:
    """Collect files by extension"""

    def __init__(self, root_dir='.'):
        self.root = root_dir

    # Find by extension - O(n)
    def find_by_extension(self, ext):
        pattern = f'{self.root}/**/*.{ext}'
        return glob.glob(pattern, recursive=True)

    # Find multiple extensions - O(n*m)
    def find_by_extensions(self, *exts):
        files = []
        for ext in exts:  # O(m) extensions
            pattern = f'{self.root}/**/*.{ext}'
            files.extend(glob.glob(pattern, recursive=True))  # O(n)
        return files

    # Find in specific directory - O(n)
    def find_in_dir(self, subdir, ext):
        pattern = f'{self.root}/{subdir}/*.{ext}'
        return glob.glob(pattern)

# Usage
collector = FileCollector('.')
py_files = collector.find_by_extension('py')
all_code = collector.find_by_extensions('py', 'js', 'ts')

Batch File Processing

import glob
import os

def process_images(directory):
    """Process all images in directory - O(n)"""

    image_exts = ['*.jpg', '*.jpeg', '*.png', '*.gif']
    patterns = [f'{directory}/**/{ext}' for ext in image_exts]

    total_size = 0
    count = 0

    # Process each pattern - O(n*m)
    for pattern in patterns:
        for image_path in glob.iglob(pattern, recursive=True):
            # Process each image
            size = os.path.getsize(image_path)
            total_size += size
            count += 1

            print(f"Processing: {image_path} ({size} bytes)")

    return count, total_size

# Usage
count, total = process_images('photos')
print(f"Processed {count} images, {total} bytes")

Compare with os.listdir

import glob
import os

# os.listdir() - non-recursive, simple
files = os.listdir('.')  # O(n) - one level only
print(files)

# glob.glob() - pattern matching, can recurse
files = glob.glob('**/*.txt', recursive=True)  # O(n) - all levels
print(files)

# glob is better for searching, os.listdir better for listing

Pattern Escaping

Handle Special Characters

import glob

# Filenames with special characters need escaping
# * ? [ ] { } are glob metacharacters

# Files that contain brackets
# File: test[1].txt

# Escape with [brackets]
pattern = glob.escape('test[1].txt')  # Returns 'test[[]1].txt'
result = glob.glob(pattern)

# Escape before using in patterns
filename = "data[backup].csv"
pattern = glob.escape(filename)
result = glob.glob(pattern)

Common Use Cases

Find Recent Files

import glob
import os
import time

def find_recent_files(directory, minutes=60):
    """Find files modified in last N minutes - O(n)"""

    cutoff_time = time.time() - (minutes * 60)
    recent = []

    # Get all files recursively - O(n)
    for file_path in glob.iglob(f'{directory}/**/*', recursive=True):
        if os.path.isfile(file_path):
            mtime = os.path.getmtime(file_path)
            if mtime > cutoff_time:
                recent.append((file_path, mtime))

    return sorted(recent, key=lambda x: x[1], reverse=True)

# Usage
recent = find_recent_files('.', minutes=30)
for path, mtime in recent:
    print(f"{path}: {time.ctime(mtime)}")

Build System File Finding

import glob

class BuildSystem:
    """Find source files for building"""

    def __init__(self, project_root):
        self.root = project_root

    # Find source files
    def find_sources(self, language):
        """Find source files - O(n)"""

        patterns = {
            'python': '**/*.py',
            'javascript': '**/*.js',
            'cpp': ['**/*.cpp', '**/*.h'],
            'java': '**/*.java'
        }

        if language not in patterns:
            return []

        pattern_list = patterns[language]
        if isinstance(pattern_list, str):
            pattern_list = [pattern_list]

        files = []
        for pattern in pattern_list:
            full_pattern = f'{self.root}/{pattern}'
            files.extend(glob.glob(full_pattern, recursive=True))

        return files

    # Find test files
    def find_tests(self):
        """Find test files - O(n)"""
        patterns = [
            f'{self.root}/**/test_*.py',
            f'{self.root}/**/*_test.py',
            f'{self.root}/**/tests.py'
        ]

        tests = []
        for pattern in patterns:
            tests.extend(glob.glob(pattern, recursive=True))

        return list(set(tests))  # Remove duplicates

# Usage
build = BuildSystem('.')
py_sources = build.find_sources('python')
tests = build.find_tests()

Package Discovery

import glob
import os

def find_python_packages(directory):
    """Find all Python packages - O(n)"""

    packages = []

    # Find __init__.py files - O(n)
    init_files = glob.glob(f'{directory}/**/__init__.py', recursive=True)

    for init_file in init_files:
        # Package is directory containing __init__.py
        package_dir = os.path.dirname(init_file)
        # Convert path to module name
        module_name = package_dir.replace(os.sep, '.')
        packages.append(module_name)

    return sorted(packages)

# Usage
packages = find_python_packages('src')
print("Found packages:", packages)

Performance Characteristics

Time Complexity

  • glob(): O(n) where n = total matching files
  • iglob(): O(1) initialization + O(k) for first k items
  • Pattern matching: O(n) to scan all files
  • Recursive search: O(d) proportional to directory depth

Space Complexity

  • glob(): O(n) for result list
  • iglob(): O(1) memory, iterator-based
  • Pattern processing: O(1) per match

Performance Tips

import glob
import time

# Slow: Multiple glob calls
start = time.time()
py_files = glob.glob('*.py')
txt_files = glob.glob('*.txt')
md_files = glob.glob('*.md')
time1 = time.time() - start

# Better: Single glob with pattern
start = time.time()
all_files = glob.glob('[!_]*.[pytm]*')
time2 = time.time() - start

# Fast: Using iglob for processing
start = time.time()
for file in glob.iglob('**/*', recursive=True):
    if file.endswith('.py'):
        process(file)
time3 = time.time() - start

print(f"Multiple: {time1:.4f}s")
print(f"Single: {time2:.4f}s")
print(f"Iterator: {time3:.4f}s")

Limitations

  • Cannot check file type before returning
  • No built-in size filtering
  • No date/time filtering
  • Patterns are shell-style, not regex

Alternatives

# For more control, use pathlib
from pathlib import Path

# glob with pathlib
path = Path('.')
py_files = list(path.glob('**/*.py'))

# For regex patterns, use os.walk + re
import os
import re

for root, dirs, files in os.walk('.'):
    for file in files:
        if re.match(r'test_.*\.py$', file):
            print(os.path.join(root, file))

# For filtering by attributes
from pathlib import Path
import os

py_files = [f for f in Path('.').glob('**/*.py') 
            if f.stat().st_size < 1000]  # < 1KB

Best Practices

Do's

  • Use glob for simple wildcard matching
  • Use iglob for large result sets
  • Escape filenames with special characters
  • Use recursive=True for deep searches
  • Cache results if used multiple times

Avoid's

  • Don't use glob for complex filtering
  • Don't use glob inside tight loops
  • Don't assume pattern will find files quickly
  • Don't rely on glob for file existence checking