Skip to content

Statistics Module Complexity

The statistics module provides functions for calculating basic statistical properties of numeric data.

Common Operations

Operation Time Space Notes
mean(data) O(n) O(1) Calculate average
median(data) O(n log n) O(n) Calculate median; requires sorting
mode(data) O(n) O(n) Find most common
stdev(data) O(n) O(1) Standard deviation; two passes
variance(data) O(n) O(1) Calculate variance; two passes
quantiles(data) O(n log n) O(n) Calculate quantiles

Mean

mean()

Time Complexity: O(n)

Where n = number of data points.

from statistics import mean

# Calculate mean: O(n)
data = [1, 2, 3, 4, 5]
avg = mean(data)  # O(5) = 3.0

# Large dataset: O(n)
large_data = range(1000000)
result = mean(large_data)  # O(1000000)

# Single pass through data
result = mean([10, 20, 30, 40, 50])  # O(5)

Space Complexity: O(1)

from statistics import mean

result = mean(data)  # O(1) - only stores sum and count

Median

median()

Time Complexity: O(n log n)

from statistics import median

# Calculate median: O(n log n) - requires sorting
data = [5, 2, 8, 1, 9]
mid = median(data)  # O(n log n)

# Sorted data: still O(n log n)
data = [1, 2, 3, 4, 5]
mid = median(data)  # 3 - O(n log n)

# Even/odd length handled automatically
mid = median([1, 2, 3])  # 2 (odd)
mid = median([1, 2, 3, 4])  # 2.5 (even - average)

Space Complexity: O(n)

from statistics import median

# Sorting requires O(n) space
result = median(large_data)  # O(n) space

median_low() and median_high()

Time Complexity: O(n log n)

from statistics import median_low, median_high

data = [1, 2, 3, 4, 5]

# Median low (lower middle): O(n log n)
low = median_low(data)  # 3

# Median high (upper middle): O(n log n)
high = median_high(data)  # 3

# Even-length data
data = [1, 2, 3, 4]
low = median_low(data)   # 2
high = median_high(data)  # 3

Space Complexity: O(n)

from statistics import median_low

result = median_low(data)  # O(n) space for sorting

Mode

mode()

Time Complexity: O(n)

from statistics import mode

# Find most common: O(n)
data = [1, 1, 1, 2, 2, 3]
most_common = mode(data)  # O(6) = 1

# Multimodal: returns first mode found (Python 3.8+)
data = [1, 1, 2, 2, 3, 3]
m = mode(data)  # Returns 1 (first encountered)

# Python 3.8+: mode() returns first mode if multimodal
# Python 3.7 and earlier: raised StatisticsError for multimodal data
data = [1, 2, 3]  # All equally common
m = mode(data)    # Returns 1 (first encountered)

Space Complexity: O(n)

from statistics import mode

# Tracks frequencies of all values
result = mode(data)  # O(n) space for frequency table

multimode() - All Modes

Time Complexity: O(n)

from statistics import multimode

# Get all modes: O(n)
data = [1, 1, 2, 2, 3]
modes = multimode(data)  # O(5) = [1, 2]

# Single mode returns list
data = [1, 1, 1, 2, 3]
modes = multimode(data)  # O(5) = [1]

# Multiple modes
data = [5, 5, 5, 6, 6, 6]
modes = multimode(data)  # [5, 6]

Space Complexity: O(n)

from statistics import multimode

modes = multimode(data)  # O(n) for frequency tracking

Variance and Standard Deviation

variance() and stdev()

Time Complexity: O(n)

from statistics import variance, stdev, pvariance, pstdev

data = [1, 2, 3, 4, 5]

# Calculate variance: O(n) - two passes through data
var = variance(data)  # O(5) = 2.5

# Standard deviation: O(n) - two passes (mean then variance)
std = stdev(data)  # O(5) ≈ 1.58

# Sample vs population variance
var_sample = variance(data)        # O(n) - sample (n-1 denominator)
var_pop = pvariance(data)          # O(n) - population (n denominator)

# Sample vs population stdev
std_sample = stdev(data)           # O(n) - sample
std_pop = pstdev(data)             # O(n) - population

Space Complexity: O(1)

from statistics import variance, stdev

# Two passes through data: O(1) space
var = variance(data)  # O(1) - only stores intermediate values

Quantiles

quantiles()

Time Complexity: O(n log n)

from statistics import quantiles

data = list(range(1, 101))  # 1-100

# Get quartiles: O(n log n) - sorting required
q = quantiles(data, n=4)  # O(n log n) = [25.75, 50.5, 75.25]

# Deciles: O(n log n)
dec = quantiles(data, n=10)  # O(n log n) - 10 quantiles

# Custom quantiles: O(n log n)
q = quantiles(data, n=100)  # O(n log n) - percentiles

Space Complexity: O(n)

from statistics import quantiles

result = quantiles(data, n=4)  # O(n) space for sorting

Common Patterns

Analyze Dataset

from statistics import mean, stdev, median

def analyze(data):
    """Comprehensive analysis: O(n log n)"""
    return {
        'mean': mean(data),           # O(n)
        'median': median(data),       # O(n log n)
        'stdev': stdev(data),         # O(n)
        'count': len(data),           # O(1)
        'min': min(data),             # O(n)
        'max': max(data),             # O(n)
    }

stats = analyze([10, 20, 30, 40, 50])  # O(n log n) total

Quality Control

from statistics import mean, stdev

def is_outlier(data, value, stddev_limit=2):
    """Check if value is outlier: O(n)"""
    avg = mean(data)      # O(n)
    std = stdev(data)     # O(n)

    return abs(value - avg) > stddev_limit * std  # O(1)

if is_outlier(measurements, 5.5):
    print("Outlier detected")

Data Validation

from statistics import mean, stdev, StatisticsError

def validate_data(data, min_size=3):
    """Validate statistical data: O(n)"""
    if len(data) < min_size:
        raise ValueError(f"Need at least {min_size} values")

    try:
        avg = mean(data)     # O(n)
        std = stdev(data)    # O(n)
        return True
    except StatisticsError:
        return False

Performance Characteristics

Best Practices

from statistics import mean, median, stdev

# Good: Calculate multiple stats in one pass
data = [1, 2, 3, 4, 5]
m = mean(data)      # O(n)
std = stdev(data)   # O(n)
# Total: O(2n)

# Avoid: Sorting multiple times
med = median(data)  # O(n log n) - sorts once
q = quantiles(data) # O(n log n) - sorts again

# Better: Use sorted data
sorted_data = sorted(data)  # O(n log n)
# Then analyze (still O(n) for each stat)

Memory Usage

from statistics import mean, median, stdev

# Good: mean() uses O(1) memory
avg = mean(huge_dataset)  # O(1) memory

# Careful: median() uses O(n) memory (sorts)
med = median(huge_dataset)  # O(n) memory - creates sorted copy

# Good: stdev() uses O(1) memory
std = stdev(huge_dataset)  # O(1) memory

Comparison with NumPy

from statistics import mean
import numpy as np

# statistics module (simple)
data = [1, 2, 3, 4, 5]
avg = mean(data)  # O(n) - Python

# NumPy (powerful)
arr = np.array([1, 2, 3, 4, 5])
avg = np.mean(arr)  # Faster - optimized C code

# Use statistics for small datasets
# Use NumPy for large datasets or advanced operations

Exception Handling

from statistics import mode, median, StatisticsError

# mode() with no mode
try:
    m = mode([1, 2, 3])  # All equally common
except StatisticsError:
    print("No unique mode")

# Empty data
try:
    avg = statistics.mean([])
except StatisticsError:
    print("No data")

Version Notes

  • Python 3.4+: statistics module introduced
  • Python 3.8+: quantiles() function added
  • Python 3.11+: mean(), variance(), stdev() consume iterators in one pass (O(1) space vs O(n) when given an iterator instead of a list)
  • Math Module - Mathematical functions
  • Random Module - Random number generation