Standard Library Power Tools & Common Patterns — Learn Python — A Crash Course for Programmers

Python ships with an absurdly complete standard library — “batteries included” is not marketing fluff. This lesson covers the modules and patterns you will reach for daily. If you are coming from a language where you npm install or cargo add for every small task, prepare to be surprised by how much Python gives you out of the box.

Python standard library module categories and common patterns

collections

The collections module provides specialized container types that solve problems dict and list handle poorly.

defaultdict

from collections import defaultdict

# Group items without checking key existence
logs = [("web", "200 OK"), ("db", "timeout"), ("web", "404"), ("db", "connected")]
by_service = defaultdict(list)
for service, msg in logs:
    by_service[service].append(msg)
# {"web": ["200 OK", "404"], "db": ["timeout", "connected"]}

# Nested defaultdict for multi-level grouping
tree = lambda: defaultdict(tree)
config = tree()
config["database"]["primary"]["host"] = "10.0.0.1"
config["database"]["primary"]["port"] = 5432

Counter

from collections import Counter

words = "the cat sat on the mat the cat".split()
freq = Counter(words)
freq.most_common(2)       # [("the", 3), ("cat", 2)]

# Arithmetic on counters
a = Counter(x=4, y=2)
b = Counter(x=1, y=3)
a - b                     # Counter({"x": 3})  — drops zero/negative
a & b                     # Counter({"x": 1, "y": 2})  — min per key
a | b                     # Counter({"x": 4, "y": 3})  — max per key

deque

from collections import deque

# O(1) append/pop from both ends — unlike list which is O(n) for left ops
buf = deque(maxlen=3)
for i in range(5):
    buf.append(i)
print(buf)  # deque([2, 3, 4], maxlen=3) — oldest items evicted

# Rotate
d = deque([1, 2, 3, 4, 5])
d.rotate(2)   # deque([4, 5, 1, 2, 3]) — positive = rotate right
d.rotate(-1)  # deque([5, 1, 2, 3, 4])

namedtuple and ChainMap

from collections import namedtuple, ChainMap

# namedtuple — lightweight immutable record (prefer dataclasses for new code)
Point = namedtuple("Point", ["x", "y"])
p = Point(3, 4)
print(p.x, p[0])  # 3, 3 — attribute and index access

# ChainMap — layer multiple dicts, first match wins
defaults = {"theme": "light", "lang": "en", "timeout": 30}
user_prefs = {"theme": "dark"}
request = {"lang": "fr"}
settings = ChainMap(request, user_prefs, defaults)
settings["theme"]    # "dark"  — user_prefs layer
settings["lang"]     # "fr"    — request layer
settings["timeout"]  # 30      — defaults layer

itertools

These are composable building blocks for iteration. They return lazy iterators — zero memory overhead regardless of input size.

from itertools import chain, islice, groupby, combinations, permutations, product, zip_longest, starmap

# chain — flatten multiple iterables into one
all_items = chain([1, 2], [3, 4], [5])
list(all_items)  # [1, 2, 3, 4, 5]

# chain.from_iterable — flatten a list of lists
nested = [[1, 2], [3], [4, 5, 6]]
list(chain.from_iterable(nested))  # [1, 2, 3, 4, 5, 6]

# islice — slice an iterator without materializing it
from itertools import count
first_10_evens = islice((x for x in count() if x % 2 == 0), 10)
list(first_10_evens)  # [0, 2, 4, 6, 8, 10, 12, 14, 16, 18]

# groupby — group consecutive items (data MUST be sorted by key first)
data = [("A", 1), ("A", 2), ("B", 3), ("B", 4), ("A", 5)]
data.sort(key=lambda x: x[0])  # sort first!
for key, group in groupby(data, key=lambda x: x[0]):
    print(key, list(group))
# A [("A", 1), ("A", 2), ("A", 5)]
# B [("B", 3), ("B", 4)]

# combinations and permutations
list(combinations("ABCD", 2))   # [("A","B"), ("A","C"), ("A","D"), ("B","C"), ...]
list(permutations("ABC", 2))    # [("A","B"), ("A","C"), ("B","A"), ("B","C"), ...]

# product — cartesian product (replaces nested loops)
sizes = ["S", "M", "L"]
colors = ["red", "blue"]
list(product(sizes, colors))
# [("S","red"), ("S","blue"), ("M","red"), ("M","blue"), ("L","red"), ("L","blue")]

# zip_longest — zip without truncation
list(zip_longest([1, 2, 3], ["a", "b"], fillvalue="-"))
# [(1, "a"), (2, "b"), (3, "-")]

# starmap — apply function to pre-grouped argument tuples
list(starmap(pow, [(2, 10), (3, 5), (10, 3)]))  # [1024, 243, 1000]

Gotcha: groupby groups consecutive elements. If your data is not sorted by the grouping key, you will get multiple groups for the same key. Always sort first, or use defaultdict(list) instead.

functools

Higher-order function utilities that modify or compose callable behavior.

from functools import lru_cache, partial, reduce, total_ordering, singledispatch

# lru_cache — memoization with LRU eviction
@lru_cache(maxsize=256)
def fib(n):
    if n < 2:
        return n
    return fib(n - 1) + fib(n - 2)

fib(100)  # instant — 7-digit result, computed in microseconds
fib.cache_info()  # CacheInfo(hits=98, misses=101, maxsize=256, currsize=101)
fib.cache_clear()  # reset cache

# partial — freeze some arguments
import json
pretty_json = partial(json.dumps, indent=2, sort_keys=True, ensure_ascii=False)
pretty_json({"name": "Gyan", "items": [1, 2, 3]})

# reduce — fold a sequence into a single value
from operator import mul
factorial_10 = reduce(mul, range(1, 11))  # 3628800

# total_ordering — define __eq__ and one comparison, get the rest free
@total_ordering
class Version:
    def __init__(self, major, minor):
        self.major, self.minor = major, minor
    def __eq__(self, other):
        return (self.major, self.minor) == (other.major, other.minor)
    def __lt__(self, other):
        return (self.major, self.minor) < (other.major, other.minor)

Version(1, 2) <= Version(1, 3)  # True — __le__ auto-generated

# singledispatch — function overloading by first argument type
@singledispatch
def serialize(obj):
    raise TypeError(f"Cannot serialize {type(obj)}")

@serialize.register(str)
def _(obj):
    return f'"{obj}"'

@serialize.register(int)
@serialize.register(float)
def _(obj):
    return str(obj)

serialize("hello")  # '"hello"'
serialize(42)       # "42"

Tip: In Python 3.9+, use @cache instead of @lru_cache(maxsize=None) for unbounded memoization.

datetime

Date/time handling is notoriously tricky. Python’s datetime module plus zoneinfo (3.9+) covers most needs.

from datetime import datetime, date, time, timedelta, timezone
from zoneinfo import ZoneInfo

# Current time — always prefer timezone-aware
now_utc = datetime.now(timezone.utc)
now_tokyo = datetime.now(ZoneInfo("Asia/Tokyo"))

# Parsing and formatting
ts = datetime.strptime("2026-03-28T14:30:00", "%Y-%m-%dT%H:%M:%S")
ts.strftime("%B %d, %Y at %I:%M %p")  # "March 28, 2026 at 02:30 PM"

# ISO 8601 (preferred for APIs)
dt = datetime.fromisoformat("2026-03-28T14:30:00+05:30")
dt.isoformat()  # "2026-03-28T14:30:00+05:30"

# Arithmetic with timedelta
tomorrow = date.today() + timedelta(days=1)
two_hours_later = now_utc + timedelta(hours=2)
delta = datetime(2026, 12, 31) - datetime(2026, 1, 1)
delta.days  # 364

# Convert between timezones
utc_time = datetime(2026, 3, 28, 12, 0, tzinfo=timezone.utc)
eastern = utc_time.astimezone(ZoneInfo("America/New_York"))

Gotcha: Never use datetime.now() without a timezone argument. Naive datetimes (no tz info) are a constant source of bugs. Always work in UTC internally and convert to local time only for display.

Regular Expressions

The re module gives you Perl-style regex. Compile patterns you reuse for performance.

import re

# compile for reuse — avoids recompilation overhead
email_re = re.compile(r"[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+")

# match vs search — match anchors at start, search finds anywhere
re.match(r"\d+", "abc123")    # None — no digits at start
re.search(r"\d+", "abc123")   # <Match "123">

# findall — all non-overlapping matches
text = "Call 555-1234 or 555-5678 for info"
re.findall(r"\d{3}-\d{4}", text)  # ["555-1234", "555-5678"]

# Groups — capture parts of the match
m = re.search(r"(\d{4})-(\d{2})-(\d{2})", "Date: 2026-03-28")
m.group(0)   # "2026-03-28" — full match
m.group(1)   # "2026"
m.groups()   # ("2026", "03", "28")

# Named groups
pattern = re.compile(r"(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})")
m = pattern.search("Date: 2026-03-28")
m.group("year")   # "2026"
m.groupdict()     # {"year": "2026", "month": "03", "day": "28"}

# sub — find and replace
cleaned = re.sub(r"\s+", " ", "too   many    spaces")  # "too many spaces"

# Split on pattern
re.split(r"[,;\s]+", "one, two;  three four")  # ["one", "two", "three", "four"]

Tip: Use raw strings (r"...") for all regex patterns to avoid backslash escaping confusion. For anything more complex than simple patterns, consider the regex third-party library which supports Unicode categories and atomic groups.

logging

print() debugging does not scale. The logging module is built in and production-ready.

import logging

# Basic setup — usually in your entry point / main
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s %(levelname)-8s %(name)s — %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
)

# Per-module logger — always use __name__
logger = logging.getLogger(__name__)

logger.debug("Verbose detail, hidden at INFO level")
logger.info("Server started on port %d", 8080)      # lazy formatting
logger.warning("Disk usage at %d%%", 92)
logger.error("Connection failed: %s", "timeout")
logger.exception("Unhandled error")  # includes traceback automatically

# File handler with rotation
from logging.handlers import RotatingFileHandler
handler = RotatingFileHandler("app.log", maxBytes=5_000_000, backupCount=3)
handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(message)s"))
logger.addHandler(handler)

Best practices:

Use logger.info("msg %s", val) not logger.info(f"msg {val}") — lazy formatting avoids string construction when the level is filtered out.
One logger per module via logging.getLogger(__name__).
Never call logging.basicConfig() in library code — only in application entry points.

subprocess

Run external commands from Python. The run() function (Python 3.5+) is the only API you need for most cases.

import subprocess

# Simple command
result = subprocess.run(["ls", "-la", "/tmp"], capture_output=True, text=True)
print(result.stdout)
print(result.returncode)  # 0 on success

# Check for errors — raises CalledProcessError on non-zero exit
subprocess.run(["git", "status"], check=True, capture_output=True, text=True)

# Pass input via stdin
result = subprocess.run(
    ["grep", "error"],
    input="line1\nerror found\nline3\n",
    capture_output=True, text=True,
)
print(result.stdout)  # "error found\n"

# Pipe between commands (prefer this over shell=True)
ps = subprocess.run(["ps", "aux"], capture_output=True, text=True)
grep = subprocess.run(
    ["grep", "python"],
    input=ps.stdout,
    capture_output=True, text=True,
)

# Timeout
try:
    subprocess.run(["sleep", "30"], timeout=5)
except subprocess.TimeoutExpired:
    print("Command timed out")

Gotcha: Avoid shell=True unless you genuinely need shell features (globbing, pipes). It introduces shell injection vulnerabilities and platform-dependent behavior.

Testing

Python has unittest built in, but pytest is the de facto standard. Both work together — pytest can run unittest-style tests.

unittest basics

import unittest

class TestStringMethods(unittest.TestCase):
    def setUp(self):
        self.data = ["alice", "bob", "charlie"]

    def test_upper(self):
        self.assertEqual("foo".upper(), "FOO")

    def test_contains(self):
        self.assertIn("alice", self.data)

    def test_raises(self):
        with self.assertRaises(ValueError):
            int("not_a_number")

pytest — less boilerplate, more power

# test_users.py — just functions, no classes needed
import pytest

def test_addition():
    assert 1 + 1 == 2

def test_exception():
    with pytest.raises(ZeroDivisionError):
        1 / 0

# Fixtures — dependency injection for tests
@pytest.fixture
def db_connection():
    conn = create_connection("test.db")
    yield conn           # test runs here
    conn.close()         # teardown after test

def test_query(db_connection):
    result = db_connection.execute("SELECT 1")
    assert result.fetchone()[0] == 1

# Parametrize — run same test with different inputs
@pytest.mark.parametrize("input,expected", [
    ("hello", 5),
    ("", 0),
    ("world!", 6),
])
def test_string_length(input, expected):
    assert len(input) == expected

Mocking

from unittest.mock import patch, MagicMock

# Patch an external dependency
@patch("myapp.services.requests.get")
def test_fetch_user(mock_get):
    mock_get.return_value = MagicMock(
        status_code=200,
        json=lambda: {"id": 1, "name": "Alice"},
    )
    user = fetch_user(1)
    assert user["name"] == "Alice"
    mock_get.assert_called_once_with("https://api.example.com/users/1")

# Context manager style
def test_with_mock():
    with patch("builtins.open", MagicMock()) as mock_file:
        mock_file.return_value.__enter__.return_value.read.return_value = "data"
        # ... test code that reads a file

Other Essentials

json and csv

import json, csv

# JSON — serialize/deserialize
data = {"name": "Gyan", "scores": [95, 87, 92]}
json_str = json.dumps(data, indent=2)
parsed = json.loads(json_str)

# Read/write JSON files
with open("data.json", "w") as f:
    json.dump(data, f)

with open("data.json") as f:
    loaded = json.load(f)

# CSV — DictReader is the way to go
with open("report.csv") as f:
    for row in csv.DictReader(f):
        print(row["name"], row["score"])  # dict per row, headers as keys

# Write CSV
with open("output.csv", "w", newline="") as f:
    writer = csv.DictWriter(f, fieldnames=["name", "score"])
    writer.writeheader()
    writer.writerow({"name": "Alice", "score": 95})

os, sys, and pathlib

import os, sys
from pathlib import Path

# pathlib — modern, object-oriented path handling (prefer over os.path)
p = Path("/var/log/app")
config = Path.home() / ".config" / "myapp" / "settings.json"
config.parent.mkdir(parents=True, exist_ok=True)  # create dirs recursively

# Read/write
config.write_text(json.dumps({"debug": True}))
data = json.loads(config.read_text())

# Globbing
for py_file in Path("src").rglob("*.py"):
    print(py_file)

# os/sys essentials
os.getenv("DATABASE_URL", "sqlite:///default.db")  # env vars with fallback
sys.argv       # command-line arguments
sys.exit(1)    # exit with error code

typing patterns

from typing import Optional, Union, TypeAlias, Literal
from collections.abc import Callable, Iterator

# Type aliases (3.10+ syntax)
JSON: TypeAlias = dict[str, "JSON"] | list["JSON"] | str | int | float | bool | None
UserId: TypeAlias = int

# Callable types
Handler: TypeAlias = Callable[[str, int], bool]

# Literal for constrained values
def set_log_level(level: Literal["DEBUG", "INFO", "WARNING", "ERROR"]) -> None: ...

# Optional = Union[X, None]
def find_user(user_id: int) -> Optional[dict]:
    ...

# Generic functions (3.12+ syntax)
def first[T](items: list[T]) -> T | None:
    return items[0] if items else None

Pythonic Patterns

These built-in patterns separate idiomatic Python from “Java written in Python.”

# enumerate — never use range(len(...))
names = ["Alice", "Bob", "Charlie"]
for i, name in enumerate(names, start=1):
    print(f"{i}. {name}")

# zip — iterate multiple sequences in parallel
keys = ["name", "age", "role"]
values = ["Alice", 30, "admin"]
user = dict(zip(keys, values))  # {"name": "Alice", "age": 30, "role": "admin"}

# any / all — short-circuit boolean checks on iterables
nums = [2, 4, 6, 8, 10]
all(x % 2 == 0 for x in nums)    # True — all even
any(x > 100 for x in nums)       # False — none over 100

# Walrus operator := (3.8+) — assign inside expressions
import re
if m := re.search(r"error (\d+)", "error 404 occurred"):
    print(f"Error code: {m.group(1)}")

# Useful in while loops
while chunk := f.read(8192):
    process(chunk)

# match/case — structural pattern matching (3.10+)
def handle_command(command: dict):
    match command:
        case {"action": "create", "name": str(name)}:
            create_item(name)
        case {"action": "delete", "id": int(item_id)}:
            delete_item(item_id)
        case {"action": "list", "filter": {"status": status}}:
            list_items(status=status)
        case _:
            raise ValueError(f"Unknown command: {command}")

# Unpacking and starred expressions
first, *middle, last = [1, 2, 3, 4, 5]
# first=1, middle=[2, 3, 4], last=5

# Dictionary merge (3.9+)
defaults = {"timeout": 30, "retries": 3}
overrides = {"timeout": 10, "verbose": True}
config = defaults | overrides  # {"timeout": 10, "retries": 3, "verbose": True}

Key Takeaways

collections: defaultdict eliminates key-existence checks, Counter handles frequency counting, deque gives O(1) operations on both ends.
itertools: Think in terms of lazy pipelines. chain, islice, and groupby replace most manual loop accumulation patterns. Always sort before groupby.
functools: lru_cache is free memoization. partial is cleaner than lambdas for freezing arguments. singledispatch gives you function overloading.
datetime: Always use timezone-aware datetimes. Use zoneinfo (3.9+) instead of pytz. Prefer ISO 8601 format for serialization.
re: Compile patterns you reuse. Use raw strings. Prefer named groups for readability. match anchors at start; search scans the whole string.
logging: One logger per module, lazy formatting, configure only in entry points.
subprocess: Use run() with capture_output=True and text=True. Avoid shell=True.
Testing: pytest over unittest for new projects. Use fixtures for setup/teardown, parametrize for data-driven tests, mock.patch for external dependencies.
pathlib: Prefer Path over os.path for all file operations. The / operator for path joining is cleaner and less error-prone.
Pythonic patterns: enumerate over range(len(...)), zip for parallel iteration, walrus operator for assign-and-test, match/case for complex dispatch.