Python ships with an absurdly complete standard library — “batteries included” is not marketing fluff. This lesson covers the modules and patterns you will reach for daily. If you are coming from a language where you npm install or cargo add for every small task, prepare to be surprised by how much Python gives you out of the box.
collections
The collections module provides specialized container types that solve problems dict and list handle poorly.
defaultdict
from collections import defaultdict
# Group items without checking key existence
logs = [("web", "200 OK"), ("db", "timeout"), ("web", "404"), ("db", "connected")]
by_service = defaultdict(list)
for service, msg in logs:
by_service[service].append(msg)
# {"web": ["200 OK", "404"], "db": ["timeout", "connected"]}
# Nested defaultdict for multi-level grouping
tree = lambda: defaultdict(tree)
config = tree()
config["database"]["primary"]["host"] = "10.0.0.1"
config["database"]["primary"]["port"] = 5432Counter
from collections import Counter
words = "the cat sat on the mat the cat".split()
freq = Counter(words)
freq.most_common(2) # [("the", 3), ("cat", 2)]
# Arithmetic on counters
a = Counter(x=4, y=2)
b = Counter(x=1, y=3)
a - b # Counter({"x": 3}) — drops zero/negative
a & b # Counter({"x": 1, "y": 2}) — min per key
a | b # Counter({"x": 4, "y": 3}) — max per keydeque
from collections import deque
# O(1) append/pop from both ends — unlike list which is O(n) for left ops
buf = deque(maxlen=3)
for i in range(5):
buf.append(i)
print(buf) # deque([2, 3, 4], maxlen=3) — oldest items evicted
# Rotate
d = deque([1, 2, 3, 4, 5])
d.rotate(2) # deque([4, 5, 1, 2, 3]) — positive = rotate right
d.rotate(-1) # deque([5, 1, 2, 3, 4])namedtuple and ChainMap
from collections import namedtuple, ChainMap
# namedtuple — lightweight immutable record (prefer dataclasses for new code)
Point = namedtuple("Point", ["x", "y"])
p = Point(3, 4)
print(p.x, p[0]) # 3, 3 — attribute and index access
# ChainMap — layer multiple dicts, first match wins
defaults = {"theme": "light", "lang": "en", "timeout": 30}
user_prefs = {"theme": "dark"}
request = {"lang": "fr"}
settings = ChainMap(request, user_prefs, defaults)
settings["theme"] # "dark" — user_prefs layer
settings["lang"] # "fr" — request layer
settings["timeout"] # 30 — defaults layeritertools
These are composable building blocks for iteration. They return lazy iterators — zero memory overhead regardless of input size.
from itertools import chain, islice, groupby, combinations, permutations, product, zip_longest, starmap
# chain — flatten multiple iterables into one
all_items = chain([1, 2], [3, 4], [5])
list(all_items) # [1, 2, 3, 4, 5]
# chain.from_iterable — flatten a list of lists
nested = [[1, 2], [3], [4, 5, 6]]
list(chain.from_iterable(nested)) # [1, 2, 3, 4, 5, 6]
# islice — slice an iterator without materializing it
from itertools import count
first_10_evens = islice((x for x in count() if x % 2 == 0), 10)
list(first_10_evens) # [0, 2, 4, 6, 8, 10, 12, 14, 16, 18]
# groupby — group consecutive items (data MUST be sorted by key first)
data = [("A", 1), ("A", 2), ("B", 3), ("B", 4), ("A", 5)]
data.sort(key=lambda x: x[0]) # sort first!
for key, group in groupby(data, key=lambda x: x[0]):
print(key, list(group))
# A [("A", 1), ("A", 2), ("A", 5)]
# B [("B", 3), ("B", 4)]
# combinations and permutations
list(combinations("ABCD", 2)) # [("A","B"), ("A","C"), ("A","D"), ("B","C"), ...]
list(permutations("ABC", 2)) # [("A","B"), ("A","C"), ("B","A"), ("B","C"), ...]
# product — cartesian product (replaces nested loops)
sizes = ["S", "M", "L"]
colors = ["red", "blue"]
list(product(sizes, colors))
# [("S","red"), ("S","blue"), ("M","red"), ("M","blue"), ("L","red"), ("L","blue")]
# zip_longest — zip without truncation
list(zip_longest([1, 2, 3], ["a", "b"], fillvalue="-"))
# [(1, "a"), (2, "b"), (3, "-")]
# starmap — apply function to pre-grouped argument tuples
list(starmap(pow, [(2, 10), (3, 5), (10, 3)])) # [1024, 243, 1000]Gotcha: groupby groups consecutive elements. If your data is not sorted by the grouping key, you will get multiple groups for the same key. Always sort first, or use defaultdict(list) instead.
functools
Higher-order function utilities that modify or compose callable behavior.
from functools import lru_cache, partial, reduce, total_ordering, singledispatch
# lru_cache — memoization with LRU eviction
@lru_cache(maxsize=256)
def fib(n):
if n < 2:
return n
return fib(n - 1) + fib(n - 2)
fib(100) # instant — 7-digit result, computed in microseconds
fib.cache_info() # CacheInfo(hits=98, misses=101, maxsize=256, currsize=101)
fib.cache_clear() # reset cache
# partial — freeze some arguments
import json
pretty_json = partial(json.dumps, indent=2, sort_keys=True, ensure_ascii=False)
pretty_json({"name": "Gyan", "items": [1, 2, 3]})
# reduce — fold a sequence into a single value
from operator import mul
factorial_10 = reduce(mul, range(1, 11)) # 3628800
# total_ordering — define __eq__ and one comparison, get the rest free
@total_ordering
class Version:
def __init__(self, major, minor):
self.major, self.minor = major, minor
def __eq__(self, other):
return (self.major, self.minor) == (other.major, other.minor)
def __lt__(self, other):
return (self.major, self.minor) < (other.major, other.minor)
Version(1, 2) <= Version(1, 3) # True — __le__ auto-generated
# singledispatch — function overloading by first argument type
@singledispatch
def serialize(obj):
raise TypeError(f"Cannot serialize {type(obj)}")
@serialize.register(str)
def _(obj):
return f'"{obj}"'
@serialize.register(int)
@serialize.register(float)
def _(obj):
return str(obj)
serialize("hello") # '"hello"'
serialize(42) # "42"Tip: In Python 3.9+, use @cache instead of @lru_cache(maxsize=None) for unbounded memoization.
datetime
Date/time handling is notoriously tricky. Python’s datetime module plus zoneinfo (3.9+) covers most needs.
from datetime import datetime, date, time, timedelta, timezone
from zoneinfo import ZoneInfo
# Current time — always prefer timezone-aware
now_utc = datetime.now(timezone.utc)
now_tokyo = datetime.now(ZoneInfo("Asia/Tokyo"))
# Parsing and formatting
ts = datetime.strptime("2026-03-28T14:30:00", "%Y-%m-%dT%H:%M:%S")
ts.strftime("%B %d, %Y at %I:%M %p") # "March 28, 2026 at 02:30 PM"
# ISO 8601 (preferred for APIs)
dt = datetime.fromisoformat("2026-03-28T14:30:00+05:30")
dt.isoformat() # "2026-03-28T14:30:00+05:30"
# Arithmetic with timedelta
tomorrow = date.today() + timedelta(days=1)
two_hours_later = now_utc + timedelta(hours=2)
delta = datetime(2026, 12, 31) - datetime(2026, 1, 1)
delta.days # 364
# Convert between timezones
utc_time = datetime(2026, 3, 28, 12, 0, tzinfo=timezone.utc)
eastern = utc_time.astimezone(ZoneInfo("America/New_York"))Gotcha: Never use datetime.now() without a timezone argument. Naive datetimes (no tz info) are a constant source of bugs. Always work in UTC internally and convert to local time only for display.
Regular Expressions
The re module gives you Perl-style regex. Compile patterns you reuse for performance.
import re
# compile for reuse — avoids recompilation overhead
email_re = re.compile(r"[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+")
# match vs search — match anchors at start, search finds anywhere
re.match(r"\d+", "abc123") # None — no digits at start
re.search(r"\d+", "abc123") # <Match "123">
# findall — all non-overlapping matches
text = "Call 555-1234 or 555-5678 for info"
re.findall(r"\d{3}-\d{4}", text) # ["555-1234", "555-5678"]
# Groups — capture parts of the match
m = re.search(r"(\d{4})-(\d{2})-(\d{2})", "Date: 2026-03-28")
m.group(0) # "2026-03-28" — full match
m.group(1) # "2026"
m.groups() # ("2026", "03", "28")
# Named groups
pattern = re.compile(r"(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})")
m = pattern.search("Date: 2026-03-28")
m.group("year") # "2026"
m.groupdict() # {"year": "2026", "month": "03", "day": "28"}
# sub — find and replace
cleaned = re.sub(r"\s+", " ", "too many spaces") # "too many spaces"
# Split on pattern
re.split(r"[,;\s]+", "one, two; three four") # ["one", "two", "three", "four"]Tip: Use raw strings (r"...") for all regex patterns to avoid backslash escaping confusion. For anything more complex than simple patterns, consider the regex third-party library which supports Unicode categories and atomic groups.
logging
print() debugging does not scale. The logging module is built in and production-ready.
import logging
# Basic setup — usually in your entry point / main
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)-8s %(name)s — %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
# Per-module logger — always use __name__
logger = logging.getLogger(__name__)
logger.debug("Verbose detail, hidden at INFO level")
logger.info("Server started on port %d", 8080) # lazy formatting
logger.warning("Disk usage at %d%%", 92)
logger.error("Connection failed: %s", "timeout")
logger.exception("Unhandled error") # includes traceback automatically
# File handler with rotation
from logging.handlers import RotatingFileHandler
handler = RotatingFileHandler("app.log", maxBytes=5_000_000, backupCount=3)
handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(message)s"))
logger.addHandler(handler)Best practices:
- Use
logger.info("msg %s", val)notlogger.info(f"msg {val}")— lazy formatting avoids string construction when the level is filtered out. - One logger per module via
logging.getLogger(__name__). - Never call
logging.basicConfig()in library code — only in application entry points.
subprocess
Run external commands from Python. The run() function (Python 3.5+) is the only API you need for most cases.
import subprocess
# Simple command
result = subprocess.run(["ls", "-la", "/tmp"], capture_output=True, text=True)
print(result.stdout)
print(result.returncode) # 0 on success
# Check for errors — raises CalledProcessError on non-zero exit
subprocess.run(["git", "status"], check=True, capture_output=True, text=True)
# Pass input via stdin
result = subprocess.run(
["grep", "error"],
input="line1\nerror found\nline3\n",
capture_output=True, text=True,
)
print(result.stdout) # "error found\n"
# Pipe between commands (prefer this over shell=True)
ps = subprocess.run(["ps", "aux"], capture_output=True, text=True)
grep = subprocess.run(
["grep", "python"],
input=ps.stdout,
capture_output=True, text=True,
)
# Timeout
try:
subprocess.run(["sleep", "30"], timeout=5)
except subprocess.TimeoutExpired:
print("Command timed out")Gotcha: Avoid shell=True unless you genuinely need shell features (globbing, pipes). It introduces shell injection vulnerabilities and platform-dependent behavior.
Testing
Python has unittest built in, but pytest is the de facto standard. Both work together — pytest can run unittest-style tests.
unittest basics
import unittest
class TestStringMethods(unittest.TestCase):
def setUp(self):
self.data = ["alice", "bob", "charlie"]
def test_upper(self):
self.assertEqual("foo".upper(), "FOO")
def test_contains(self):
self.assertIn("alice", self.data)
def test_raises(self):
with self.assertRaises(ValueError):
int("not_a_number")pytest — less boilerplate, more power
# test_users.py — just functions, no classes needed
import pytest
def test_addition():
assert 1 + 1 == 2
def test_exception():
with pytest.raises(ZeroDivisionError):
1 / 0
# Fixtures — dependency injection for tests
@pytest.fixture
def db_connection():
conn = create_connection("test.db")
yield conn # test runs here
conn.close() # teardown after test
def test_query(db_connection):
result = db_connection.execute("SELECT 1")
assert result.fetchone()[0] == 1
# Parametrize — run same test with different inputs
@pytest.mark.parametrize("input,expected", [
("hello", 5),
("", 0),
("world!", 6),
])
def test_string_length(input, expected):
assert len(input) == expectedMocking
from unittest.mock import patch, MagicMock
# Patch an external dependency
@patch("myapp.services.requests.get")
def test_fetch_user(mock_get):
mock_get.return_value = MagicMock(
status_code=200,
json=lambda: {"id": 1, "name": "Alice"},
)
user = fetch_user(1)
assert user["name"] == "Alice"
mock_get.assert_called_once_with("https://api.example.com/users/1")
# Context manager style
def test_with_mock():
with patch("builtins.open", MagicMock()) as mock_file:
mock_file.return_value.__enter__.return_value.read.return_value = "data"
# ... test code that reads a fileOther Essentials
json and csv
import json, csv
# JSON — serialize/deserialize
data = {"name": "Gyan", "scores": [95, 87, 92]}
json_str = json.dumps(data, indent=2)
parsed = json.loads(json_str)
# Read/write JSON files
with open("data.json", "w") as f:
json.dump(data, f)
with open("data.json") as f:
loaded = json.load(f)
# CSV — DictReader is the way to go
with open("report.csv") as f:
for row in csv.DictReader(f):
print(row["name"], row["score"]) # dict per row, headers as keys
# Write CSV
with open("output.csv", "w", newline="") as f:
writer = csv.DictWriter(f, fieldnames=["name", "score"])
writer.writeheader()
writer.writerow({"name": "Alice", "score": 95})os, sys, and pathlib
import os, sys
from pathlib import Path
# pathlib — modern, object-oriented path handling (prefer over os.path)
p = Path("/var/log/app")
config = Path.home() / ".config" / "myapp" / "settings.json"
config.parent.mkdir(parents=True, exist_ok=True) # create dirs recursively
# Read/write
config.write_text(json.dumps({"debug": True}))
data = json.loads(config.read_text())
# Globbing
for py_file in Path("src").rglob("*.py"):
print(py_file)
# os/sys essentials
os.getenv("DATABASE_URL", "sqlite:///default.db") # env vars with fallback
sys.argv # command-line arguments
sys.exit(1) # exit with error codetyping patterns
from typing import Optional, Union, TypeAlias, Literal
from collections.abc import Callable, Iterator
# Type aliases (3.10+ syntax)
JSON: TypeAlias = dict[str, "JSON"] | list["JSON"] | str | int | float | bool | None
UserId: TypeAlias = int
# Callable types
Handler: TypeAlias = Callable[[str, int], bool]
# Literal for constrained values
def set_log_level(level: Literal["DEBUG", "INFO", "WARNING", "ERROR"]) -> None: ...
# Optional = Union[X, None]
def find_user(user_id: int) -> Optional[dict]:
...
# Generic functions (3.12+ syntax)
def first[T](items: list[T]) -> T | None:
return items[0] if items else NonePythonic Patterns
These built-in patterns separate idiomatic Python from “Java written in Python.”
# enumerate — never use range(len(...))
names = ["Alice", "Bob", "Charlie"]
for i, name in enumerate(names, start=1):
print(f"{i}. {name}")
# zip — iterate multiple sequences in parallel
keys = ["name", "age", "role"]
values = ["Alice", 30, "admin"]
user = dict(zip(keys, values)) # {"name": "Alice", "age": 30, "role": "admin"}
# any / all — short-circuit boolean checks on iterables
nums = [2, 4, 6, 8, 10]
all(x % 2 == 0 for x in nums) # True — all even
any(x > 100 for x in nums) # False — none over 100
# Walrus operator := (3.8+) — assign inside expressions
import re
if m := re.search(r"error (\d+)", "error 404 occurred"):
print(f"Error code: {m.group(1)}")
# Useful in while loops
while chunk := f.read(8192):
process(chunk)
# match/case — structural pattern matching (3.10+)
def handle_command(command: dict):
match command:
case {"action": "create", "name": str(name)}:
create_item(name)
case {"action": "delete", "id": int(item_id)}:
delete_item(item_id)
case {"action": "list", "filter": {"status": status}}:
list_items(status=status)
case _:
raise ValueError(f"Unknown command: {command}")
# Unpacking and starred expressions
first, *middle, last = [1, 2, 3, 4, 5]
# first=1, middle=[2, 3, 4], last=5
# Dictionary merge (3.9+)
defaults = {"timeout": 30, "retries": 3}
overrides = {"timeout": 10, "verbose": True}
config = defaults | overrides # {"timeout": 10, "retries": 3, "verbose": True}Key Takeaways
- collections:
defaultdicteliminates key-existence checks,Counterhandles frequency counting,dequegives O(1) operations on both ends. - itertools: Think in terms of lazy pipelines.
chain,islice, andgroupbyreplace most manual loop accumulation patterns. Always sort beforegroupby. - functools:
lru_cacheis free memoization.partialis cleaner than lambdas for freezing arguments.singledispatchgives you function overloading. - datetime: Always use timezone-aware datetimes. Use
zoneinfo(3.9+) instead ofpytz. Prefer ISO 8601 format for serialization. - re: Compile patterns you reuse. Use raw strings. Prefer named groups for readability.
matchanchors at start;searchscans the whole string. - logging: One logger per module, lazy formatting, configure only in entry points.
- subprocess: Use
run()withcapture_output=Trueandtext=True. Avoidshell=True. - Testing: pytest over unittest for new projects. Use fixtures for setup/teardown,
parametrizefor data-driven tests,mock.patchfor external dependencies. - pathlib: Prefer
Pathoveros.pathfor all file operations. The/operator for path joining is cleaner and less error-prone. - Pythonic patterns:
enumerateoverrange(len(...)),zipfor parallel iteration, walrus operator for assign-and-test,match/casefor complex dispatch.
