The recipe book (copy-paste NumPy)

A reference card of the small, sharp building blocks you reach for in every AI project. Each recipe is self-contained, vectorized (no Python loops where NumPy can do it), and shown with exact input and output. The similarity/activation/encoding recipes also live, runnable, in code/recipes.py. Copy, paste, ship.

Convention: X is a feature matrix of shape (n_samples, n_features) — axis 0 = samples, axis 1 = features (Chapter 2).

Similarity & distance

import numpy as np

def cosine_sim(a, b, eps=1e-12):                 # direction only, [-1, 1]
    a, b = np.asarray(a, float), np.asarray(b, float)
    return float(a @ b / (np.linalg.norm(a)*np.linalg.norm(b) + eps))

def euclidean(a, b):                             # straight-line distance, >= 0
    return float(np.sqrt(np.sum((np.asarray(a,float)-np.asarray(b,float))**2)))

def rbf(a, b, gamma=1.0):                        # distance -> similarity in (0,1]
    d2 = np.sum((np.asarray(a,float)-np.asarray(b,float))**2)
    return float(np.exp(-gamma * d2))

print(round(cosine_sim([1,0,1], [1,0,0]), 3),
      round(euclidean([0,0], [3,4]), 3),
      round(rbf([0,0], [1,0], gamma=0.5), 3))

Output:

0.707 5.0 0.607

All-pairs (the matrix versions you actually ship)

def normalize_rows(X, eps=1e-12):                # each row -> unit L2 length
    return X / (np.linalg.norm(X, axis=1, keepdims=True) + eps)

def cosine_matrix(X):                            # (n,n) all-pairs cosine
    Xn = normalize_rows(X); return Xn @ Xn.T

def pairwise_sq_dists(X, Y=None):                # (n,m) squared Euclidean, no loops
    Y = X if Y is None else Y
    xx = np.sum(X**2, 1)[:, None]; yy = np.sum(Y**2, 1)[None, :]
    return np.maximum(xx + yy - 2.0*(X @ Y.T), 0.0)

X = np.array([[1.,0.], [0.9,0.1], [0.,1.]])
print(np.round(cosine_matrix(X), 3))

Output:

[[1.    0.994 0.   ]
 [0.994 1.    0.11 ]
 [0.    0.11  1.   ]]

Ranking: top-k (the retrieval primitive)

def top_k(scores, k):                            # indices of k largest, best-first
    scores = np.asarray(scores); k = min(k, len(scores))
    part = np.argpartition(-scores, k-1)[:k]     # O(n) partial select
    return part[np.argsort(-scores[part])]       # then sort just those k

print(top_k([0.1, 0.9, 0.3, 0.7, 0.2], 3))

Output:

[1 3 2]

Activations

def sigmoid(x):  return 1.0 / (1.0 + np.exp(-np.asarray(x, float)))
def softmax(x):                                  # numerically stable (subtract max!)
    x = np.asarray(x, float); e = np.exp(x - x.max()); return e / e.sum()
def relu(x):     return np.maximum(0.0, np.asarray(x, float))

print("sigmoid:", np.round(sigmoid([-2, 0, 2]), 3))
print("softmax:", np.round(softmax([1, 2, 3]), 3), "sums to", round(softmax([1,2,3]).sum(),3))

Output:

sigmoid: [0.119 0.5   0.881]
softmax: [0.09  0.245 0.665] sums to 1.0

Encodings & scaling

def one_hot(labels, n=None):                     # ints -> one-hot rows
    labels = np.asarray(labels); n = n or labels.max()+1
    out = np.zeros((len(labels), n)); out[np.arange(len(labels)), labels] = 1.0
    return out

def standardize(X, eps=1e-12):                   # per column: mean 0, std 1
    X = np.asarray(X, float); return (X - X.mean(0)) / (X.std(0) + eps)

def min_max(X, eps=1e-12):                        # per column: rescale to [0,1]
    X = np.asarray(X, float); lo, hi = X.min(0), X.max(0); return (X-lo)/(hi-lo+eps)

print(one_hot([0, 2, 1]).tolist())
print(np.round(standardize([[1.,10],[2,20],[3,30]]), 3).tolist())

Output:

[[1.0, 0.0, 0.0], [0.0, 0.0, 1.0], [0.0, 1.0, 0.0]]
[[-1.225, -1.225], [0.0, 0.0], [1.225, 1.225]]

L2-normalize a single vector

v = np.array([3., 4.])
print("unit:", v / np.linalg.norm(v), " norm:", np.linalg.norm(v))

Output:

unit: [0.6 0.8]  norm: 5.0

Train / test split (shuffle, then slice)

def train_test_split(X, y, test=0.25, seed=0):
    rng = np.random.default_rng(seed); idx = rng.permutation(len(X))
    n = int(len(X) * test)
    te, tr = idx[:n], idx[n:]
    return X[tr], X[te], y[tr], y[te]

X = np.arange(8).reshape(8, 1); y = np.arange(8)
Xtr, Xte, ytr, yte = train_test_split(X, y, test=0.25)
print("train y:", ytr, " test y:", yte)

Output:

train y: [3 6 5 0 1 7]  test y: [2 4]

Mini-batch iterator

def batches(X, y, bs, seed=0):
    rng = np.random.default_rng(seed); idx = rng.permutation(len(X))
    for s in range(0, len(X), bs):
        b = idx[s:s+bs]; yield X[b], y[b]

for i, (xb, yb) in enumerate(batches(np.arange(10).reshape(10,1), np.arange(10), 4)):
    print(f"batch {i}: y={yb}")

Output:

batch 0: y=[4 6 2 7]
batch 1: y=[3 5 9 0]
batch 2: y=[8 1]

Classification metrics from scratch

def accuracy(probs, y_true):                     # probs shape (n, classes)
    return float((probs.argmax(1) == y_true).mean())

probs = np.array([[0.7,0.2,0.1], [0.1,0.8,0.1], [0.2,0.2,0.6]])
print("accuracy:", round(accuracy(probs, np.array([0, 1, 0])), 3))

def precision_recall(y_pred, y_true):            # binary 0/1 arrays
    TP = ((y_pred==1)&(y_true==1)).sum(); FP = ((y_pred==1)&(y_true==0)).sum()
    FN = ((y_pred==0)&(y_true==1)).sum()
    return TP/(TP+FP), TP/(TP+FN)

p, r = precision_recall(np.array([1,1,0,0,1]), np.array([1,0,0,1,1]))
print(f"precision={p:.3f} recall={r:.3f}")

Output:

accuracy: 0.667
precision=0.667 recall=0.667

Smoothing a loss curve: exponential moving average

def ema(values, alpha=0.3):                      # EMA, for noisy training curves
    out = [values[0]]
    for v in values[1:]:
        out.append(alpha*v + (1-alpha)*out[-1])
    return np.array(out)

print(np.round(ema(np.array([1.,2,3,10,3,2,1]), alpha=0.3), 3))

Output:

[1.    1.3   1.81  4.267 3.887 3.321 2.625]

The spike at 10 is smoothed to 4.267 instead of dominating — exactly what you want when reading a jittery loss curve.

Numerical gradient check (verify any hand-derived gradient)

def grad_check(f, x, eps=1e-6):                  # central difference
    g = np.zeros_like(x, dtype=float)
    for i in range(x.size):
        d = np.zeros_like(x, dtype=float); d.flat[i] = eps
        g.flat[i] = (f(x+d) - f(x-d)) / (2*eps)
    return g

f = lambda v: v[0]**2 + 3*v[0]*v[1]              # analytic grad: [2x+3y, 3x]
print("numeric:", np.round(grad_check(f, np.array([2., 1.])), 5))

Output:

numeric: [7. 6.]

Two more one-liners worth memorizing

print("clip to [0,1]:", np.clip([-2., 0.5, 3.], 0, 1))       # clamp / saturate
print("running mean :", np.cumsum([2.,4,6,8]) / np.arange(1, 5))

Output:

clip to [0,1]: [0.  0.5 1. ]
running mean : [2. 3. 4. 5.]

The takeaway

These are the verbs of practical AI: normalize, similarity, top-k, softmax, one-hot, standardize, split, batch, score, smooth, grad-check. They show up in every project in this series. Keep this page open in a tab. One chapter left — the words an interviewer will assume you already know. 👉