The recipe book (copy-paste NumPy)
A reference card of the small, sharp building blocks you reach for in every AI
project. Each recipe is self-contained, vectorized (no Python loops where NumPy can
do it), and shown with exact input and output. The similarity/activation/encoding
recipes also live, runnable, in code/recipes.py. Copy, paste,
ship.
Convention:
Xis a feature matrix of shape(n_samples, n_features)— axis 0 = samples, axis 1 = features (Chapter 2).
Similarity & distance
import numpy as np
def cosine_sim(a, b, eps=1e-12): # direction only, [-1, 1]
a, b = np.asarray(a, float), np.asarray(b, float)
return float(a @ b / (np.linalg.norm(a)*np.linalg.norm(b) + eps))
def euclidean(a, b): # straight-line distance, >= 0
return float(np.sqrt(np.sum((np.asarray(a,float)-np.asarray(b,float))**2)))
def rbf(a, b, gamma=1.0): # distance -> similarity in (0,1]
d2 = np.sum((np.asarray(a,float)-np.asarray(b,float))**2)
return float(np.exp(-gamma * d2))
print(round(cosine_sim([1,0,1], [1,0,0]), 3),
round(euclidean([0,0], [3,4]), 3),
round(rbf([0,0], [1,0], gamma=0.5), 3))
Output:
0.707 5.0 0.607
All-pairs (the matrix versions you actually ship)
def normalize_rows(X, eps=1e-12): # each row -> unit L2 length
return X / (np.linalg.norm(X, axis=1, keepdims=True) + eps)
def cosine_matrix(X): # (n,n) all-pairs cosine
Xn = normalize_rows(X); return Xn @ Xn.T
def pairwise_sq_dists(X, Y=None): # (n,m) squared Euclidean, no loops
Y = X if Y is None else Y
xx = np.sum(X**2, 1)[:, None]; yy = np.sum(Y**2, 1)[None, :]
return np.maximum(xx + yy - 2.0*(X @ Y.T), 0.0)
X = np.array([[1.,0.], [0.9,0.1], [0.,1.]])
print(np.round(cosine_matrix(X), 3))
Output:
[[1. 0.994 0. ]
[0.994 1. 0.11 ]
[0. 0.11 1. ]]
Ranking: top-k (the retrieval primitive)
def top_k(scores, k): # indices of k largest, best-first
scores = np.asarray(scores); k = min(k, len(scores))
part = np.argpartition(-scores, k-1)[:k] # O(n) partial select
return part[np.argsort(-scores[part])] # then sort just those k
print(top_k([0.1, 0.9, 0.3, 0.7, 0.2], 3))
Output:
[1 3 2]
Activations
def sigmoid(x): return 1.0 / (1.0 + np.exp(-np.asarray(x, float)))
def softmax(x): # numerically stable (subtract max!)
x = np.asarray(x, float); e = np.exp(x - x.max()); return e / e.sum()
def relu(x): return np.maximum(0.0, np.asarray(x, float))
print("sigmoid:", np.round(sigmoid([-2, 0, 2]), 3))
print("softmax:", np.round(softmax([1, 2, 3]), 3), "sums to", round(softmax([1,2,3]).sum(),3))
Output:
sigmoid: [0.119 0.5 0.881]
softmax: [0.09 0.245 0.665] sums to 1.0
Encodings & scaling
def one_hot(labels, n=None): # ints -> one-hot rows
labels = np.asarray(labels); n = n or labels.max()+1
out = np.zeros((len(labels), n)); out[np.arange(len(labels)), labels] = 1.0
return out
def standardize(X, eps=1e-12): # per column: mean 0, std 1
X = np.asarray(X, float); return (X - X.mean(0)) / (X.std(0) + eps)
def min_max(X, eps=1e-12): # per column: rescale to [0,1]
X = np.asarray(X, float); lo, hi = X.min(0), X.max(0); return (X-lo)/(hi-lo+eps)
print(one_hot([0, 2, 1]).tolist())
print(np.round(standardize([[1.,10],[2,20],[3,30]]), 3).tolist())
Output:
[[1.0, 0.0, 0.0], [0.0, 0.0, 1.0], [0.0, 1.0, 0.0]]
[[-1.225, -1.225], [0.0, 0.0], [1.225, 1.225]]
L2-normalize a single vector
v = np.array([3., 4.])
print("unit:", v / np.linalg.norm(v), " norm:", np.linalg.norm(v))
Output:
unit: [0.6 0.8] norm: 5.0
Train / test split (shuffle, then slice)
def train_test_split(X, y, test=0.25, seed=0):
rng = np.random.default_rng(seed); idx = rng.permutation(len(X))
n = int(len(X) * test)
te, tr = idx[:n], idx[n:]
return X[tr], X[te], y[tr], y[te]
X = np.arange(8).reshape(8, 1); y = np.arange(8)
Xtr, Xte, ytr, yte = train_test_split(X, y, test=0.25)
print("train y:", ytr, " test y:", yte)
Output:
train y: [3 6 5 0 1 7] test y: [2 4]
Mini-batch iterator
def batches(X, y, bs, seed=0):
rng = np.random.default_rng(seed); idx = rng.permutation(len(X))
for s in range(0, len(X), bs):
b = idx[s:s+bs]; yield X[b], y[b]
for i, (xb, yb) in enumerate(batches(np.arange(10).reshape(10,1), np.arange(10), 4)):
print(f"batch {i}: y={yb}")
Output:
batch 0: y=[4 6 2 7]
batch 1: y=[3 5 9 0]
batch 2: y=[8 1]
Classification metrics from scratch
def accuracy(probs, y_true): # probs shape (n, classes)
return float((probs.argmax(1) == y_true).mean())
probs = np.array([[0.7,0.2,0.1], [0.1,0.8,0.1], [0.2,0.2,0.6]])
print("accuracy:", round(accuracy(probs, np.array([0, 1, 0])), 3))
def precision_recall(y_pred, y_true): # binary 0/1 arrays
TP = ((y_pred==1)&(y_true==1)).sum(); FP = ((y_pred==1)&(y_true==0)).sum()
FN = ((y_pred==0)&(y_true==1)).sum()
return TP/(TP+FP), TP/(TP+FN)
p, r = precision_recall(np.array([1,1,0,0,1]), np.array([1,0,0,1,1]))
print(f"precision={p:.3f} recall={r:.3f}")
Output:
accuracy: 0.667
precision=0.667 recall=0.667
Smoothing a loss curve: exponential moving average
def ema(values, alpha=0.3): # EMA, for noisy training curves
out = [values[0]]
for v in values[1:]:
out.append(alpha*v + (1-alpha)*out[-1])
return np.array(out)
print(np.round(ema(np.array([1.,2,3,10,3,2,1]), alpha=0.3), 3))
Output:
[1. 1.3 1.81 4.267 3.887 3.321 2.625]
The spike at 10 is smoothed to 4.267 instead of dominating — exactly what you
want when reading a jittery loss curve.
Numerical gradient check (verify any hand-derived gradient)
def grad_check(f, x, eps=1e-6): # central difference
g = np.zeros_like(x, dtype=float)
for i in range(x.size):
d = np.zeros_like(x, dtype=float); d.flat[i] = eps
g.flat[i] = (f(x+d) - f(x-d)) / (2*eps)
return g
f = lambda v: v[0]**2 + 3*v[0]*v[1] # analytic grad: [2x+3y, 3x]
print("numeric:", np.round(grad_check(f, np.array([2., 1.])), 5))
Output:
numeric: [7. 6.]
Two more one-liners worth memorizing
print("clip to [0,1]:", np.clip([-2., 0.5, 3.], 0, 1)) # clamp / saturate
print("running mean :", np.cumsum([2.,4,6,8]) / np.arange(1, 5))
Output:
clip to [0,1]: [0. 0.5 1. ]
running mean : [2. 3. 4. 5.]
The takeaway
These are the verbs of practical AI: normalize, similarity, top-k, softmax, one-hot, standardize, split, batch, score, smooth, grad-check. They show up in every project in this series. Keep this page open in a tab. One chapter left — the words an interviewer will assume you already know. 👉