The full implementation
Here is the consolidated module — synthetic data, metrics, and every recommender
(Random, Popularity, Trending, ContentBased, ItemItemCF, ImplicitALS, BPR) — in
one file, code/recsys.py, reproduced in full.
"""
Recommendation algorithms, from scratch in NumPy.
Implements, from simplest to most powerful:
metrics: recall@k, precision@k, ndcg@k, map@k
Random / Popularity / Trending (time-decayed popularity) -- baselines
ContentBased (decayed user profile + cosine kNN) -- the "article" method
ItemItemCF (neighborhood collaborative filtering)
ImplicitALS (matrix factorization for implicit feedback)
BPR (Bayesian Personalized Ranking — pairwise learning-to-rank)
Plus make_synthetic() to generate an evaluable interaction dataset, and
train_test_split_last() for leave-last-out evaluation.
Only NumPy is used.
"""
from __future__ import annotations
import numpy as np
# ===========================================================================
# Data: a synthetic but realistic interaction log
# ===========================================================================
def make_synthetic(n_users=300, n_items=150, n_genres=8, n_inter=18000,
pop_weight=0.8, taste_pow=2.5, seed=0):
"""
Returns:
interactions : list of (user, item, time) tuples, time increasing
item_feats : (n_items, n_genres) content vectors (the "embeddings")
n_users, n_items
Each item belongs to a genre; each user has a peaky genre taste; item
popularity is skewed. An interaction's probability mixes the user's taste with
global popularity (pop_weight controls the blend), so popularity is a real
baseline and personalized methods can still win by matching taste.
"""
rng = np.random.default_rng(seed)
item_genre = rng.integers(0, n_genres, size=n_items)
item_feats = np.full((n_items, n_genres), 0.05)
item_feats[np.arange(n_items), item_genre] = 1.0 # mostly one genre
item_feats += rng.random((n_items, n_genres)) * 0.05
pop = rng.power(0.5, size=n_items) # skewed popularity
pop = pop / pop.sum()
taste = rng.random((n_users, n_genres)) ** taste_pow # peaky tastes
taste /= taste.sum(1, keepdims=True)
interactions = []
for t in range(n_inter):
u = int(rng.integers(n_users))
score = taste[u, item_genre] * (pop_weight * pop + (1 - pop_weight) / n_items)
score /= score.sum()
i = int(rng.choice(n_items, p=score))
interactions.append((u, i, t))
return interactions, item_feats, n_users, n_items
def train_test_split_last(interactions, n_users):
"""Leave-last-out: each user's chronologically last interaction -> test."""
by_user = {}
for u, i, t in interactions:
by_user.setdefault(u, []).append((t, i))
train, test = [], {}
for u, lst in by_user.items():
lst.sort()
if len(lst) >= 2:
*rest, last = lst
test[u] = last[1]
for t, i in rest:
train.append((u, i, t))
else:
for t, i in lst:
train.append((u, i, t))
return train, test
# ===========================================================================
# Metrics
# ===========================================================================
def recall_at_k(recs, truth, k):
"""Fraction of users whose held-out item appears in their top-k (hit rate)."""
hits = sum(1 for u, item in truth.items() if item in recs.get(u, [])[:k])
return hits / len(truth)
def precision_at_k(recs, truth, k):
"""With one held-out item per user, this is recall/k — reported for completeness."""
return recall_at_k(recs, truth, k) / k
def ndcg_at_k(recs, truth, k):
"""Normalized Discounted Cumulative Gain: rewards ranking the hit higher."""
total = 0.0
for u, item in truth.items():
lst = recs.get(u, [])[:k]
if item in lst:
rank = lst.index(item) # 0-based
total += 1.0 / np.log2(rank + 2) # ideal DCG is 1 here
return total / len(truth)
def map_at_k(recs, truth, k):
"""Mean Average Precision (single relevant item => 1/rank)."""
total = 0.0
for u, item in truth.items():
lst = recs.get(u, [])[:k]
if item in lst:
total += 1.0 / (lst.index(item) + 1)
return total / len(truth)
# ===========================================================================
# Helpers
# ===========================================================================
def build_matrix(train, n_users, n_items):
"""Binary user-item interaction matrix R."""
R = np.zeros((n_users, n_items))
for u, i, t in train:
R[u, i] = 1.0
return R
def _topk_excluding(scores, seen, k):
"""Top-k item ids by score, skipping items the user already has."""
order = np.argsort(-scores)
out = []
for i in order:
if i not in seen:
out.append(int(i))
if len(out) == k:
break
return out
def user_seen(train, n_users):
seen = {u: set() for u in range(n_users)}
for u, i, t in train:
seen[u].add(i)
return seen
# ===========================================================================
# Baselines
# ===========================================================================
class Random:
"""Recommend random unseen items — the sanity-check floor."""
def __init__(self, seed=0):
self.seed = seed
def fit(self, train, n_users, n_items):
self.n_items = n_items
self.rng = np.random.default_rng(self.seed)
self.seen = user_seen(train, n_users)
return self
def recommend(self, u, k=10):
return _topk_excluding(self.rng.random(self.n_items), self.seen.get(u, set()), k)
class Popularity:
"""Recommend the globally most-interacted items to everyone."""
def fit(self, train, n_users, n_items):
self.counts = np.zeros(n_items)
for u, i, t in train:
self.counts[i] += 1
self.seen = user_seen(train, n_users)
return self
def recommend(self, u, k=10):
return _topk_excluding(self.counts, self.seen.get(u, set()), k)
class Trending:
"""Time-decayed popularity: recent interactions count more (a 'trending' list)."""
def __init__(self, half_life=2000.0):
self.half_life = half_life
def fit(self, train, n_users, n_items):
lam = np.log(2) / self.half_life
now = max((t for u, i, t in train), default=0)
self.scores = np.zeros(n_items)
for u, i, t in train:
self.scores[i] += np.exp(-lam * (now - t))
self.seen = user_seen(train, n_users)
return self
def recommend(self, u, k=10):
return _topk_excluding(self.scores, self.seen.get(u, set()), k)
# ===========================================================================
# Content-based: decayed user profile + cosine kNN (the "article" method)
# ===========================================================================
class ContentBased:
"""
Build each user's profile as a TIME-DECAYED average of the content vectors of
items they interacted with, then score all items by cosine similarity to it.
This is the 'average article embedding -> kNN over the catalog' approach.
"""
def __init__(self, half_life=2000.0):
self.half_life = half_life
def fit(self, train, item_feats, n_users, n_items):
self.item_feats = item_feats
norm = np.linalg.norm(item_feats, axis=1, keepdims=True)
self.feat_unit = item_feats / np.maximum(norm, 1e-9) # for cosine
self.hist = {u: [] for u in range(n_users)}
for u, i, t in train:
self.hist[u].append((t, i))
self.now = max((t for u, i, t in train), default=0)
self.seen = user_seen(train, n_users)
return self
def profile(self, u):
h = self.hist.get(u, [])
if not h:
return None
lam = np.log(2) / self.half_life
items = [i for t, i in h]
w = np.array([np.exp(-lam * (self.now - t)) for t, i in h])
return (w[:, None] * self.item_feats[items]).sum(0) / w.sum()
def recommend(self, u, k=10):
p = self.profile(u)
if p is None:
return []
pu = p / max(np.linalg.norm(p), 1e-9)
scores = self.feat_unit @ pu # cosine similarity to every item
return _topk_excluding(scores, self.seen.get(u, set()), k)
# ===========================================================================
# Neighborhood collaborative filtering: item-item
# ===========================================================================
class ItemItemCF:
"""
'Users who interacted with X also interacted with Y.' Similarity between items
is the cosine of their interaction columns; a user's score for an item is the
summed similarity to items they already have.
"""
def fit(self, train, n_users, n_items):
R = build_matrix(train, n_users, n_items)
self.R = R
norm = np.linalg.norm(R, axis=0, keepdims=True)
Rn = R / np.maximum(norm, 1e-9)
self.S = Rn.T @ Rn # item-item cosine similarity
np.fill_diagonal(self.S, 0.0)
self.seen = user_seen(train, n_users)
return self
def recommend(self, u, k=10):
scores = self.R[u] @ self.S # spread from items the user has
return _topk_excluding(scores, self.seen.get(u, set()), k)
# ===========================================================================
# Matrix factorization for implicit feedback (Hu, Koren, Volinsky 2008)
# ===========================================================================
class ImplicitALS:
"""
Factor R ~ X Y^T with confidence-weighted Alternating Least Squares.
Confidence c_ui = 1 + alpha * R_ui; preference p_ui = 1 if interacted.
"""
def __init__(self, factors=32, reg=0.1, alpha=40.0, iters=15, seed=0):
self.f, self.reg, self.alpha, self.iters, self.seed = factors, reg, alpha, iters, seed
def fit(self, train, n_users, n_items):
R = build_matrix(train, n_users, n_items)
rng = np.random.default_rng(self.seed)
X = rng.normal(scale=0.01, size=(n_users, self.f))
Y = rng.normal(scale=0.01, size=(n_items, self.f))
C = 1.0 + self.alpha * R # confidence
P = (R > 0).astype(float) # preference
I = self.reg * np.eye(self.f)
for _ in range(self.iters):
YtY = Y.T @ Y
for u in range(n_users):
cu = C[u]
Yw = Y * (cu - 1.0)[:, None]
A = YtY + Y.T @ Yw + I
b = (Y * (cu * P[u])[:, None]).sum(0)
X[u] = np.linalg.solve(A, b)
XtX = X.T @ X
for i in range(n_items):
ci = C[:, i]
Xw = X * (ci - 1.0)[:, None]
A = XtX + X.T @ Xw + I
b = (X * (ci * P[:, i])[:, None]).sum(0)
Y[i] = np.linalg.solve(A, b)
self.X, self.Y = X, Y
self.seen = user_seen(train, n_users)
return self
def recommend(self, u, k=10):
scores = self.Y @ self.X[u]
return _topk_excluding(scores, self.seen.get(u, set()), k)
# ===========================================================================
# Bayesian Personalized Ranking (pairwise learning-to-rank)
# ===========================================================================
class BPR:
"""
Optimize the RANKING directly: for each (user, positive item), sample a
negative item and push the positive's score above the negative's.
"""
def __init__(self, factors=32, lr=0.05, reg=0.01, epochs=30, seed=0):
self.f, self.lr, self.reg, self.epochs, self.seed = factors, lr, reg, epochs, seed
def fit(self, train, n_users, n_items):
rng = np.random.default_rng(self.seed)
X = rng.normal(scale=0.1, size=(n_users, self.f))
Y = rng.normal(scale=0.1, size=(n_items, self.f))
ui = {u: set() for u in range(n_users)}
for u, i, t in train:
ui[u].add(i)
pos = [(u, i) for u in ui for i in ui[u]]
for _ in range(self.epochs):
rng.shuffle(pos)
for u, i in pos:
j = int(rng.integers(n_items))
while j in ui[u]:
j = int(rng.integers(n_items))
diff = X[u] @ (Y[i] - Y[j])
sig = 1.0 / (1.0 + np.exp(diff)) # gradient weight
xu = X[u].copy()
X[u] += self.lr * (sig * (Y[i] - Y[j]) - self.reg * X[u])
Y[i] += self.lr * (sig * xu - self.reg * Y[i])
Y[j] += self.lr * (-sig * xu - self.reg * Y[j])
self.X, self.Y = X, Y
self.seen = user_seen(train, n_users)
return self
def recommend(self, u, k=10):
scores = self.Y @ self.X[u]
return _topk_excluding(scores, self.seen.get(u, set()), k)
__all__ = [
"make_synthetic", "train_test_split_last",
"recall_at_k", "precision_at_k", "ndcg_at_k", "map_at_k",
"build_matrix", "user_seen",
"Random", "Popularity", "Trending", "ContentBased", "ItemItemCF",
"ImplicitALS", "BPR",
]
The
{{#include}}directive splices in the realcode/recsys.pyat build time, so the documentation can never drift from the actual implementation.
API at a glance
Every model follows the same tiny interface — .fit(...) then .recommend(u, k) —
so they're interchangeable in the leaderboard:
| Call | Purpose |
|---|---|
make_synthetic(...) | generate an evaluable interaction log + item features |
train_test_split_last(...) | leave-last-out split (test = each user's last item) |
recall_at_k / ndcg_at_k / map_at_k | offline metrics |
Popularity().fit(train, nu, ni) | popularity baseline |
Trending(half_life=…).fit(…) | time-decayed popularity |
ContentBased(half_life=…).fit(train, feats, nu, ni) | embeddings + decayed profile + kNN |
ItemItemCF().fit(…) | neighborhood collaborative filtering |
ImplicitALS(factors=…).fit(…) | matrix factorization (implicit ALS) |
BPR(factors=…).fit(…) | pairwise learning-to-rank |
Typical usage
from recsys import make_synthetic, train_test_split_last, ImplicitALS, recall_at_k
inter, feats, nu, ni = make_synthetic(seed=0)
train, test = train_test_split_last(inter, nu)
model = ImplicitALS(factors=32, iters=15).fit(train, nu, ni)
recs = {u: model.recommend(u, k=10) for u in test}
print("recall@10:", recall_at_k(recs, test, 10))
What this is and isn't
These implementations are written for clarity — they show exactly how each algorithm works and produce correct, measurable rankings on small data. Production systems differ in scale, not in concept:
- Sparse math (we used dense matrices; real systems use sparse formats and only store interactions).
- Optimized libraries —
implicit(ALS/BPR in Cython),LightFM(hybrid WARP/BPR), TensorFlow/PyTorch for two-tower models, gradient-boosted trees for ranking. - ANN serving —
.recommendhere scans all items; at scale that dot-product scan becomes an HNSW/IVF-PQ query. - Feature stores, batch/stream pipelines, A/B testing around the model (serving).
Same algorithms; the rest is engineering. Let's measure them head-to-head. 👉