The full implementation

Here is the consolidated module — synthetic data, metrics, and every recommender (Random, Popularity, Trending, ContentBased, ItemItemCF, ImplicitALS, BPR) — in one file, code/recsys.py, reproduced in full.

"""
Recommendation algorithms, from scratch in NumPy.

Implements, from simplest to most powerful:
    metrics: recall@k, precision@k, ndcg@k, map@k
    Random / Popularity / Trending (time-decayed popularity)   -- baselines
    ContentBased (decayed user profile + cosine kNN)           -- the "article" method
    ItemItemCF (neighborhood collaborative filtering)
    ImplicitALS (matrix factorization for implicit feedback)
    BPR (Bayesian Personalized Ranking — pairwise learning-to-rank)

Plus make_synthetic() to generate an evaluable interaction dataset, and
train_test_split_last() for leave-last-out evaluation.

Only NumPy is used.
"""

from __future__ import annotations

import numpy as np


# ===========================================================================
# Data: a synthetic but realistic interaction log
# ===========================================================================
def make_synthetic(n_users=300, n_items=150, n_genres=8, n_inter=18000,
                   pop_weight=0.8, taste_pow=2.5, seed=0):
    """
    Returns:
        interactions : list of (user, item, time) tuples, time increasing
        item_feats   : (n_items, n_genres) content vectors (the "embeddings")
        n_users, n_items
    Each item belongs to a genre; each user has a peaky genre taste; item
    popularity is skewed. An interaction's probability mixes the user's taste with
    global popularity (pop_weight controls the blend), so popularity is a real
    baseline and personalized methods can still win by matching taste.
    """
    rng = np.random.default_rng(seed)
    item_genre = rng.integers(0, n_genres, size=n_items)
    item_feats = np.full((n_items, n_genres), 0.05)
    item_feats[np.arange(n_items), item_genre] = 1.0          # mostly one genre
    item_feats += rng.random((n_items, n_genres)) * 0.05

    pop = rng.power(0.5, size=n_items)                        # skewed popularity
    pop = pop / pop.sum()

    taste = rng.random((n_users, n_genres)) ** taste_pow      # peaky tastes
    taste /= taste.sum(1, keepdims=True)

    interactions = []
    for t in range(n_inter):
        u = int(rng.integers(n_users))
        score = taste[u, item_genre] * (pop_weight * pop + (1 - pop_weight) / n_items)
        score /= score.sum()
        i = int(rng.choice(n_items, p=score))
        interactions.append((u, i, t))
    return interactions, item_feats, n_users, n_items


def train_test_split_last(interactions, n_users):
    """Leave-last-out: each user's chronologically last interaction -> test."""
    by_user = {}
    for u, i, t in interactions:
        by_user.setdefault(u, []).append((t, i))
    train, test = [], {}
    for u, lst in by_user.items():
        lst.sort()
        if len(lst) >= 2:
            *rest, last = lst
            test[u] = last[1]
            for t, i in rest:
                train.append((u, i, t))
        else:
            for t, i in lst:
                train.append((u, i, t))
    return train, test


# ===========================================================================
# Metrics
# ===========================================================================
def recall_at_k(recs, truth, k):
    """Fraction of users whose held-out item appears in their top-k (hit rate)."""
    hits = sum(1 for u, item in truth.items() if item in recs.get(u, [])[:k])
    return hits / len(truth)


def precision_at_k(recs, truth, k):
    """With one held-out item per user, this is recall/k — reported for completeness."""
    return recall_at_k(recs, truth, k) / k


def ndcg_at_k(recs, truth, k):
    """Normalized Discounted Cumulative Gain: rewards ranking the hit higher."""
    total = 0.0
    for u, item in truth.items():
        lst = recs.get(u, [])[:k]
        if item in lst:
            rank = lst.index(item)              # 0-based
            total += 1.0 / np.log2(rank + 2)    # ideal DCG is 1 here
    return total / len(truth)


def map_at_k(recs, truth, k):
    """Mean Average Precision (single relevant item => 1/rank)."""
    total = 0.0
    for u, item in truth.items():
        lst = recs.get(u, [])[:k]
        if item in lst:
            total += 1.0 / (lst.index(item) + 1)
    return total / len(truth)


# ===========================================================================
# Helpers
# ===========================================================================
def build_matrix(train, n_users, n_items):
    """Binary user-item interaction matrix R."""
    R = np.zeros((n_users, n_items))
    for u, i, t in train:
        R[u, i] = 1.0
    return R


def _topk_excluding(scores, seen, k):
    """Top-k item ids by score, skipping items the user already has."""
    order = np.argsort(-scores)
    out = []
    for i in order:
        if i not in seen:
            out.append(int(i))
            if len(out) == k:
                break
    return out


def user_seen(train, n_users):
    seen = {u: set() for u in range(n_users)}
    for u, i, t in train:
        seen[u].add(i)
    return seen


# ===========================================================================
# Baselines
# ===========================================================================
class Random:
    """Recommend random unseen items — the sanity-check floor."""
    def __init__(self, seed=0):
        self.seed = seed

    def fit(self, train, n_users, n_items):
        self.n_items = n_items
        self.rng = np.random.default_rng(self.seed)
        self.seen = user_seen(train, n_users)
        return self

    def recommend(self, u, k=10):
        return _topk_excluding(self.rng.random(self.n_items), self.seen.get(u, set()), k)


class Popularity:
    """Recommend the globally most-interacted items to everyone."""
    def fit(self, train, n_users, n_items):
        self.counts = np.zeros(n_items)
        for u, i, t in train:
            self.counts[i] += 1
        self.seen = user_seen(train, n_users)
        return self

    def recommend(self, u, k=10):
        return _topk_excluding(self.counts, self.seen.get(u, set()), k)


class Trending:
    """Time-decayed popularity: recent interactions count more (a 'trending' list)."""
    def __init__(self, half_life=2000.0):
        self.half_life = half_life

    def fit(self, train, n_users, n_items):
        lam = np.log(2) / self.half_life
        now = max((t for u, i, t in train), default=0)
        self.scores = np.zeros(n_items)
        for u, i, t in train:
            self.scores[i] += np.exp(-lam * (now - t))
        self.seen = user_seen(train, n_users)
        return self

    def recommend(self, u, k=10):
        return _topk_excluding(self.scores, self.seen.get(u, set()), k)


# ===========================================================================
# Content-based: decayed user profile + cosine kNN (the "article" method)
# ===========================================================================
class ContentBased:
    """
    Build each user's profile as a TIME-DECAYED average of the content vectors of
    items they interacted with, then score all items by cosine similarity to it.
    This is the 'average article embedding -> kNN over the catalog' approach.
    """
    def __init__(self, half_life=2000.0):
        self.half_life = half_life

    def fit(self, train, item_feats, n_users, n_items):
        self.item_feats = item_feats
        norm = np.linalg.norm(item_feats, axis=1, keepdims=True)
        self.feat_unit = item_feats / np.maximum(norm, 1e-9)   # for cosine
        self.hist = {u: [] for u in range(n_users)}
        for u, i, t in train:
            self.hist[u].append((t, i))
        self.now = max((t for u, i, t in train), default=0)
        self.seen = user_seen(train, n_users)
        return self

    def profile(self, u):
        h = self.hist.get(u, [])
        if not h:
            return None
        lam = np.log(2) / self.half_life
        items = [i for t, i in h]
        w = np.array([np.exp(-lam * (self.now - t)) for t, i in h])
        return (w[:, None] * self.item_feats[items]).sum(0) / w.sum()

    def recommend(self, u, k=10):
        p = self.profile(u)
        if p is None:
            return []
        pu = p / max(np.linalg.norm(p), 1e-9)
        scores = self.feat_unit @ pu                  # cosine similarity to every item
        return _topk_excluding(scores, self.seen.get(u, set()), k)


# ===========================================================================
# Neighborhood collaborative filtering: item-item
# ===========================================================================
class ItemItemCF:
    """
    'Users who interacted with X also interacted with Y.' Similarity between items
    is the cosine of their interaction columns; a user's score for an item is the
    summed similarity to items they already have.
    """
    def fit(self, train, n_users, n_items):
        R = build_matrix(train, n_users, n_items)
        self.R = R
        norm = np.linalg.norm(R, axis=0, keepdims=True)
        Rn = R / np.maximum(norm, 1e-9)
        self.S = Rn.T @ Rn                            # item-item cosine similarity
        np.fill_diagonal(self.S, 0.0)
        self.seen = user_seen(train, n_users)
        return self

    def recommend(self, u, k=10):
        scores = self.R[u] @ self.S                   # spread from items the user has
        return _topk_excluding(scores, self.seen.get(u, set()), k)


# ===========================================================================
# Matrix factorization for implicit feedback (Hu, Koren, Volinsky 2008)
# ===========================================================================
class ImplicitALS:
    """
    Factor R ~ X Y^T with confidence-weighted Alternating Least Squares.
    Confidence c_ui = 1 + alpha * R_ui; preference p_ui = 1 if interacted.
    """
    def __init__(self, factors=32, reg=0.1, alpha=40.0, iters=15, seed=0):
        self.f, self.reg, self.alpha, self.iters, self.seed = factors, reg, alpha, iters, seed

    def fit(self, train, n_users, n_items):
        R = build_matrix(train, n_users, n_items)
        rng = np.random.default_rng(self.seed)
        X = rng.normal(scale=0.01, size=(n_users, self.f))
        Y = rng.normal(scale=0.01, size=(n_items, self.f))
        C = 1.0 + self.alpha * R                       # confidence
        P = (R > 0).astype(float)                      # preference
        I = self.reg * np.eye(self.f)

        for _ in range(self.iters):
            YtY = Y.T @ Y
            for u in range(n_users):
                cu = C[u]
                Yw = Y * (cu - 1.0)[:, None]
                A = YtY + Y.T @ Yw + I
                b = (Y * (cu * P[u])[:, None]).sum(0)
                X[u] = np.linalg.solve(A, b)
            XtX = X.T @ X
            for i in range(n_items):
                ci = C[:, i]
                Xw = X * (ci - 1.0)[:, None]
                A = XtX + X.T @ Xw + I
                b = (X * (ci * P[:, i])[:, None]).sum(0)
                Y[i] = np.linalg.solve(A, b)
        self.X, self.Y = X, Y
        self.seen = user_seen(train, n_users)
        return self

    def recommend(self, u, k=10):
        scores = self.Y @ self.X[u]
        return _topk_excluding(scores, self.seen.get(u, set()), k)


# ===========================================================================
# Bayesian Personalized Ranking (pairwise learning-to-rank)
# ===========================================================================
class BPR:
    """
    Optimize the RANKING directly: for each (user, positive item), sample a
    negative item and push the positive's score above the negative's.
    """
    def __init__(self, factors=32, lr=0.05, reg=0.01, epochs=30, seed=0):
        self.f, self.lr, self.reg, self.epochs, self.seed = factors, lr, reg, epochs, seed

    def fit(self, train, n_users, n_items):
        rng = np.random.default_rng(self.seed)
        X = rng.normal(scale=0.1, size=(n_users, self.f))
        Y = rng.normal(scale=0.1, size=(n_items, self.f))
        ui = {u: set() for u in range(n_users)}
        for u, i, t in train:
            ui[u].add(i)
        pos = [(u, i) for u in ui for i in ui[u]]

        for _ in range(self.epochs):
            rng.shuffle(pos)
            for u, i in pos:
                j = int(rng.integers(n_items))
                while j in ui[u]:
                    j = int(rng.integers(n_items))
                diff = X[u] @ (Y[i] - Y[j])
                sig = 1.0 / (1.0 + np.exp(diff))        # gradient weight
                xu = X[u].copy()
                X[u] += self.lr * (sig * (Y[i] - Y[j]) - self.reg * X[u])
                Y[i] += self.lr * (sig * xu - self.reg * Y[i])
                Y[j] += self.lr * (-sig * xu - self.reg * Y[j])
        self.X, self.Y = X, Y
        self.seen = user_seen(train, n_users)
        return self

    def recommend(self, u, k=10):
        scores = self.Y @ self.X[u]
        return _topk_excluding(scores, self.seen.get(u, set()), k)


__all__ = [
    "make_synthetic", "train_test_split_last",
    "recall_at_k", "precision_at_k", "ndcg_at_k", "map_at_k",
    "build_matrix", "user_seen",
    "Random", "Popularity", "Trending", "ContentBased", "ItemItemCF",
    "ImplicitALS", "BPR",
]

The {{#include}} directive splices in the real code/recsys.py at build time, so the documentation can never drift from the actual implementation.

API at a glance

Every model follows the same tiny interface — .fit(...) then .recommend(u, k) — so they're interchangeable in the leaderboard:

CallPurpose
make_synthetic(...)generate an evaluable interaction log + item features
train_test_split_last(...)leave-last-out split (test = each user's last item)
recall_at_k / ndcg_at_k / map_at_koffline metrics
Popularity().fit(train, nu, ni)popularity baseline
Trending(half_life=…).fit(…)time-decayed popularity
ContentBased(half_life=…).fit(train, feats, nu, ni)embeddings + decayed profile + kNN
ItemItemCF().fit(…)neighborhood collaborative filtering
ImplicitALS(factors=…).fit(…)matrix factorization (implicit ALS)
BPR(factors=…).fit(…)pairwise learning-to-rank

Typical usage

from recsys import make_synthetic, train_test_split_last, ImplicitALS, recall_at_k

inter, feats, nu, ni = make_synthetic(seed=0)
train, test = train_test_split_last(inter, nu)

model = ImplicitALS(factors=32, iters=15).fit(train, nu, ni)
recs = {u: model.recommend(u, k=10) for u in test}
print("recall@10:", recall_at_k(recs, test, 10))

What this is and isn't

These implementations are written for clarity — they show exactly how each algorithm works and produce correct, measurable rankings on small data. Production systems differ in scale, not in concept:

  • Sparse math (we used dense matrices; real systems use sparse formats and only store interactions).
  • Optimized librariesimplicit (ALS/BPR in Cython), LightFM (hybrid WARP/BPR), TensorFlow/PyTorch for two-tower models, gradient-boosted trees for ranking.
  • ANN serving.recommend here scans all items; at scale that dot-product scan becomes an HNSW/IVF-PQ query.
  • Feature stores, batch/stream pipelines, A/B testing around the model (serving).

Same algorithms; the rest is engineering. Let's measure them head-to-head. 👉