One-file CLI: the trade-off estimator

Everything in this book, distilled into one tool that answers the practical question every engineer asks before compressing vectors:

If I compress my embeddings with these settings, how much memory do I save, and what recall do I keep?

pqtool.py builds an IVFPQ index over your vectors (or synthetic data), measures recall against exact search, and reports the memory savings — so you can pick parameters with evidence instead of guesswork.

The steps

Load vectors from an .npy file (shape [n, d]), or generate synthetic clustered data with --synthetic.
Hold out some vectors as queries and compute their exact neighbors (ground truth).
Build an IVFPQ index with your chosen nlist, m, nprobe.
Measure recall@k — both raw and with exact re-ranking.
Report recall, memory (float32 vs PQ codes), and the fraction of cells scanned.

Install & run

pip install numpy        # that's all it needs

# try it on synthetic data (no files required)
python pqtool.py --synthetic --m 16 --nlist 128 --nprobe 8

# estimate for your own embeddings
python pqtool.py --npy embeddings.npy --m 16 --nlist 256 --nprobe 16 --rerank 100

It works — real output

$ python pqtool.py --synthetic --m 16 --nlist 128 --nprobe 8
synthetic: 10000 vectors x 64 dims
building IVFPQ (nlist=128, m=16) ...

--- results ---
  recall@10 (raw)      : 0.711
  recall@10 (+rerank 100): 1.000
  memory (float32 vectors) : 2.6 MB
  memory (PQ codes)        : 0.16 MB  (16x smaller)
  per query, IVF scans ~6.2% of cells

The read-out is the whole decision in one place: 16× less memory, recall 1.0 after re-ranking, scanning only 6% of cells. Change --m to trade memory for raw recall, --nprobe to trade speed for recall, and --rerank to trade a little query time for accuracy.

The complete script

#!/usr/bin/env python3
"""
pqtool.py — estimate the memory / recall / speed trade-off of compressing your
vectors with IVF + Product Quantization, using the from-scratch index in ivfpq.py.

Answers the practical question: "If I compress my embeddings with these settings,
how much memory do I save and what recall do I keep?"

Usage:
    # try it on synthetic clustered data (no files needed)
    python pqtool.py --synthetic

    # estimate for your own embeddings (an .npy array of shape [n, d])
    python pqtool.py --npy embeddings.npy --m 16 --nlist 256 --nprobe 16

Requirements: numpy (and the local ivfpq.py).
"""

from __future__ import annotations

import argparse

import numpy as np

from ivfpq import flat_knn, IVFPQ


def make_synthetic(n=10000, d=64, seed=0):
    rng = np.random.default_rng(seed)
    centers = rng.normal(scale=5, size=(max(n // 250, 2), d))
    data = np.vstack([centers[i % len(centers)] + rng.normal(size=(1, d))
                      for i in range(n)])
    return data


def main(argv=None):
    p = argparse.ArgumentParser(description="Estimate IVF-PQ memory/recall trade-off.")
    p.add_argument("--npy", help="path to an .npy array of shape [n, d]")
    p.add_argument("--synthetic", action="store_true",
                   help="use generated clustered data instead of a file")
    p.add_argument("--n", type=int, default=10000, help="synthetic dataset size")
    p.add_argument("--d", type=int, default=64, help="synthetic dimensionality")
    p.add_argument("--m", type=int, default=16, help="PQ sub-vectors / bytes per code")
    p.add_argument("--nlist", type=int, default=128, help="IVF partitions")
    p.add_argument("--nprobe", type=int, default=8, help="IVF cells probed per query")
    p.add_argument("--rerank", type=int, default=100,
                   help="candidates to re-rank exactly (0 = no re-rank)")
    p.add_argument("-k", type=int, default=10, help="neighbors to return")
    p.add_argument("--queries", type=int, default=100, help="evaluation queries")
    args = p.parse_args(argv)

    if args.npy:
        data = np.load(args.npy).astype(np.float64)
        print(f"loaded {data.shape[0]} vectors x {data.shape[1]} dims from {args.npy}")
    else:
        data = make_synthetic(args.n, args.d)
        print(f"synthetic: {data.shape[0]} vectors x {data.shape[1]} dims")

    N, D = data.shape
    if D % args.m != 0:
        raise SystemExit(f"error: dim {D} not divisible by --m {args.m}; "
                         f"pick m in {[x for x in (4,8,16,32) if D % x == 0]}")

    rng = np.random.default_rng(123)
    qidx = rng.choice(N, size=min(args.queries, N), replace=False)
    queries = data[qidx] + rng.normal(scale=0.5, size=(len(qidx), D))
    exact = [set(flat_knn(data, q, args.k)[0].tolist()) for q in queries]

    print(f"building IVFPQ (nlist={args.nlist}, m={args.m}) ...")
    index = IVFPQ(nlist=args.nlist, m=args.m, ksub=256, seed=1).train(data)
    index.add(data)

    def recall(rerank):
        hits = 0
        for q, ex in zip(queries, exact):
            want = args.rerank if (rerank and args.rerank) else args.k
            ids, _ = index.search(q, want, nprobe=args.nprobe)
            if rerank and args.rerank and len(ids):
                d = ((data[ids] - q) ** 2).sum(1)
                ids = ids[np.argsort(d)[:args.k]]
            else:
                ids = ids[:args.k]
            hits += len(set(np.asarray(ids).tolist()) & ex)
        return hits / (len(queries) * args.k)

    flat_bytes = N * D * 4                     # realistic float32 baseline
    code_bytes = N * args.m                    # PQ codes (1 byte each)
    raw = recall(False)
    rr = recall(True) if args.rerank else None

    print("\n--- results ---")
    print(f"  recall@{args.k} (raw)      : {raw:.3f}")
    if rr is not None:
        print(f"  recall@{args.k} (+rerank {args.rerank}): {rr:.3f}")
    print(f"  memory (float32 vectors) : {flat_bytes / 1e6:.1f} MB")
    print(f"  memory (PQ codes)        : {code_bytes / 1e6:.2f} MB  "
          f"({flat_bytes // code_bytes}x smaller)")
    print(f"  per query, IVF scans ~{args.nprobe / args.nlist * 100:.1f}% of cells")


if __name__ == "__main__":
    main()

Point it at your real embeddings and you have an evidence-based way to choose IVFPQ parameters — the same decision FAISS users make every day, now with a tool you fully understand.

IVF & Product Quantization from Scratch

One-file CLI: the trade-off estimator

The steps

Install & run

It works — real output

The complete script