ReservoirClassifier on MNIST and Fashion-MNIST

This notebook benchmarks merlin.models.ReservoirClassifier on MNIST and Fashion-MNIST with the current built-in API.

Requested experiment grid:

3 photons with 12 PCA components
3 photons with 16 PCA components

The photonic reservoir is frozen after fit_reservoir(). Only the linear readout is trained.

A second section adds focused MNIST + PCA(12) demos for grouped measurements, changing layer.n_modes, and setting layer.noise_model.

The first execution downloads the datasets through Merlin’s dataset loaders. Full MNIST-sized runs can be slow for a photonic reservoir, so the notebook uses configurable stratified subsets by default.

[1]:

import time

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import perceval as pcvl
import torch
import torch.nn as nn
from sklearn.decomposition import PCA
from sklearn.metrics import ConfusionMatrixDisplay, accuracy_score
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader

from merlin import MeasurementStrategy, MerlinProcessor, ModGrouping
from merlin.datasets import fashion_mnist, mnist_digits
from merlin.models import ReservoirClassifier

[2]:

SEED = 7
TRAIN_SAMPLES = 60000
TEST_SAMPLES = 10000
BATCH_SIZE = 128
EPOCHS = 100
LEARNING_RATE = 1e-3
DEVICE = torch.device("cpu")

EXPERIMENTS = [
    {"n_photons": 3, "n_components": 12},
    {"n_photons": 3, "n_components": 16},
]


def set_seed(seed: int = SEED) -> None:
    np.random.seed(seed)
    torch.manual_seed(seed)


set_seed()
print(
    f"train_samples={TRAIN_SAMPLES}, test_samples={TEST_SAMPLES}, "
    f"epochs={EPOCHS}, batch_size={BATCH_SIZE}"
)

train_samples=60000, test_samples=10000, epochs=100, batch_size=128

[3]:

DATASET_LOADERS = {
    "MNIST": (
        mnist_digits.get_data_train_original,
        mnist_digits.get_data_test_original,
    ),
    "Fashion-MNIST": (
        fashion_mnist.get_data_train,
        fashion_mnist.get_data_test,
    ),
}


def stratified_subset(X, y, n_samples, seed):
    if n_samples is None or n_samples >= len(X):
        return X, y
    X_sub, _, y_sub, _ = train_test_split(
        X,
        y,
        train_size=n_samples,
        stratify=y,
        random_state=seed,
    )
    return X_sub, y_sub


def subset_per_class(X, y, n_samples_per_class, seed):
    if n_samples_per_class is None:
        return X, y

    rng = np.random.default_rng(seed)
    selected_indices = []
    for class_label in np.unique(y):
        class_indices = np.flatnonzero(y == class_label)
        if n_samples_per_class > len(class_indices):
            raise ValueError(
                f"Requested {n_samples_per_class} samples for class {class_label}, "
                f"but only {len(class_indices)} are available."
            )
        selected_indices.append(
            rng.choice(class_indices, size=n_samples_per_class, replace=False)
        )

    selected_indices = np.concatenate(selected_indices)
    rng.shuffle(selected_indices)
    return X[selected_indices], y[selected_indices]


def load_dataset(
    name,
    train_samples=TRAIN_SAMPLES,
    test_samples=TEST_SAMPLES,
    train_samples_per_class=None,
    test_samples_per_class=None,
    seed=SEED,
):
    if train_samples is not None and train_samples_per_class is not None:
        raise ValueError("Use either train_samples or train_samples_per_class, not both.")
    if test_samples is not None and test_samples_per_class is not None:
        raise ValueError("Use either test_samples or test_samples_per_class, not both.")

    train_loader, test_loader = DATASET_LOADERS[name]
    X_train, y_train, _ = train_loader()
    X_test, y_test, _ = test_loader()
    print(f"Orignal {name}: train={X_train.shape}, test={X_test.shape}, classes={len(np.unique(y_train))}")

    X_train = X_train.reshape(len(X_train), -1).astype(np.float32) / 255.0
    X_test = X_test.reshape(len(X_test), -1).astype(np.float32) / 255.0
    y_train = np.asarray(y_train, dtype=np.int64)
    y_test = np.asarray(y_test, dtype=np.int64)

    if train_samples_per_class is not None:
        X_train, y_train = subset_per_class(
            X_train,
            y_train,
            train_samples_per_class,
            seed,
        )
    else:
        X_train, y_train = stratified_subset(X_train, y_train, train_samples, seed)

    if test_samples_per_class is not None:
        X_test, y_test = subset_per_class(
            X_test,
            y_test,
            test_samples_per_class,
            seed + 1,
        )
    else:
        X_test, y_test = stratified_subset(X_test, y_test, test_samples, seed)

    return X_train, y_train, X_test, y_test


for dataset_name in DATASET_LOADERS:
    X_train, y_train, X_test, y_test = load_dataset(dataset_name)
    print(
        f"{dataset_name}: train={X_train.shape}, test={X_test.shape}, "
        f"classes={len(np.unique(y_train))}"
    )

Orignal MNIST: train=(60000, 28, 28), test=(10000, 28, 28), classes=10
MNIST: train=(60000, 784), test=(10000, 784), classes=10
Orignal Fashion-MNIST: train=(60000, 28, 28), test=(10000, 28, 28), classes=10
Fashion-MNIST: train=(60000, 784), test=(10000, 784), classes=10

Training Helpers

fit_reservoir() computes the frozen quantum features. The training loop below only optimizes the linear readout parameters returned by model.parameters().

[4]:

def train_readout(model, dataset, epochs=EPOCHS, batch_size=BATCH_SIZE, lr=LEARNING_RATE):
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    history = []

    for epoch in range(1, epochs + 1):
        model.train()
        total_loss = 0.0
        total_correct = 0
        total_examples = 0

        for features, targets in loader:
            features = features.to(model.device, dtype=model.dtype)
            targets = targets.to(model.device)

            optimizer.zero_grad()
            logits = model(features)
            loss = criterion(logits, targets)
            loss.backward()
            optimizer.step()

            total_loss += loss.item() * targets.shape[0]
            total_correct += (logits.argmax(dim=1) == targets).sum().item()
            total_examples += targets.shape[0]

        history.append(
            {
                "epoch": epoch,
                "loss": total_loss / total_examples,
                "train_accuracy": total_correct / total_examples,
            }
        )

    return pd.DataFrame(history)


def accuracy_with_predictions(logits, y_true):
    predictions = logits.argmax(dim=1).numpy()
    accuracy = accuracy_score(y_true, predictions)
    return accuracy, predictions


def build_reservoir_model(
    in_features,
    out_features,
    n_components,
    n_photons=3,
    seed=SEED,
    measurement_strategy=None,
):
    model = ReservoirClassifier(
        in_features=in_features,
        out_features=out_features,
        n_photons=n_photons,
        reduction=PCA(
            n_components=n_components,
            svd_solver="randomized",
            random_state=seed,
        ),
        concatenate=True,
        cache=True,
        seed=seed,
        device=DEVICE,
        dtype=torch.float32,
    )
    if measurement_strategy is not None:
        model.layer.measurement_strategy = measurement_strategy
    return model


def run_experiment(
    dataset_name,
    n_components,
    n_photons=3,
    seed=SEED,
    processor=None,
    train_samples=None,
    test_samples=None,
    train_samples_per_class=None,
    test_samples_per_class=None,
    measurement_strategy=None,
    model_setup=None,
    run_name_suffix=None,
    epochs=EPOCHS,
):
    set_seed(seed)
    X_train, y_train, X_test, y_test = load_dataset(
        dataset_name,
        train_samples=(
            None
            if train_samples_per_class is not None
            else (TRAIN_SAMPLES if train_samples is None else train_samples)
        ),
        test_samples=(
            None
            if test_samples_per_class is not None
            else (TEST_SAMPLES if test_samples is None else test_samples)
        ),
        train_samples_per_class=train_samples_per_class,
        test_samples_per_class=test_samples_per_class,
        seed=seed,
    )

    model = build_reservoir_model(
        in_features=X_train.shape[1],
        out_features=len(np.unique(y_train)),
        n_components=n_components,
        n_photons=n_photons,
        seed=seed,
        measurement_strategy=measurement_strategy,
    )
    if model_setup is not None:
        model_setup(model)
    if processor is not None:
        model.layer.processor = processor

    run_name = f"{dataset_name} | photons={n_photons} | pca={n_components}"
    if run_name_suffix is not None:
        run_name = f"{run_name} | {run_name_suffix}"

    start = time.perf_counter()
    model.fit_reservoir(X_train)
    train_dataset = model.make_dataset(X_train, y_train)
    history = train_readout(model, train_dataset, epochs=epochs)
    elapsed = time.perf_counter() - start

    train_logits = model.predict(X_train)
    test_logits = model.predict(X_test)
    train_accuracy, train_predictions = accuracy_with_predictions(train_logits, y_train)
    test_accuracy, test_predictions = accuracy_with_predictions(test_logits, y_test)

    result = {
        "run_name": run_name,
        "dataset": dataset_name,
        "n_photons": n_photons,
        "n_components": n_components,
        "n_modes": model.layer.n_modes,
        "train_samples": len(X_train),
        "test_samples": len(X_test),
        "train_samples_per_class": train_samples_per_class,
        "test_samples_per_class": test_samples_per_class,
        "quantum_output_size": model.layer.output_size,
        "effective_quantum_output_size": (
            model.readout.in_features - model.in_features
            if model.concatenate
            else model.readout.in_features
        ),
        "readout_in_features": model.readout.in_features,
        "train_accuracy": train_accuracy,
        "test_accuracy": test_accuracy,
        "runtime_seconds": elapsed,
    }

    artifact = {
        "model": model,
        "history": history,
        "X_test": X_test,
        "y_test": y_test,
        "train_predictions": train_predictions,
        "test_predictions": test_predictions,
    }
    return result, artifact

[5]:

results = []
artifacts = {}

for dataset_name in DATASET_LOADERS:
    for config in EXPERIMENTS:
        run_name = (
            f"{dataset_name} | photons={config['n_photons']} | "
            f"pca={config['n_components']}"
        )
        print(f"Running {run_name}")
        result, artifact = run_experiment(
            dataset_name=dataset_name,
            n_components=config["n_components"],
            n_photons=config["n_photons"],
        )
        results.append(result)
        artifacts[result["run_name"]] = artifact

results_df = pd.DataFrame(results).sort_values(["dataset", "n_components"])
results_df

Running MNIST | photons=3 | pca=12
Orignal MNIST: train=(60000, 28, 28), test=(10000, 28, 28), classes=10
Running MNIST | photons=3 | pca=16
Orignal MNIST: train=(60000, 28, 28), test=(10000, 28, 28), classes=10
Running Fashion-MNIST | photons=3 | pca=12
Orignal Fashion-MNIST: train=(60000, 28, 28), test=(10000, 28, 28), classes=10
Running Fashion-MNIST | photons=3 | pca=16
Orignal Fashion-MNIST: train=(60000, 28, 28), test=(10000, 28, 28), classes=10

[5]:

	run_name	dataset	n_photons	n_components	n_modes	train_samples	test_samples	train_samples_per_class	test_samples_per_class	quantum_output_size	effective_quantum_output_size	readout_in_features	train_accuracy	test_accuracy	runtime_seconds
2	Fashion-MNIST \| photons=3 \| pca=12	Fashion-MNIST	3	12	12	60000	10000	None	None	220	220	1004	0.891133	0.8560	24.578810
3	Fashion-MNIST \| photons=3 \| pca=16	Fashion-MNIST	3	16	16	60000	10000	None	None	560	560	1344	0.892700	0.8582	29.953323
0	MNIST \| photons=3 \| pca=12	MNIST	3	12	12	60000	10000	None	None	220	220	1004	0.969533	0.9569	27.475793
1	MNIST \| photons=3 \| pca=16	MNIST	3	16	16	60000	10000	None	None	560	560	1344	0.974833	0.9619	27.126543

[6]:

fig, axes = plt.subplots(1, 2, figsize=(14, 4))

for run_name, artifact in artifacts.items():
    history = artifact["history"]
    axes[0].plot(history["epoch"], history["loss"], marker="o", label=run_name)
    axes[1].plot(
        history["epoch"],
        history["train_accuracy"],
        marker="o",
        label=run_name,
    )

axes[0].set_title("Readout Training Loss")
axes[0].set_xlabel("Epoch")
axes[0].set_ylabel("Cross-Entropy Loss")
axes[0].grid(alpha=0.3)

axes[1].set_title("Readout Training Accuracy")
axes[1].set_xlabel("Epoch")
axes[1].set_ylabel("Accuracy")
axes[1].grid(alpha=0.3)

handles, labels = axes[1].get_legend_handles_labels()
axes[1].legend(handles, labels, loc="lower right", fontsize=8)
plt.tight_layout()

../../../../../../../../../Users/cassandrenotton/Documents/projects/QML_project/fork-merlin/merlin/docs/build/html/0.4/.doctrees/nbsphinx/notebooks_ReservoirClassifier_7_0.png

[7]:

summary = results_df[[
    "dataset",
    "n_photons",
    "n_components",
    "n_modes",
    "quantum_output_size",
    "train_accuracy",
    "test_accuracy",
    "runtime_seconds",
]].copy()
display(summary)

pivot = results_df.pivot(index="dataset", columns="n_components", values="test_accuracy")
ax = pivot.plot(kind="bar", figsize=(8, 4), rot=0)
ax.set_title("ReservoirClassifier Test Accuracy")
ax.set_xlabel("Dataset")
ax.set_ylabel("Accuracy")
ax.grid(axis="y", alpha=0.3)
plt.tight_layout()

	dataset	n_photons	n_components	n_modes	quantum_output_size	train_accuracy	test_accuracy	runtime_seconds
2	Fashion-MNIST	3	12	12	220	0.891133	0.8560	24.578810
3	Fashion-MNIST	3	16	16	560	0.892700	0.8582	29.953323
0	MNIST	3	12	12	220	0.969533	0.9569	27.475793
1	MNIST	3	16	16	560	0.974833	0.9619	27.126543

[8]:

best_row = results_df.sort_values("test_accuracy", ascending=False).iloc[0]
best_run_name = best_row["run_name"]
best_artifact = artifacts[best_run_name]

print(best_row[["run_name", "test_accuracy", "runtime_seconds"]])
print(
    f"quantum_output_size={best_row['quantum_output_size']}, "
    f"readout_in_features={best_row['readout_in_features']}"
)

fig, ax = plt.subplots(figsize=(7, 6))
ConfusionMatrixDisplay.from_predictions(
    best_artifact["y_test"],
    best_artifact["test_predictions"],
    ax=ax,
    colorbar=False,
)
ax.set_title(f"Best Run Confusion Matrix\n{best_run_name}")
plt.tight_layout()

run_name           MNIST | photons=3 | pca=16
test_accuracy                          0.9619
runtime_seconds                     27.126543
Name: 1, dtype: object
quantum_output_size=560, readout_in_features=1344

Part 1b: MNIST API Demos

The benchmark above changes dataset and PCA width. The next cell keeps MNIST with PCA(n_components=12) and runs three smaller API demos:

change the probability grouping with ModGrouping(..., 10)
change model.layer.n_modes after construction
set model.layer.noise_model = pcvl.NoiseModel(brightness=0.9)

These runs use a small per-class subset and fewer epochs so you can inspect the API behavior quickly.

[9]:

DEMO_DATASET = "MNIST"
DEMO_N_PHOTONS = 3
DEMO_BASE_COMPONENTS = 12
DEMO_N_MODES = 20
DEMO_TRAIN_SAMPLES_PER_CLASS = 5000
DEMO_TEST_SAMPLES_PER_CLASS = 850
DEMO_EPOCHS = EPOCHS


def apply_grouping_demo(model):
    model.layer.measurement_strategy = MeasurementStrategy.probs(
        grouping=ModGrouping(model.layer.output_size, model.out_features)
    )


def apply_n_modes_demo(model):
    model.layer.n_modes = DEMO_N_MODES


def apply_noise_model_demo(model):
    model.layer.noise_model = pcvl.NoiseModel(brightness=0.9)



demo_runs = [
    {
        "demo": "Changed grouping",
        "run_name_suffix": "grouping=ModGrouping(output_size=10)",
        "model_setup": apply_grouping_demo,
    },
    {
        "demo": "Changed number of modes",
        "run_name_suffix": f"n_modes={DEMO_N_MODES}",
        "model_setup": apply_n_modes_demo,
    },
    {
        "demo": "With pcvl.NoiseModel",
        "run_name_suffix": "layer.noise_model=pcvl.NoiseModel(brightness=0.9)",
        "model_setup": apply_noise_model_demo,
    },
]

demo_results = []
for spec in demo_runs:
    print(f"Running demo: {spec['demo']}")
    result, _ = run_experiment(
        dataset_name=DEMO_DATASET,
        n_components=DEMO_BASE_COMPONENTS,
        n_photons=DEMO_N_PHOTONS,
        train_samples_per_class=DEMO_TRAIN_SAMPLES_PER_CLASS,
        test_samples_per_class=DEMO_TEST_SAMPLES_PER_CLASS,
        model_setup=spec.get("model_setup"),
        run_name_suffix=spec["run_name_suffix"],
        epochs=DEMO_EPOCHS,
    )
    result["demo"] = spec["demo"]
    demo_results.append(result)

demo_results_df = pd.DataFrame(demo_results)[[
    "demo",
    "run_name",
    "n_components",
    "n_modes",
    "quantum_output_size",
    "effective_quantum_output_size",
    "readout_in_features",
    "test_accuracy",
    "runtime_seconds",
]]
display(demo_results_df)

Running demo: Changed grouping
Orignal MNIST: train=(60000, 28, 28), test=(10000, 28, 28), classes=10
Running demo: Changed number of modes
Orignal MNIST: train=(60000, 28, 28), test=(10000, 28, 28), classes=10
Running demo: With pcvl.NoiseModel
Orignal MNIST: train=(60000, 28, 28), test=(10000, 28, 28), classes=10

	demo	run_name	n_components	n_modes	quantum_output_size	effective_quantum_output_size	readout_in_features	test_accuracy	runtime_seconds
0	Changed grouping	MNIST \| photons=3 \| pca=12 \| grouping=ModGroup...	12	12	220	10	794	0.930588	19.429218
1	Changed number of modes	MNIST \| photons=3 \| pca=12 \| n_modes=20	12	20	1140	1140	1924	0.956353	26.926721
2	With pcvl.NoiseModel	MNIST \| photons=3 \| pca=12 \| layer.noise_model...	12	12	299	299	1083	0.953294	19.965862

Part 2: Cloud Processing

The same ReservoirClassifier pipeline can offload the frozen quantum reservoir to Quandela Cloud by attaching a MerlinProcessor to model.layer.processor.

The experiment helper below stores the processor on the layer once, then calls fit_reservoir(), make_dataset(), and predict() without passing a processor argument. Only the quantum feature extraction is remote. The linear readout still trains locally in PyTorch.

Paste your token in the next cell or load it from token_utils.py if you prefer.

[10]:

# Option 1: paste your Quandela Cloud token directly.
#CLOUD_TOKEN = ""  # TODO: replace with your token

# Option 2: load it from token_utils.py or your environment.
from token_utils import load_cloud_token
CLOUD_TOKEN = load_cloud_token()

#CLOUD_PLATFORM = "sim:slos"  # exact probabilities on Quandela Cloud
CLOUD_PLATFORM = "sim:ascella"  # example QPU-like simulator

CLOUD_MICROBATCH_SIZE = 32
CLOUD_TIMEOUT = 3600.0
CLOUD_MAX_SHOTS_PER_CALL = None
CLOUD_CHUNK_CONCURRENCY = 1

[11]:

def build_cloud_processor(
    cloud_token,
    platform=CLOUD_PLATFORM,
    microbatch_size=CLOUD_MICROBATCH_SIZE,
    timeout=CLOUD_TIMEOUT,
    max_shots_per_call=CLOUD_MAX_SHOTS_PER_CALL,
    chunk_concurrency=CLOUD_CHUNK_CONCURRENCY,
):
    token = cloud_token.strip()
    if not token:
        raise ValueError("Add your Quandela Cloud token in the previous cell before building the processor.")

    pcvl.RemoteConfig.set_token(token)
    remote_processor = pcvl.RemoteProcessor(platform)
    return MerlinProcessor(
        remote_processor=remote_processor,
        microbatch_size=microbatch_size,
        timeout=timeout,
        max_shots_per_call=max_shots_per_call,
        chunk_concurrency=chunk_concurrency,
    )


cloud_processor = None
if CLOUD_TOKEN.strip():
    cloud_processor = build_cloud_processor(CLOUD_TOKEN)
    print(f"Cloud processor ready on {CLOUD_PLATFORM}")
else:
    print("Add your token above, then re-run this cell to enable cloud execution.")

Cloud processor ready on sim:ascella

Optional Cloud Experiment

Start with a smaller subset before launching the remote QPU sweep. The cloud cells below use n_train and n_test samples per class rather than one global subset size, and keep n_components=12 for QPU runs.

[12]:

if cloud_processor is None:
    raise RuntimeError("Build the cloud processor first by adding your token and running the previous cell.")

CLOUD_DATASET = "MNIST"
CLOUD_EXPERIMENT = {"n_photons": 3, "n_components": 12}
CLOUD_TRAIN_SAMPLES_PER_CLASS = 5
CLOUD_TEST_SAMPLES_PER_CLASS = 5

cloud_result, cloud_artifact = run_experiment(
    dataset_name=CLOUD_DATASET,
    n_components=CLOUD_EXPERIMENT["n_components"],
    n_photons=CLOUD_EXPERIMENT["n_photons"],
    processor=cloud_processor,
    train_samples_per_class=CLOUD_TRAIN_SAMPLES_PER_CLASS,
    test_samples_per_class=CLOUD_TEST_SAMPLES_PER_CLASS,
)

pd.DataFrame([cloud_result])

Orignal MNIST: train=(60000, 28, 28), test=(10000, 28, 28), classes=10

[12]:

	run_name	dataset	n_photons	n_components	n_modes	train_samples	test_samples	train_samples_per_class	test_samples_per_class	quantum_output_size	effective_quantum_output_size	readout_in_features	train_accuracy	test_accuracy	runtime_seconds
0	MNIST \| photons=3 \| pca=12	MNIST	3	12	12	50	50	5	5	220	220	1004	1.0	0.36	45.297649

[ ]:

# Optional: run the QPU sweep remotely with PCA(12) only.
# Increase the sample counts only after you are comfortable with the runtime and cloud cost.

if cloud_processor is None:
    raise RuntimeError("Build the cloud processor first by adding your token and running the previous cell.")

QPU_EXPERIMENTS = [
    {"n_photons": 3, "n_components": 12},
]

CLOUD_SWEEP_TRAIN_SAMPLES_PER_CLASS = 64
CLOUD_SWEEP_TEST_SAMPLES_PER_CLASS = 32

cloud_results = []
for dataset_name in DATASET_LOADERS:
    for config in QPU_EXPERIMENTS:
        print(
            f"Running cloud experiment: {dataset_name} | "
            f"photons={config['n_photons']} | pca={config['n_components']}"
        )
        result, _ = run_experiment(
            dataset_name=dataset_name,
            n_components=config["n_components"],
            n_photons=config["n_photons"],
            processor=cloud_processor,
            train_samples_per_class=CLOUD_SWEEP_TRAIN_SAMPLES_PER_CLASS,
            test_samples_per_class=CLOUD_SWEEP_TEST_SAMPLES_PER_CLASS,
        )
        cloud_results.append(result)

cloud_results_df = pd.DataFrame(cloud_results).sort_values(["dataset", "n_components"])
cloud_results_df

Notes

Increase TRAIN_SAMPLES, TEST_SAMPLES, or EPOCHS if you want a stronger benchmark.
The MNIST API demos use DEMO_* knobs so you can inspect grouping, layer.n_modes, and noise_model changes without rerunning the full benchmark.
For cloud execution, paste your token in CLOUD_TOKEN, build cloud_processor, and let run_experiment() attach it to model.layer.processor. Tune CLOUD_*_SAMPLES_PER_CLASS to control the remote subset size per class.
Set concatenate=False inside run_experiment() if you want to test a pure quantum-feature readout instead of concatenating raw pixels with the reservoir output.
model.layer.output_size shows how the photonic feature dimension grows with the number of PCA modes and photons.