"""Parametric simulation and bootstrap for CrossedLMEResult.
Implements:
- simulate(): draw response vectors from the fitted mixed model
- bootMer(): parametric bootstrap with arbitrary user-defined statistic
- BootResult: container for bootstrap estimates with CI computation
"""
from __future__ import annotations
from collections.abc import Callable
from dataclasses import dataclass
from typing import TYPE_CHECKING
import numpy as np
import scipy.sparse as sp
if TYPE_CHECKING:
from interlace.result import CrossedLMEResult
# ---------------------------------------------------------------------------
# BootResult
# ---------------------------------------------------------------------------
[docs]
@dataclass
class BootResult:
"""Container for parametric bootstrap estimates.
Attributes
----------
estimates:
Array of shape ``(B, p)`` where ``B`` is the number of bootstrap
replicates and ``p`` is the length of the statistic vector.
Examples
--------
>>> boot = result.bootMer(B=100, seed=42)
>>> boot.estimates.shape # (100, p + 1 + n_theta)
>>> boot.ci(level=0.95)
"""
estimates: np.ndarray
[docs]
def ci(
self,
method: str = "perc",
level: float = 0.95,
) -> np.ndarray:
"""Compute bootstrap confidence intervals.
Parameters
----------
method:
``"perc"`` (percentile) is currently supported.
level:
Confidence level, e.g. ``0.95`` for a 95% CI.
Returns
-------
np.ndarray of shape ``(p, 2)`` with columns ``[lower, upper]``.
"""
if method != "perc":
raise ValueError(
f"method={method!r} is not supported; only 'perc' is available"
)
alpha = 1.0 - level
lo = 100.0 * (alpha / 2.0)
hi = 100.0 * (1.0 - alpha / 2.0)
lower = np.percentile(self.estimates, lo, axis=0)
upper = np.percentile(self.estimates, hi, axis=0)
return np.column_stack([lower, upper])
# ---------------------------------------------------------------------------
# simulate()
# ---------------------------------------------------------------------------
[docs]
def simulate(
result: CrossedLMEResult,
nsim: int = 1,
seed: int | np.random.Generator | None = None,
) -> np.ndarray:
"""Draw response vectors from the fitted conditional model.
Simulates from:
.. math::
y^* = X\\beta + Z u^* + \\varepsilon^*
where :math:`u^* \\sim N(0,\\, \\sigma^2 \\Lambda\\Lambda^\\top)` and
:math:`\\varepsilon^* \\sim N(0,\\, \\sigma^2 I_n)`.
Equivalently: :math:`u^* = \\sigma \\Lambda z_u` with
:math:`z_u \\sim N(0, I_q)`.
Parameters
----------
result:
Fitted :class:`~interlace.result.CrossedLMEResult`.
nsim:
Number of response vectors to simulate.
seed:
Seed for the random number generator (int, Generator, or None).
Returns
-------
np.ndarray of shape ``(n, nsim)`` — each column is one simulated
response vector.
Examples
--------
>>> y_sim = interlace.simulate(result, nsim=10, seed=42)
>>> y_sim.shape # (n, 10)
"""
from interlace.profiled_reml import make_lambda
rng = np.random.default_rng(seed)
X = result.model.exog # (n, p)
Z = result._Z # (n, q) sparse csc_matrix
beta = np.asarray(result.fe_params)
sigma = float(np.sqrt(result.scale))
Lambda = make_lambda(result.theta, result._random_specs, result._n_levels)
q = Lambda.shape[0]
n = X.shape[0]
# Fixed-effect mean: (n,)
mu = X @ beta
# Random effect draws: u* = sigma * Lambda @ z_u, z_u ~ N(0, I_q)
z_u = rng.standard_normal((q, nsim)) # (q, nsim)
u_star = sigma * Lambda.dot(sp.csc_matrix(z_u))
u_star_dense = u_star.toarray() if sp.issparse(u_star) else np.asarray(u_star)
re_contrib = Z.dot(u_star_dense) # (n, nsim)
# Residual noise
eps = sigma * rng.standard_normal((n, nsim)) # (n, nsim)
return np.asarray(mu[:, np.newaxis] + re_contrib + eps)
# ---------------------------------------------------------------------------
# Default statistic for bootMer
# ---------------------------------------------------------------------------
def _default_statistic(result: CrossedLMEResult) -> np.ndarray:
"""Collect fixed effects, sqrt(sigma^2), and theta into a single vector."""
fe = np.asarray(result.fe_params).ravel()
sigma = np.array([float(np.sqrt(result.scale))])
theta = np.asarray(result.theta).ravel()
return np.concatenate([fe, sigma, theta])
# ---------------------------------------------------------------------------
# bootMer()
# ---------------------------------------------------------------------------
def _replace_col_in_frame(frame, col_name: str, values: np.ndarray): # type: ignore[no-untyped-def]
"""Replace *col_name* in *frame* with *values*, returning the same native type."""
import pandas as pd
if isinstance(frame, pd.DataFrame):
return frame.assign(**{col_name: values})
try:
import polars as pl
if isinstance(frame, pl.DataFrame):
return frame.with_columns(pl.Series(col_name, values))
except ImportError:
pass
# Generic fallback: convert to pandas, replace, return pandas
import narwhals as nw
import pandas as pd
pdf = nw.from_native(frame, eager_only=True).to_pandas()
return pdf.assign(**{col_name: values})
[docs]
def bootMer(
result: CrossedLMEResult,
statistic: Callable[..., np.ndarray] | None = None,
B: int = 500,
seed: int | np.random.Generator | None = None,
show_progress: bool = False,
) -> BootResult:
"""Parametric bootstrap for a fitted mixed model.
For each of ``B`` iterations:
1. Simulate a new response vector ``y*`` from the fitted model.
2. Refit the model on ``(X, Z, y*)``.
3. Collect ``statistic(refit)`` into ``estimates``.
Parameters
----------
result:
Fitted :class:`~interlace.result.CrossedLMEResult`.
statistic:
Callable ``(CrossedLMEResult) → np.ndarray`` (1-D).
Defaults to :func:`_default_statistic` which collects
``[fe_params..., sqrt(sigma^2), theta...]``.
B:
Number of bootstrap replicates.
seed:
Seed for the random number generator.
show_progress:
If True and ``tqdm`` is installed, display a progress bar.
Returns
-------
BootResult
``.estimates`` has shape ``(B, p)``.
Examples
--------
>>> boot = interlace.bootMer(result, B=200, seed=42)
>>> boot.ci(level=0.95)
"""
from interlace import fit as _fit
if statistic is None:
statistic = _default_statistic
rng = np.random.default_rng(seed)
# Pre-generate seeds for each replicate (ensures reproducibility even if
# the loop is parallelised in the future)
rep_seeds = rng.integers(0, 2**31, size=B)
# Reconstruct random= spec strings for refitting
random_strs = [_spec_to_str(s) for s in result._random_specs]
# Original data (native frame) and response column name
native_data = result.model.data.frame
endog_name = result.model.endog_names
formula = result.model.formula
method = result.method
_iter: range | object = range(B)
if show_progress:
try:
from tqdm import tqdm
_iter = tqdm(range(B), desc="bootMer")
except ImportError:
pass
estimates: list[np.ndarray] = []
for i in _iter: # type: ignore[union-attr]
y_star = simulate(result, nsim=1, seed=int(rep_seeds[i])).ravel()
data_b = _replace_col_in_frame(native_data, endog_name, y_star)
refit = _fit(formula, data_b, random=random_strs, method=method)
estimates.append(np.asarray(statistic(refit)).ravel())
return BootResult(estimates=np.array(estimates))
# ---------------------------------------------------------------------------
# Helper: RandomEffectSpec → lme4-style string
# ---------------------------------------------------------------------------
def _spec_to_str(spec) -> str: # type: ignore[no-untyped-def]
"""Convert a RandomEffectSpec back to an lme4-style string for refitting."""
terms: list[str] = []
if spec.intercept:
terms.append("1")
terms.extend(spec.predictors)
effects = " + ".join(terms) if terms else "0"
pipe = "|" if spec.correlated else "||"
return f"({effects} {pipe} {spec.group})"