Source code for interlace.simulate

"""Parametric simulation and bootstrap for CrossedLMEResult.

Implements:
- simulate(): draw response vectors from the fitted mixed model
- bootMer(): parametric bootstrap with arbitrary user-defined statistic
- BootResult: container for bootstrap estimates with CI computation
"""

from __future__ import annotations

from collections.abc import Callable
from dataclasses import dataclass
from typing import TYPE_CHECKING

import numpy as np
import scipy.sparse as sp

if TYPE_CHECKING:
    from interlace.result import CrossedLMEResult


# ---------------------------------------------------------------------------
# BootResult
# ---------------------------------------------------------------------------


[docs] @dataclass class BootResult: """Container for parametric bootstrap estimates. Attributes ---------- estimates: Array of shape ``(B, p)`` where ``B`` is the number of bootstrap replicates and ``p`` is the length of the statistic vector. Examples -------- >>> boot = result.bootMer(B=100, seed=42) >>> boot.estimates.shape # (100, p + 1 + n_theta) >>> boot.ci(level=0.95) """ estimates: np.ndarray
[docs] def ci( self, method: str = "perc", level: float = 0.95, ) -> np.ndarray: """Compute bootstrap confidence intervals. Parameters ---------- method: ``"perc"`` (percentile) is currently supported. level: Confidence level, e.g. ``0.95`` for a 95% CI. Returns ------- np.ndarray of shape ``(p, 2)`` with columns ``[lower, upper]``. """ if method != "perc": raise ValueError( f"method={method!r} is not supported; only 'perc' is available" ) alpha = 1.0 - level lo = 100.0 * (alpha / 2.0) hi = 100.0 * (1.0 - alpha / 2.0) lower = np.percentile(self.estimates, lo, axis=0) upper = np.percentile(self.estimates, hi, axis=0) return np.column_stack([lower, upper])
# --------------------------------------------------------------------------- # simulate() # ---------------------------------------------------------------------------
[docs] def simulate( result: CrossedLMEResult, nsim: int = 1, seed: int | np.random.Generator | None = None, ) -> np.ndarray: """Draw response vectors from the fitted conditional model. Simulates from: .. math:: y^* = X\\beta + Z u^* + \\varepsilon^* where :math:`u^* \\sim N(0,\\, \\sigma^2 \\Lambda\\Lambda^\\top)` and :math:`\\varepsilon^* \\sim N(0,\\, \\sigma^2 I_n)`. Equivalently: :math:`u^* = \\sigma \\Lambda z_u` with :math:`z_u \\sim N(0, I_q)`. Parameters ---------- result: Fitted :class:`~interlace.result.CrossedLMEResult`. nsim: Number of response vectors to simulate. seed: Seed for the random number generator (int, Generator, or None). Returns ------- np.ndarray of shape ``(n, nsim)`` — each column is one simulated response vector. Examples -------- >>> y_sim = interlace.simulate(result, nsim=10, seed=42) >>> y_sim.shape # (n, 10) """ from interlace.profiled_reml import make_lambda rng = np.random.default_rng(seed) X = result.model.exog # (n, p) Z = result._Z # (n, q) sparse csc_matrix beta = np.asarray(result.fe_params) sigma = float(np.sqrt(result.scale)) Lambda = make_lambda(result.theta, result._random_specs, result._n_levels) q = Lambda.shape[0] n = X.shape[0] # Fixed-effect mean: (n,) mu = X @ beta # Random effect draws: u* = sigma * Lambda @ z_u, z_u ~ N(0, I_q) z_u = rng.standard_normal((q, nsim)) # (q, nsim) u_star = sigma * Lambda.dot(sp.csc_matrix(z_u)) u_star_dense = u_star.toarray() if sp.issparse(u_star) else np.asarray(u_star) re_contrib = Z.dot(u_star_dense) # (n, nsim) # Residual noise eps = sigma * rng.standard_normal((n, nsim)) # (n, nsim) return np.asarray(mu[:, np.newaxis] + re_contrib + eps)
# --------------------------------------------------------------------------- # Default statistic for bootMer # --------------------------------------------------------------------------- def _default_statistic(result: CrossedLMEResult) -> np.ndarray: """Collect fixed effects, sqrt(sigma^2), and theta into a single vector.""" fe = np.asarray(result.fe_params).ravel() sigma = np.array([float(np.sqrt(result.scale))]) theta = np.asarray(result.theta).ravel() return np.concatenate([fe, sigma, theta]) # --------------------------------------------------------------------------- # bootMer() # --------------------------------------------------------------------------- def _replace_col_in_frame(frame, col_name: str, values: np.ndarray): # type: ignore[no-untyped-def] """Replace *col_name* in *frame* with *values*, returning the same native type.""" import pandas as pd if isinstance(frame, pd.DataFrame): return frame.assign(**{col_name: values}) try: import polars as pl if isinstance(frame, pl.DataFrame): return frame.with_columns(pl.Series(col_name, values)) except ImportError: pass # Generic fallback: convert to pandas, replace, return pandas import narwhals as nw import pandas as pd pdf = nw.from_native(frame, eager_only=True).to_pandas() return pdf.assign(**{col_name: values})
[docs] def bootMer( result: CrossedLMEResult, statistic: Callable[..., np.ndarray] | None = None, B: int = 500, seed: int | np.random.Generator | None = None, show_progress: bool = False, ) -> BootResult: """Parametric bootstrap for a fitted mixed model. For each of ``B`` iterations: 1. Simulate a new response vector ``y*`` from the fitted model. 2. Refit the model on ``(X, Z, y*)``. 3. Collect ``statistic(refit)`` into ``estimates``. Parameters ---------- result: Fitted :class:`~interlace.result.CrossedLMEResult`. statistic: Callable ``(CrossedLMEResult) → np.ndarray`` (1-D). Defaults to :func:`_default_statistic` which collects ``[fe_params..., sqrt(sigma^2), theta...]``. B: Number of bootstrap replicates. seed: Seed for the random number generator. show_progress: If True and ``tqdm`` is installed, display a progress bar. Returns ------- BootResult ``.estimates`` has shape ``(B, p)``. Examples -------- >>> boot = interlace.bootMer(result, B=200, seed=42) >>> boot.ci(level=0.95) """ from interlace import fit as _fit if statistic is None: statistic = _default_statistic rng = np.random.default_rng(seed) # Pre-generate seeds for each replicate (ensures reproducibility even if # the loop is parallelised in the future) rep_seeds = rng.integers(0, 2**31, size=B) # Reconstruct random= spec strings for refitting random_strs = [_spec_to_str(s) for s in result._random_specs] # Original data (native frame) and response column name native_data = result.model.data.frame endog_name = result.model.endog_names formula = result.model.formula method = result.method _iter: range | object = range(B) if show_progress: try: from tqdm import tqdm _iter = tqdm(range(B), desc="bootMer") except ImportError: pass estimates: list[np.ndarray] = [] for i in _iter: # type: ignore[union-attr] y_star = simulate(result, nsim=1, seed=int(rep_seeds[i])).ravel() data_b = _replace_col_in_frame(native_data, endog_name, y_star) refit = _fit(formula, data_b, random=random_strs, method=method) estimates.append(np.asarray(statistic(refit)).ravel()) return BootResult(estimates=np.array(estimates))
# --------------------------------------------------------------------------- # Helper: RandomEffectSpec → lme4-style string # --------------------------------------------------------------------------- def _spec_to_str(spec) -> str: # type: ignore[no-untyped-def] """Convert a RandomEffectSpec back to an lme4-style string for refitting.""" terms: list[str] = [] if spec.intercept: terms.append("1") terms.extend(spec.predictors) effects = " + ".join(terms) if terms else "0" pipe = "|" if spec.correlated else "||" return f"({effects} {pipe} {spec.group})"