Python API reference¶

Generated with mkdocstrings from package docstrings. Install the package and docs extras: pip install -e ".[docs]".

Runner¶

`lrdbench.runner.BenchmarkRunner` ¶

Orchestrate a complete benchmark run.

The runner implements the full benchmark loop:

Load and validate the manifest.
Materialise records from generators or observational sources.
Optionally train data-driven estimators.
Fit every enrolled estimator to every record.
Evaluate mode-appropriate metrics.
Build leaderboards.
Persist results to a :class:CsvResultStore.
Generate HTML/CSV/LaTeX report artefacts.

Example::

from lrdbench.runner import BenchmarkRunner
from lrdbench.manifest import load_manifest

runner = BenchmarkRunner()
manifest = load_manifest("my_suite.yaml")
output = runner.run(manifest)
print(output.run_id)

Source code in src/lrdbench/runner.py

class BenchmarkRunner:
    """Orchestrate a complete benchmark run.

    The runner implements the full benchmark loop:

    1. Load and validate the manifest.
    2. Materialise records from generators or observational sources.
    3. Optionally train data-driven estimators.
    4. Fit every enrolled estimator to every record.
    5. Evaluate mode-appropriate metrics.
    6. Build leaderboards.
    7. Persist results to a :class:`CsvResultStore`.
    8. Generate HTML/CSV/LaTeX report artefacts.

    Example::

        from lrdbench.runner import BenchmarkRunner
        from lrdbench.manifest import load_manifest

        runner = BenchmarkRunner()
        manifest = load_manifest("my_suite.yaml")
        output = runner.run(manifest)
        print(output.run_id)
    """

    def __init__(
        self,
        *,
        generators: GeneratorRegistry | None = None,
        estimators: EstimatorRegistry | None = None,
        contaminations: ContaminationRegistry | None = None,
        discover_plugins: bool = True,
    ) -> None:
        self.generators = generators or build_default_generator_registry()
        self.contaminations = contaminations or build_default_contamination_registry()
        self._plugin_provenance: list[PluginProvenanceRecord] = []
        if estimators is not None:
            self.estimators = estimators
        elif discover_plugins:
            from lrdbench.plugin_loader import build_estimator_registry_with_plugins

            reg, results = build_estimator_registry_with_plugins()
            self.estimators = reg
            self._plugin_provenance = [
                PluginProvenanceRecord(
                    plugin_name=r.plugin_name,
                    module_name_or_path=r.module_name_or_path,
                    entry_point_name=r.entry_point_name,
                    version=r.version,
                    status=r.status,
                    failure_reason=r.failure_reason,
                    source_hash=r.source_hash,
                )
                for r in results
            ]
        else:
            self.estimators = build_default_estimator_registry()
        self._gt_evaluator = GroundTruthEvaluator()
        self._leaderboard = WeightedRankLeaderboardBuilder()
        self._reporter = SimpleHtmlCsvReporter()

    def run(
        self,
        manifest: BenchmarkManifest,
        *,
        manifest_path: Path | None = None,
        base_dir: Path | None = None,
    ) -> BenchmarkRunOutput:
        """Execute the full benchmark loop for ``manifest``.

        Args:
            manifest: A validated :class:`BenchmarkManifest`.
            manifest_path: Path to the manifest file, used to resolve
                relative paths (e.g. observational CSV files).
            base_dir: Alternative directory for relative path resolution.
                If ``None``, defaults to ``manifest_path.parent`` or
                :obj:`Path.cwd()`.

        Returns:
            A :class:`BenchmarkRunOutput` containing the run ID, records,
            estimates, metrics, leaderboards, and report bundle.

        Raises:
            NotImplementedError: If the manifest mode is not supported.
            ValueError: If the manifest requests unsupported generator
                parameters (e.g. ARFIMA with ``p != 0`` or ``q != 0``).
        """
        if manifest.mode not in (
            BenchmarkMode.GROUND_TRUTH,
            BenchmarkMode.STRESS_TEST,
            BenchmarkMode.OBSERVATIONAL,
        ):
            raise NotImplementedError(
                f"mode {manifest.mode.value!r} is not implemented in this release "
                f"(supported: ground_truth, stress_test, observational)"
            )
        run_id = str(uuid.uuid4())
        global_seed = int(manifest.seed_spec.get("global_seed", 0))

        resolve_dir = (
            base_dir
            if base_dir is not None
            else (manifest_path.parent if manifest_path is not None else Path.cwd())
        )

        if manifest.mode is BenchmarkMode.GROUND_TRUTH:
            records = self._generate_records_ground_truth(manifest, global_seed)
            evaluator: BaseEvaluator = self._gt_evaluator
        elif manifest.mode is BenchmarkMode.STRESS_TEST:
            records = self._generate_records_stress_test(manifest, global_seed)
            evaluator = self._gt_evaluator
        else:
            records = load_observational_records(
                manifest, base_dir=resolve_dir, global_seed=global_seed
            )
            evaluator = ObservationalEvaluator(self.estimators)

        report_spec = manifest.report_spec or ReportSpec(
            formats=("html", "csv"),
            leaderboards=tuple(manifest.leaderboard_specs),
        )
        if not report_spec.leaderboards and manifest.leaderboard_specs:
            report_spec = replace(report_spec, leaderboards=tuple(manifest.leaderboard_specs))
        export_root = Path(report_spec.export_root)

        manifest = prepare_data_driven_estimators(
            manifest,
            generators=self.generators,
            contaminations=self.contaminations,
            run_id=run_id,
            artefact_root=export_root,
            global_seed=global_seed,
        )

        estimates = run_fit_jobs(
            collect_fit_jobs(records, manifest.estimator_specs),
            estimators=self.estimators,
            execution_spec=dict(manifest.execution_spec),
            cwd=resolve_dir,
        )

        metrics = evaluator.evaluate(manifest, records, estimates)
        boards = self._leaderboard.build(manifest, metrics)

        store_root = export_root / run_id
        store = CsvResultStore(store_root)
        store.write_run_metadata(manifest, run_id)
        store.write_records(records)
        store.write_estimates(estimates)
        store.write_metrics(metrics)
        store.write_leaderboards(boards)

        bundle = self._reporter.build(
            manifest,
            metrics,
            boards,
            report_spec=report_spec,
            run_id=run_id,
        )
        model_artefacts = _ml_model_artefacts(run_id, export_root)
        if model_artefacts:
            bundle = replace(bundle, artefacts=tuple(bundle.artefacts) + model_artefacts)

        store.write_plugin_provenance(self._plugin_provenance)
        store.write_artefacts(bundle.artefacts)
        store_path = store.finalise()
        bundle = replace(bundle, result_store_path=store_path)

        return BenchmarkRunOutput(
            run_id=run_id,
            records=tuple(records),
            estimates=tuple(estimates),
            metrics=metrics,
            leaderboards=boards,
            report_bundle=bundle,
            result_store_path=store_path,
            plugin_provenance=tuple(self._plugin_provenance),
        )

    def preview(
        self,
        manifest: BenchmarkManifest,
        *,
        manifest_path: Path | None = None,
        base_dir: Path | None = None,
    ) -> dict[str, object]:
        """Dry-run preview: materialise records and report grid size without fitting.

        Returns:
            Dictionary with ``mode``, ``n_records``, ``n_estimators``,
            ``n_fit_jobs``, ``n_clean``, ``n_contaminated``, and ``global_seed``.
        """
        if manifest.mode not in (
            BenchmarkMode.GROUND_TRUTH,
            BenchmarkMode.STRESS_TEST,
            BenchmarkMode.OBSERVATIONAL,
        ):
            raise NotImplementedError(
                f"mode {manifest.mode.value!r} is not implemented in this release "
                f"(supported: ground_truth, stress_test, observational)"
            )
        global_seed = int(manifest.seed_spec.get("global_seed", 0))
        resolve_dir = (
            base_dir
            if base_dir is not None
            else (manifest_path.parent if manifest_path is not None else Path.cwd())
        )

        if manifest.mode is BenchmarkMode.GROUND_TRUTH:
            records = self._generate_records_ground_truth(manifest, global_seed)
        elif manifest.mode is BenchmarkMode.STRESS_TEST:
            records = self._generate_records_stress_test(manifest, global_seed)
        else:
            records = load_observational_records(
                manifest, base_dir=resolve_dir, global_seed=global_seed
            )

        n_records = len(records)
        n_estimators = len(manifest.estimator_specs)
        n_clean = sum(1 for r in records if r.annotations.get("stress_role") != "contaminated")
        n_contaminated = n_records - n_clean
        return {
            "mode": manifest.mode.value,
            "n_records": n_records,
            "n_estimators": n_estimators,
            "n_fit_jobs": n_records * n_estimators,
            "n_clean": n_clean,
            "n_contaminated": n_contaminated,
            "global_seed": global_seed,
        }

    def _generate_records_ground_truth(
        self, manifest: BenchmarkManifest, global_seed: int
    ) -> list[SeriesRecord]:
        triples = _expand_generator_grid(dict(manifest.source_spec))
        records: list[SeriesRecord] = []
        for family, params, rep in triples:
            gen = self.generators.get(family)
            rid = _record_id(manifest.manifest_id, family, params, rep)
            seed = _stable_seed(global_seed, manifest.manifest_id, family, params, rep)
            rec = gen.generate(
                record_id=rid,
                params=params,
                seed=seed,
                manifest_id=manifest.manifest_id,
            )
            records.append(rec)
        return records

    def _generate_records_stress_test(
        self, manifest: BenchmarkManifest, global_seed: int
    ) -> list[SeriesRecord]:
        triples = _expand_generator_grid(dict(manifest.source_spec))
        scenarios = _expand_contamination_grid(dict(manifest.contamination_spec))
        records: list[SeriesRecord] = []
        for family, params, rep in triples:
            gen = self.generators.get(family)
            rid = _record_id(manifest.manifest_id, family, params, rep)
            seed = _stable_seed(global_seed, manifest.manifest_id, family, params, rep)
            rec = gen.generate(
                record_id=rid,
                params=params,
                seed=seed,
                manifest_id=manifest.manifest_id,
            )
            clean = replace(
                rec,
                annotations={
                    **dict(rec.annotations),
                    "stress_role": "clean",
                    "pair_group_id": rec.record_id,
                    "contamination_operator": "clean",
                    "contamination_family": "clean",
                    "contamination_severity": "clean",
                },
            )
            records.append(clean)
            for op_name, op_params in scenarios:
                op = self.contaminations.get(op_name)
                nid = _contam_record_id(manifest.manifest_id, clean.record_id, op_name, op_params)
                cseed = _stable_seed(
                    global_seed,
                    manifest.manifest_id,
                    "contam",
                    clean.record_id,
                    op_name,
                    tuple(sorted(op_params.items())),
                )
                contaminated = op.apply(
                    clean,
                    params=op_params,
                    seed=cseed,
                    manifest_id=manifest.manifest_id,
                    new_record_id=nid,
                )
                records.append(contaminated)
        return records

`run(manifest, *, manifest_path=None, base_dir=None)` ¶

Execute the full benchmark loop for manifest.

Parameters:

Name	Type	Description	Default
`manifest`	`BenchmarkManifest`	A validated :class:`BenchmarkManifest`.	required
`manifest_path`	`Path \| None`	Path to the manifest file, used to resolve relative paths (e.g. observational CSV files).	`None`
`base_dir`	`Path \| None`	Alternative directory for relative path resolution. If `None`, defaults to `manifest_path.parent` or :obj:`Path.cwd()`.	`None`

Returns:

Name	Type	Description
`A`	`BenchmarkRunOutput`	class:`BenchmarkRunOutput` containing the run ID, records,
	`BenchmarkRunOutput`	estimates, metrics, leaderboards, and report bundle.

Raises:

Type	Description
`NotImplementedError`	If the manifest mode is not supported.
`ValueError`	If the manifest requests unsupported generator parameters (e.g. ARFIMA with `p != 0` or `q != 0`).

Source code in src/lrdbench/runner.py

def run(
    self,
    manifest: BenchmarkManifest,
    *,
    manifest_path: Path | None = None,
    base_dir: Path | None = None,
) -> BenchmarkRunOutput:
    """Execute the full benchmark loop for ``manifest``.

    Args:
        manifest: A validated :class:`BenchmarkManifest`.
        manifest_path: Path to the manifest file, used to resolve
            relative paths (e.g. observational CSV files).
        base_dir: Alternative directory for relative path resolution.
            If ``None``, defaults to ``manifest_path.parent`` or
            :obj:`Path.cwd()`.

    Returns:
        A :class:`BenchmarkRunOutput` containing the run ID, records,
        estimates, metrics, leaderboards, and report bundle.

    Raises:
        NotImplementedError: If the manifest mode is not supported.
        ValueError: If the manifest requests unsupported generator
            parameters (e.g. ARFIMA with ``p != 0`` or ``q != 0``).
    """
    if manifest.mode not in (
        BenchmarkMode.GROUND_TRUTH,
        BenchmarkMode.STRESS_TEST,
        BenchmarkMode.OBSERVATIONAL,
    ):
        raise NotImplementedError(
            f"mode {manifest.mode.value!r} is not implemented in this release "
            f"(supported: ground_truth, stress_test, observational)"
        )
    run_id = str(uuid.uuid4())
    global_seed = int(manifest.seed_spec.get("global_seed", 0))

    resolve_dir = (
        base_dir
        if base_dir is not None
        else (manifest_path.parent if manifest_path is not None else Path.cwd())
    )

    if manifest.mode is BenchmarkMode.GROUND_TRUTH:
        records = self._generate_records_ground_truth(manifest, global_seed)
        evaluator: BaseEvaluator = self._gt_evaluator
    elif manifest.mode is BenchmarkMode.STRESS_TEST:
        records = self._generate_records_stress_test(manifest, global_seed)
        evaluator = self._gt_evaluator
    else:
        records = load_observational_records(
            manifest, base_dir=resolve_dir, global_seed=global_seed
        )
        evaluator = ObservationalEvaluator(self.estimators)

    report_spec = manifest.report_spec or ReportSpec(
        formats=("html", "csv"),
        leaderboards=tuple(manifest.leaderboard_specs),
    )
    if not report_spec.leaderboards and manifest.leaderboard_specs:
        report_spec = replace(report_spec, leaderboards=tuple(manifest.leaderboard_specs))
    export_root = Path(report_spec.export_root)

    manifest = prepare_data_driven_estimators(
        manifest,
        generators=self.generators,
        contaminations=self.contaminations,
        run_id=run_id,
        artefact_root=export_root,
        global_seed=global_seed,
    )

    estimates = run_fit_jobs(
        collect_fit_jobs(records, manifest.estimator_specs),
        estimators=self.estimators,
        execution_spec=dict(manifest.execution_spec),
        cwd=resolve_dir,
    )

    metrics = evaluator.evaluate(manifest, records, estimates)
    boards = self._leaderboard.build(manifest, metrics)

    store_root = export_root / run_id
    store = CsvResultStore(store_root)
    store.write_run_metadata(manifest, run_id)
    store.write_records(records)
    store.write_estimates(estimates)
    store.write_metrics(metrics)
    store.write_leaderboards(boards)

    bundle = self._reporter.build(
        manifest,
        metrics,
        boards,
        report_spec=report_spec,
        run_id=run_id,
    )
    model_artefacts = _ml_model_artefacts(run_id, export_root)
    if model_artefacts:
        bundle = replace(bundle, artefacts=tuple(bundle.artefacts) + model_artefacts)

    store.write_plugin_provenance(self._plugin_provenance)
    store.write_artefacts(bundle.artefacts)
    store_path = store.finalise()
    bundle = replace(bundle, result_store_path=store_path)

    return BenchmarkRunOutput(
        run_id=run_id,
        records=tuple(records),
        estimates=tuple(estimates),
        metrics=metrics,
        leaderboards=boards,
        report_bundle=bundle,
        result_store_path=store_path,
        plugin_provenance=tuple(self._plugin_provenance),
    )

`preview(manifest, *, manifest_path=None, base_dir=None)` ¶

Dry-run preview: materialise records and report grid size without fitting.

Returns:

Type	Description
`dict[str, object]`	Dictionary with `mode`, `n_records`, `n_estimators`,
`dict[str, object]`	`n_fit_jobs`, `n_clean`, `n_contaminated`, and `global_seed`.

Source code in src/lrdbench/runner.py

def preview(
    self,
    manifest: BenchmarkManifest,
    *,
    manifest_path: Path | None = None,
    base_dir: Path | None = None,
) -> dict[str, object]:
    """Dry-run preview: materialise records and report grid size without fitting.

    Returns:
        Dictionary with ``mode``, ``n_records``, ``n_estimators``,
        ``n_fit_jobs``, ``n_clean``, ``n_contaminated``, and ``global_seed``.
    """
    if manifest.mode not in (
        BenchmarkMode.GROUND_TRUTH,
        BenchmarkMode.STRESS_TEST,
        BenchmarkMode.OBSERVATIONAL,
    ):
        raise NotImplementedError(
            f"mode {manifest.mode.value!r} is not implemented in this release "
            f"(supported: ground_truth, stress_test, observational)"
        )
    global_seed = int(manifest.seed_spec.get("global_seed", 0))
    resolve_dir = (
        base_dir
        if base_dir is not None
        else (manifest_path.parent if manifest_path is not None else Path.cwd())
    )

    if manifest.mode is BenchmarkMode.GROUND_TRUTH:
        records = self._generate_records_ground_truth(manifest, global_seed)
    elif manifest.mode is BenchmarkMode.STRESS_TEST:
        records = self._generate_records_stress_test(manifest, global_seed)
    else:
        records = load_observational_records(
            manifest, base_dir=resolve_dir, global_seed=global_seed
        )

    n_records = len(records)
    n_estimators = len(manifest.estimator_specs)
    n_clean = sum(1 for r in records if r.annotations.get("stress_role") != "contaminated")
    n_contaminated = n_records - n_clean
    return {
        "mode": manifest.mode.value,
        "n_records": n_records,
        "n_estimators": n_estimators,
        "n_fit_jobs": n_records * n_estimators,
        "n_clean": n_clean,
        "n_contaminated": n_contaminated,
        "global_seed": global_seed,
    }

`init(*, generators=None, estimators=None, contaminations=None, discover_plugins=True)` ¶

Source code in src/lrdbench/runner.py

def __init__(
    self,
    *,
    generators: GeneratorRegistry | None = None,
    estimators: EstimatorRegistry | None = None,
    contaminations: ContaminationRegistry | None = None,
    discover_plugins: bool = True,
) -> None:
    self.generators = generators or build_default_generator_registry()
    self.contaminations = contaminations or build_default_contamination_registry()
    self._plugin_provenance: list[PluginProvenanceRecord] = []
    if estimators is not None:
        self.estimators = estimators
    elif discover_plugins:
        from lrdbench.plugin_loader import build_estimator_registry_with_plugins

        reg, results = build_estimator_registry_with_plugins()
        self.estimators = reg
        self._plugin_provenance = [
            PluginProvenanceRecord(
                plugin_name=r.plugin_name,
                module_name_or_path=r.module_name_or_path,
                entry_point_name=r.entry_point_name,
                version=r.version,
                status=r.status,
                failure_reason=r.failure_reason,
                source_hash=r.source_hash,
            )
            for r in results
        ]
    else:
        self.estimators = build_default_estimator_registry()
    self._gt_evaluator = GroundTruthEvaluator()
    self._leaderboard = WeightedRankLeaderboardBuilder()
    self._reporter = SimpleHtmlCsvReporter()

`lrdbench.runner.run_manifest_path(path, *, discover_plugins=True)` ¶

Convenience entry-point: load a manifest from disk and run it.

Parameters:

Name	Type	Description	Default
`path`	`str \| Path`	Filesystem path to a YAML manifest.	required
`discover_plugins`	`bool`	Whether to auto-discover third-party estimator plugins via environment variables.	`True`

Returns:

Type	Description
`BenchmarkRunOutput`	The completed benchmark run output.

Source code in src/lrdbench/runner.py

def run_manifest_path(path: str | Path, *, discover_plugins: bool = True) -> BenchmarkRunOutput:
    """Convenience entry-point: load a manifest from disk and run it.

    Args:
        path: Filesystem path to a YAML manifest.
        discover_plugins: Whether to auto-discover third-party estimator
            plugins via environment variables.

    Returns:
        The completed benchmark run output.
    """
    p = Path(path)
    manifest = load_manifest(p)
    return BenchmarkRunner(discover_plugins=discover_plugins).run(manifest, manifest_path=p)

`lrdbench.runner.run_manifest_mapping(data, *, base_dir=None, discover_plugins=True)` ¶

Convenience entry-point: run a benchmark from an in-memory dictionary.

This is useful for programmatic benchmark construction or testing.

Parameters:

Name	Type	Description	Default
`data`	`dict[str, Any]`	Dictionary matching the manifest schema.	required
`base_dir`	`Path \| None`	Directory used to resolve relative paths (e.g. CSV files).	`None`
`discover_plugins`	`bool`	Whether to auto-discover third-party estimator plugins via environment variables.	`True`

Returns:

Type	Description
`BenchmarkRunOutput`	The completed benchmark run output.

Source code in src/lrdbench/runner.py

def run_manifest_mapping(
    data: dict[str, Any], *, base_dir: Path | None = None, discover_plugins: bool = True
) -> BenchmarkRunOutput:
    """Convenience entry-point: run a benchmark from an in-memory dictionary.

    This is useful for programmatic benchmark construction or testing.

    Args:
        data: Dictionary matching the manifest schema.
        base_dir: Directory used to resolve relative paths (e.g. CSV files).
        discover_plugins: Whether to auto-discover third-party estimator
            plugins via environment variables.

    Returns:
        The completed benchmark run output.
    """
    manifest = manifest_from_mapping(data)
    return BenchmarkRunner(discover_plugins=discover_plugins).run(
        manifest, base_dir=base_dir or Path.cwd()
    )

Manifest¶

`lrdbench.manifest.load_manifest(path)` ¶

Source code in src/lrdbench/manifest.py

def load_manifest(path: str | Path) -> BenchmarkManifest:
    return manifest_from_mapping(load_manifest_yaml(path))

`lrdbench.manifest.manifest_from_mapping(data)` ¶

Source code in src/lrdbench/manifest.py

def manifest_from_mapping(data: Mapping[str, Any]) -> BenchmarkManifest:
    mode = BenchmarkMode(str(data["mode"]))
    estimators = _estimator_specs_from_manifest_entries(list(data["estimators"]))
    metrics_list: list[Any] = list(data["metrics"])
    try:
        metric_specs = metric_specs_from_manifest_entries(metrics_list)
    except (TypeError, ValueError) as exc:
        raise ManifestValidationError(str(exc)) from exc

    lbs_raw = data.get("leaderboards") or []
    leaderboard_specs = tuple(leaderboard_spec_from_mapping(x) for x in lbs_raw)

    report_spec = None
    if "report" in data and data["report"] is not None:
        report_spec = report_spec_from_mapping(data["report"])

    manifest = BenchmarkManifest(
        manifest_id=str(data["manifest_id"]),
        name=str(data["name"]),
        mode=mode,
        source_spec=dict(data["source"]),
        contamination_spec=dict(data.get("contamination") or {}),
        segmentation_spec=dict(data.get("segmentation") or {}),
        preprocessing_spec=dict(data.get("preprocessing") or {}),
        estimator_specs=estimators,
        metric_specs=metric_specs,
        leaderboard_specs=leaderboard_specs,
        report_spec=report_spec,
        execution_spec=dict(data.get("execution") or {}),
        uncertainty_spec=dict(data.get("uncertainty") or {}),
        ml_training_spec=dict(data.get("ml_training") or {}),
        seed_spec=dict(data.get("seeds") or {}),
        raw_yaml=dict(data),
    )
    strict = bool((data.get("validation") or {}).get("reject_unknown_keys", True))
    validate_manifest(manifest, strict_unknown_keys=strict)
    return manifest

Estimator interface¶

`lrdbench.interfaces.BaseEstimator` ¶

Bases: ABC

Abstract base for long-range dependence estimators.

All estimators enrolled in a benchmark must implement this interface. The :meth:fit method receives a :class:SeriesRecord and must return an :class:EstimateResult containing at minimum a point estimate and a validity flag.

Source code in src/lrdbench/interfaces.py

class BaseEstimator(ABC):
    """Abstract base for long-range dependence estimators.

    All estimators enrolled in a benchmark must implement this interface.
    The :meth:`fit` method receives a :class:`SeriesRecord` and must return
    an :class:`EstimateResult` containing at minimum a point estimate and
    a validity flag.
    """

    @property
    @abstractmethod
    def spec(self) -> Any:
        """The estimator's specification (normally an :class:`EstimatorSpec`)."""
        raise NotImplementedError

    @abstractmethod
    def fit(self, record: SeriesRecord) -> EstimateResult:
        """Compute an estimate for the given record.

        Args:
            record: The time-series record to analyse.

        Returns:
            An :class:`EstimateResult` with ``point``, ``valid``, and optional
            confidence intervals, diagnostics, and runtime.
        """
        raise NotImplementedError

`spec` `abstractmethod` `property` ¶

The estimator's specification (normally an :class:EstimatorSpec).

`fit(record)` `abstractmethod` ¶

Compute an estimate for the given record.

Parameters:

Name	Type	Description	Default
`record`	`SeriesRecord`	The time-series record to analyse.	required

Returns:

Name	Type	Description
`An`	`EstimateResult`	class:`EstimateResult` with `point`, `valid`, and optional
	`EstimateResult`	confidence intervals, diagnostics, and runtime.

Source code in src/lrdbench/interfaces.py

@abstractmethod
def fit(self, record: SeriesRecord) -> EstimateResult:
    """Compute an estimate for the given record.

    Args:
        record: The time-series record to analyse.

    Returns:
        An :class:`EstimateResult` with ``point``, ``valid``, and optional
        confidence intervals, diagnostics, and runtime.
    """
    raise NotImplementedError

`lrdbench.interfaces.BaseGenerator` ¶

Bases: ABC

Abstract base for synthetic time-series generators.

Each generator produces a :class:SeriesRecord from a parameter dictionary and an optional seed. The family property is the registry key used in manifest source blocks (e.g. fGn, ARFIMA).

Source code in src/lrdbench/interfaces.py

class BaseGenerator(ABC):
    """Abstract base for synthetic time-series generators.

    Each generator produces a :class:`SeriesRecord` from a parameter dictionary
    and an optional seed. The ``family`` property is the registry key used in
    manifest ``source`` blocks (e.g. ``fGn``, ``ARFIMA``).
    """

    @property
    @abstractmethod
    def family(self) -> str:
        """Registry key for this generator (e.g. ``'fGn'``, ``'ARFIMA'``)."""
        raise NotImplementedError

    @property
    @abstractmethod
    def version(self) -> str:
        """Human-readable version string for provenance tracking."""
        raise NotImplementedError

    @abstractmethod
    def generate(
        self,
        *,
        record_id: str,
        params: Mapping[str, Any],
        seed: int | None,
        manifest_id: str | None,
    ) -> SeriesRecord:
        """Generate a single synthetic record.

        Args:
            record_id: Stable identifier for the record.
            params: Generator-specific parameters (e.g. ``{'H': 0.75, 'n': 1024}``).
            seed: Optional RNG seed for reproducibility.
            manifest_id: Manifest identifier to embed in provenance.

        Returns:
            A fully populated :class:`SeriesRecord` including truth and provenance.
        """
        raise NotImplementedError

`family` `abstractmethod` `property` ¶

Registry key for this generator (e.g. 'fGn', 'ARFIMA').

`version` `abstractmethod` `property` ¶

Human-readable version string for provenance tracking.

`generate(*, record_id, params, seed, manifest_id)` `abstractmethod` ¶

Generate a single synthetic record.

Parameters:

Name	Type	Description	Default
`record_id`	`str`	Stable identifier for the record.	required
`params`	`Mapping[str, Any]`	Generator-specific parameters (e.g. `{'H': 0.75, 'n': 1024}`).	required
`seed`	`int \| None`	Optional RNG seed for reproducibility.	required
`manifest_id`	`str \| None`	Manifest identifier to embed in provenance.	required

Returns:

Type	Description
`SeriesRecord`	A fully populated :class:`SeriesRecord` including truth and provenance.

Source code in src/lrdbench/interfaces.py

@abstractmethod
def generate(
    self,
    *,
    record_id: str,
    params: Mapping[str, Any],
    seed: int | None,
    manifest_id: str | None,
) -> SeriesRecord:
    """Generate a single synthetic record.

    Args:
        record_id: Stable identifier for the record.
        params: Generator-specific parameters (e.g. ``{'H': 0.75, 'n': 1024}``).
        seed: Optional RNG seed for reproducibility.
        manifest_id: Manifest identifier to embed in provenance.

    Returns:
        A fully populated :class:`SeriesRecord` including truth and provenance.
    """
    raise NotImplementedError

Bundled temporal estimators¶

`lrdbench.estimators.temporal.RSEstimator` ¶

Bases: BaseEstimator

Rescaled-range Hurst proxy with optional block-bootstrap CIs.

Parameters read from params:

n_bootstrap (int, default 200) – number of bootstrap replicates.
bootstrap_block_len (int, default max(4, n//10)) – block length.
ci_levels (list, default [0.95]) – nominal coverage levels.
min_scale (int, default 8) – minimum R/S subseries length.
max_scale (int, optional) – maximum R/S subseries length.
scale_ratio (float, default 1.5) – geometric scale spacing.
use_anis_lloyd_correction (bool, default False) – if True, divide each scale's average R/S value by the Anis-Lloyd white-noise expectation before fitting the slope, then add the 0.5 white-noise baseline back to the fitted slope.

Source code in src/lrdbench/estimators/temporal.py

class RSEstimator(BaseEstimator):
    """Rescaled-range Hurst proxy with optional block-bootstrap CIs.

    Parameters read from ``params``:

    - ``n_bootstrap`` (int, default 200) – number of bootstrap replicates.
    - ``bootstrap_block_len`` (int, default ``max(4, n//10)``) – block length.
    - ``ci_levels`` (list, default ``[0.95]``) – nominal coverage levels.
    - ``min_scale`` (int, default 8) – minimum R/S subseries length.
    - ``max_scale`` (int, optional) – maximum R/S subseries length.
    - ``scale_ratio`` (float, default 1.5) – geometric scale spacing.
    - ``use_anis_lloyd_correction`` (bool, default ``False``) – if ``True``,
      divide each scale's average R/S value by the Anis-Lloyd white-noise
      expectation before fitting the slope, then add the 0.5 white-noise
      baseline back to the fitted slope.
    """

    VERSION = "0.3.0"

    def __init__(self, spec: EstimatorSpec) -> None:
        self._spec = spec

    @property
    def spec(self) -> EstimatorSpec:
        return self._spec

    def fit(self, record: SeriesRecord) -> EstimateResult:
        t0 = time.perf_counter()
        params = dict(self._spec.parameter_schema)
        n_boot = int(params.get("n_bootstrap", 200))
        block_len = int(params.get("bootstrap_block_len", 0)) or max(4, record.values.size // 10)
        levels_raw = params.get("ci_levels")
        ci_levels = tuple(float(x) for x in levels_raw) if levels_raw is not None else (0.95,)
        seed = 0
        if record.provenance is not None and record.provenance.seed is not None:
            seed = int(record.provenance.seed)
        rng = np.random.default_rng(seed & (2**32 - 1))

        try:
            use_corr = bool(params.get("use_anis_lloyd_correction", False))
            min_scale = int(params.get("min_scale", 8))
            max_scale = int(params["max_scale"]) if params.get("max_scale") is not None else None
            scale_ratio = float(params.get("scale_ratio", 1.5))
            h = _rs_hurst_proxy(
                record.values,
                use_correction=use_corr,
                min_scale=min_scale,
                max_scale=max_scale,
                scale_ratio=scale_ratio,
            )
            dt = time.perf_counter() - t0
            if h is None:
                return EstimateResult(
                    record_id=record.record_id,
                    estimator_name=self._spec.name,
                    point=None,
                    runtime_seconds=dt,
                    valid=False,
                    failure_reason="insufficient_signal_for_rs",
                    estimator_version=self.VERSION,
                )

            def _rs_stat(z: np.ndarray) -> float | None:
                return _rs_hurst_proxy(
                    z,
                    use_correction=use_corr,
                    min_scale=min_scale,
                    max_scale=max_scale,
                    scale_ratio=scale_ratio,
                )

            samples = bootstrap_statistic_distribution(
                record.values,
                rng,
                _rs_stat,
                n_boot=n_boot,
                block_len=block_len,
            )
            cis = symmetric_percentile_cis(samples, ci_levels) if samples.size >= 5 else ()
            bstd = float(np.std(samples)) if samples.size >= 2 else None
            ci_low = ci_high = None
            for a, lo, hi in cis:
                if abs(a - 0.95) < 1e-9:
                    ci_low, ci_high = lo, hi
                    break
            if cis and (ci_low is None):
                ci_low, ci_high = cis[-1][1], cis[-1][2]

            diag: dict[str, object] = {
                "ci_method": "circular_block_bootstrap",
                "n_bootstrap": n_boot,
                "bootstrap_block_len": block_len,
                "bootstrap_replicates_used": int(samples.size),
                "bootstrap_point_std": bstd,
                "min_scale": min_scale,
                "max_scale": max_scale,
                "scale_ratio": scale_ratio,
                "use_anis_lloyd_correction": use_corr,
            }
            return EstimateResult(
                record_id=record.record_id,
                estimator_name=self._spec.name,
                point=h,
                ci_low=ci_low,
                ci_high=ci_high,
                runtime_seconds=dt,
                valid=True,
                estimator_version=self.VERSION,
                diagnostics=diag,
                bootstrap_cis=cis,
            )
        except Exception as exc:  # noqa: BLE001
            dt = time.perf_counter() - t0
            return EstimateResult(
                record_id=record.record_id,
                estimator_name=self._spec.name,
                point=None,
                runtime_seconds=dt,
                valid=False,
                failure_reason=f"exception:{type(exc).__name__}:{exc}",
                estimator_version=self.VERSION,
            )

`lrdbench.estimators.temporal.DFAEstimator` ¶

Bases: BaseEstimator

Detrended fluctuation analysis (DFA) scaling exponent as a Hurst proxy.

Source code in src/lrdbench/estimators/temporal.py

class DFAEstimator(BaseEstimator):
    """Detrended fluctuation analysis (DFA) scaling exponent as a Hurst proxy."""

    VERSION = "0.1.0"

    def __init__(self, spec: EstimatorSpec) -> None:
        self._spec = spec

    @property
    def spec(self) -> EstimatorSpec:
        return self._spec

    def fit(self, record: SeriesRecord) -> EstimateResult:
        params = dict(self._spec.parameter_schema)

        def stat(z: np.ndarray) -> float | None:
            return _dfa_hurst(
                z,
                detrend_order=int(params.get("detrend_order", 1)),
                min_scale=int(params.get("min_scale", 16)),
                max_scale=int(params["max_scale"]) if params.get("max_scale") is not None else None,
            )

        return fit_with_block_bootstrap(
            record,
            self._spec,
            statistic=stat,
            estimator_version=self.VERSION,
            failure_reason="insufficient_signal_for_dfa",
            seed_offset=0,
        )

`lrdbench.estimators.temporal.DMAEstimator` ¶

Bases: BaseEstimator

Detrended moving-average fluctuation scaling (Hurst proxy).

Source code in src/lrdbench/estimators/temporal.py

class DMAEstimator(BaseEstimator):
    """Detrended moving-average fluctuation scaling (Hurst proxy)."""

    VERSION = "0.1.0"

    def __init__(self, spec: EstimatorSpec) -> None:
        self._spec = spec

    @property
    def spec(self) -> EstimatorSpec:
        return self._spec

    def fit(self, record: SeriesRecord) -> EstimateResult:
        params = dict(self._spec.parameter_schema)

        def stat(z: np.ndarray) -> float | None:
            return _dma_hurst(
                z,
                min_scale=int(params.get("min_scale", 8)),
                max_scale=int(params["max_scale"]) if params.get("max_scale") is not None else None,
            )

        return fit_with_block_bootstrap(
            record,
            self._spec,
            statistic=stat,
            estimator_version=self.VERSION,
            failure_reason="insufficient_signal_for_dma",
            seed_offset=17,
        )

`lrdbench.estimators.temporal.AbsoluteMomentEstimator` ¶

Bases: BaseEstimator

Absolute first moment of aggregated series as a Hurst proxy.

Source code in src/lrdbench/estimators/temporal.py

class AbsoluteMomentEstimator(BaseEstimator):
    """Absolute first moment of aggregated series as a Hurst proxy."""

    VERSION = "0.1.0"

    def __init__(self, spec: EstimatorSpec) -> None:
        self._spec = spec

    @property
    def spec(self) -> EstimatorSpec:
        return self._spec

    def fit(self, record: SeriesRecord) -> EstimateResult:
        params = dict(self._spec.parameter_schema)

        def stat(z: np.ndarray) -> float | None:
            return _absolute_moment_hurst(
                z,
                min_scale=int(params.get("min_scale", 2)),
                max_scale=int(params["max_scale"]) if params.get("max_scale") is not None else None,
                scale_ratio=float(params.get("scale_ratio", 1.5)),
            )

        return fit_with_block_bootstrap(
            record,
            self._spec,
            statistic=stat,
            estimator_version=self.VERSION,
            failure_reason="insufficient_signal_for_absolute_moment",
            seed_offset=29,
        )

`lrdbench.estimators.temporal.VarianceEstimator` ¶

Bases: BaseEstimator

Variance of aggregated series as a Hurst proxy.

Source code in src/lrdbench/estimators/temporal.py

class VarianceEstimator(BaseEstimator):
    """Variance of aggregated series as a Hurst proxy."""

    VERSION = "0.1.0"

    def __init__(self, spec: EstimatorSpec) -> None:
        self._spec = spec

    @property
    def spec(self) -> EstimatorSpec:
        return self._spec

    def fit(self, record: SeriesRecord) -> EstimateResult:
        params = dict(self._spec.parameter_schema)

        def stat(z: np.ndarray) -> float | None:
            return _variance_aggregation_hurst(
                z,
                min_scale=int(params.get("min_scale", 2)),
                max_scale=int(params["max_scale"]) if params.get("max_scale") is not None else None,
                scale_ratio=float(params.get("scale_ratio", 1.5)),
            )

        return fit_with_block_bootstrap(
            record,
            self._spec,
            statistic=stat,
            estimator_version=self.VERSION,
            failure_reason="insufficient_signal_for_variance",
            seed_offset=31,
        )

`lrdbench.estimators.temporal.VarianceResidualEstimator` ¶

Bases: BaseEstimator

Variance of block residuals as a Hurst proxy.

Source code in src/lrdbench/estimators/temporal.py

class VarianceResidualEstimator(BaseEstimator):
    """Variance of block residuals as a Hurst proxy."""

    VERSION = "0.1.0"

    def __init__(self, spec: EstimatorSpec) -> None:
        self._spec = spec

    @property
    def spec(self) -> EstimatorSpec:
        return self._spec

    def fit(self, record: SeriesRecord) -> EstimateResult:
        params = dict(self._spec.parameter_schema)

        def stat(z: np.ndarray) -> float | None:
            return _variance_residual_hurst(
                z,
                min_scale=int(params.get("min_scale", 8)),
                max_scale=int(params["max_scale"]) if params.get("max_scale") is not None else None,
                scale_ratio=float(params.get("scale_ratio", 1.5)),
                detrend_order=int(params.get("detrend_order", 1)),
            )

        return fit_with_block_bootstrap(
            record,
            self._spec,
            statistic=stat,
            estimator_version=self.VERSION,
            failure_reason="insufficient_signal_for_variance_residual",
            seed_offset=37,
        )

Bundled spectral estimators¶

`lrdbench.estimators.spectral.GPHEstimator` ¶

Bases: BaseEstimator

Geweke–Porter–Hudak log-periodogram regression for long-memory parameter d.

Source code in src/lrdbench/estimators/spectral.py

class GPHEstimator(BaseEstimator):
    """Geweke–Porter–Hudak log-periodogram regression for long-memory parameter d."""

    VERSION = "0.3.0"

    def __init__(self, spec: EstimatorSpec) -> None:
        self._spec = spec

    @property
    def spec(self) -> EstimatorSpec:
        return self._spec

    def fit(self, record: SeriesRecord) -> EstimateResult:
        t0 = time.perf_counter()
        params = dict(self._spec.parameter_schema)
        n_boot = int(params.get("n_bootstrap", 200))
        block_len = int(params.get("bootstrap_block_len", 0)) or max(4, record.values.size // 10)
        levels_raw = params.get("ci_levels")
        ci_levels = tuple(float(x) for x in levels_raw) if levels_raw is not None else (0.95,)
        seed = 0
        if record.provenance is not None and record.provenance.seed is not None:
            seed = int(record.provenance.seed) + 7919
        rng = np.random.default_rng(seed & (2**32 - 1))

        try:
            taper = str(params.get("taper", "none")) or "none"
            m = int(params["m"]) if params.get("m") is not None else None
            d = _log_periodogram_regression_d(record.values, m=m, taper=taper)
            dt = time.perf_counter() - t0
            if d is None:
                return EstimateResult(
                    record_id=record.record_id,
                    estimator_name=self._spec.name,
                    point=None,
                    runtime_seconds=dt,
                    valid=False,
                    failure_reason="insufficient_signal_for_gph",
                    estimator_version=self.VERSION,
                )

            def _gph_stat(z: np.ndarray) -> float | None:
                return _log_periodogram_regression_d(z, m=m, taper=taper)

            samples = bootstrap_statistic_distribution(
                record.values,
                rng,
                _gph_stat,
                n_boot=n_boot,
                block_len=block_len,
            )
            cis = symmetric_percentile_cis(samples, ci_levels) if samples.size >= 5 else ()
            bstd = float(np.std(samples)) if samples.size >= 2 else None
            ci_low = ci_high = None
            for a, lo, hi in cis:
                if abs(a - 0.95) < 1e-9:
                    ci_low, ci_high = lo, hi
                    break
            if cis and ci_low is None:
                ci_low, ci_high = cis[-1][1], cis[-1][2]

            diag: dict[str, object] = {
                "ci_method": "circular_block_bootstrap",
                "n_bootstrap": n_boot,
                "bootstrap_block_len": block_len,
                "bootstrap_replicates_used": int(samples.size),
                "bootstrap_point_std": bstd,
                "m": m,
                "taper": taper,
            }
            return EstimateResult(
                record_id=record.record_id,
                estimator_name=self._spec.name,
                point=d,
                ci_low=ci_low,
                ci_high=ci_high,
                runtime_seconds=dt,
                valid=True,
                estimator_version=self.VERSION,
                diagnostics=diag,
                bootstrap_cis=cis,
            )
        except Exception as exc:  # noqa: BLE001
            dt = time.perf_counter() - t0
            return EstimateResult(
                record_id=record.record_id,
                estimator_name=self._spec.name,
                point=None,
                runtime_seconds=dt,
                valid=False,
                failure_reason=f"exception:{type(exc).__name__}:{exc}",
                estimator_version=self.VERSION,
            )

`lrdbench.estimators.spectral.PeriodogramRegressionEstimator` ¶

Bases: BaseEstimator

Log-periodogram regression (memory parameter d, GPH-type).

Source code in src/lrdbench/estimators/spectral.py

class PeriodogramRegressionEstimator(BaseEstimator):
    """Log-periodogram regression (memory parameter d, GPH-type)."""

    VERSION = "0.2.0"

    def __init__(self, spec: EstimatorSpec) -> None:
        self._spec = spec

    @property
    def spec(self) -> EstimatorSpec:
        return self._spec

    def fit(self, record: SeriesRecord) -> EstimateResult:
        params = dict(self._spec.parameter_schema)
        taper = str(params.get("taper", "none")) or "none"

        def stat(z: np.ndarray) -> float | None:
            m = int(params["m"]) if params.get("m") is not None else None
            return _log_periodogram_regression_d(z, m=m, taper=taper)

        return fit_with_block_bootstrap(
            record,
            self._spec,
            statistic=stat,
            estimator_version=self.VERSION,
            failure_reason="insufficient_signal_for_periodogram",
            seed_offset=101,
        )

`lrdbench.estimators.spectral.WhittleMLEEstimator` ¶

Bases: BaseEstimator

Gaussian Whittle likelihood for ARFIMA(0,d,0) spectral density.

Source code in src/lrdbench/estimators/spectral.py

class WhittleMLEEstimator(BaseEstimator):
    """Gaussian Whittle likelihood for ARFIMA(0,d,0) spectral density."""

    VERSION = "0.1.0"

    def __init__(self, spec: EstimatorSpec) -> None:
        self._spec = spec

    @property
    def spec(self) -> EstimatorSpec:
        return self._spec

    def fit(self, record: SeriesRecord) -> EstimateResult:
        params = dict(self._spec.parameter_schema)

        def stat(z: np.ndarray) -> float | None:
            m = int(params["m"]) if params.get("m") is not None else None
            return _whittle_arfima_d(z, m=m)

        return fit_with_block_bootstrap(
            record,
            self._spec,
            statistic=stat,
            estimator_version=self.VERSION,
            failure_reason="insufficient_signal_for_whittle",
            seed_offset=203,
        )

`lrdbench.estimators.spectral.ModifiedLocalWhittleEstimator` ¶

Bases: BaseEstimator

Modified (Gaussian) local Whittle estimator of long-memory parameter d.

Source code in src/lrdbench/estimators/spectral.py

class ModifiedLocalWhittleEstimator(BaseEstimator):
    """Modified (Gaussian) local Whittle estimator of long-memory parameter d."""

    VERSION = "0.1.0"

    def __init__(self, spec: EstimatorSpec) -> None:
        self._spec = spec

    @property
    def spec(self) -> EstimatorSpec:
        return self._spec

    def fit(self, record: SeriesRecord) -> EstimateResult:
        params = dict(self._spec.parameter_schema)

        def stat(z: np.ndarray) -> float | None:
            m = int(params["m"]) if params.get("m") is not None else None
            return _modified_local_whittle_d(z, m=m)

        return fit_with_block_bootstrap(
            record,
            self._spec,
            statistic=stat,
            estimator_version=self.VERSION,
            failure_reason="insufficient_signal_for_mlw",
            seed_offset=307,
        )

Bundled geometric estimators¶

`lrdbench.estimators.geometric.HiguchiEstimator` ¶

Bases: BaseEstimator

Higuchi fractal length curve; Hurst proxy H ≈ 2 − D for the time-series graph.

Source code in src/lrdbench/estimators/geometric.py

class HiguchiEstimator(BaseEstimator):
    """Higuchi fractal length curve; Hurst proxy H ≈ 2 − D for the time-series graph."""

    VERSION = "0.1.0"

    def __init__(self, spec: EstimatorSpec) -> None:
        self._spec = spec

    @property
    def spec(self) -> EstimatorSpec:
        return self._spec

    def fit(self, record: SeriesRecord) -> EstimateResult:
        params = dict(self._spec.parameter_schema)

        def stat(z: np.ndarray) -> float | None:
            km = int(params["k_max"]) if params.get("k_max") is not None else None
            return _higuchi_hurst_proxy(z, k_max=km)

        return fit_with_block_bootstrap(
            record,
            self._spec,
            statistic=stat,
            estimator_version=self.VERSION,
            failure_reason="insufficient_signal_for_higuchi",
            seed_offset=919,
        )

`lrdbench.estimators.geometric.GHEEstimator` ¶

Bases: BaseEstimator

Geometric Hurst estimator: multiscale variance scaling of lagged increments.

Parameters read from params:

n_scales (int, default 16) – number of geometric lags.
h_min (int, default 1) – minimum lag in samples.
flat_slope_tol (float, default 0.08) – pragmatic threshold below which the log-log slope is treated as flat and the estimate is clamped to 0.5. This is an empiric finite-sample guard, not a theoretically derived bound; set to 0.0 to disable it.

Source code in src/lrdbench/estimators/geometric.py

class GHEEstimator(BaseEstimator):
    """Geometric Hurst estimator: multiscale variance scaling of lagged increments.

    Parameters read from ``params``:

    - ``n_scales`` (int, default 16) – number of geometric lags.
    - ``h_min`` (int, default 1) – minimum lag in samples.
    - ``flat_slope_tol`` (float, default 0.08) – pragmatic threshold below which
      the log-log slope is treated as flat and the estimate is clamped to ``0.5``.
      This is an empiric finite-sample guard, not a theoretically derived bound;
      set to ``0.0`` to disable it.
    """

    VERSION = "0.1.0"

    def __init__(self, spec: EstimatorSpec) -> None:
        self._spec = spec

    @property
    def spec(self) -> EstimatorSpec:
        return self._spec

    def fit(self, record: SeriesRecord) -> EstimateResult:
        params = dict(self._spec.parameter_schema)

        def stat(z: np.ndarray) -> float | None:
            return _ghe_hurst(
                z,
                n_scales=int(params.get("n_scales", 16)),
                h_min=int(params.get("h_min", 1)),
                flat_slope_tol=float(params.get("flat_slope_tol", 0.08)),
            )

        return fit_with_block_bootstrap(
            record,
            self._spec,
            statistic=stat,
            estimator_version=self.VERSION,
            failure_reason="insufficient_signal_for_ghe",
            seed_offset=1021,
        )

Bundled wavelet estimators¶

`lrdbench.estimators.wavelet.WaveletOLSEstimator` ¶

Bases: BaseEstimator

Plain OLS on log2 wavelet detail variances vs scale index (log-scale regression).

Source code in src/lrdbench/estimators/wavelet.py

class WaveletOLSEstimator(BaseEstimator):
    """Plain OLS on log2 wavelet detail variances vs scale index (log-scale regression)."""

    VERSION = "0.1.0"

    def __init__(self, spec: EstimatorSpec) -> None:
        self._spec = spec

    @property
    def spec(self) -> EstimatorSpec:
        return self._spec

    def fit(self, record: SeriesRecord) -> EstimateResult:
        params = dict(self._spec.parameter_schema)
        wavelet = str(params.get("wavelet", "db4"))
        j_hi = int(params.get("j_drop_high", 1))
        j_lo = int(params.get("j_drop_low", 1))

        def stat(z: np.ndarray) -> float | None:
            packed = _collect_detail_scales(z, wavelet=wavelet, j_drop_high=j_hi, j_drop_low=j_lo)
            if packed is None:
                return None
            j, v, _ = packed
            return _hurst_from_log2_slope(_ols_slope_log2(j, v))

        return fit_with_block_bootstrap(
            record,
            self._spec,
            statistic=stat,
            estimator_version=self.VERSION,
            failure_reason="insufficient_signal_for_wavelet_ols",
            seed_offset=607,
        )

`lrdbench.estimators.wavelet.WaveletAbryVeitchEstimator` ¶

Bases: BaseEstimator

Abry–Veitch-type log-scale regression on wavelet detail variances (Hurst proxy).

Source code in src/lrdbench/estimators/wavelet.py

class WaveletAbryVeitchEstimator(BaseEstimator):
    """Abry–Veitch-type log-scale regression on wavelet detail variances (Hurst proxy)."""

    VERSION = "0.1.0"

    def __init__(self, spec: EstimatorSpec) -> None:
        self._spec = spec

    @property
    def spec(self) -> EstimatorSpec:
        return self._spec

    def fit(self, record: SeriesRecord) -> EstimateResult:
        params = dict(self._spec.parameter_schema)
        wavelet = str(params.get("wavelet", "db4"))
        j_hi = int(params.get("j_drop_high", 2))
        j_lo = int(params.get("j_drop_low", 2))

        def stat(z: np.ndarray) -> float | None:
            packed = _collect_detail_scales(z, wavelet=wavelet, j_drop_high=j_hi, j_drop_low=j_lo)
            if packed is None:
                return None
            j, v, _ = packed
            return _hurst_from_log2_slope(_ols_slope_log2(j, v))

        return fit_with_block_bootstrap(
            record,
            self._spec,
            statistic=stat,
            estimator_version=self.VERSION,
            failure_reason="insufficient_signal_for_wavelet_av",
            seed_offset=401,
        )

`lrdbench.estimators.wavelet.WaveletBardetEstimator` ¶

Bases: BaseEstimator

Weighted log-scale regression (Bardet-type wavelet Hurst proxy).

Source code in src/lrdbench/estimators/wavelet.py

class WaveletBardetEstimator(BaseEstimator):
    """Weighted log-scale regression (Bardet-type wavelet Hurst proxy)."""

    VERSION = "0.1.0"

    def __init__(self, spec: EstimatorSpec) -> None:
        self._spec = spec

    @property
    def spec(self) -> EstimatorSpec:
        return self._spec

    def fit(self, record: SeriesRecord) -> EstimateResult:
        params = dict(self._spec.parameter_schema)
        wavelet = str(params.get("wavelet", "db4"))
        j_hi = int(params.get("j_drop_high", 1))
        j_lo = int(params.get("j_drop_low", 2))

        def stat(z: np.ndarray) -> float | None:
            packed = _collect_detail_scales(z, wavelet=wavelet, j_drop_high=j_hi, j_drop_low=j_lo)
            if packed is None:
                return None
            j, v, n = packed
            return _hurst_from_log2_slope(_wls_slope_log2(j, v, n))

        return fit_with_block_bootstrap(
            record,
            self._spec,
            statistic=stat,
            estimator_version=self.VERSION,
            failure_reason="insufficient_signal_for_wavelet_bardet",
            seed_offset=503,
        )

`lrdbench.estimators.wavelet.WaveletJensenEstimator` ¶

Bases: BaseEstimator

Two-band wavelet slope extrapolation (Jensen-style bias reduction).

Source code in src/lrdbench/estimators/wavelet.py

class WaveletJensenEstimator(BaseEstimator):
    """Two-band wavelet slope extrapolation (Jensen-style bias reduction)."""

    VERSION = "0.1.0"

    def __init__(self, spec: EstimatorSpec) -> None:
        self._spec = spec

    @property
    def spec(self) -> EstimatorSpec:
        return self._spec

    def fit(self, record: SeriesRecord) -> EstimateResult:
        params = dict(self._spec.parameter_schema)
        wavelet = str(params.get("wavelet", "db4"))
        fb = params.get("fine_band", (2, 4))
        cb = params.get("coarse_band", (4, 6))
        fine_band = (int(fb[0]), int(fb[1]))
        coarse_band = (int(cb[0]), int(cb[1]))

        def stat(z: np.ndarray) -> float | None:
            return _wavelet_jensen_h(
                z, wavelet=wavelet, fine_band=fine_band, coarse_band=coarse_band
            )

        return fit_with_block_bootstrap(
            record,
            self._spec,
            statistic=stat,
            estimator_version=self.VERSION,
            failure_reason="insufficient_signal_for_wavelet_jensen",
            seed_offset=709,
        )

`lrdbench.estimators.wavelet.WaveletWhittleEstimator` ¶

Bases: BaseEstimator

Wavelet-domain Gaussian Whittle-type fit to detail variances across scales.

Source code in src/lrdbench/estimators/wavelet.py

class WaveletWhittleEstimator(BaseEstimator):
    """Wavelet-domain Gaussian Whittle-type fit to detail variances across scales."""

    VERSION = "0.1.0"

    def __init__(self, spec: EstimatorSpec) -> None:
        self._spec = spec

    @property
    def spec(self) -> EstimatorSpec:
        return self._spec

    def fit(self, record: SeriesRecord) -> EstimateResult:
        params = dict(self._spec.parameter_schema)
        wavelet = str(params.get("wavelet", "db4"))
        j_hi = int(params.get("j_drop_high", 1))
        j_lo = int(params.get("j_drop_low", 1))

        def stat(z: np.ndarray) -> float | None:
            return _wavelet_whittle_h(z, wavelet=wavelet, j_drop_high=j_hi, j_drop_low=j_lo)

        return fit_with_block_bootstrap(
            record,
            self._spec,
            statistic=stat,
            estimator_version=self.VERSION,
            failure_reason="insufficient_signal_for_wavelet_whittle",
            seed_offset=811,
        )

Bundled data-driven estimators¶

`lrdbench.estimators.data_driven.MLRandomForestEstimator` ¶

Bases: _SklearnEstimator

Source code in src/lrdbench/estimators/data_driven.py

class MLRandomForestEstimator(_SklearnEstimator):
    MODEL_KIND = "random_forest_regressor"

`lrdbench.estimators.data_driven.MLSVREstimator` ¶

Bases: _SklearnEstimator

Source code in src/lrdbench/estimators/data_driven.py

class MLSVREstimator(_SklearnEstimator):
    MODEL_KIND = "support_vector_regressor"

`lrdbench.estimators.data_driven.MLCNNEstimator` ¶

Bases: _TorchSequenceEstimator

Source code in src/lrdbench/estimators/data_driven.py

class MLCNNEstimator(_TorchSequenceEstimator):
    MODEL_KIND = "cnn_1d"

`lrdbench.estimators.data_driven.MLLSTMEstimator` ¶

Bases: _TorchSequenceEstimator

Source code in src/lrdbench/estimators/data_driven.py

class MLLSTMEstimator(_TorchSequenceEstimator):
    MODEL_KIND = "lstm"

Registries¶

`lrdbench.registries.EstimatorRegistry` ¶

Bases: Registry[EstimatorBuilder]

Callable(spec: EstimatorSpec) -> BaseEstimator.

Source code in src/lrdbench/registries.py

class EstimatorRegistry(Registry[EstimatorBuilder]):
    """Callable(spec: EstimatorSpec) -> BaseEstimator."""

    pass

`lrdbench.registries.GeneratorRegistry` ¶

Bases: Registry[BaseGenerator]

Source code in src/lrdbench/registries.py

class GeneratorRegistry(Registry[BaseGenerator]):
    pass

`lrdbench.registries.ContaminationRegistry` ¶

Bases: Registry[BaseContamination]

Source code in src/lrdbench/registries.py

class ContaminationRegistry(Registry[BaseContamination]):
    pass

Defaults¶

`lrdbench.defaults.build_default_estimator_registry()` ¶

Source code in src/lrdbench/defaults.py

def build_default_estimator_registry() -> EstimatorRegistry:
    reg = EstimatorRegistry()

    def rs_builder(spec: EstimatorSpec) -> BaseEstimator:
        return RSEstimator(spec)

    def gph_builder(spec: EstimatorSpec) -> BaseEstimator:
        return GPHEstimator(spec)

    def dfa_builder(spec: EstimatorSpec) -> BaseEstimator:
        return DFAEstimator(spec)

    def dma_builder(spec: EstimatorSpec) -> BaseEstimator:
        return DMAEstimator(spec)

    def absolute_moment_builder(spec: EstimatorSpec) -> BaseEstimator:
        return AbsoluteMomentEstimator(spec)

    def variance_builder(spec: EstimatorSpec) -> BaseEstimator:
        return VarianceEstimator(spec)

    def variance_residual_builder(spec: EstimatorSpec) -> BaseEstimator:
        return VarianceResidualEstimator(spec)

    def higuchi_builder(spec: EstimatorSpec) -> BaseEstimator:
        return HiguchiEstimator(spec)

    def ghe_builder(spec: EstimatorSpec) -> BaseEstimator:
        return GHEEstimator(spec)

    def periodogram_builder(spec: EstimatorSpec) -> BaseEstimator:
        return PeriodogramRegressionEstimator(spec)

    def whittle_builder(spec: EstimatorSpec) -> BaseEstimator:
        return WhittleMLEEstimator(spec)

    def mlw_builder(spec: EstimatorSpec) -> BaseEstimator:
        return ModifiedLocalWhittleEstimator(spec)

    def w_av_builder(spec: EstimatorSpec) -> BaseEstimator:
        return WaveletAbryVeitchEstimator(spec)

    def w_bardet_builder(spec: EstimatorSpec) -> BaseEstimator:
        return WaveletBardetEstimator(spec)

    def w_ols_builder(spec: EstimatorSpec) -> BaseEstimator:
        return WaveletOLSEstimator(spec)

    def w_jensen_builder(spec: EstimatorSpec) -> BaseEstimator:
        return WaveletJensenEstimator(spec)

    def w_whittle_builder(spec: EstimatorSpec) -> BaseEstimator:
        return WaveletWhittleEstimator(spec)

    def ml_rf_builder(spec: EstimatorSpec) -> BaseEstimator:
        return MLRandomForestEstimator(spec)

    def ml_svr_builder(spec: EstimatorSpec) -> BaseEstimator:
        return MLSVREstimator(spec)

    def ml_cnn_builder(spec: EstimatorSpec) -> BaseEstimator:
        return MLCNNEstimator(spec)

    def ml_lstm_builder(spec: EstimatorSpec) -> BaseEstimator:
        return MLLSTMEstimator(spec)

    reg.register("RS", rs_builder)
    reg.register("GPH", gph_builder)
    reg.register("DFA", dfa_builder)
    reg.register("DMA", dma_builder)
    reg.register("AbsoluteMoment", absolute_moment_builder)
    reg.register("Variance", variance_builder)
    reg.register("VarianceResidual", variance_residual_builder)
    reg.register("Higuchi", higuchi_builder)
    reg.register("GHE", ghe_builder)
    reg.register("Periodogram", periodogram_builder)
    reg.register("WhittleMLE", whittle_builder)
    reg.register("ModifiedLocalWhittle", mlw_builder)
    reg.register("WaveletAbryVeitch", w_av_builder)
    reg.register("WaveletBardet", w_bardet_builder)
    reg.register("WaveletOLS", w_ols_builder)
    reg.register("WaveletJensen", w_jensen_builder)
    reg.register("WaveletWhittle", w_whittle_builder)
    reg.register("MLRandomForest", ml_rf_builder)
    reg.register("MLSVR", ml_svr_builder)
    reg.register("MLCNN", ml_cnn_builder)
    reg.register("MLLSTM", ml_lstm_builder)
    return reg

`lrdbench.defaults.build_default_generator_registry()` ¶

Source code in src/lrdbench/defaults.py

def build_default_generator_registry() -> GeneratorRegistry:
    reg = GeneratorRegistry()
    reg.register("fGn", FGNGenerator())
    reg.register("fBm", FBMGenerator())
    reg.register("ARFIMA", ARFIMAGenerator())
    reg.register("MRW", MRWGenerator())
    reg.register("fOU", FOUGenerator())
    return reg

`lrdbench.defaults.build_default_contamination_registry()` ¶

Source code in src/lrdbench/defaults.py

def build_default_contamination_registry() -> ContaminationRegistry:
    reg = ContaminationRegistry()
    reg.register("polynomial_trend", PolynomialTrendContamination())
    reg.register("outliers", OutliersContamination())
    reg.register("level_shift", LevelShiftContamination())
    reg.register("heavy_tail_noise", HeavyTailNoiseContamination())
    return reg