Pipeline API

The main radiomics pipeline for configurable feature extraction.

Pipeline Classes

`pictologics.pipeline.RadiomicsPipeline`

A flexible, configurable pipeline for radiomic feature extraction. Allows defining multiple processing configurations (sequences of steps) to be run on data.

Parameters:

Name	Type	Description	Default
`deduplicate`	`bool`	Whether to enable feature deduplication across configurations. When True (default), features that would be identical due to shared preprocessing are computed once and reused.	`True`
`deduplication_rules`	`DeduplicationRules \| str \| None`	Specific DeduplicationRules to use, or a version string to look up from the registry. If None, uses current default.	`None`

Source code in pictologics/pipeline.py

class RadiomicsPipeline:
    """
    A flexible, configurable pipeline for radiomic feature extraction.
    Allows defining multiple processing configurations (sequences of steps) to be run on data.

    Args:
        deduplicate: Whether to enable feature deduplication across configurations.
            When True (default), features that would be identical due to shared
            preprocessing are computed once and reused.
        deduplication_rules: Specific DeduplicationRules to use, or a version
            string to look up from the registry. If None, uses current default.
    """

    def __init__(
        self,
        deduplicate: bool = True,
        deduplication_rules: DeduplicationRules | str | None = None,
        load_standard: bool = True,
    ) -> None:
        """Initialize pipeline with empty config registry.

        Args:
            deduplicate: Whether to enable feature deduplication across configurations.
            deduplication_rules: Specific DeduplicationRules to use, or a version
                string to look up from the registry. If None, uses current default.
            load_standard: Whether to load standard predefined configurations
                (e.g., ``standard_fbn_32``, ``standard_fbs_16``). Defaults to True
                for direct instantiation. Set to False when loading configurations
                from files or strings to avoid mixing standard configs with
                user-defined ones.
        """
        self._configs: dict[str, list[dict[str, Any]]] = {}
        self._config_metadata: dict[str, dict[str, Any]] = {}  # Stores source_mode, etc.
        self._log: list[dict[str, Any]] = []

        # Deduplication settings
        self._deduplication_enabled = deduplicate

        if deduplication_rules is None:
            self._deduplication_rules = get_default_rules()
        elif isinstance(deduplication_rules, str):
            self._deduplication_rules = DeduplicationRules.get_version(deduplication_rules)
        else:
            self._deduplication_rules = deduplication_rules

        self._last_deduplication_plan: DeduplicationPlan | None = None
        self._configs_modified_since_plan: bool = False

        # Deduplication statistics (reset on each run)
        self._dedup_reused_count: int = 0
        self._dedup_computed_count: int = 0

        if load_standard:
            self._load_predefined_configs()

    def _load_predefined_configs(self) -> None:
        """
        Load predefined, commonly used pipeline configurations from templates.
        """
        try:
            standard_configs = get_standard_templates()
            for name, steps in standard_configs.items():
                # Convert YAML lists to tuples where needed (e.g., new_spacing)
                converted_steps = self._convert_yaml_steps(steps)
                self._configs[name] = converted_steps
        except Exception as e:
            warnings.warn(
                f"Failed to load standard templates: {e}",
                UserWarning,
                stacklevel=2,
            )
            # Fallback to empty configs - user can add their own

    def _convert_yaml_steps(self, steps: list[dict[str, Any]]) -> list[dict[str, Any]]:
        """
        Convert YAML-loaded steps to internal format.

        YAML loads lists, but some parameters expect tuples (e.g., new_spacing).
        """
        converted = []
        for step in steps:
            new_step = {"step": step["step"]}
            if "params" in step:
                params = copy.deepcopy(step["params"])
                # Convert new_spacing list to tuple
                if "new_spacing" in params and isinstance(params["new_spacing"], list):
                    params["new_spacing"] = tuple(params["new_spacing"])
                new_step["params"] = params
            converted.append(new_step)
        return converted

    def get_all_standard_config_names(self) -> list[str]:
        """
        Returns the list of all standard configuration names.

        Returns names from loaded templates that start with 'standard_'.
        """
        return sorted([name for name in self._configs.keys() if name.startswith("standard_")])

    # -------------------------------------------------------------------------
    # Deduplication Properties
    # -------------------------------------------------------------------------

    @property
    def deduplication_enabled(self) -> bool:
        """Whether feature deduplication is enabled."""
        return self._deduplication_enabled

    @deduplication_enabled.setter
    def deduplication_enabled(self, value: bool) -> None:
        """Enable or disable feature deduplication."""
        self._deduplication_enabled = value

    @property
    def deduplication_rules(self) -> DeduplicationRules:
        """Current deduplication rules."""
        return self._deduplication_rules

    @deduplication_rules.setter
    def deduplication_rules(self, value: DeduplicationRules | str) -> None:
        """Set deduplication rules (by version string or DeduplicationRules)."""
        if isinstance(value, str):
            self._deduplication_rules = DeduplicationRules.get_version(value)
        else:
            self._deduplication_rules = value
        # Invalidate existing plan when rules change
        self._configs_modified_since_plan = True

    @property
    def last_deduplication_plan(self) -> DeduplicationPlan | None:
        """The last computed deduplication plan, if any."""
        return self._last_deduplication_plan

    @property
    def deduplication_stats(self) -> dict[str, int | float]:
        """
        Statistics from the last pipeline run with deduplication enabled.

        Returns a dictionary with:
            - 'reused_families': Number of feature families reused from cache
            - 'computed_families': Number of feature families freshly computed
            - 'cache_hit_rate': Fraction of families reused (0.0 to 1.0)

        Returns an empty dict if no features were extracted (with a warning),
        or if deduplication was not enabled during the last run.

        Note:
            Statistics are valid because pipeline configurations run sequentially.
            Parallelization occurs within Numba-accelerated functions, not across configs.
        """
        total = self._dedup_reused_count + self._dedup_computed_count
        if total == 0:
            warnings.warn(
                "No features were extracted with deduplication enabled. "
                "Ensure deduplication is enabled and run() has been called with multiple configs.",
                UserWarning,
                stacklevel=2,
            )
            return {}

        return {
            "reused_families": self._dedup_reused_count,
            "computed_families": self._dedup_computed_count,
            "cache_hit_rate": self._dedup_reused_count / total,
        }

    def add_config(
        self,
        name: str,
        steps: list[dict[str, Any]],
        source_mode: str = "full_image",
        sentinel_value: Optional[float] = None,
    ) -> "RadiomicsPipeline":
        """
        Add a processing configuration.

        Args:
            name: Unique name for this configuration.
            steps: List of steps. Each step is a dict with 'step' (name) and 'params' (dict).
                   Supported steps:
                   - 'resample': params: new_spacing (required), interpolation (optional)
                   - 'resegment': params: range_min, range_max, apply_to
                   - 'filter_outliers': params: sigma, apply_to
                   - 'binarize_mask': params: threshold (float, default 0.5),
                       mask_values (int | list[int] | tuple[int, int]), apply_to ('morph'|'intensity'|'both')
                   - 'keep_largest_component': params: None
                   - 'round_intensities': params: None
                   - 'discretise': params: method, n_bins/bin_width, etc.
                   - 'filter': params: type (required), plus filter-specific params
                   - 'extract_features': params: families (list), etc.
            source_mode: How to handle source voxel validity for resampling/filtering:
                - "full_image" (default): All voxels contain real data. Traditional behavior.
                - "roi_only": Only ROI voxels contain real data; others have sentinel values.
                - "auto": Auto-detect sentinel values; emit warning if found.
            sentinel_value: If specified, explicitly set the sentinel value instead
                of auto-detecting. Only used when source_mode is "roi_only" or "auto".

        Note:
            - Texture features require a prior 'discretise' step.
            - IVH features are configured via 'ivh_params' dict.
            - The source_mode setting affects resampling and filtering operations.
              In 'roi_only' mode, boundary regions use normalized interpolation to
              exclude sentinel voxels.

        Example:
            ```python
            pipeline = RadiomicsPipeline()

            # Standard configuration (all voxels valid)
            pipeline.add_config(
                name="standard",
                steps=[...],
            )

            # Configuration for sentinel-masked images
            pipeline.add_config(
                name="sentinel_aware",
                source_mode="roi_only",
                    steps=[
                        {"step": "resample", "params": {"new_spacing": (1, 1, 1)}},
                        {
                            "step": "extract_features",
                            "params": {
                                "families": [
                                    "intensity",
                                    "morphology",
                                    "texture",
                                    "histogram",
                                    "ivh",
                                ]
                            },
                        },
                    ],
                )
            ```
        """
        if not isinstance(steps, list):
            raise ValueError("Configuration must be a list of steps")

        # Validate source_mode
        valid_modes = {"full_image", "roi_only", "auto"}
        if source_mode not in valid_modes:
            raise ValueError(f"Invalid source_mode '{source_mode}'. Must be one of: {valid_modes}")

        for step in steps:
            if not isinstance(step, dict):
                raise ValueError("Each step must be a dictionary")
            if "step" not in step:
                raise ValueError("Each step must have a 'step' key")

        self._configs[name] = steps
        self._config_metadata[name] = {
            "source_mode": source_mode,
            "sentinel_value": sentinel_value,
        }
        self._configs_modified_since_plan = True
        return self

    def run(
        self,
        image: str | Image,
        mask: str | Image | None = None,
        subject_id: Optional[str] = None,
        config_names: Optional[list[str]] = None,
        mask_subvoxel_tolerance: float = 0.5,
        mask_subvoxel_warning_threshold: float = 0.01,
        mask_min_overlap_fraction: float = 0.5,
    ) -> dict[str, pd.Series]:
        """
        Run configurations on the provided image and mask.

        Args:
            image: Path to image or Image object.
            mask: Optional path to mask or Image object.
                If omitted (or passed as `None` / empty string), the pipeline will
                treat the **entire image** as the ROI by generating a full (all-ones)
                mask matching the input image geometry.
            subject_id: Optional identifier for the subject (used in the
                processing log only; not included in the returned feature Series).
            config_names: List of specific configuration names to run.
                          If None, runs all registered configurations.
                          Supports "all_standard" to run all 6 standard configs.
            mask_subvoxel_tolerance: Maximum permitted fractional-voxel offset when
                repositioning a mask path (default: 0.5). Has no effect when mask is
                a pre-loaded Image object. See ``load_image`` for full description.
            mask_subvoxel_warning_threshold: Fractional-voxel drift above which a
                ``UserWarning`` is emitted during mask repositioning (default: 0.01).
                Has no effect when mask is a pre-loaded Image object.
            mask_min_overlap_fraction: Minimum fraction of the mask volume that must
                intersect with the image space when loading from a path (default: 0.5).
                Has no effect when mask is a pre-loaded Image object.

        Returns:
            Dictionary mapping config names to pandas Series of features.
            Every Series contains the **complete set of expected feature names**
            for its configuration, regardless of whether extraction succeeded:

            - If extraction succeeds, values are the computed feature values.
            - If individual features fail (e.g., mesh error, PCA with ≤3 voxels),
                those features are ``NaN``; successfully computed features are preserved.
            - If the entire configuration fails (empty ROI or unexpected error),
                all values are ``NaN``.

        Example:
            Run standard pipeline components:

            ```python
            from pictologics.pipeline import RadiomicsPipeline

            # Initialize
            pipeline = RadiomicsPipeline()

            # Run on image and mask
            results = pipeline.run(
                image="data/image.nii.gz",
                mask="data/mask.nii.gz",
                subject_id="subject_001",
                config_names=["standard_fbn_32"]
            )

            # Access results
            print(results["standard_fbn_32"].head())
            ```
        """
        # 1. Load Data
        if isinstance(image, str):
            orig_img = load_image(image)
            img_source = image
        else:
            orig_img = image
            img_source = "InMemory"

        mask_was_generated = False
        if mask is None or (isinstance(mask, str) and mask.strip() == ""):
            orig_mask = create_full_mask(orig_img)
            mask_source = "GeneratedFullMask"
            mask_was_generated = True
        elif isinstance(mask, str):
            orig_mask = load_image(
                mask,
                reference_image=orig_img,
                subvoxel_tolerance=mask_subvoxel_tolerance,
                subvoxel_warning_threshold=mask_subvoxel_warning_threshold,
                min_overlap_fraction=mask_min_overlap_fraction,
            )
            mask_source = mask
        else:
            orig_mask = mask
            mask_source = "InMemory"

        _validate_geometry(orig_mask, orig_img, "mask", "image")

        all_results = {}

        # Determine which configs to run
        if config_names is None:
            target_configs = list(self._configs.keys())
        else:
            target_configs = []
            for name in config_names:
                if name == "all_standard":
                    target_configs.extend(self.get_all_standard_config_names())
                elif name in self._configs:
                    target_configs.append(name)
                else:
                    raise ValueError(f"Configuration '{name}' not found.")

        # Create or regenerate deduplication plan if enabled
        dedup_plan: DeduplicationPlan | None = None
        family_cache: dict[tuple[str, str], dict[str, Any]] = {}

        # Reset deduplication statistics for this run
        self._dedup_reused_count = 0
        self._dedup_computed_count = 0

        if self._deduplication_enabled and len(target_configs) > 1:
            # Get configs for analysis
            configs_to_analyze = {name: self._configs[name] for name in target_configs}
            analyzer = ConfigurationAnalyzer(configs_to_analyze, self._deduplication_rules)
            dedup_plan = analyzer.analyze()
            self._last_deduplication_plan = dedup_plan
            self._configs_modified_since_plan = False

        # Run each configuration
        for config_name in target_configs:
            steps = self._configs[config_name]
            metadata = self._config_metadata.get(config_name, {})

            # Determine source mode for this config
            source_mode_str = metadata.get("source_mode", "full_image")
            source_mode = SourceMode(source_mode_str)
            explicit_sentinel = metadata.get("sentinel_value")

            # Determine source mask based on source_mode
            source_mask: Optional[Image] = None
            sentinel_detected = False
            detected_sentinel_value: Optional[float] = None

            if source_mode == SourceMode.FULL_IMAGE:
                # Default: all voxels valid, no source_mask needed
                pass

            elif source_mode == SourceMode.ROI_ONLY:
                # Use ROI mask as source mask
                source_mask = Image(
                    array=(orig_mask.array > 0).astype(np.uint8),
                    spacing=orig_mask.spacing,
                    origin=orig_mask.origin,
                    direction=orig_mask.direction,
                    modality="SOURCE_MASK",
                )

            elif source_mode == SourceMode.AUTO:
                # Auto-detect sentinel values
                if explicit_sentinel is not None:
                    # User provided explicit sentinel value
                    detected_sentinel_value = explicit_sentinel
                    sentinel_detected = True
                else:
                    # If mask was auto-generated (full mask), do not use it for
                    # "outside-ness" check in detection, as everything is "inside".
                    mask_for_detection = orig_mask if not mask_was_generated else None
                    detected = detect_sentinel_value(orig_img, roi_mask=mask_for_detection)
                    if detected is not None:
                        detected_sentinel_value = detected
                        sentinel_detected = True

                        # Log info instead of warning (user request)
                        # Changed to DEBUG level to avoid console spam in default logging configuration
                        logging.debug(
                            f"Auto-detected sentinel value {detected} in image. "
                            f"Using source validity mask for config '{config_name}'. "
                            f"Voxels with value {detected} will be excluded from "
                            f"resampling/filtering."
                        )

                if sentinel_detected and detected_sentinel_value is not None:
                    source_mask = create_source_mask_from_sentinel(
                        orig_img, detected_sentinel_value
                    )

            # Initialize State with source tracking
            # We start with fresh copies for each config
            state = PipelineState(
                image=orig_img,
                raw_image=orig_img,  # Track non-discretised image
                morph_mask=orig_mask,
                intensity_mask=Image(
                    array=orig_mask.array.copy(),
                    spacing=orig_mask.spacing,
                    origin=orig_mask.origin,
                    direction=orig_mask.direction,
                    modality=orig_mask.modality,
                ),
                mask_was_generated=mask_was_generated,
                source_mode=source_mode,
                source_mask=source_mask,
                sentinel_detected=sentinel_detected,
                sentinel_value=detected_sentinel_value,
            )

            config_log: dict[str, Any] = {
                "timestamp": datetime.datetime.now().isoformat(),
                "schema_version": CONFIG_SCHEMA_VERSION,
                "pictologics_version": _get_package_version(),
                "subject_id": subject_id,
                "config_name": config_name,
                "image_source": img_source,
                "mask_source": mask_source,
                "source_mode": source_mode.value,
                "sentinel_detected": sentinel_detected,
                "sentinel_value": detected_sentinel_value,
                "mask_roi_semantics": "nonzero_values_are_roi_membership",
                "config_snapshot": self._make_serializable(
                    {
                        "source_mode": source_mode.value,
                        "sentinel_value": explicit_sentinel,
                        "effective_sentinel_value": detected_sentinel_value,
                        "steps": steps,
                    }
                ),
                "deduplication": {
                    "enabled": self._deduplication_enabled,
                    "rules_version": self._deduplication_rules.version,
                    "plan_used": dedup_plan is not None,
                },
                "run_parameters": {
                    "requested_config_names": config_names,
                    "target_configs": target_configs,
                },
                "mask_repositioning_settings": {
                    "subvoxel_tolerance": mask_subvoxel_tolerance,
                    "subvoxel_warning_threshold": mask_subvoxel_warning_threshold,
                    "min_overlap_fraction": mask_min_overlap_fraction,
                },
                "status": "started",
                "steps_executed": [],
            }

            config_features: dict[str, Any] = {}
            current_step: dict[str, Any] | None = None

            try:
                self._ensure_nonempty_roi(state, context="initialization")

                for step_def in steps:
                    current_step = step_def
                    step_name = step_def["step"]
                    params = step_def.get("params", {})

                    # Execute Step
                    if step_name == "extract_features":
                        # Use deduplication if plan exists
                        if dedup_plan is not None:
                            features = self._extract_features_with_dedup(
                                state, params, config_name, dedup_plan, family_cache
                            )
                        else:
                            features = self._extract_features(state, params)
                        config_features.update(features)
                    else:
                        self._execute_preprocessing_step(state, step_name, params)

                    # Log
                    config_log["steps_executed"].append(
                        {
                            "step": step_name,
                            "params": self._make_serializable(params),
                            "status": "completed",
                        }
                    )
                config_log["status"] = "completed"
                config_log["result_feature_count"] = len(config_features)

            except EmptyROIMaskError as e:
                config_log["status"] = "empty_roi"
                config_log["error"] = str(e)
                config_log["failed_step"] = (
                    current_step if current_step is not None else "initialization"
                )
                self._log.append(config_log)

                # Build a NaN-filled Series with the expected feature names so
                # that downstream formatting/concatenation always sees a
                # complete, predictable set of columns.
                nan_names = self._get_expected_feature_names(steps)
                all_results[config_name] = pd.Series({name: float("nan") for name in nan_names})
                config_log["result_feature_count"] = len(nan_names)
                logging.debug(
                    "Config '%s' produced an empty ROI: %s. Returning NaN for %d features.",
                    config_name,
                    e,
                    len(nan_names),
                )
                continue

            except Exception as e:
                config_log["status"] = "error"
                config_log["error"] = str(e)
                config_log["failed_step"] = current_step
                # Backfill with NaN so the result always has a complete set of
                # feature columns, even when extraction was interrupted.
                nan_names = self._get_expected_feature_names(steps)
                for name in nan_names:
                    config_features.setdefault(name, float("nan"))
                config_log["result_feature_count"] = len(config_features)

            self._log.append(config_log)

            # Create Series
            series = pd.Series(config_features)
            all_results[config_name] = series

        return all_results

    def clear_log(self) -> None:
        """Clear the in-memory processing log."""
        self._log.clear()

    def _ensure_nonempty_roi(self, state: PipelineState, context: str) -> None:
        """Raise a clear error if the ROI is empty.

        The pipeline treats any nonzero mask value as ROI membership unless a
        step explicitly binarizes/selects labels first.
        """
        has_intensity_roi = bool(np.any(state.intensity_mask.array != 0))
        if not has_intensity_roi:
            raise EmptyROIMaskError(
                "ROI is empty after preprocessing "
                f"({context}). Ensure your mask contains at least one nonzero voxel, "
                "or relax resegmentation/outlier filtering thresholds."
            )
        has_morph_roi = bool(np.any(state.morph_mask.array != 0))
        if not has_morph_roi:
            raise EmptyROIMaskError(
                "ROI is empty after preprocessing "
                f"({context}). Ensure your mask contains at least one nonzero voxel, "
                "or relax resegmentation/outlier filtering thresholds."
            )

    def _execute_preprocessing_step(
        self, state: PipelineState, step_name: str, params: dict[str, Any]
    ) -> None:
        """
        Execute a single preprocessing step and update the state in-place.
        """
        if step_name == "resample":
            # Params
            if "new_spacing" not in params:
                raise ValueError("Resample step requires 'new_spacing' parameter.")

            spacing = params["new_spacing"]
            interp_img = params.get("interpolation", "linear")
            interp_mask = params.get("mask_interpolation", "nearest")
            mask_thresh = params.get("mask_threshold", 0.5)
            round_intensities_flag = params.get("round_intensities", False)

            # Determine source_mask for resampling (if not FULL_IMAGE mode)
            source_mask_arg = None
            if state.source_mode != SourceMode.FULL_IMAGE and state.source_mask is not None:
                source_mask_arg = state.source_mask

            # Update Image and raw_image
            state.image = resample_image(
                state.image,
                spacing,
                interpolation=interp_img,
                round_intensities=round_intensities_flag,
                source_mask=source_mask_arg,
            )
            state.raw_image = state.image  # Keep raw_image in sync before discretisation

            # Propagate source_mask from resampled image if it was used
            if state.image.has_source_mask and state.image.source_mask is not None:
                state.source_mask = Image(
                    array=state.image.source_mask.astype(np.uint8),
                    spacing=state.image.spacing,
                    origin=state.image.origin,
                    direction=state.image.direction,
                    modality="SOURCE_MASK",
                )

            # Update Masks
            thresh_arg = mask_thresh if interp_mask != "nearest" else None
            state.morph_mask = resample_image(
                state.morph_mask,
                spacing,
                interpolation=interp_mask,
                mask_threshold=thresh_arg,
            )
            state.intensity_mask = resample_image(
                state.intensity_mask,
                spacing,
                interpolation=interp_mask,
                mask_threshold=thresh_arg,
            )

            # CRITICAL: If valid source mask exists, apply it to both masks.
            # This prevents background (often 0 after resampling) from being
            # considered part of the ROI if the resegmentation range includes 0.
            if state.source_mask is not None:
                # Ensure binary mask semantics
                valid_mask = state.source_mask.array > 0
                state.morph_mask = _intersect_mask(state.morph_mask, valid_mask)
                state.intensity_mask = _intersect_mask(state.intensity_mask, valid_mask)

            self._ensure_nonempty_roi(state, context="resample")

        elif step_name == "resegment":
            range_min = params.get("range_min")
            range_max = params.get("range_max")
            apply_to = _get_apply_to(params, "resegment")

            if apply_to in ("intensity", "both"):
                state.intensity_mask = resegment_mask(
                    state.image, state.intensity_mask, range_min, range_max
                )

            if apply_to in ("morph", "both"):
                state.morph_mask = resegment_mask(
                    state.image, state.morph_mask, range_min, range_max
                )

            self._ensure_nonempty_roi(state, context="resegment")

        elif step_name == "filter_outliers":
            sigma = params.get("sigma", 3.0)
            apply_to = _get_apply_to(params, "filter_outliers")

            if apply_to in ("intensity", "both"):
                state.intensity_mask = filter_outliers(state.image, state.intensity_mask, sigma)
            if apply_to in ("morph", "both"):
                state.morph_mask = filter_outliers(state.image, state.morph_mask, sigma)

            self._ensure_nonempty_roi(state, context="filter_outliers")

        elif step_name == "round_intensities":
            state.image = round_intensities(state.image)
            state.raw_image = state.image  # Keep raw_image in sync before discretisation

        elif step_name == "keep_largest_component":
            # apply_to: "morph", "intensity", or "both" (default)
            apply_to = _get_apply_to(params, "keep_largest_component")
            if apply_to in ("morph", "both"):
                state.morph_mask = keep_largest_component(state.morph_mask)
            if apply_to in ("intensity", "both"):
                state.intensity_mask = keep_largest_component(state.intensity_mask)

            self._ensure_nonempty_roi(state, context="keep_largest_component")

        elif step_name == "binarize_mask":
            apply_to = _get_apply_to(params, "binarize_mask")
            threshold = params.get("threshold", 0.5)
            mask_values = params.get("mask_values")

            def _binarize(image: Image) -> Image:
                if mask_values is not None:
                    if isinstance(mask_values, tuple) and len(mask_values) == 2:
                        lo, hi = mask_values
                        mask_arr = (image.array >= lo) & (image.array <= hi)
                    else:
                        values = mask_values
                        if isinstance(values, int):
                            values = [values]
                        mask_arr = np.isin(image.array, values)
                else:
                    if threshold is None:
                        raise ValueError(
                            "binarize_mask requires 'threshold' unless mask_values is provided"
                        )
                    mask_arr = image.array >= float(threshold)

                return Image(
                    array=mask_arr.astype(np.uint8),
                    spacing=image.spacing,
                    origin=image.origin,
                    direction=image.direction,
                    modality=image.modality,
                )

            if apply_to in ("morph", "both"):
                state.morph_mask = _binarize(state.morph_mask)
            if apply_to in ("intensity", "both"):
                state.intensity_mask = _binarize(state.intensity_mask)

            self._ensure_nonempty_roi(state, context="binarize_mask")

        elif step_name == "discretise":
            self._ensure_nonempty_roi(state, context="discretise")
            method = params.get("method", "FBN")

            # Avoid passing 'method' twice
            disc_params = params.copy()
            if "method" in disc_params:
                del disc_params["method"]

            state.image = cast(
                Image,
                discretise_image(
                    state.image,
                    method=method,
                    roi_mask=state.intensity_mask,
                    **disc_params,
                ),
            )

            state.is_discretised = True
            state.discretisation_method = method
            state.n_bins = params.get("n_bins")
            state.bin_width = params.get("bin_width")

            # If FBS, n_bins is dynamic. We can estimate it from the result.
            if method == "FBS":
                masked_vals = apply_mask(state.image, state.intensity_mask)
                if len(masked_vals) > 0:
                    state.n_bins = int(np.max(masked_vals))
                else:
                    raise EmptyROIMaskError(
                        "ROI is empty after preprocessing (discretise). "
                        "Cannot infer FBS bin count from an empty ROI."
                    )

        elif step_name == "filter":
            # Apply image filter
            filter_type = params.get("type")
            if not filter_type:
                raise ValueError("Filter step requires 'type' parameter.")

            # Get boundary condition (default: mirror per IBSI 2)
            boundary_str = params.get("boundary", "mirror")
            boundary_map = {
                "mirror": BoundaryCondition.MIRROR,
                "nearest": BoundaryCondition.NEAREST,
                "constant": BoundaryCondition.ZERO,
                "wrap": BoundaryCondition.PERIODIC,
                "zero": BoundaryCondition.ZERO,
                "periodic": BoundaryCondition.PERIODIC,
            }
            boundary = boundary_map.get(boundary_str, BoundaryCondition.MIRROR)

            # Extract filter-specific params (exclude type and boundary)
            filter_params = {k: v for k, v in params.items() if k not in ("type", "boundary")}

            # Apply filter based on type
            filtered_array: npt.NDArray[np.floating[Any]]

            if filter_type == "mean":
                filter_params["boundary"] = boundary
                # Pass source_mask if not in FULL_IMAGE mode
                if state.source_mode != SourceMode.FULL_IMAGE and state.source_mask is not None:
                    source_arr = state.source_mask.array > 0
                    result_tuple = mean_filter(
                        state.image.array, source_mask=source_arr, **filter_params
                    )
                    # mean_filter returns tuple[NDArray, NDArray] when source_mask is used
                    if isinstance(result_tuple, tuple):
                        filtered_array = result_tuple[0]
                    else:
                        filtered_array = result_tuple
                else:
                    filtered_array = mean_filter(state.image.array, **filter_params)

            elif filter_type == "log":
                filter_params["boundary"] = boundary
                # Use image spacing if not provided
                if "spacing_mm" not in filter_params:
                    filter_params["spacing_mm"] = state.image.spacing
                # Pass source_mask if not in FULL_IMAGE mode
                if state.source_mode != SourceMode.FULL_IMAGE and state.source_mask is not None:
                    source_arr = state.source_mask.array > 0
                    result_tuple_log = laplacian_of_gaussian(
                        state.image.array, source_mask=source_arr, **filter_params
                    )
                    if isinstance(result_tuple_log, tuple):
                        filtered_array = result_tuple_log[0]
                    else:
                        filtered_array = result_tuple_log
                else:
                    filtered_array = laplacian_of_gaussian(state.image.array, **filter_params)

            elif filter_type == "laws":
                filter_params["boundary"] = boundary
                # 'kernel' param maps to first positional arg
                kernel = filter_params.pop("kernel", "L5E5E5")
                # Pass source_mask if not in FULL_IMAGE mode
                if state.source_mode != SourceMode.FULL_IMAGE and state.source_mask is not None:
                    source_arr = state.source_mask.array > 0
                    result_laws = laws_filter(
                        state.image.array,
                        kernel,
                        source_mask=source_arr,
                        **filter_params,
                    )
                    if isinstance(result_laws, tuple):
                        filtered_array = result_laws[0]
                    else:
                        filtered_array = result_laws
                else:
                    filtered_array = laws_filter(state.image.array, kernel, **filter_params)

            elif filter_type == "gabor":
                filter_params["boundary"] = boundary
                if "spacing_mm" not in filter_params:
                    filter_params["spacing_mm"] = state.image.spacing
                # Pass source_mask if not in FULL_IMAGE mode
                if state.source_mode != SourceMode.FULL_IMAGE and state.source_mask is not None:
                    source_arr = state.source_mask.array > 0
                    filter_params["source_mask"] = source_arr
                filtered_array = gabor_filter(state.image.array, **filter_params)

            elif filter_type == "wavelet":
                filter_params["boundary"] = boundary
                # Pass source_mask if not in FULL_IMAGE mode
                if state.source_mode != SourceMode.FULL_IMAGE and state.source_mask is not None:
                    source_arr = state.source_mask.array > 0
                    filter_params["source_mask"] = source_arr
                filtered_array = wavelet_transform(state.image.array, **filter_params)

            elif filter_type == "simoncelli":
                # Simoncelli doesn't use boundary param
                # Pass source_mask if not in FULL_IMAGE mode
                if state.source_mode != SourceMode.FULL_IMAGE and state.source_mask is not None:
                    source_arr = state.source_mask.array > 0
                    filter_params["source_mask"] = source_arr
                filtered_array = simoncelli_wavelet(state.image.array, **filter_params)

            elif filter_type == "riesz":
                # Riesz transform variants
                variant = filter_params.pop("variant", "base")
                # Pass source_mask if not in FULL_IMAGE mode
                if state.source_mode != SourceMode.FULL_IMAGE and state.source_mask is not None:
                    source_arr = state.source_mask.array > 0
                    filter_params["source_mask"] = source_arr
                if variant == "log":
                    if "spacing_mm" not in filter_params:
                        filter_params["spacing_mm"] = state.image.spacing
                    filtered_array = riesz_log(state.image.array, **filter_params)
                elif variant == "simoncelli":
                    filtered_array = riesz_simoncelli(state.image.array, **filter_params)
                else:
                    filtered_array = riesz_transform(state.image.array, **filter_params)

            else:
                raise ValueError(
                    f"Unknown filter type: {filter_type}. "
                    "Supported: mean, log, laws, gabor, wavelet, simoncelli, riesz"
                )

            # Update state with filtered image
            state.image = Image(
                array=filtered_array,
                spacing=state.image.spacing,
                origin=state.image.origin,
                direction=state.image.direction,
                modality=state.image.modality,
            )
            state.raw_image = state.image  # Update raw_image post-filter
            state.is_filtered = True
            state.filter_type = filter_type

        else:
            raise ValueError(f"Unknown preprocessing step: {step_name}")

    @staticmethod
    def _get_expected_feature_names(
        steps: list[dict[str, Any]],
    ) -> list[str]:
        """Return the ordered list of feature names a config would produce.

        Inspects all ``extract_features`` steps in *steps*, expanding family
        names via :data:`FEATURE_NAMES`.  The result is used to build a
        NaN-filled Series when a configuration fails entirely (empty ROI or
        unexpected error), guaranteeing that the returned Series always has the
        same set of feature names as a successful extraction.
        """
        names: list[str] = []
        seen: set[str] = set()

        for step_def in steps:
            if step_def.get("step") != "extract_features":
                continue
            params = step_def.get("params", {})
            families: list[str] = params.get(
                "families",
                _DEFAULT_FEATURE_FAMILIES,
            )

            for family in families:
                for fam in _feature_name_families(family):
                    if fam in FEATURE_NAMES and fam not in seen:
                        names.extend(FEATURE_NAMES[fam])
                        seen.add(fam)

            # intensity family may include spatial/local sub-families
            if "intensity" in families:
                if params.get("include_spatial_intensity", False):
                    if "spatial_intensity" not in seen:
                        names.extend(FEATURE_NAMES["spatial_intensity"])
                        seen.add("spatial_intensity")
                if params.get("include_local_intensity", False):
                    if "local_intensity" not in seen:
                        names.extend(FEATURE_NAMES["local_intensity"])
                        seen.add("local_intensity")

        return names

    @staticmethod
    def _fill_missing_features(
        results: dict[str, Any],
        families: list[str],
        params: dict[str, Any] | None = None,
    ) -> None:
        """Backfill any missing feature keys with ``NaN``.

        After a ``calculate_*`` function returns, some keys may be absent due
        to partial failures (e.g. mesh generation failure in morphology, or an
        empty texture matrix).  This method ensures every expected key is
        present – computed values are preserved and only truly missing keys are
        set to ``NaN``.
        """
        if params is None:
            params = {}
        nan = float("nan")
        for family in families:
            for fam in _feature_name_families(family):
                if fam in FEATURE_NAMES:
                    for key in FEATURE_NAMES[fam]:
                        if key not in results:
                            results[key] = nan

        # spatial/local intensity sub-families that are gated by params
        if "intensity" in families:
            if params.get("include_spatial_intensity", False):
                for key in FEATURE_NAMES.get("spatial_intensity", ()):
                    if key not in results:
                        results[key] = nan
            if params.get("include_local_intensity", False):
                for key in FEATURE_NAMES.get("local_intensity", ()):
                    if key not in results:
                        results[key] = nan

    def _extract_features(self, state: PipelineState, params: dict[str, Any]) -> dict[str, Any]:
        """
        Extract features based on current state.
        """
        results = {}
        families = params.get("families", _DEFAULT_FEATURE_FAMILIES)

        # Optional kwargs pass-through (advanced usage)
        spatial_intensity_params = params.get("spatial_intensity_params", {})
        local_intensity_params = params.get("local_intensity_params", {})
        ivh_params = params.get("ivh_params", {})
        texture_matrix_params = params.get("texture_matrix_params", {})

        if spatial_intensity_params is None:
            spatial_intensity_params = {}
        if local_intensity_params is None:
            local_intensity_params = {}
        if ivh_params is None:
            ivh_params = {}
        if texture_matrix_params is None:
            texture_matrix_params = {}

        if not isinstance(spatial_intensity_params, dict):
            raise ValueError("spatial_intensity_params must be a dict")
        if not isinstance(local_intensity_params, dict):
            raise ValueError("local_intensity_params must be a dict")
        if not isinstance(ivh_params, dict):
            raise ValueError("ivh_params must be a dict")
        if not isinstance(texture_matrix_params, dict):
            raise ValueError("texture_matrix_params must be a dict")

        for family in families:
            results.update(self._extract_single_family(state, family, params))

        # Ensure every expected feature key is present (NaN for partial failures)
        self._fill_missing_features(results, families, params)
        return results

    def _extract_features_with_dedup(
        self,
        state: PipelineState,
        params: dict[str, Any],
        config_name: str,
        plan: DeduplicationPlan,
        family_cache: dict[tuple[str, str], dict[str, Any]],
    ) -> dict[str, Any]:
        """
        Extract features using deduplication plan to avoid redundant computation.

        For each feature family requested, checks if an identical signature has
        already been computed. If so, reuses cached results. Otherwise computes
        and caches for potential reuse by subsequent configurations.

        Args:
            state: Current pipeline state.
            params: Feature extraction parameters.
            config_name: Name of the current configuration.
            plan: Deduplication plan mapping families to signatures.
            family_cache: Cache of computed family features by family and signature hash.

        Returns:
            Dictionary of all extracted features.
        """
        results: dict[str, Any] = {}
        families = params.get("families", _DEFAULT_FEATURE_FAMILIES)

        for family in families:
            # Normalize texture aliases so raw subfamily and texture_* requests
            # share the same signature/cache behavior.
            sig_family = _normalize_texture_family(family) or family

            # Get signature from plan using (config_name, family) tuple key
            sig = plan.signatures.get((config_name, sig_family))
            cache_key = (sig_family, sig.hash) if sig else None

            if cache_key is not None and cache_key in family_cache:
                # Reuse cached results
                cached = family_cache[cache_key]
                results.update(cached)
                self._dedup_reused_count += 1
            else:
                # Compute this family
                family_results = self._extract_single_family(state, family, params)
                results.update(family_results)

                # Cache if we have a signature
                if cache_key is not None:
                    family_cache[cache_key] = family_results
                self._dedup_computed_count += 1

        # Ensure every expected feature key is present (NaN for partial failures)
        self._fill_missing_features(results, families, params)
        return results

    def _extract_single_family(
        self,
        state: PipelineState,
        family: str,
        params: dict[str, Any],
    ) -> dict[str, Any]:
        """
        Extract features for a single family.

        This is a refactored helper to enable per-family deduplication.
        """
        results: dict[str, Any] = {}

        # Optional kwargs pass-through
        spatial_intensity_params = params.get("spatial_intensity_params", {}) or {}
        local_intensity_params = params.get("local_intensity_params", {}) or {}
        ivh_params = params.get("ivh_params", {}) or {}
        texture_matrix_params = params.get("texture_matrix_params", {}) or {}

        if family == "morphology":
            results.update(
                calculate_morphology_features(
                    state.morph_mask,
                    state.raw_image,
                    intensity_mask=state.intensity_mask,
                )
            )

        elif family == "intensity":
            masked_values = apply_mask(state.raw_image, state.intensity_mask)
            results.update(calculate_intensity_features(masked_values))

            include_spatial = bool(params.get("include_spatial_intensity", False))
            include_local = bool(params.get("include_local_intensity", False))

            if include_spatial:
                results.update(
                    calculate_spatial_intensity_features(
                        state.raw_image,
                        state.intensity_mask,
                        **spatial_intensity_params,
                    )
                )
            if include_local:
                results.update(
                    calculate_local_intensity_features(
                        state.raw_image, state.intensity_mask, **local_intensity_params
                    )
                )

        elif family == "spatial_intensity":
            results.update(
                calculate_spatial_intensity_features(
                    state.raw_image, state.intensity_mask, **spatial_intensity_params
                )
            )

        elif family == "local_intensity":
            results.update(
                calculate_local_intensity_features(
                    state.raw_image, state.intensity_mask, **local_intensity_params
                )
            )

        elif family == "histogram":
            if not state.is_discretised:
                warnings.warn(
                    "Histogram features requested but image is not discretised. "
                    "Features may be unreliable.",
                    UserWarning,
                    stacklevel=2,
                )
            masked_values = apply_mask(state.image, state.intensity_mask)
            results.update(calculate_intensity_histogram_features(masked_values))

        elif family == "ivh":
            results.update(self._compute_ivh_features(state, params, ivh_params))

        elif (texture_family := _normalize_texture_family(family)) is not None:
            results.update(
                self._compute_texture_features(state, texture_family, texture_matrix_params)
            )

        return results

    def _compute_ivh_features(
        self,
        state: PipelineState,
        params: dict[str, Any],
        ivh_params: dict[str, Any],
    ) -> dict[str, Any]:
        """Compute IVH features (helper for _extract_single_family)."""
        ivh_use_continuous = params.get("ivh_use_continuous", False)
        ivh_discretisation = params.get("ivh_discretisation", None)

        ivh_disc_bin_width: Optional[float] = None
        ivh_disc_min_val: Optional[float] = None

        if ivh_use_continuous:
            ivh_values = apply_mask(state.raw_image, state.intensity_mask)
        elif ivh_discretisation:
            ivh_disc_params = ivh_discretisation.copy()
            ivh_method = ivh_disc_params.pop("method", "FBS")
            ivh_disc_bin_width = ivh_disc_params.get("bin_width")
            ivh_disc_min_val = ivh_disc_params.get("min_val")
            temp_ivh_disc = discretise_image(
                state.raw_image,
                method=ivh_method,
                roi_mask=state.intensity_mask,
                **ivh_disc_params,
            )
            ivh_values = apply_mask(temp_ivh_disc, state.intensity_mask)
        else:
            ivh_values = apply_mask(state.image, state.intensity_mask)

        ivh_kwargs: dict[str, Any] = {}
        if ivh_disc_bin_width is not None:
            ivh_kwargs["bin_width"] = ivh_disc_bin_width
        if ivh_disc_min_val is not None:
            ivh_kwargs["min_val"] = ivh_disc_min_val

        for key in [
            "bin_width",
            "min_val",
            "max_val",
            "target_range_min",
            "target_range_max",
        ]:
            if key in ivh_params:
                ivh_kwargs[key] = ivh_params[key]

        if (
            not ivh_use_continuous
            and state.is_discretised
            and ivh_kwargs.get("bin_width") is None
            and not ivh_discretisation
        ):
            ivh_kwargs["bin_width"] = 1.0

        ivh_kwargs = {k: v for k, v in ivh_kwargs.items() if v is not None}
        return calculate_ivh_features(ivh_values, **ivh_kwargs)

    def _compute_texture_features(
        self,
        state: PipelineState,
        family: str,
        texture_matrix_params: dict[str, Any],
    ) -> dict[str, Any]:
        """Compute texture features (helper for _extract_single_family)."""
        results: dict[str, Any] = {}

        if not state.is_discretised:
            raise ValueError(
                "Texture features requested but image is not discretised. "
                "You must include a 'discretise' step before extracting texture features."
            )

        disc_image = state.image
        n_bins = state.n_bins if state.n_bins else 32

        matrix_kwargs: dict[str, Any] = {}
        if "ngldm_alpha" in texture_matrix_params:
            matrix_kwargs["ngldm_alpha"] = texture_matrix_params["ngldm_alpha"]

        texture_matrices = calculate_all_texture_matrices(
            disc_image.array,
            state.intensity_mask.array,
            n_bins,
            distance_mask=state.morph_mask.array,
            **matrix_kwargs,
        )

        # If specific texture family requested, only compute that
        if family == "texture" or family == "texture_glcm" or family == "glcm":
            results.update(
                calculate_glcm_features(
                    disc_image.array,
                    state.intensity_mask.array,
                    n_bins,
                    glcm_matrix=texture_matrices["glcm"],
                )
            )
        if family == "texture" or family == "texture_glrlm" or family == "glrlm":
            results.update(
                calculate_glrlm_features(
                    disc_image.array,
                    state.intensity_mask.array,
                    n_bins,
                    glrlm_matrix=texture_matrices["glrlm"],
                )
            )
        if family == "texture" or family == "texture_glszm" or family == "glszm":
            results.update(
                calculate_glszm_features(
                    disc_image.array,
                    state.intensity_mask.array,
                    n_bins,
                    glszm_matrix=texture_matrices["glszm"],
                )
            )
        if family == "texture" or family == "texture_gldzm" or family == "gldzm":
            results.update(
                calculate_gldzm_features(
                    disc_image.array,
                    state.intensity_mask.array,
                    n_bins,
                    gldzm_matrix=texture_matrices["gldzm"],
                    distance_mask=state.morph_mask.array,
                )
            )
        if family == "texture" or family == "texture_ngtdm" or family == "ngtdm":
            results.update(
                calculate_ngtdm_features(
                    disc_image.array,
                    state.intensity_mask.array,
                    n_bins,
                    ngtdm_matrices=(
                        texture_matrices["ngtdm_s"],
                        texture_matrices["ngtdm_n"],
                    ),
                )
            )
        if family == "texture" or family == "texture_ngldm" or family == "ngldm":
            results.update(
                calculate_ngldm_features(
                    disc_image.array,
                    state.intensity_mask.array,
                    n_bins,
                    ngldm_matrix=texture_matrices["ngldm"],
                )
            )

        return results

    def save_log(self, output_path: str | Path) -> None:
        """
        Save the processing log to a self-describing JSON file.
        """
        path = Path(output_path)
        if not str(path).endswith(".json"):
            path = Path(f"{path}.json")

        payload = {
            "log_schema_version": "1.0",
            "pipeline_schema_version": CONFIG_SCHEMA_VERSION,
            "pictologics_version": _get_package_version(),
            "exported_at": datetime.datetime.now().isoformat(),
            "mask_roi_semantics": "nonzero_values_are_roi_membership",
            "entry_count": len(self._log),
            "entries": self._make_serializable(self._log),
        }

        path.parent.mkdir(parents=True, exist_ok=True)
        path.write_text(json.dumps(payload, indent=4, default=str), encoding="utf-8")

    # -------------------------------------------------------------------------
    # Configuration Serialization Methods
    # -------------------------------------------------------------------------

    def list_configs(self) -> list[str]:
        """
        List all registered configuration names.

        Returns:
            List of configuration names.
        """
        return list(self._configs.keys())

    def get_config(self, name: str) -> list[dict[str, Any]]:
        """
        Get a copy of a configuration by name.

        Args:
            name: Configuration name.

        Returns:
            Deep copy of the configuration steps.

        Raises:
            KeyError: If configuration not found.
        """
        if name not in self._configs:
            raise KeyError(f"Configuration '{name}' not found")
        return copy.deepcopy(self._configs[name])

    def remove_config(self, name: str) -> "RadiomicsPipeline":
        """
        Remove a configuration by name.

        Args:
            name: Configuration name to remove.

        Returns:
            Self for method chaining.

        Raises:
            KeyError: If configuration not found.
        """
        if name not in self._configs:
            raise KeyError(f"Configuration '{name}' not found")
        del self._configs[name]
        self._configs_modified_since_plan = True
        return self

    # ------------------------------------------------------------------
    # Feature catalog
    # ------------------------------------------------------------------

    @staticmethod
    def _parse_feature_key(key: str) -> tuple[str, str]:
        """Extract the human-readable name and IBSI code from a feature key.

        Feature keys follow the pattern ``descriptive_name_CODE`` where
        ``CODE`` is a 3–4 character uppercase-alphanumeric IBSI identifier.
        Some IVH features carry an additional numeric suffix (e.g.
        ``volume_at_intensity_fraction_0.10_BC2M_10``).

        Returns:
            ``(stripped_name, ibsi_code)`` — e.g. ``("joint_entropy", "TU9B")``.
        """
        m = _IBSI_CODE_RE.search(key)
        if m is None:
            return key, ""
        code = m.group(1)
        # Strip the code (and optional trailing _digits) from the key
        name = key[: m.start()]
        return name, code

    @staticmethod
    def _extract_config_metadata(
        steps: list[dict[str, Any]],
    ) -> dict[str, Any]:
        """Parse a config’s step list into preprocessing metadata.

        Returns a flat dict with keys used by :meth:`describe_features`.
        """
        records: list[dict[str, Any]] = []
        for i, step_def in enumerate(steps, start=1):
            step_name = step_def.get("step", "")
            if step_name not in _PREPROCESSING_STEPS:
                continue
            step_index = step_def.get("step_index", i)
            records.append(
                {
                    "step_index": step_index,
                    "step": step_name,
                    "params": copy.deepcopy(step_def.get("params", {})),
                }
            )

        meta: dict[str, Any] = {
            "preprocessing_sequence": None,
            "preprocessing_steps": None,
            "is_resampled": False,
            "resampling_spacing": None,
            "interpolation": None,
            "resample_params": None,
            "is_resegmented": False,
            "resegment_apply_to": None,
            "resegment_params": None,
            "is_outlier_filtered": False,
            "filter_outliers_apply_to": None,
            "filter_outliers_params": None,
            "is_intensity_rounded": False,
            "round_intensities_params": None,
            "keeps_largest_component": False,
            "keep_largest_component_apply_to": None,
            "keep_largest_component_params": None,
            "is_mask_binarized": False,
            "binarize_mask_apply_to": None,
            "binarize_mask_params": None,
            "is_discretised": False,
            "discretisation_method": None,
            "discretisation_param": None,
            "discretise_params": None,
            "is_filtered": False,
            "filter_type": None,
            "filter_params": None,
        }

        if not records:
            return meta

        meta["preprocessing_sequence"] = " > ".join(
            f"{record['step_index']}:{record['step']}" for record in records
        )
        meta["preprocessing_steps"] = RadiomicsPipeline._catalog_json(records)

        records_by_step: dict[str, list[dict[str, Any]]] = {
            step_name: [record for record in records if record.get("step") == step_name]
            for step_name in _PREPROCESSING_STEPS
        }

        for step_name, column in _PREPROCESSING_PARAM_COLUMNS.items():
            step_records = records_by_step[step_name]
            if step_records:
                meta[column] = RadiomicsPipeline._catalog_json(
                    [
                        {
                            "step_index": record["step_index"],
                            "params": record["params"],
                        }
                        for record in step_records
                    ]
                )

        resample_records = records_by_step["resample"]
        if resample_records:
            meta["is_resampled"] = True
            spacings = [record["params"].get("new_spacing") for record in resample_records]
            meta["resampling_spacing"] = RadiomicsPipeline._catalog_value(spacings)
            interpolations = [
                record["params"].get("interpolation", "linear") for record in resample_records
            ]
            meta["interpolation"] = RadiomicsPipeline._catalog_value(interpolations)

        resegment_records = records_by_step["resegment"]
        if resegment_records:
            meta["is_resegmented"] = True
            meta["resegment_apply_to"] = RadiomicsPipeline._catalog_value(
                [record["params"].get("apply_to", "both") for record in resegment_records]
            )

        filter_outlier_records = records_by_step["filter_outliers"]
        if filter_outlier_records:
            meta["is_outlier_filtered"] = True
            meta["filter_outliers_apply_to"] = RadiomicsPipeline._catalog_value(
                [record["params"].get("apply_to", "both") for record in filter_outlier_records]
            )

        round_records = records_by_step["round_intensities"]
        if round_records:
            meta["is_intensity_rounded"] = True

        largest_component_records = records_by_step["keep_largest_component"]
        if largest_component_records:
            meta["keeps_largest_component"] = True
            meta["keep_largest_component_apply_to"] = RadiomicsPipeline._catalog_value(
                [record["params"].get("apply_to", "both") for record in largest_component_records]
            )

        binarize_records = records_by_step["binarize_mask"]
        if binarize_records:
            meta["is_mask_binarized"] = True
            meta["binarize_mask_apply_to"] = RadiomicsPipeline._catalog_value(
                [record["params"].get("apply_to", "both") for record in binarize_records]
            )

        discretise_records = records_by_step["discretise"]
        if discretise_records:
            meta["is_discretised"] = True
            methods = [record["params"].get("method", "FBN") for record in discretise_records]
            meta["discretisation_method"] = RadiomicsPipeline._catalog_value(methods)
            disc_params: list[Any] = []
            for method, record in zip(methods, discretise_records, strict=True):
                params = record["params"]
                if method == "FBN":
                    disc_params.append(params.get("n_bins"))
                elif method == "FBS":
                    disc_params.append(params.get("bin_width"))
                elif method == "FIXED_CUTOFFS":
                    disc_params.append(params.get("cutoffs"))
                else:
                    disc_params.append(None)
            meta["discretisation_param"] = RadiomicsPipeline._catalog_value(disc_params)

        filter_records = records_by_step["filter"]
        if filter_records:
            meta["is_filtered"] = True
            filter_types = [record["params"].get("type") for record in filter_records]
            meta["filter_type"] = RadiomicsPipeline._catalog_value(filter_types)

        return meta

    @staticmethod
    def _catalog_serializable(obj: Any) -> Any:
        """Convert catalog metadata values to JSON-serializable objects."""
        if isinstance(obj, tuple):
            return [RadiomicsPipeline._catalog_serializable(item) for item in obj]
        if isinstance(obj, dict):
            return {
                str(key): RadiomicsPipeline._catalog_serializable(value)
                for key, value in obj.items()
            }
        if isinstance(obj, list):
            return [RadiomicsPipeline._catalog_serializable(item) for item in obj]
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        if isinstance(obj, (np.integer, np.floating, np.bool_)):
            return obj.item()
        return obj

    @staticmethod
    def _catalog_json(obj: Any) -> str:
        """Encode structured catalog metadata as compact JSON."""
        return json.dumps(
            RadiomicsPipeline._catalog_serializable(obj),
            sort_keys=True,
            separators=(",", ":"),
        )

    @staticmethod
    def _catalog_value(values: list[Any]) -> Any:
        """Return one scalar value or a JSON list for repeated preprocessing steps."""
        serializable = cast(list[Any], RadiomicsPipeline._catalog_serializable(values))
        if len(serializable) == 1:
            value = serializable[0]
            if isinstance(value, list):
                return str(tuple(value))
            if isinstance(value, dict):
                return RadiomicsPipeline._catalog_json(value)
            return value
        return RadiomicsPipeline._catalog_json(serializable)

    def describe_features(self) -> pd.DataFrame:
        """Return a DataFrame cataloguing every feature the pipeline will produce.

        Each row represents one (configuration, feature) pair. The columns
        describe the feature identity, its family membership, and the
        preprocessing state at the ``extract_features`` step that produces it.
        Repeated preprocessing steps are represented as compact JSON arrays in
        the corresponding ``*_params`` cells.

        This is useful for:

        * Inspecting the full set of features before running the pipeline.
        * Filtering or subsetting features by family, discretisation method,
          filter type, etc.
        * Exporting a data dictionary (``describe_features().to_csv(...)``)
          alongside study results for documentation and reproducibility.

        Returns:
            A `pandas.DataFrame` with columns:

            - **config** – Configuration name.
            - **feature_key** – Full feature key as it appears in the output Series.
            - **feature_name** – Human-readable name (IBSI code stripped).
            - **ibsi_code** – 3–4 character IBSI identifier.
            - **family** – Granular feature family (e.g. ``glcm``, ``ivh``).
            - **family_group** – Broad category: *Intensity*, *Morphology*, or
                *Texture*.
            - **requires_discretisation** – Whether the family needs discretised
                input.
            - **uses_morph_mask** / **uses_intensity_mask** – Which runtime ROI
                mask(s) the feature row depends on.
            - **source_mode** – Source voxel handling mode for the configuration.
            - **sentinel_value** – Explicit sentinel value, if configured.
            - **feature_extraction_step_index** – 1-based position of the
                ``extract_features`` step that produced the row.
            - **feature_extraction_params** – Parameters on that
                ``extract_features`` step as compact JSON.
            - **preprocessing_sequence** – Ordered preprocessing step sequence
                applied before feature extraction.
            - **preprocessing_steps** – Full ordered preprocessing step records
                as compact JSON.
            - **is_discretised** – Whether the configuration includes a
                ``discretise`` step.
            - **discretisation_method** – ``FBN``, ``FBS``, or ``None``.
            - **discretisation_param** – Bin count (FBN) or bin width (FBS).
            - **is_resampled** – Whether the configuration includes a ``resample``
                step.
            - **resampling_spacing** – Target spacing as a string, e.g.
                ``"(0.5, 0.5, 0.5)"``.
            - **interpolation** – Resampling interpolation method.
            - **is_filtered** – Whether a ``filter`` step is present.
            - **filter_type** – Filter type (``log``, ``gabor``, …) or ``None``.
            - **filter_params** – Ordered filter step parameters as compact JSON,
                or ``None``.

        Example:
            ```python
            pipeline = RadiomicsPipeline()
            catalog = pipeline.describe_features()

            # Export as CSV data dictionary
            catalog.to_csv("feature_catalog.csv", index=False)

            # Filter: only texture features from FBN configs
            texture_fbn = catalog[
                (catalog["family_group"] == "Texture")
                & (catalog["discretisation_method"] == "FBN")
            ]
            ```
        """
        # Build reverse lookup: feature_key -> family
        key_to_family: dict[str, str] = {}
        for family, keys in FEATURE_NAMES.items():
            for key in keys:
                key_to_family[key] = family

        rows: list[dict[str, Any]] = []

        for config_name, steps in self._configs.items():
            active_preprocessing: list[dict[str, Any]] = []
            row_by_key: dict[str, dict[str, Any]] = {}
            key_order: list[str] = []
            config_metadata = self._config_metadata.get(config_name, {})
            source_mode = config_metadata.get("source_mode", "full_image")
            sentinel_value = config_metadata.get("sentinel_value")

            for step_index, step_def in enumerate(steps, start=1):
                step_name = step_def.get("step", "")
                params = step_def.get("params", {})

                if step_name == "extract_features":
                    feature_keys = self._get_expected_feature_names([step_def])
                    config_meta = self._extract_config_metadata(active_preprocessing)
                    extraction_params = self._catalog_json(params) if params else None

                    for fkey in feature_keys:
                        fname, code = self._parse_feature_key(fkey)
                        family = key_to_family.get(fkey, "unknown")

                        row: dict[str, Any] = {
                            "config": config_name,
                            "feature_key": fkey,
                            "feature_name": fname,
                            "ibsi_code": code,
                            "family": family,
                            "family_group": _FAMILY_GROUP.get(family, "Unknown"),
                            "requires_discretisation": _REQUIRES_DISCRETISATION.get(family, False),
                            "uses_morph_mask": _family_uses_morph_mask(family),
                            "uses_intensity_mask": _feature_uses_intensity_mask(fkey, family),
                            "source_mode": source_mode,
                            "sentinel_value": sentinel_value,
                            "feature_extraction_step_index": step_index,
                            "feature_extraction_params": extraction_params,
                        }
                        row.update(config_meta)
                        if fkey not in row_by_key:
                            key_order.append(fkey)
                        row_by_key[fkey] = row

                elif step_name in _PREPROCESSING_STEPS:
                    active_preprocessing.append(
                        {
                            "step_index": step_index,
                            "step": step_name,
                            "params": copy.deepcopy(params),
                        }
                    )

            rows.extend(row_by_key[fkey] for fkey in key_order)

        columns = [
            "config",
            "feature_key",
            "feature_name",
            "ibsi_code",
            "family",
            "family_group",
            "requires_discretisation",
            "uses_morph_mask",
            "uses_intensity_mask",
            "source_mode",
            "sentinel_value",
            "feature_extraction_step_index",
            "feature_extraction_params",
            "preprocessing_sequence",
            "preprocessing_steps",
            "is_discretised",
            "discretisation_method",
            "discretisation_param",
            "discretise_params",
            "is_resampled",
            "resampling_spacing",
            "interpolation",
            "resample_params",
            "is_resegmented",
            "resegment_apply_to",
            "resegment_params",
            "is_outlier_filtered",
            "filter_outliers_apply_to",
            "filter_outliers_params",
            "is_intensity_rounded",
            "round_intensities_params",
            "keeps_largest_component",
            "keep_largest_component_apply_to",
            "keep_largest_component_params",
            "is_mask_binarized",
            "binarize_mask_apply_to",
            "binarize_mask_params",
            "is_filtered",
            "filter_type",
            "filter_params",
        ]
        return pd.DataFrame(rows, columns=columns) if rows else pd.DataFrame(columns=columns)

    def to_dict(
        self,
        config_names: Optional[list[str]] = None,
        include_metadata: bool = True,
        include_deduplication: bool = True,
    ) -> dict[str, Any]:
        """
        Export configurations to a dictionary.

        Args:
            config_names: Specific configs to export. If None, exports all.
            include_metadata: Whether to include schema version and metadata.
            include_deduplication: Whether to include deduplication settings.

        Returns:
            Dictionary with configs and optional metadata.
        """
        if config_names is None:
            configs_to_export = self._configs
        else:
            configs_to_export = {
                name: self._configs[name] for name in config_names if name in self._configs
            }

        # Convert tuples to lists for serialization
        serializable_configs: dict[str, Any] = {}
        for name, steps in configs_to_export.items():
            conf_data = {"steps": self._make_serializable(steps)}
            # Include metadata if present
            if name in self._config_metadata:
                meta = self._config_metadata[name]
                if "source_mode" in meta:
                    conf_data["source_mode"] = meta["source_mode"]
                if "sentinel_value" in meta and meta["sentinel_value"] is not None:
                    conf_data["sentinel_value"] = meta["sentinel_value"]
            serializable_configs[name] = conf_data

        result: dict[str, Any] = {}

        if include_metadata:
            result["schema_version"] = CONFIG_SCHEMA_VERSION
            result["pictologics_version"] = _get_package_version()
            result["exported_at"] = datetime.datetime.now().isoformat()
            result["mask_roi_semantics"] = "nonzero_values_are_roi_membership"

        result["configs"] = serializable_configs

        if include_deduplication:
            result["deduplication"] = {
                "enabled": self._deduplication_enabled,
                "rules_version": self._deduplication_rules.version,
            }
            # Include last plan if available and not stale
            if self._last_deduplication_plan and not self._configs_modified_since_plan:
                result["deduplication"]["last_plan"] = self._last_deduplication_plan.to_dict()

        return result

    def _make_serializable(self, obj: Any) -> Any:
        """Convert tuples and other non-serializable types to serializable forms."""
        if isinstance(obj, tuple):
            return list(obj)
        elif isinstance(obj, dict):
            return {k: self._make_serializable(v) for k, v in obj.items()}
        elif isinstance(obj, list):
            return [self._make_serializable(item) for item in obj]
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        elif isinstance(obj, (np.integer, np.floating, np.bool_)):
            return obj.item()
        elif isinstance(obj, Path):
            return str(obj)
        elif isinstance(obj, Enum):
            return obj.value
        return obj

    def to_json(
        self,
        config_names: Optional[list[str]] = None,
        indent: int = 2,
    ) -> str:
        """
        Export configurations to a JSON string.

        Args:
            config_names: Specific configs to export. If None, exports all.
            indent: JSON indentation level.

        Returns:
            JSON string representation.
        """
        data = self.to_dict(config_names=config_names)
        return json.dumps(data, indent=indent, default=str)

    def to_yaml(
        self,
        config_names: Optional[list[str]] = None,
    ) -> str:
        """
        Export configurations to a YAML string.

        Args:
            config_names: Specific configs to export. If None, exports all.

        Returns:
            YAML string representation.
        """
        data = self.to_dict(config_names=config_names)
        result: str = yaml.dump(data, default_flow_style=False, sort_keys=False)
        return result

    def save_configs(
        self,
        output_path: str | Path,
        config_names: Optional[list[str]] = None,
    ) -> None:
        """
        Save configurations to a file (JSON or YAML based on extension).

        Args:
            output_path: Path to output file. Extension determines format.
            config_names: Specific configs to export. If None, exports all.

        Raises:
            ValueError: If file extension is not .json, .yaml, or .yml.
        """
        path = Path(output_path)
        suffix = path.suffix.lower()

        if suffix == ".json":
            content = self.to_json(config_names=config_names)
        elif suffix in (".yaml", ".yml"):
            content = self.to_yaml(config_names=config_names)
        else:
            raise ValueError(f"Unsupported file extension: {suffix}. Use .json, .yaml, or .yml")

        path.parent.mkdir(parents=True, exist_ok=True)
        path.write_text(content, encoding="utf-8")

    @classmethod
    def from_dict(
        cls,
        data: dict[str, Any],
        validate: bool = False,
        load_standard: bool = False,
    ) -> "RadiomicsPipeline":
        """
        Create a new pipeline instance from a configuration dictionary.

        The resulting pipeline contains only the configurations defined in the
        dictionary by default. Standard configurations are not loaded unless
        explicitly requested.

        Args:
            data: Configuration dictionary with 'configs' key.
            validate: Whether to validate parameters (logs warnings for issues).
            load_standard: Whether to also load standard predefined configurations.
                Defaults to False so that only the provided configs are loaded.

        Returns:
            New RadiomicsPipeline instance with loaded configs.
        """
        # Handle schema version migration if needed
        schema_version = data.get("schema_version", "1.0")
        migrated_data = cls._migrate_config(data, schema_version)

        # Extract deduplication settings if present
        dedup_settings = migrated_data.get("deduplication", {})
        deduplicate = dedup_settings.get("enabled", True)
        dedup_rules_version = dedup_settings.get("rules_version", None)

        # Create pipeline with deduplication settings (no standard configs by default)
        pipeline = cls(
            deduplicate=deduplicate,
            deduplication_rules=dedup_rules_version,
            load_standard=load_standard,
        )

        configs = migrated_data.get("configs", {})
        for name, config_data in configs.items():
            if isinstance(config_data, dict) and "steps" in config_data:
                steps = config_data["steps"]
            elif isinstance(config_data, list):
                steps = config_data
            else:
                warnings.warn(
                    f"Invalid config format for '{name}', skipping",
                    UserWarning,
                    stacklevel=2,
                )
                continue

            # Extract metadata
            source_mode = "full_image"
            sentinel_value = None

            if isinstance(config_data, dict):
                source_mode = config_data.get("source_mode", "full_image")
                sentinel_value = config_data.get("sentinel_value")

            # Convert YAML lists to tuples where needed
            converted_steps = pipeline._convert_yaml_steps(steps)

            if validate:
                cls._validate_config(name, converted_steps)

            pipeline._configs[name] = converted_steps
            pipeline._config_metadata[name] = {
                "source_mode": source_mode,
                "sentinel_value": sentinel_value,
            }

        # Mark configs as loaded (not modified) so dedup plan from serialized data is valid
        pipeline._configs_modified_since_plan = False

        # Restore last_plan if present and valid
        if "last_plan" in dedup_settings:
            try:
                pipeline._last_deduplication_plan = DeduplicationPlan.from_dict(
                    dedup_settings["last_plan"]
                )
            except Exception as e:
                warnings.warn(
                    f"Failed to restore deduplication plan: {e}",
                    RuntimeWarning,
                    stacklevel=2,
                )

        return pipeline

    @classmethod
    def from_json(
        cls,
        json_string: str,
        validate: bool = False,
        load_standard: bool = False,
    ) -> "RadiomicsPipeline":
        """
        Create a new pipeline instance from a JSON string.

        The resulting pipeline contains only the configurations defined in the
        JSON string by default.

        Args:
            json_string: JSON configuration string.
            validate: Whether to validate parameters.
            load_standard: Whether to also load standard predefined configurations.
                Defaults to False so that only the provided configs are loaded.

        Returns:
            New RadiomicsPipeline instance.
        """
        data = json.loads(json_string)
        return cls.from_dict(data, validate=validate, load_standard=load_standard)

    @classmethod
    def from_yaml(
        cls,
        yaml_string: str,
        validate: bool = False,
        load_standard: bool = False,
    ) -> "RadiomicsPipeline":
        """
        Create a new pipeline instance from a YAML string.

        The resulting pipeline contains only the configurations defined in the
        YAML string by default.

        Args:
            yaml_string: YAML configuration string.
            validate: Whether to validate parameters.
            load_standard: Whether to also load standard predefined configurations.
                Defaults to False so that only the provided configs are loaded.

        Returns:
            New RadiomicsPipeline instance.
        """
        data = yaml.safe_load(yaml_string)
        return cls.from_dict(data, validate=validate, load_standard=load_standard)

    @classmethod
    def load_configs(
        cls,
        file_path: str | Path,
        validate: bool = False,
        load_standard: bool = False,
    ) -> "RadiomicsPipeline":
        """
        Load configurations from a file (JSON or YAML).

        The resulting pipeline contains only the configurations defined in the
        file by default. Standard configurations (e.g., ``standard_fbn_32``) are
        not loaded unless ``load_standard=True`` is passed.

        Args:
            file_path: Path to configuration file.
            validate: Whether to validate parameters.
            load_standard: Whether to also load standard predefined configurations.
                Defaults to False so that only the file's configs are loaded.
                Pass True to include standard configs alongside the loaded ones.

        Returns:
            New RadiomicsPipeline instance.

        Raises:
            FileNotFoundError: If file doesn't exist.
            ValueError: If file extension is unsupported.
        """
        path = Path(file_path)
        if not path.exists():
            raise FileNotFoundError(f"Configuration file not found: {path}")

        suffix = path.suffix.lower()
        content = path.read_text(encoding="utf-8")

        if suffix == ".json":
            return cls.from_json(content, validate=validate, load_standard=load_standard)
        elif suffix in (".yaml", ".yml"):
            return cls.from_yaml(content, validate=validate, load_standard=load_standard)
        else:
            raise ValueError(f"Unsupported file extension: {suffix}. Use .json, .yaml, or .yml")

    def merge_configs(
        self,
        other: "RadiomicsPipeline",
        overwrite: bool = False,
    ) -> "RadiomicsPipeline":
        """
        Merge configurations from another pipeline instance.

        Args:
            other: Another RadiomicsPipeline to merge from.
            overwrite: Whether to overwrite existing configs with same name.

        Returns:
            Self for method chaining.
        """
        for name, steps in other._configs.items():
            if name in self._configs and not overwrite:
                warnings.warn(
                    f"Config '{name}' already exists, skipping (use overwrite=True)",
                    UserWarning,
                    stacklevel=2,
                )
                continue
            self._configs[name] = copy.deepcopy(steps)
            if name in other._config_metadata:
                self._config_metadata[name] = copy.deepcopy(other._config_metadata[name])
            else:
                self._config_metadata.pop(name, None)
        return self

    # -------------------------------------------------------------------------
    # Schema Migration
    # -------------------------------------------------------------------------

    @staticmethod
    def _migrate_config(data: dict[str, Any], from_version: str) -> dict[str, Any]:
        """
        Migrate configuration from an older schema version to current.

        Args:
            data: Configuration data to migrate.
            from_version: Source schema version.

        Returns:
            Migrated configuration data.
        """
        if from_version == CONFIG_SCHEMA_VERSION:
            return data

        # Validate source version is known
        if from_version not in _VALID_SCHEMA_VERSIONS:
            warnings.warn(
                f"Unknown schema version '{from_version}', proceeding cautiously",
                UserWarning,
                stacklevel=2,
            )

        # Future migrations would go here
        # Example: if from_version == "1.0" and target is "2.0": ...

        return data

    # -------------------------------------------------------------------------
    # Validation
    # -------------------------------------------------------------------------

    # Known step types and their valid parameters
    _VALID_STEPS: dict[str, set[str]] = {
        "resample": {
            "new_spacing",
            "interpolation",
            "mask_interpolation",
            "mask_threshold",
            "round_intensities",
        },
        "resegment": {"range_min", "range_max", "apply_to"},
        "filter_outliers": {"sigma", "apply_to"},
        "binarize_mask": {"threshold", "mask_values", "apply_to"},
        "keep_largest_component": {"apply_to"},
        "round_intensities": set(),
        "discretise": {
            "method",
            "n_bins",
            "bin_width",
            "min_val",
            "max_val",
            "cutoffs",
        },
        "filter": {
            # Shared / dispatch
            "type",
            "boundary",
            # Mean filter
            "support",
            # LoG filter
            "sigma_mm",
            "spacing_mm",
            "truncate",
            # Laws filter
            "kernel",
            "compute_energy",
            "energy_distance",
            # Gabor filter
            "lambda_mm",
            "gamma",
            "theta",
            "delta_theta",
            "average_over_planes",
            # Wavelet filter
            "wavelet",
            "decomposition",
            "level",
            # Riesz transform
            "order",
            "variant",
            # Shared across filters
            "rotation_invariant",
            "pooling",
            "use_parallel",
        },
        "extract_features": {
            "families",
            "include_spatial_intensity",
            "include_local_intensity",
            "texture_matrix_params",
            "ivh_params",
            "ivh_use_continuous",
            "ivh_discretisation",
        },
    }

    @classmethod
    def _validate_config(cls, name: str, steps: list[dict[str, Any]]) -> bool:
        """
        Validate a configuration, issuing warnings for issues.

        Args:
            name: Configuration name (for warning messages).
            steps: List of step dictionaries.

        Returns:
            True if valid, False if issues found (warnings are issued).
        """
        is_valid = True

        if not isinstance(steps, list):
            warnings.warn(
                f"Config '{name}': steps must be a list",
                UserWarning,
                stacklevel=2,
            )
            return False

        for i, step in enumerate(steps):
            if not isinstance(step, dict):
                warnings.warn(
                    f"Config '{name}' step {i}: must be a dictionary",
                    UserWarning,
                    stacklevel=2,
                )
                is_valid = False
                continue

            step_type = step.get("step")
            if not step_type:
                warnings.warn(
                    f"Config '{name}' step {i}: missing 'step' key",
                    UserWarning,
                    stacklevel=2,
                )
                is_valid = False
                continue

            if step_type not in cls._VALID_STEPS:
                warnings.warn(
                    f"Config '{name}' step {i}: unknown step type '{step_type}'",
                    UserWarning,
                    stacklevel=2,
                )
                is_valid = False
                continue

            # Check for unknown parameters
            params = step.get("params", {})
            if params:
                valid_params = cls._VALID_STEPS[step_type]
                for param_name in params.keys():
                    if param_name not in valid_params:
                        warnings.warn(
                            f"Config '{name}' step {i} ({step_type}): "
                            f"unknown parameter '{param_name}'",
                            UserWarning,
                            stacklevel=2,
                        )

        return is_valid

`deduplication_enabled` `property` `writable`

Whether feature deduplication is enabled.

`deduplication_rules` `property` `writable`

Current deduplication rules.

`deduplication_stats` `property`

Statistics from the last pipeline run with deduplication enabled.

Returns a dictionary with

'reused_families': Number of feature families reused from cache
'computed_families': Number of feature families freshly computed
'cache_hit_rate': Fraction of families reused (0.0 to 1.0)

Returns an empty dict if no features were extracted (with a warning), or if deduplication was not enabled during the last run.

Note

Statistics are valid because pipeline configurations run sequentially. Parallelization occurs within Numba-accelerated functions, not across configs.

`last_deduplication_plan` `property`

The last computed deduplication plan, if any.

`init(deduplicate=True, deduplication_rules=None, load_standard=True)`

Initialize pipeline with empty config registry.

Parameters:

Name	Type	Description	Default
`deduplicate`	`bool`	Whether to enable feature deduplication across configurations.	`True`
`deduplication_rules`	`DeduplicationRules \| str \| None`	Specific DeduplicationRules to use, or a version string to look up from the registry. If None, uses current default.	`None`
`load_standard`	`bool`	Whether to load standard predefined configurations (e.g., `standard_fbn_32`, `standard_fbs_16`). Defaults to True for direct instantiation. Set to False when loading configurations from files or strings to avoid mixing standard configs with user-defined ones.	`True`

Source code in pictologics/pipeline.py

def __init__(
    self,
    deduplicate: bool = True,
    deduplication_rules: DeduplicationRules | str | None = None,
    load_standard: bool = True,
) -> None:
    """Initialize pipeline with empty config registry.

    Args:
        deduplicate: Whether to enable feature deduplication across configurations.
        deduplication_rules: Specific DeduplicationRules to use, or a version
            string to look up from the registry. If None, uses current default.
        load_standard: Whether to load standard predefined configurations
            (e.g., ``standard_fbn_32``, ``standard_fbs_16``). Defaults to True
            for direct instantiation. Set to False when loading configurations
            from files or strings to avoid mixing standard configs with
            user-defined ones.
    """
    self._configs: dict[str, list[dict[str, Any]]] = {}
    self._config_metadata: dict[str, dict[str, Any]] = {}  # Stores source_mode, etc.
    self._log: list[dict[str, Any]] = []

    # Deduplication settings
    self._deduplication_enabled = deduplicate

    if deduplication_rules is None:
        self._deduplication_rules = get_default_rules()
    elif isinstance(deduplication_rules, str):
        self._deduplication_rules = DeduplicationRules.get_version(deduplication_rules)
    else:
        self._deduplication_rules = deduplication_rules

    self._last_deduplication_plan: DeduplicationPlan | None = None
    self._configs_modified_since_plan: bool = False

    # Deduplication statistics (reset on each run)
    self._dedup_reused_count: int = 0
    self._dedup_computed_count: int = 0

    if load_standard:
        self._load_predefined_configs()

`add_config(name, steps, source_mode='full_image', sentinel_value=None)`

Add a processing configuration.

Parameters:

Name	Type	Description	Default
`name`	`str`	Unique name for this configuration.	required
`steps`	`list[dict[str, Any]]`	List of steps. Each step is a dict with 'step' (name) and 'params' (dict). Supported steps: - 'resample': params: new_spacing (required), interpolation (optional) - 'resegment': params: range_min, range_max, apply_to - 'filter_outliers': params: sigma, apply_to - 'binarize_mask': params: threshold (float, default 0.5), mask_values (int \| list[int] \| tuple[int, int]), apply_to ('morph'\|'intensity'\|'both') - 'keep_largest_component': params: None - 'round_intensities': params: None - 'discretise': params: method, n_bins/bin_width, etc. - 'filter': params: type (required), plus filter-specific params - 'extract_features': params: families (list), etc.	required
`source_mode`	`str`	How to handle source voxel validity for resampling/filtering: - "full_image" (default): All voxels contain real data. Traditional behavior. - "roi_only": Only ROI voxels contain real data; others have sentinel values. - "auto": Auto-detect sentinel values; emit warning if found.	`'full_image'`
`sentinel_value`	`Optional[float]`	If specified, explicitly set the sentinel value instead of auto-detecting. Only used when source_mode is "roi_only" or "auto".	`None`

Note

Texture features require a prior 'discretise' step.
IVH features are configured via 'ivh_params' dict.
The source_mode setting affects resampling and filtering operations. In 'roi_only' mode, boundary regions use normalized interpolation to exclude sentinel voxels.

Example

pipeline = RadiomicsPipeline()

# Standard configuration (all voxels valid)
pipeline.add_config(
    name="standard",
    steps=[...],
)

# Configuration for sentinel-masked images
pipeline.add_config(
    name="sentinel_aware",
    source_mode="roi_only",
        steps=[
            {"step": "resample", "params": {"new_spacing": (1, 1, 1)}},
            {
                "step": "extract_features",
                "params": {
                    "families": [
                        "intensity",
                        "morphology",
                        "texture",
                        "histogram",
                        "ivh",
                    ]
                },
            },
        ],
    )

Source code in pictologics/pipeline.py

def add_config(
    self,
    name: str,
    steps: list[dict[str, Any]],
    source_mode: str = "full_image",
    sentinel_value: Optional[float] = None,
) -> "RadiomicsPipeline":
    """
    Add a processing configuration.

    Args:
        name: Unique name for this configuration.
        steps: List of steps. Each step is a dict with 'step' (name) and 'params' (dict).
               Supported steps:
               - 'resample': params: new_spacing (required), interpolation (optional)
               - 'resegment': params: range_min, range_max, apply_to
               - 'filter_outliers': params: sigma, apply_to
               - 'binarize_mask': params: threshold (float, default 0.5),
                   mask_values (int | list[int] | tuple[int, int]), apply_to ('morph'|'intensity'|'both')
               - 'keep_largest_component': params: None
               - 'round_intensities': params: None
               - 'discretise': params: method, n_bins/bin_width, etc.
               - 'filter': params: type (required), plus filter-specific params
               - 'extract_features': params: families (list), etc.
        source_mode: How to handle source voxel validity for resampling/filtering:
            - "full_image" (default): All voxels contain real data. Traditional behavior.
            - "roi_only": Only ROI voxels contain real data; others have sentinel values.
            - "auto": Auto-detect sentinel values; emit warning if found.
        sentinel_value: If specified, explicitly set the sentinel value instead
            of auto-detecting. Only used when source_mode is "roi_only" or "auto".

    Note:
        - Texture features require a prior 'discretise' step.
        - IVH features are configured via 'ivh_params' dict.
        - The source_mode setting affects resampling and filtering operations.
          In 'roi_only' mode, boundary regions use normalized interpolation to
          exclude sentinel voxels.

    Example:
        ```python
        pipeline = RadiomicsPipeline()

        # Standard configuration (all voxels valid)
        pipeline.add_config(
            name="standard",
            steps=[...],
        )

        # Configuration for sentinel-masked images
        pipeline.add_config(
            name="sentinel_aware",
            source_mode="roi_only",
                steps=[
                    {"step": "resample", "params": {"new_spacing": (1, 1, 1)}},
                    {
                        "step": "extract_features",
                        "params": {
                            "families": [
                                "intensity",
                                "morphology",
                                "texture",
                                "histogram",
                                "ivh",
                            ]
                        },
                    },
                ],
            )
        ```
    """
    if not isinstance(steps, list):
        raise ValueError("Configuration must be a list of steps")

    # Validate source_mode
    valid_modes = {"full_image", "roi_only", "auto"}
    if source_mode not in valid_modes:
        raise ValueError(f"Invalid source_mode '{source_mode}'. Must be one of: {valid_modes}")

    for step in steps:
        if not isinstance(step, dict):
            raise ValueError("Each step must be a dictionary")
        if "step" not in step:
            raise ValueError("Each step must have a 'step' key")

    self._configs[name] = steps
    self._config_metadata[name] = {
        "source_mode": source_mode,
        "sentinel_value": sentinel_value,
    }
    self._configs_modified_since_plan = True
    return self

`clear_log()`

Clear the in-memory processing log.

Source code in pictologics/pipeline.py

def clear_log(self) -> None:
    """Clear the in-memory processing log."""
    self._log.clear()

`describe_features()`

Return a DataFrame cataloguing every feature the pipeline will produce.

Each row represents one (configuration, feature) pair. The columns describe the feature identity, its family membership, and the preprocessing state at the extract_features step that produces it. Repeated preprocessing steps are represented as compact JSON arrays in the corresponding *_params cells.

This is useful for:

Inspecting the full set of features before running the pipeline.
Filtering or subsetting features by family, discretisation method, filter type, etc.
Exporting a data dictionary (describe_features().to_csv(...)) alongside study results for documentation and reproducibility.

Returns:

Type	Description
`DataFrame`	A `pandas.DataFrame` with columns:
`DataFrame`	config – Configuration name.
`DataFrame`	feature_key – Full feature key as it appears in the output Series.
`DataFrame`	feature_name – Human-readable name (IBSI code stripped).
`DataFrame`	ibsi_code – 3–4 character IBSI identifier.
`DataFrame`	family – Granular feature family (e.g. `glcm`, `ivh`).
`DataFrame`	family_group – Broad category: Intensity, Morphology, or Texture.
`DataFrame`	requires_discretisation – Whether the family needs discretised input.
`DataFrame`	uses_morph_mask / uses_intensity_mask – Which runtime ROI mask(s) the feature row depends on.
`DataFrame`	source_mode – Source voxel handling mode for the configuration.
`DataFrame`	sentinel_value – Explicit sentinel value, if configured.
`DataFrame`	feature_extraction_step_index – 1-based position of the `extract_features` step that produced the row.
`DataFrame`	feature_extraction_params – Parameters on that `extract_features` step as compact JSON.
`DataFrame`	preprocessing_sequence – Ordered preprocessing step sequence applied before feature extraction.
`DataFrame`	preprocessing_steps – Full ordered preprocessing step records as compact JSON.
`DataFrame`	is_discretised – Whether the configuration includes a `discretise` step.
`DataFrame`	discretisation_method – `FBN`, `FBS`, or `None`.
`DataFrame`	discretisation_param – Bin count (FBN) or bin width (FBS).
`DataFrame`	is_resampled – Whether the configuration includes a `resample` step.
`DataFrame`	resampling_spacing – Target spacing as a string, e.g. `"(0.5, 0.5, 0.5)"`.
`DataFrame`	interpolation – Resampling interpolation method.
`DataFrame`	is_filtered – Whether a `filter` step is present.
`DataFrame`	filter_type – Filter type (`log`, `gabor`, …) or `None`.
`DataFrame`	filter_params – Ordered filter step parameters as compact JSON, or `None`.

Example

pipeline = RadiomicsPipeline()
catalog = pipeline.describe_features()

# Export as CSV data dictionary
catalog.to_csv("feature_catalog.csv", index=False)

# Filter: only texture features from FBN configs
texture_fbn = catalog[
    (catalog["family_group"] == "Texture")
    & (catalog["discretisation_method"] == "FBN")
]

Source code in pictologics/pipeline.py

def describe_features(self) -> pd.DataFrame:
    """Return a DataFrame cataloguing every feature the pipeline will produce.

    Each row represents one (configuration, feature) pair. The columns
    describe the feature identity, its family membership, and the
    preprocessing state at the ``extract_features`` step that produces it.
    Repeated preprocessing steps are represented as compact JSON arrays in
    the corresponding ``*_params`` cells.

    This is useful for:

    * Inspecting the full set of features before running the pipeline.
    * Filtering or subsetting features by family, discretisation method,
      filter type, etc.
    * Exporting a data dictionary (``describe_features().to_csv(...)``)
      alongside study results for documentation and reproducibility.

    Returns:
        A `pandas.DataFrame` with columns:

        - **config** – Configuration name.
        - **feature_key** – Full feature key as it appears in the output Series.
        - **feature_name** – Human-readable name (IBSI code stripped).
        - **ibsi_code** – 3–4 character IBSI identifier.
        - **family** – Granular feature family (e.g. ``glcm``, ``ivh``).
        - **family_group** – Broad category: *Intensity*, *Morphology*, or
            *Texture*.
        - **requires_discretisation** – Whether the family needs discretised
            input.
        - **uses_morph_mask** / **uses_intensity_mask** – Which runtime ROI
            mask(s) the feature row depends on.
        - **source_mode** – Source voxel handling mode for the configuration.
        - **sentinel_value** – Explicit sentinel value, if configured.
        - **feature_extraction_step_index** – 1-based position of the
            ``extract_features`` step that produced the row.
        - **feature_extraction_params** – Parameters on that
            ``extract_features`` step as compact JSON.
        - **preprocessing_sequence** – Ordered preprocessing step sequence
            applied before feature extraction.
        - **preprocessing_steps** – Full ordered preprocessing step records
            as compact JSON.
        - **is_discretised** – Whether the configuration includes a
            ``discretise`` step.
        - **discretisation_method** – ``FBN``, ``FBS``, or ``None``.
        - **discretisation_param** – Bin count (FBN) or bin width (FBS).
        - **is_resampled** – Whether the configuration includes a ``resample``
            step.
        - **resampling_spacing** – Target spacing as a string, e.g.
            ``"(0.5, 0.5, 0.5)"``.
        - **interpolation** – Resampling interpolation method.
        - **is_filtered** – Whether a ``filter`` step is present.
        - **filter_type** – Filter type (``log``, ``gabor``, …) or ``None``.
        - **filter_params** – Ordered filter step parameters as compact JSON,
            or ``None``.

    Example:
        ```python
        pipeline = RadiomicsPipeline()
        catalog = pipeline.describe_features()

        # Export as CSV data dictionary
        catalog.to_csv("feature_catalog.csv", index=False)

        # Filter: only texture features from FBN configs
        texture_fbn = catalog[
            (catalog["family_group"] == "Texture")
            & (catalog["discretisation_method"] == "FBN")
        ]
        ```
    """
    # Build reverse lookup: feature_key -> family
    key_to_family: dict[str, str] = {}
    for family, keys in FEATURE_NAMES.items():
        for key in keys:
            key_to_family[key] = family

    rows: list[dict[str, Any]] = []

    for config_name, steps in self._configs.items():
        active_preprocessing: list[dict[str, Any]] = []
        row_by_key: dict[str, dict[str, Any]] = {}
        key_order: list[str] = []
        config_metadata = self._config_metadata.get(config_name, {})
        source_mode = config_metadata.get("source_mode", "full_image")
        sentinel_value = config_metadata.get("sentinel_value")

        for step_index, step_def in enumerate(steps, start=1):
            step_name = step_def.get("step", "")
            params = step_def.get("params", {})

            if step_name == "extract_features":
                feature_keys = self._get_expected_feature_names([step_def])
                config_meta = self._extract_config_metadata(active_preprocessing)
                extraction_params = self._catalog_json(params) if params else None

                for fkey in feature_keys:
                    fname, code = self._parse_feature_key(fkey)
                    family = key_to_family.get(fkey, "unknown")

                    row: dict[str, Any] = {
                        "config": config_name,
                        "feature_key": fkey,
                        "feature_name": fname,
                        "ibsi_code": code,
                        "family": family,
                        "family_group": _FAMILY_GROUP.get(family, "Unknown"),
                        "requires_discretisation": _REQUIRES_DISCRETISATION.get(family, False),
                        "uses_morph_mask": _family_uses_morph_mask(family),
                        "uses_intensity_mask": _feature_uses_intensity_mask(fkey, family),
                        "source_mode": source_mode,
                        "sentinel_value": sentinel_value,
                        "feature_extraction_step_index": step_index,
                        "feature_extraction_params": extraction_params,
                    }
                    row.update(config_meta)
                    if fkey not in row_by_key:
                        key_order.append(fkey)
                    row_by_key[fkey] = row

            elif step_name in _PREPROCESSING_STEPS:
                active_preprocessing.append(
                    {
                        "step_index": step_index,
                        "step": step_name,
                        "params": copy.deepcopy(params),
                    }
                )

        rows.extend(row_by_key[fkey] for fkey in key_order)

    columns = [
        "config",
        "feature_key",
        "feature_name",
        "ibsi_code",
        "family",
        "family_group",
        "requires_discretisation",
        "uses_morph_mask",
        "uses_intensity_mask",
        "source_mode",
        "sentinel_value",
        "feature_extraction_step_index",
        "feature_extraction_params",
        "preprocessing_sequence",
        "preprocessing_steps",
        "is_discretised",
        "discretisation_method",
        "discretisation_param",
        "discretise_params",
        "is_resampled",
        "resampling_spacing",
        "interpolation",
        "resample_params",
        "is_resegmented",
        "resegment_apply_to",
        "resegment_params",
        "is_outlier_filtered",
        "filter_outliers_apply_to",
        "filter_outliers_params",
        "is_intensity_rounded",
        "round_intensities_params",
        "keeps_largest_component",
        "keep_largest_component_apply_to",
        "keep_largest_component_params",
        "is_mask_binarized",
        "binarize_mask_apply_to",
        "binarize_mask_params",
        "is_filtered",
        "filter_type",
        "filter_params",
    ]
    return pd.DataFrame(rows, columns=columns) if rows else pd.DataFrame(columns=columns)

`from_dict(data, validate=False, load_standard=False)` `classmethod`

Create a new pipeline instance from a configuration dictionary.

The resulting pipeline contains only the configurations defined in the dictionary by default. Standard configurations are not loaded unless explicitly requested.

Parameters:

Name	Type	Description	Default
`data`	`dict[str, Any]`	Configuration dictionary with 'configs' key.	required
`validate`	`bool`	Whether to validate parameters (logs warnings for issues).	`False`
`load_standard`	`bool`	Whether to also load standard predefined configurations. Defaults to False so that only the provided configs are loaded.	`False`

Returns:

Type	Description
`'RadiomicsPipeline'`	New RadiomicsPipeline instance with loaded configs.

Source code in pictologics/pipeline.py

@classmethod
def from_dict(
    cls,
    data: dict[str, Any],
    validate: bool = False,
    load_standard: bool = False,
) -> "RadiomicsPipeline":
    """
    Create a new pipeline instance from a configuration dictionary.

    The resulting pipeline contains only the configurations defined in the
    dictionary by default. Standard configurations are not loaded unless
    explicitly requested.

    Args:
        data: Configuration dictionary with 'configs' key.
        validate: Whether to validate parameters (logs warnings for issues).
        load_standard: Whether to also load standard predefined configurations.
            Defaults to False so that only the provided configs are loaded.

    Returns:
        New RadiomicsPipeline instance with loaded configs.
    """
    # Handle schema version migration if needed
    schema_version = data.get("schema_version", "1.0")
    migrated_data = cls._migrate_config(data, schema_version)

    # Extract deduplication settings if present
    dedup_settings = migrated_data.get("deduplication", {})
    deduplicate = dedup_settings.get("enabled", True)
    dedup_rules_version = dedup_settings.get("rules_version", None)

    # Create pipeline with deduplication settings (no standard configs by default)
    pipeline = cls(
        deduplicate=deduplicate,
        deduplication_rules=dedup_rules_version,
        load_standard=load_standard,
    )

    configs = migrated_data.get("configs", {})
    for name, config_data in configs.items():
        if isinstance(config_data, dict) and "steps" in config_data:
            steps = config_data["steps"]
        elif isinstance(config_data, list):
            steps = config_data
        else:
            warnings.warn(
                f"Invalid config format for '{name}', skipping",
                UserWarning,
                stacklevel=2,
            )
            continue

        # Extract metadata
        source_mode = "full_image"
        sentinel_value = None

        if isinstance(config_data, dict):
            source_mode = config_data.get("source_mode", "full_image")
            sentinel_value = config_data.get("sentinel_value")

        # Convert YAML lists to tuples where needed
        converted_steps = pipeline._convert_yaml_steps(steps)

        if validate:
            cls._validate_config(name, converted_steps)

        pipeline._configs[name] = converted_steps
        pipeline._config_metadata[name] = {
            "source_mode": source_mode,
            "sentinel_value": sentinel_value,
        }

    # Mark configs as loaded (not modified) so dedup plan from serialized data is valid
    pipeline._configs_modified_since_plan = False

    # Restore last_plan if present and valid
    if "last_plan" in dedup_settings:
        try:
            pipeline._last_deduplication_plan = DeduplicationPlan.from_dict(
                dedup_settings["last_plan"]
            )
        except Exception as e:
            warnings.warn(
                f"Failed to restore deduplication plan: {e}",
                RuntimeWarning,
                stacklevel=2,
            )

    return pipeline

`from_json(json_string, validate=False, load_standard=False)` `classmethod`

Create a new pipeline instance from a JSON string.

The resulting pipeline contains only the configurations defined in the JSON string by default.

Parameters:

Name	Type	Description	Default
`json_string`	`str`	JSON configuration string.	required
`validate`	`bool`	Whether to validate parameters.	`False`
`load_standard`	`bool`	Whether to also load standard predefined configurations. Defaults to False so that only the provided configs are loaded.	`False`

Returns:

Type	Description
`'RadiomicsPipeline'`	New RadiomicsPipeline instance.

Source code in pictologics/pipeline.py

@classmethod
def from_json(
    cls,
    json_string: str,
    validate: bool = False,
    load_standard: bool = False,
) -> "RadiomicsPipeline":
    """
    Create a new pipeline instance from a JSON string.

    The resulting pipeline contains only the configurations defined in the
    JSON string by default.

    Args:
        json_string: JSON configuration string.
        validate: Whether to validate parameters.
        load_standard: Whether to also load standard predefined configurations.
            Defaults to False so that only the provided configs are loaded.

    Returns:
        New RadiomicsPipeline instance.
    """
    data = json.loads(json_string)
    return cls.from_dict(data, validate=validate, load_standard=load_standard)

`from_yaml(yaml_string, validate=False, load_standard=False)` `classmethod`

Create a new pipeline instance from a YAML string.

The resulting pipeline contains only the configurations defined in the YAML string by default.

Parameters:

Name	Type	Description	Default
`yaml_string`	`str`	YAML configuration string.	required
`validate`	`bool`	Whether to validate parameters.	`False`
`load_standard`	`bool`	Whether to also load standard predefined configurations. Defaults to False so that only the provided configs are loaded.	`False`

Returns:

Type	Description
`'RadiomicsPipeline'`	New RadiomicsPipeline instance.

Source code in pictologics/pipeline.py

@classmethod
def from_yaml(
    cls,
    yaml_string: str,
    validate: bool = False,
    load_standard: bool = False,
) -> "RadiomicsPipeline":
    """
    Create a new pipeline instance from a YAML string.

    The resulting pipeline contains only the configurations defined in the
    YAML string by default.

    Args:
        yaml_string: YAML configuration string.
        validate: Whether to validate parameters.
        load_standard: Whether to also load standard predefined configurations.
            Defaults to False so that only the provided configs are loaded.

    Returns:
        New RadiomicsPipeline instance.
    """
    data = yaml.safe_load(yaml_string)
    return cls.from_dict(data, validate=validate, load_standard=load_standard)

`get_all_standard_config_names()`

Returns the list of all standard configuration names.

Returns names from loaded templates that start with 'standard_'.

Source code in pictologics/pipeline.py

def get_all_standard_config_names(self) -> list[str]:
    """
    Returns the list of all standard configuration names.

    Returns names from loaded templates that start with 'standard_'.
    """
    return sorted([name for name in self._configs.keys() if name.startswith("standard_")])

`get_config(name)`

Get a copy of a configuration by name.

Parameters:

Name	Type	Description	Default
`name`	`str`	Configuration name.	required

Returns:

Type	Description
`list[dict[str, Any]]`	Deep copy of the configuration steps.

Raises:

Type	Description
`KeyError`	If configuration not found.

Source code in pictologics/pipeline.py

def get_config(self, name: str) -> list[dict[str, Any]]:
    """
    Get a copy of a configuration by name.

    Args:
        name: Configuration name.

    Returns:
        Deep copy of the configuration steps.

    Raises:
        KeyError: If configuration not found.
    """
    if name not in self._configs:
        raise KeyError(f"Configuration '{name}' not found")
    return copy.deepcopy(self._configs[name])

`list_configs()`

List all registered configuration names.

Returns:

Type	Description
`list[str]`	List of configuration names.

Source code in pictologics/pipeline.py

def list_configs(self) -> list[str]:
    """
    List all registered configuration names.

    Returns:
        List of configuration names.
    """
    return list(self._configs.keys())

`load_configs(file_path, validate=False, load_standard=False)` `classmethod`

Load configurations from a file (JSON or YAML).

The resulting pipeline contains only the configurations defined in the file by default. Standard configurations (e.g., standard_fbn_32) are not loaded unless load_standard=True is passed.

Parameters:

Name	Type	Description	Default
`file_path`	`str \| Path`	Path to configuration file.	required
`validate`	`bool`	Whether to validate parameters.	`False`
`load_standard`	`bool`	Whether to also load standard predefined configurations. Defaults to False so that only the file's configs are loaded. Pass True to include standard configs alongside the loaded ones.	`False`

Returns:

Type	Description
`'RadiomicsPipeline'`	New RadiomicsPipeline instance.

Raises:

Type	Description
`FileNotFoundError`	If file doesn't exist.
`ValueError`	If file extension is unsupported.

Source code in pictologics/pipeline.py

@classmethod
def load_configs(
    cls,
    file_path: str | Path,
    validate: bool = False,
    load_standard: bool = False,
) -> "RadiomicsPipeline":
    """
    Load configurations from a file (JSON or YAML).

    The resulting pipeline contains only the configurations defined in the
    file by default. Standard configurations (e.g., ``standard_fbn_32``) are
    not loaded unless ``load_standard=True`` is passed.

    Args:
        file_path: Path to configuration file.
        validate: Whether to validate parameters.
        load_standard: Whether to also load standard predefined configurations.
            Defaults to False so that only the file's configs are loaded.
            Pass True to include standard configs alongside the loaded ones.

    Returns:
        New RadiomicsPipeline instance.

    Raises:
        FileNotFoundError: If file doesn't exist.
        ValueError: If file extension is unsupported.
    """
    path = Path(file_path)
    if not path.exists():
        raise FileNotFoundError(f"Configuration file not found: {path}")

    suffix = path.suffix.lower()
    content = path.read_text(encoding="utf-8")

    if suffix == ".json":
        return cls.from_json(content, validate=validate, load_standard=load_standard)
    elif suffix in (".yaml", ".yml"):
        return cls.from_yaml(content, validate=validate, load_standard=load_standard)
    else:
        raise ValueError(f"Unsupported file extension: {suffix}. Use .json, .yaml, or .yml")

`merge_configs(other, overwrite=False)`

Merge configurations from another pipeline instance.

Parameters:

Name	Type	Description	Default
`other`	`'RadiomicsPipeline'`	Another RadiomicsPipeline to merge from.	required
`overwrite`	`bool`	Whether to overwrite existing configs with same name.	`False`

Returns:

Type	Description
`'RadiomicsPipeline'`	Self for method chaining.

Source code in pictologics/pipeline.py

def merge_configs(
    self,
    other: "RadiomicsPipeline",
    overwrite: bool = False,
) -> "RadiomicsPipeline":
    """
    Merge configurations from another pipeline instance.

    Args:
        other: Another RadiomicsPipeline to merge from.
        overwrite: Whether to overwrite existing configs with same name.

    Returns:
        Self for method chaining.
    """
    for name, steps in other._configs.items():
        if name in self._configs and not overwrite:
            warnings.warn(
                f"Config '{name}' already exists, skipping (use overwrite=True)",
                UserWarning,
                stacklevel=2,
            )
            continue
        self._configs[name] = copy.deepcopy(steps)
        if name in other._config_metadata:
            self._config_metadata[name] = copy.deepcopy(other._config_metadata[name])
        else:
            self._config_metadata.pop(name, None)
    return self

`remove_config(name)`

Remove a configuration by name.

Parameters:

Name	Type	Description	Default
`name`	`str`	Configuration name to remove.	required

Returns:

Type	Description
`'RadiomicsPipeline'`	Self for method chaining.

Raises:

Type	Description
`KeyError`	If configuration not found.

Source code in pictologics/pipeline.py

def remove_config(self, name: str) -> "RadiomicsPipeline":
    """
    Remove a configuration by name.

    Args:
        name: Configuration name to remove.

    Returns:
        Self for method chaining.

    Raises:
        KeyError: If configuration not found.
    """
    if name not in self._configs:
        raise KeyError(f"Configuration '{name}' not found")
    del self._configs[name]
    self._configs_modified_since_plan = True
    return self

`run(image, mask=None, subject_id=None, config_names=None, mask_subvoxel_tolerance=0.5, mask_subvoxel_warning_threshold=0.01, mask_min_overlap_fraction=0.5)`

Run configurations on the provided image and mask.

Parameters:

Name	Type	Description	Default
`image`	`str \| Image`	Path to image or Image object.	required
`mask`	`str \| Image \| None`	Optional path to mask or Image object. If omitted (or passed as `None` / empty string), the pipeline will treat the entire image as the ROI by generating a full (all-ones) mask matching the input image geometry.	`None`
`subject_id`	`Optional[str]`	Optional identifier for the subject (used in the processing log only; not included in the returned feature Series).	`None`
`config_names`	`Optional[list[str]]`	List of specific configuration names to run. If None, runs all registered configurations. Supports "all_standard" to run all 6 standard configs.	`None`
`mask_subvoxel_tolerance`	`float`	Maximum permitted fractional-voxel offset when repositioning a mask path (default: 0.5). Has no effect when mask is a pre-loaded Image object. See `load_image` for full description.	`0.5`
`mask_subvoxel_warning_threshold`	`float`	Fractional-voxel drift above which a `UserWarning` is emitted during mask repositioning (default: 0.01). Has no effect when mask is a pre-loaded Image object.	`0.01`
`mask_min_overlap_fraction`	`float`	Minimum fraction of the mask volume that must intersect with the image space when loading from a path (default: 0.5). Has no effect when mask is a pre-loaded Image object.	`0.5`

Returns:

Type	Description
`dict[str, Series]`	Dictionary mapping config names to pandas Series of features.
`dict[str, Series]`	Every Series contains the complete set of expected feature names
`dict[str, Series]`	for its configuration, regardless of whether extraction succeeded:
`dict[str, Series]`	If extraction succeeds, values are the computed feature values.
`dict[str, Series]`	If individual features fail (e.g., mesh error, PCA with ≤3 voxels), those features are `NaN`; successfully computed features are preserved.
`dict[str, Series]`	If the entire configuration fails (empty ROI or unexpected error), all values are `NaN`.

Example

Run standard pipeline components:

from pictologics.pipeline import RadiomicsPipeline

# Initialize
pipeline = RadiomicsPipeline()

# Run on image and mask
results = pipeline.run(
    image="data/image.nii.gz",
    mask="data/mask.nii.gz",
    subject_id="subject_001",
    config_names=["standard_fbn_32"]
)

# Access results
print(results["standard_fbn_32"].head())

Source code in pictologics/pipeline.py

def run(
    self,
    image: str | Image,
    mask: str | Image | None = None,
    subject_id: Optional[str] = None,
    config_names: Optional[list[str]] = None,
    mask_subvoxel_tolerance: float = 0.5,
    mask_subvoxel_warning_threshold: float = 0.01,
    mask_min_overlap_fraction: float = 0.5,
) -> dict[str, pd.Series]:
    """
    Run configurations on the provided image and mask.

    Args:
        image: Path to image or Image object.
        mask: Optional path to mask or Image object.
            If omitted (or passed as `None` / empty string), the pipeline will
            treat the **entire image** as the ROI by generating a full (all-ones)
            mask matching the input image geometry.
        subject_id: Optional identifier for the subject (used in the
            processing log only; not included in the returned feature Series).
        config_names: List of specific configuration names to run.
                      If None, runs all registered configurations.
                      Supports "all_standard" to run all 6 standard configs.
        mask_subvoxel_tolerance: Maximum permitted fractional-voxel offset when
            repositioning a mask path (default: 0.5). Has no effect when mask is
            a pre-loaded Image object. See ``load_image`` for full description.
        mask_subvoxel_warning_threshold: Fractional-voxel drift above which a
            ``UserWarning`` is emitted during mask repositioning (default: 0.01).
            Has no effect when mask is a pre-loaded Image object.
        mask_min_overlap_fraction: Minimum fraction of the mask volume that must
            intersect with the image space when loading from a path (default: 0.5).
            Has no effect when mask is a pre-loaded Image object.

    Returns:
        Dictionary mapping config names to pandas Series of features.
        Every Series contains the **complete set of expected feature names**
        for its configuration, regardless of whether extraction succeeded:

        - If extraction succeeds, values are the computed feature values.
        - If individual features fail (e.g., mesh error, PCA with ≤3 voxels),
            those features are ``NaN``; successfully computed features are preserved.
        - If the entire configuration fails (empty ROI or unexpected error),
            all values are ``NaN``.

    Example:
        Run standard pipeline components:

        ```python
        from pictologics.pipeline import RadiomicsPipeline

        # Initialize
        pipeline = RadiomicsPipeline()

        # Run on image and mask
        results = pipeline.run(
            image="data/image.nii.gz",
            mask="data/mask.nii.gz",
            subject_id="subject_001",
            config_names=["standard_fbn_32"]
        )

        # Access results
        print(results["standard_fbn_32"].head())
        ```
    """
    # 1. Load Data
    if isinstance(image, str):
        orig_img = load_image(image)
        img_source = image
    else:
        orig_img = image
        img_source = "InMemory"

    mask_was_generated = False
    if mask is None or (isinstance(mask, str) and mask.strip() == ""):
        orig_mask = create_full_mask(orig_img)
        mask_source = "GeneratedFullMask"
        mask_was_generated = True
    elif isinstance(mask, str):
        orig_mask = load_image(
            mask,
            reference_image=orig_img,
            subvoxel_tolerance=mask_subvoxel_tolerance,
            subvoxel_warning_threshold=mask_subvoxel_warning_threshold,
            min_overlap_fraction=mask_min_overlap_fraction,
        )
        mask_source = mask
    else:
        orig_mask = mask
        mask_source = "InMemory"

    _validate_geometry(orig_mask, orig_img, "mask", "image")

    all_results = {}

    # Determine which configs to run
    if config_names is None:
        target_configs = list(self._configs.keys())
    else:
        target_configs = []
        for name in config_names:
            if name == "all_standard":
                target_configs.extend(self.get_all_standard_config_names())
            elif name in self._configs:
                target_configs.append(name)
            else:
                raise ValueError(f"Configuration '{name}' not found.")

    # Create or regenerate deduplication plan if enabled
    dedup_plan: DeduplicationPlan | None = None
    family_cache: dict[tuple[str, str], dict[str, Any]] = {}

    # Reset deduplication statistics for this run
    self._dedup_reused_count = 0
    self._dedup_computed_count = 0

    if self._deduplication_enabled and len(target_configs) > 1:
        # Get configs for analysis
        configs_to_analyze = {name: self._configs[name] for name in target_configs}
        analyzer = ConfigurationAnalyzer(configs_to_analyze, self._deduplication_rules)
        dedup_plan = analyzer.analyze()
        self._last_deduplication_plan = dedup_plan
        self._configs_modified_since_plan = False

    # Run each configuration
    for config_name in target_configs:
        steps = self._configs[config_name]
        metadata = self._config_metadata.get(config_name, {})

        # Determine source mode for this config
        source_mode_str = metadata.get("source_mode", "full_image")
        source_mode = SourceMode(source_mode_str)
        explicit_sentinel = metadata.get("sentinel_value")

        # Determine source mask based on source_mode
        source_mask: Optional[Image] = None
        sentinel_detected = False
        detected_sentinel_value: Optional[float] = None

        if source_mode == SourceMode.FULL_IMAGE:
            # Default: all voxels valid, no source_mask needed
            pass

        elif source_mode == SourceMode.ROI_ONLY:
            # Use ROI mask as source mask
            source_mask = Image(
                array=(orig_mask.array > 0).astype(np.uint8),
                spacing=orig_mask.spacing,
                origin=orig_mask.origin,
                direction=orig_mask.direction,
                modality="SOURCE_MASK",
            )

        elif source_mode == SourceMode.AUTO:
            # Auto-detect sentinel values
            if explicit_sentinel is not None:
                # User provided explicit sentinel value
                detected_sentinel_value = explicit_sentinel
                sentinel_detected = True
            else:
                # If mask was auto-generated (full mask), do not use it for
                # "outside-ness" check in detection, as everything is "inside".
                mask_for_detection = orig_mask if not mask_was_generated else None
                detected = detect_sentinel_value(orig_img, roi_mask=mask_for_detection)
                if detected is not None:
                    detected_sentinel_value = detected
                    sentinel_detected = True

                    # Log info instead of warning (user request)
                    # Changed to DEBUG level to avoid console spam in default logging configuration
                    logging.debug(
                        f"Auto-detected sentinel value {detected} in image. "
                        f"Using source validity mask for config '{config_name}'. "
                        f"Voxels with value {detected} will be excluded from "
                        f"resampling/filtering."
                    )

            if sentinel_detected and detected_sentinel_value is not None:
                source_mask = create_source_mask_from_sentinel(
                    orig_img, detected_sentinel_value
                )

        # Initialize State with source tracking
        # We start with fresh copies for each config
        state = PipelineState(
            image=orig_img,
            raw_image=orig_img,  # Track non-discretised image
            morph_mask=orig_mask,
            intensity_mask=Image(
                array=orig_mask.array.copy(),
                spacing=orig_mask.spacing,
                origin=orig_mask.origin,
                direction=orig_mask.direction,
                modality=orig_mask.modality,
            ),
            mask_was_generated=mask_was_generated,
            source_mode=source_mode,
            source_mask=source_mask,
            sentinel_detected=sentinel_detected,
            sentinel_value=detected_sentinel_value,
        )

        config_log: dict[str, Any] = {
            "timestamp": datetime.datetime.now().isoformat(),
            "schema_version": CONFIG_SCHEMA_VERSION,
            "pictologics_version": _get_package_version(),
            "subject_id": subject_id,
            "config_name": config_name,
            "image_source": img_source,
            "mask_source": mask_source,
            "source_mode": source_mode.value,
            "sentinel_detected": sentinel_detected,
            "sentinel_value": detected_sentinel_value,
            "mask_roi_semantics": "nonzero_values_are_roi_membership",
            "config_snapshot": self._make_serializable(
                {
                    "source_mode": source_mode.value,
                    "sentinel_value": explicit_sentinel,
                    "effective_sentinel_value": detected_sentinel_value,
                    "steps": steps,
                }
            ),
            "deduplication": {
                "enabled": self._deduplication_enabled,
                "rules_version": self._deduplication_rules.version,
                "plan_used": dedup_plan is not None,
            },
            "run_parameters": {
                "requested_config_names": config_names,
                "target_configs": target_configs,
            },
            "mask_repositioning_settings": {
                "subvoxel_tolerance": mask_subvoxel_tolerance,
                "subvoxel_warning_threshold": mask_subvoxel_warning_threshold,
                "min_overlap_fraction": mask_min_overlap_fraction,
            },
            "status": "started",
            "steps_executed": [],
        }

        config_features: dict[str, Any] = {}
        current_step: dict[str, Any] | None = None

        try:
            self._ensure_nonempty_roi(state, context="initialization")

            for step_def in steps:
                current_step = step_def
                step_name = step_def["step"]
                params = step_def.get("params", {})

                # Execute Step
                if step_name == "extract_features":
                    # Use deduplication if plan exists
                    if dedup_plan is not None:
                        features = self._extract_features_with_dedup(
                            state, params, config_name, dedup_plan, family_cache
                        )
                    else:
                        features = self._extract_features(state, params)
                    config_features.update(features)
                else:
                    self._execute_preprocessing_step(state, step_name, params)

                # Log
                config_log["steps_executed"].append(
                    {
                        "step": step_name,
                        "params": self._make_serializable(params),
                        "status": "completed",
                    }
                )
            config_log["status"] = "completed"
            config_log["result_feature_count"] = len(config_features)

        except EmptyROIMaskError as e:
            config_log["status"] = "empty_roi"
            config_log["error"] = str(e)
            config_log["failed_step"] = (
                current_step if current_step is not None else "initialization"
            )
            self._log.append(config_log)

            # Build a NaN-filled Series with the expected feature names so
            # that downstream formatting/concatenation always sees a
            # complete, predictable set of columns.
            nan_names = self._get_expected_feature_names(steps)
            all_results[config_name] = pd.Series({name: float("nan") for name in nan_names})
            config_log["result_feature_count"] = len(nan_names)
            logging.debug(
                "Config '%s' produced an empty ROI: %s. Returning NaN for %d features.",
                config_name,
                e,
                len(nan_names),
            )
            continue

        except Exception as e:
            config_log["status"] = "error"
            config_log["error"] = str(e)
            config_log["failed_step"] = current_step
            # Backfill with NaN so the result always has a complete set of
            # feature columns, even when extraction was interrupted.
            nan_names = self._get_expected_feature_names(steps)
            for name in nan_names:
                config_features.setdefault(name, float("nan"))
            config_log["result_feature_count"] = len(config_features)

        self._log.append(config_log)

        # Create Series
        series = pd.Series(config_features)
        all_results[config_name] = series

    return all_results

`save_configs(output_path, config_names=None)`

Save configurations to a file (JSON or YAML based on extension).

Parameters:

Name	Type	Description	Default
`output_path`	`str \| Path`	Path to output file. Extension determines format.	required
`config_names`	`Optional[list[str]]`	Specific configs to export. If None, exports all.	`None`

Raises:

Type	Description
`ValueError`	If file extension is not .json, .yaml, or .yml.

Source code in pictologics/pipeline.py

def save_configs(
    self,
    output_path: str | Path,
    config_names: Optional[list[str]] = None,
) -> None:
    """
    Save configurations to a file (JSON or YAML based on extension).

    Args:
        output_path: Path to output file. Extension determines format.
        config_names: Specific configs to export. If None, exports all.

    Raises:
        ValueError: If file extension is not .json, .yaml, or .yml.
    """
    path = Path(output_path)
    suffix = path.suffix.lower()

    if suffix == ".json":
        content = self.to_json(config_names=config_names)
    elif suffix in (".yaml", ".yml"):
        content = self.to_yaml(config_names=config_names)
    else:
        raise ValueError(f"Unsupported file extension: {suffix}. Use .json, .yaml, or .yml")

    path.parent.mkdir(parents=True, exist_ok=True)
    path.write_text(content, encoding="utf-8")

`save_log(output_path)`

Save the processing log to a self-describing JSON file.

Source code in pictologics/pipeline.py

def save_log(self, output_path: str | Path) -> None:
    """
    Save the processing log to a self-describing JSON file.
    """
    path = Path(output_path)
    if not str(path).endswith(".json"):
        path = Path(f"{path}.json")

    payload = {
        "log_schema_version": "1.0",
        "pipeline_schema_version": CONFIG_SCHEMA_VERSION,
        "pictologics_version": _get_package_version(),
        "exported_at": datetime.datetime.now().isoformat(),
        "mask_roi_semantics": "nonzero_values_are_roi_membership",
        "entry_count": len(self._log),
        "entries": self._make_serializable(self._log),
    }

    path.parent.mkdir(parents=True, exist_ok=True)
    path.write_text(json.dumps(payload, indent=4, default=str), encoding="utf-8")

`to_dict(config_names=None, include_metadata=True, include_deduplication=True)`

Export configurations to a dictionary.

Parameters:

Name	Type	Description	Default
`config_names`	`Optional[list[str]]`	Specific configs to export. If None, exports all.	`None`
`include_metadata`	`bool`	Whether to include schema version and metadata.	`True`
`include_deduplication`	`bool`	Whether to include deduplication settings.	`True`

Returns:

Type	Description
`dict[str, Any]`	Dictionary with configs and optional metadata.

Source code in pictologics/pipeline.py

def to_dict(
    self,
    config_names: Optional[list[str]] = None,
    include_metadata: bool = True,
    include_deduplication: bool = True,
) -> dict[str, Any]:
    """
    Export configurations to a dictionary.

    Args:
        config_names: Specific configs to export. If None, exports all.
        include_metadata: Whether to include schema version and metadata.
        include_deduplication: Whether to include deduplication settings.

    Returns:
        Dictionary with configs and optional metadata.
    """
    if config_names is None:
        configs_to_export = self._configs
    else:
        configs_to_export = {
            name: self._configs[name] for name in config_names if name in self._configs
        }

    # Convert tuples to lists for serialization
    serializable_configs: dict[str, Any] = {}
    for name, steps in configs_to_export.items():
        conf_data = {"steps": self._make_serializable(steps)}
        # Include metadata if present
        if name in self._config_metadata:
            meta = self._config_metadata[name]
            if "source_mode" in meta:
                conf_data["source_mode"] = meta["source_mode"]
            if "sentinel_value" in meta and meta["sentinel_value"] is not None:
                conf_data["sentinel_value"] = meta["sentinel_value"]
        serializable_configs[name] = conf_data

    result: dict[str, Any] = {}

    if include_metadata:
        result["schema_version"] = CONFIG_SCHEMA_VERSION
        result["pictologics_version"] = _get_package_version()
        result["exported_at"] = datetime.datetime.now().isoformat()
        result["mask_roi_semantics"] = "nonzero_values_are_roi_membership"

    result["configs"] = serializable_configs

    if include_deduplication:
        result["deduplication"] = {
            "enabled": self._deduplication_enabled,
            "rules_version": self._deduplication_rules.version,
        }
        # Include last plan if available and not stale
        if self._last_deduplication_plan and not self._configs_modified_since_plan:
            result["deduplication"]["last_plan"] = self._last_deduplication_plan.to_dict()

    return result

`to_json(config_names=None, indent=2)`

Export configurations to a JSON string.

Parameters:

Name	Type	Description	Default
`config_names`	`Optional[list[str]]`	Specific configs to export. If None, exports all.	`None`
`indent`	`int`	JSON indentation level.	`2`

Returns:

Type	Description
`str`	JSON string representation.

Source code in pictologics/pipeline.py

def to_json(
    self,
    config_names: Optional[list[str]] = None,
    indent: int = 2,
) -> str:
    """
    Export configurations to a JSON string.

    Args:
        config_names: Specific configs to export. If None, exports all.
        indent: JSON indentation level.

    Returns:
        JSON string representation.
    """
    data = self.to_dict(config_names=config_names)
    return json.dumps(data, indent=indent, default=str)

`to_yaml(config_names=None)`

Export configurations to a YAML string.

Parameters:

Name	Type	Description	Default
`config_names`	`Optional[list[str]]`	Specific configs to export. If None, exports all.	`None`

Returns:

Type	Description
`str`	YAML string representation.

Source code in pictologics/pipeline.py

def to_yaml(
    self,
    config_names: Optional[list[str]] = None,
) -> str:
    """
    Export configurations to a YAML string.

    Args:
        config_names: Specific configs to export. If None, exports all.

    Returns:
        YAML string representation.
    """
    data = self.to_dict(config_names=config_names)
    result: str = yaml.dump(data, default_flow_style=False, sort_keys=False)
    return result

`pictologics.pipeline.PipelineState` `dataclass`

Holds the current state of the image and masks during pipeline execution.

Attributes:

Name	Type	Description
`image`	`Image`	Current image (may be discretised after discretise step).
`raw_image`	`Image`	Always the non-discretised image (for intensity/morphology).
`morph_mask`	`Image`	Morphological mask for shape-based features.
`intensity_mask`	`Image`	Intensity mask for intensity-based features.
`is_discretised`	`bool`	Whether the image has been discretised.
`n_bins`	`Optional[int]`	Number of bins if discretised with FBN.
`bin_width`	`Optional[float]`	Bin width if discretised with FBS.
`discretisation_method`	`Optional[str]`	Discretisation method used ('FBN' or 'FBS').
`discretisation_min`	`Optional[float]`	Minimum value used for discretisation.
`discretisation_max`	`Optional[float]`	Maximum value used for discretisation.
`mask_was_generated`	`bool`	Whether the mask was auto-generated (no mask provided).
`is_filtered`	`bool`	Whether a filter has been applied.
`filter_type`	`Optional[str]`	Type of filter applied (if any).
`source_mode`	`SourceMode`	How source voxel validity is handled.
`source_mask`	`Optional[Image]`	Computed validity mask (where real data exists).
`sentinel_detected`	`bool`	True if AUTO mode detected sentinel values.
`sentinel_value`	`Optional[float]`	The detected sentinel value (if any).

Source code in pictologics/pipeline.py

@dataclass
class PipelineState:
    """
    Holds the current state of the image and masks during pipeline execution.

    Attributes:
        image: Current image (may be discretised after discretise step).
        raw_image: Always the non-discretised image (for intensity/morphology).
        morph_mask: Morphological mask for shape-based features.
        intensity_mask: Intensity mask for intensity-based features.
        is_discretised: Whether the image has been discretised.
        n_bins: Number of bins if discretised with FBN.
        bin_width: Bin width if discretised with FBS.
        discretisation_method: Discretisation method used ('FBN' or 'FBS').
        discretisation_min: Minimum value used for discretisation.
        discretisation_max: Maximum value used for discretisation.
        mask_was_generated: Whether the mask was auto-generated (no mask provided).
        is_filtered: Whether a filter has been applied.
        filter_type: Type of filter applied (if any).
        source_mode: How source voxel validity is handled.
        source_mask: Computed validity mask (where real data exists).
        sentinel_detected: True if AUTO mode detected sentinel values.
        sentinel_value: The detected sentinel value (if any).
    """

    image: Image  # May be discretised after discretise step
    raw_image: Image  # Always the non-discretised image (for intensity/morphology)
    morph_mask: Image
    intensity_mask: Image
    is_discretised: bool = False
    n_bins: Optional[int] = None
    bin_width: Optional[float] = None
    discretisation_method: Optional[str] = None
    discretisation_min: Optional[float] = None
    discretisation_max: Optional[float] = None
    mask_was_generated: bool = False
    is_filtered: bool = False
    filter_type: Optional[str] = None
    # Source tracking for sentinel value handling
    source_mode: SourceMode = SourceMode.FULL_IMAGE
    source_mask: Optional[Image] = None
    sentinel_detected: bool = False
    sentinel_value: Optional[float] = None

Enums

`pictologics.pipeline.SourceMode`

Bases: Enum

Determines how voxels outside the ROI mask are treated during spatial operations.

This setting affects resampling, filtering, and other operations that use neighboring voxels for interpolation or convolution.

Attributes:

Name	Type	Description
`FULL_IMAGE`		All voxels contain valid data. Use surrounding voxels for interpolation during resampling and filtering. This is the traditional behavior when a full CT/MR scan is provided.
`ROI_ONLY`		Only ROI mask voxels contain valid data. Voxels outside the mask contain sentinel values (-2048, etc.) that must be excluded from all spatial operations.
`AUTO`		Automatically detect common sentinel values (-2048, -1024, etc.). If detected, behave like ROI_ONLY. Otherwise, behave like FULL_IMAGE. Emits a warning when sentinel values are detected.

Source code in pictologics/pipeline.py

class SourceMode(Enum):
    """
    Determines how voxels outside the ROI mask are treated during spatial operations.

    This setting affects resampling, filtering, and other operations that use
    neighboring voxels for interpolation or convolution.

    Attributes:
        FULL_IMAGE: All voxels contain valid data. Use surrounding voxels for
                    interpolation during resampling and filtering. This is the
                    traditional behavior when a full CT/MR scan is provided.
        ROI_ONLY: Only ROI mask voxels contain valid data. Voxels outside the
                  mask contain sentinel values (-2048, etc.) that must be excluded
                  from all spatial operations.
        AUTO: Automatically detect common sentinel values (-2048, -1024, etc.).
              If detected, behave like ROI_ONLY. Otherwise, behave like FULL_IMAGE.
              Emits a warning when sentinel values are detected.
    """

    FULL_IMAGE = "full_image"
    ROI_ONLY = "roi_only"
    AUTO = "auto"
    """
    Automatically detect sentinel values (e.g., -2048) and exclude them.
    This mode ensures that background voxels are not included in the ROI after
    resampling, even if their intensity (e.g., 0) is within the valid range.
    """

`AUTO = 'auto'` `class-attribute` `instance-attribute`

Automatically detect sentinel values (e.g., -2048) and exclude them. This mode ensures that background voxels are not included in the ROI after resampling, even if their intensity (e.g., 0) is within the valid range.

Exceptions

`pictologics.pipeline.EmptyROIMaskError`

Bases: ValueError

Raised internally when preprocessing yields an empty ROI mask.

When this error occurs during run(), the affected configuration is not propagated as an exception. Instead, the pipeline returns a pandas.Series of NaN values whose index matches the feature names that would have been produced by a successful extraction. Other configurations in the same run() call continue normally.

This is one of three mechanisms that guarantee a complete feature set for every configuration. The other two are the partial-failure backfill (for individual features that cannot be computed) and the general exception handler (for unexpected runtime errors). See the run() docstring and the Result Guarantees section of the user guide for details.

Source code in pictologics/pipeline.py

class EmptyROIMaskError(ValueError):
    """Raised internally when preprocessing yields an empty ROI mask.

    When this error occurs during ``run()``, the affected configuration is
    **not** propagated as an exception.  Instead, the pipeline returns a
    ``pandas.Series`` of ``NaN`` values whose index matches the feature names
    that would have been produced by a successful extraction.  Other
    configurations in the same ``run()`` call continue normally.

    This is one of three mechanisms that guarantee a complete feature set for
    every configuration.  The other two are the partial-failure backfill (for
    individual features that cannot be computed) and the general exception
    handler (for unexpected runtime errors).  See the ``run()`` docstring and
    the *Result Guarantees* section of the user guide for details.
    """

Pipeline API

Pipeline Classes

pictologics.pipeline.RadiomicsPipeline

deduplication_enabled property writable

deduplication_rules property writable

deduplication_stats property

last_deduplication_plan property

__init__(deduplicate=True, deduplication_rules=None, load_standard=True)

add_config(name, steps, source_mode='full_image', sentinel_value=None)

clear_log()

describe_features()

from_dict(data, validate=False, load_standard=False) classmethod

from_json(json_string, validate=False, load_standard=False) classmethod

from_yaml(yaml_string, validate=False, load_standard=False) classmethod

get_all_standard_config_names()

get_config(name)

list_configs()

load_configs(file_path, validate=False, load_standard=False) classmethod

merge_configs(other, overwrite=False)

remove_config(name)

run(image, mask=None, subject_id=None, config_names=None, mask_subvoxel_tolerance=0.5, mask_subvoxel_warning_threshold=0.01, mask_min_overlap_fraction=0.5)

save_configs(output_path, config_names=None)

save_log(output_path)

to_dict(config_names=None, include_metadata=True, include_deduplication=True)

to_json(config_names=None, indent=2)

to_yaml(config_names=None)

pictologics.pipeline.PipelineState dataclass

Enums

pictologics.pipeline.SourceMode

AUTO = 'auto' class-attribute instance-attribute

Exceptions

pictologics.pipeline.EmptyROIMaskError

`pictologics.pipeline.RadiomicsPipeline`

`deduplication_enabled` `property` `writable`

`deduplication_rules` `property` `writable`

`deduplication_stats` `property`

`last_deduplication_plan` `property`

`init(deduplicate=True, deduplication_rules=None, load_standard=True)`

`add_config(name, steps, source_mode='full_image', sentinel_value=None)`

`clear_log()`

`describe_features()`

`from_dict(data, validate=False, load_standard=False)` `classmethod`

`from_json(json_string, validate=False, load_standard=False)` `classmethod`

`from_yaml(yaml_string, validate=False, load_standard=False)` `classmethod`

`get_all_standard_config_names()`

`get_config(name)`

`list_configs()`

`load_configs(file_path, validate=False, load_standard=False)` `classmethod`

`merge_configs(other, overwrite=False)`

`remove_config(name)`

`run(image, mask=None, subject_id=None, config_names=None, mask_subvoxel_tolerance=0.5, mask_subvoxel_warning_threshold=0.01, mask_min_overlap_fraction=0.5)`

`save_configs(output_path, config_names=None)`

`save_log(output_path)`

`to_dict(config_names=None, include_metadata=True, include_deduplication=True)`

`to_json(config_names=None, indent=2)`

`to_yaml(config_names=None)`

`pictologics.pipeline.PipelineState` `dataclass`

`pictologics.pipeline.SourceMode`

`AUTO = 'auto'` `class-attribute` `instance-attribute`

`pictologics.pipeline.EmptyROIMaskError`