Results API

`pictologics.results`

Results Module

This module provides utilities for formatting and saving radiomic feature extraction results. It supports multiple output formats (wide, long) and file formats (CSV, JSON).

Key Functions:

format_results: Convert pipeline output to various formats (dict, pandas DataFrame, JSON).
save_results: Save results to CSV or JSON files with automatic format detection.

`format_results(results, fmt='wide', meta=None, output_type='dict', config_col='config')`

Format the output of RadiomicsPipeline.run() into a structured format.

Parameters:

Name	Type	Description	Default
`results`	`dict[str, Series]`	Dictionary mapping configuration names to pandas Series of features (the standard output of RadiomicsPipeline.run).	required
`fmt`	`str`	"wide" or "long". - "wide": Flattens keys to '{config}__{feature}'. Returns 1 row (dict/df). - "long": Tidy format with columns for config, feature_name, and value.	`'wide'`
`meta`	`dict[str, Any] \| None`	Optional dictionary of metadata to prepend to the result (e.g., subject ID).	`None`
`output_type`	`str`	Format of the returned object: "dict", "pandas", or "json".	`'dict'`
`config_col`	`str`	Name of the column holding the configuration name (only used if fmt="long").	`'config'`

Returns:

Type	Description
`dict[str, Any] \| DataFrame \| str \| list[dict[str, Any]]`	Formatted data in the specified output_type.

Example

Format results as a single pandas DataFrame row (wide format):

from pictologics.results import format_results

# Assume 'results' is output from pipeline.run()
df = format_results(
    results,
    fmt="wide",
    meta={"custom_id": 123},
    output_type="pandas"
)

Source code in pictologics/results.py

def format_results(
    results: dict[str, pd.Series],
    fmt: str = "wide",
    meta: dict[str, Any] | None = None,
    output_type: str = "dict",
    config_col: str = "config",
) -> dict[str, Any] | pd.DataFrame | str | list[dict[str, Any]]:
    """
    Format the output of RadiomicsPipeline.run() into a structured format.

    Args:
        results: Dictionary mapping configuration names to pandas Series of features
                 (the standard output of RadiomicsPipeline.run).
        fmt: "wide" or "long".
             - "wide": Flattens keys to '{config}__{feature}'. Returns 1 row (dict/df).
             - "long": Tidy format with columns for config, feature_name, and value.
        meta: Optional dictionary of metadata to prepend to the result (e.g., subject ID).
        output_type: Format of the returned object: "dict", "pandas", or "json".
        config_col: Name of the column holding the configuration name (only used if fmt="long").

    Returns:
        Formatted data in the specified output_type.

    Example:
        Format results as a single pandas DataFrame row (wide format):

        ```python
        from pictologics.results import format_results

        # Assume 'results' is output from pipeline.run()
        df = format_results(
            results,
            fmt="wide",
            meta={"custom_id": 123},
            output_type="pandas"
        )
        ```
    """
    if meta is None:
        meta = {}

    if fmt == "wide":
        # Wide format: { "meta_key": val, "config__feature": val }
        formatted_data = meta.copy()
        for config_name, series in results.items():
            for feature_name, value in series.items():
                col_name = f"{config_name}__{feature_name}"
                formatted_data[col_name] = value

        if output_type == "dict":
            return formatted_data
        elif output_type == "pandas":
            return pd.DataFrame([formatted_data])
        elif output_type == "json":
            return json.dumps(formatted_data)
        else:
            raise ValueError(f"Unknown output_type: {output_type}")

    elif fmt == "long":
        # Long format: Rows of [meta_cols..., config, feature_name, value]
        rows = []
        for config_name, series in results.items():
            for feature_name, value in series.items():
                row = meta.copy()
                row[config_col] = config_name
                row["feature_name"] = feature_name
                row["value"] = value
                rows.append(row)

        if not rows:
            # Handle empty results case
            # For pure python output, empty list is fine.
            # For pandas, we need a dataframe with columns.
            if output_type != "pandas":
                if output_type == "dict":
                    return []
                elif output_type == "json":
                    return "[]"

            df = pd.DataFrame(
                columns=list(meta.keys()) + [config_col, "feature_name", "value"]
            )
            return df  # Columns are already in order

        # Reorder keys/columns
        # Determine strict order
        meta_keys = list(meta.keys())
        standard_cols = [config_col, "feature_name", "value"]
        # Ensure we don't duplicate keys
        cols_order = meta_keys + [c for c in standard_cols if c not in meta_keys]

        # If output is dict/json, reorder the dictionaries directly
        if output_type in ("dict", "json"):
            # Ensure each row has keys in the desired order
            ordered_rows = []
            for r in rows:
                new_r = {k: r.get(k) for k in cols_order if k in r}

                ordered_rows.append(new_r)

            if output_type == "dict":
                return ordered_rows
            else:  # json
                return json.dumps(ordered_rows)

        # Output type is 'pandas'
        df = pd.DataFrame(rows)
        # Verify which columns actually exist in the dataframe
        existing_cols = list(df.columns)
        final_order = [c for c in cols_order if c in existing_cols] + [
            c for c in existing_cols if c not in cols_order
        ]
        df = df.reindex(columns=final_order)

        return df

    else:
        raise ValueError(f"Unknown format: {fmt}. Use 'wide' or 'long'.")

`save_results(data, path, file_format=None)`

Save results to a file (CSV, JSON, etc.), automatically handling merging of lists.

Parameters:

Name	Type	Description	Default
`data`	`dict[str, Any] \| list[dict[str, Any]] \| DataFrame \| list[DataFrame] \| str \| list[str]`	The data to save. Supported types: - Dict or List[Dict] - DataFrame or List[DataFrame] - JSON string or List[JSON strings]	required
`path`	`str \| Path`	Output file path.	required
`file_format`	`str \| None`	"csv" or "json". If None, inferred from file extension.	`None`

Example

Save formatted results to JSON:

from pictologics.results import save_results

save_results(formatted_data, "output/features.json")

Source code in pictologics/results.py

def save_results(
    data: (
        dict[str, Any]
        | list[dict[str, Any]]
        | pd.DataFrame
        | list[pd.DataFrame]
        | str
        | list[str]
    ),
    path: str | Path,
    file_format: str | None = None,
) -> None:
    """
    Save results to a file (CSV, JSON, etc.), automatically handling merging of lists.

    Args:
        data: The data to save. Supported types:
              - Dict or List[Dict]
              - DataFrame or List[DataFrame]
              - JSON string or List[JSON strings]
        path: Output file path.
        file_format: "csv" or "json". If None, inferred from file extension.

    Example:
        Save formatted results to JSON:

        ```python
        from pictologics.results import save_results

        save_results(formatted_data, "output/features.json")
        ```
    """
    path = Path(path)
    if file_format is None:
        if path.suffix.lower() == ".csv":
            file_format = "csv"
        elif path.suffix.lower() == ".json":
            file_format = "json"
        else:
            file_format = "csv"

    # If format is JSON and data is already a dict or list of dicts, bypass pandas
    # to avoid overhead and potential C-extension conflicts in coverage/threading.
    if file_format == "json":
        # Check if data is already in a compatible format
        is_dict = isinstance(data, dict)
        is_list_of_dicts = isinstance(data, list) and (
            not data or isinstance(data[0], dict)
        )

        if is_dict:
            with open(path, "w") as f:
                json.dump([data], f, indent=2)
            return

        if is_list_of_dicts:
            with open(path, "w") as f:
                json.dump(data, f, indent=2)
            return

    # Normalize input to a single DataFrame
    try:
        final_df = _normalize_to_dataframe(data)
    except Exception as e:
        # If normalization fails but we want to debug, re-raise
        raise e

    # Export
    if file_format == "csv":
        final_df.to_csv(path, index=False)
    elif file_format == "json":
        # Use standard json library to avoid potential pandas C-extension issues during coverage
        with open(path, "w") as f:
            json.dump(final_df.to_dict(orient="records"), f, indent=2)
    else:
        raise ValueError(f"Unsupported export format: {file_format}")