Source code for sklearn.utils._set_output

from functools import wraps

from scipy.sparse import issparse

from . import check_pandas_support
from .._config import get_config
from ._available_if import available_if

def _wrap_in_pandas_container(
    """Create a Pandas DataFrame.

    If `data_to_wrap` is a DataFrame, then the `columns` and `index` will be changed
    inplace. If `data_to_wrap` is a ndarray, then a new DataFrame is created with
    `columns` and `index`.

    data_to_wrap : {ndarray, dataframe}
        Data to be wrapped as pandas dataframe.

    columns : callable, ndarray, or None
        The column names or a callable that returns the column names. The
        callable is useful if the column names require some computation.
        If `columns` is a callable that raises an error, `columns` will have
        the same semantics as `None`. If `None` and `data_to_wrap` is already a
        dataframe, then the column names are not changed. If `None` and
        `data_to_wrap` is **not** a dataframe, then columns are

    index : array-like, default=None
        Index for data. `index` is ignored if `data_to_wrap` is already a DataFrame.

    dataframe : DataFrame
        Container with column names or unchanged `output`.
    if issparse(data_to_wrap):
        raise ValueError("Pandas output does not support sparse data.")

    if callable(columns):
            columns = columns()
        except Exception:
            columns = None

    pd = check_pandas_support("Setting output container to 'pandas'")

    if isinstance(data_to_wrap, pd.DataFrame):
        if columns is not None:
            data_to_wrap.columns = columns
        return data_to_wrap

    return pd.DataFrame(data_to_wrap, index=index, columns=columns)

def _get_output_config(method, estimator=None):
    """Get output config based on estimator and global configuration.

    method : {"transform"}
        Estimator's method for which the output container is looked up.

    estimator : estimator instance or None
        Estimator to get the output configuration from. If `None`, check global
        configuration is used.

    config : dict
        Dictionary with keys:

        - "dense": specifies the dense container for `method`. This can be
          `"default"` or `"pandas"`.
    est_sklearn_output_config = getattr(estimator, "_sklearn_output_config", {})
    if method in est_sklearn_output_config:
        dense_config = est_sklearn_output_config[method]
        dense_config = get_config()[f"{method}_output"]

    if dense_config not in {"default", "pandas"}:
        raise ValueError(
            f"output config must be 'default' or 'pandas' got {dense_config}"

    return {"dense": dense_config}

def _wrap_data_with_container(method, data_to_wrap, original_input, estimator):
    """Wrap output with container based on an estimator's or global config.

    method : {"transform"}
        Estimator's method to get container output for.

    data_to_wrap : {ndarray, dataframe}
        Data to wrap with container.

    original_input : {ndarray, dataframe}
        Original input of function.

    estimator : estimator instance
        Estimator with to get the output configuration from.

    output : {ndarray, dataframe}
        If the output config is "default" or the estimator is not configured
        for wrapping return `data_to_wrap` unchanged.
        If the output config is "pandas", return `data_to_wrap` as a pandas
    output_config = _get_output_config(method, estimator)

    if output_config["dense"] == "default" or not _auto_wrap_is_configured(estimator):
        return data_to_wrap

    # dense_config == "pandas"
    return _wrap_in_pandas_container(
        index=getattr(original_input, "index", None),

def _wrap_method_output(f, method):
    """Wrapper used by `_SetOutputMixin` to automatically wrap methods."""

    def wrapped(self, X, *args, **kwargs):
        data_to_wrap = f(self, X, *args, **kwargs)
        if isinstance(data_to_wrap, tuple):
            # only wrap the first output for cross decomposition
            return (
                _wrap_data_with_container(method, data_to_wrap[0], X, self),

        return _wrap_data_with_container(method, data_to_wrap, X, self)

    return wrapped

def _auto_wrap_is_configured(estimator):
    """Return True if estimator is configured for auto-wrapping the transform method.

    `_SetOutputMixin` sets `_sklearn_auto_wrap_output_keys` to `set()` if auto wrapping
    is manually disabled.
    auto_wrap_output_keys = getattr(estimator, "_sklearn_auto_wrap_output_keys", set())
    return (
        hasattr(estimator, "get_feature_names_out")
        and "transform" in auto_wrap_output_keys

class _SetOutputMixin:
    """Mixin that dynamically wraps methods to return container based on config.

    Currently `_SetOutputMixin` wraps `transform` and `fit_transform` and configures
    it based on `set_output` of the global configuration.

    `set_output` is only defined if `get_feature_names_out` is defined and
    `auto_wrap_output_keys` is the default value.

    def __init_subclass__(cls, auto_wrap_output_keys=("transform",), **kwargs):

        # Dynamically wraps `transform` and `fit_transform` and configure it's
        # output based on `set_output`.
        if not (
            isinstance(auto_wrap_output_keys, tuple) or auto_wrap_output_keys is None
            raise ValueError("auto_wrap_output_keys must be None or a tuple of keys.")

        if auto_wrap_output_keys is None:
            cls._sklearn_auto_wrap_output_keys = set()

        # Mapping from method to key in configurations
        method_to_key = {
            "transform": "transform",
            "fit_transform": "transform",
        cls._sklearn_auto_wrap_output_keys = set()

        for method, key in method_to_key.items():
            if not hasattr(cls, method) or key not in auto_wrap_output_keys:

            # Only wrap methods defined by cls itself
            if method not in cls.__dict__:
            wrapped_method = _wrap_method_output(getattr(cls, method), key)
            setattr(cls, method, wrapped_method)

    def set_output(self, *, transform=None):
        """Set output container.

        See :ref:``
        for an example on how to use the API.

        transform : {"default", "pandas"}, default=None
            Configure output of `transform` and `fit_transform`.

            - `"default"`: Default output format of a transformer
            - `"pandas"`: DataFrame output
            - `None`: Transform configuration is unchanged

        self : estimator instance
            Estimator instance.
        if transform is None:
            return self

        if not hasattr(self, "_sklearn_output_config"):
            self._sklearn_output_config = {}

        self._sklearn_output_config["transform"] = transform
        return self

def _safe_set_output(estimator, *, transform=None):
    """Safely call estimator.set_output and error if it not available.

    This is used by meta-estimators to set the output for child estimators.

    estimator : estimator instance
        Estimator instance.

    transform : {"default", "pandas"}, default=None
        Configure output of the following estimator's methods:

        - `"transform"`
        - `"fit_transform"`

        If `None`, this operation is a no-op.

    estimator : estimator instance
        Estimator instance.
    set_output_for_transform = (
        hasattr(estimator, "transform")
        or hasattr(estimator, "fit_transform")
        and transform is not None
    if not set_output_for_transform:
        # If estimator can not transform, then `set_output` does not need to be
        # called.

    if not hasattr(estimator, "set_output"):
        raise ValueError(
            f"Unable to configure output for {estimator} because `set_output` "
            "is not available."
    return estimator.set_output(transform=transform)