__all__ = [
    'ClosestToAverage',
]
import crowdkit.aggregation.base
import numpy
import pandas
import typing


class ClosestToAverage(crowdkit.aggregation.base.BaseEmbeddingsAggregator):
    """Closest to Average - chooses the output with the embedding closest to the average embedding.

    This method takes a `DataFrame` containing four columns: `task`, `worker`, `output`, and `embedding`.
    Here the `embedding` is a vector containing a representation of the `output`. The `output` might be any
    type of data such as text, images, NumPy arrays, etc. As the result, the method returns the output which
    embedding is the closest one to the average embedding of the task's responses.

    Args:
        distance: A callable that takes two NumPy arrays and returns a single `float` number — the distance
            between these two vectors.
    Attributes:
        embeddings_and_outputs_ (DataFrame): Tasks' embeddings and outputs.
            A pandas.DataFrame indexed by `task` with `embedding` and `output` columns.

        scores_ (DataFrame): Tasks' label scores.
            A pandas.DataFrame indexed by `task` such that `result.loc[task, label]`
            is the score of `label` for `task`.
    """

    def fit(
        self,
        data: pandas.DataFrame,
        aggregated_embeddings: pandas.Series = None,
        true_embeddings: pandas.Series = None
    ) -> 'ClosestToAverage':
        """Fits the model.
        Args:
            data (DataFrame): Workers' outputs with their embeddings.
                A pandas.DataFrame containing `task`, `worker`, `output` and `embedding` columns.
            aggregated_embeddings (Series): Tasks' embeddings.
                A pandas.Series indexed by `task` and holding corresponding embeddings.
            true_embeddings (Series): Tasks' embeddings.
                A pandas.Series indexed by `task` and holding corresponding embeddings.
        Returns:
            ClosestToAverage: self.
        """
        ...

    def fit_predict_scores(
        self,
        data: pandas.DataFrame,
        aggregated_embeddings: pandas.Series = None
    ) -> pandas.DataFrame:
        """Fit the model and return the estimated scores.
        Args:
            data (DataFrame): Workers' outputs with their embeddings.
                A pandas.DataFrame containing `task`, `worker`, `output` and `embedding` columns.
            aggregated_embeddings (Series): Tasks' embeddings.
                A pandas.Series indexed by `task` and holding corresponding embeddings.
        Returns:
            DataFrame: Tasks' label probability distributions.
                A pandas.DataFrame indexed by `task` such that `result.loc[task, label]`
                is the probability of `task`'s true label to be equal to `label`. Each
                probability is between 0 and 1, all task's probabilities should sum up to 1
        """
        ...

    def fit_predict(
        self,
        data: pandas.DataFrame,
        aggregated_embeddings: pandas.Series = None
    ) -> pandas.DataFrame:
        """Fit the model and return the aggregated results.
        Args:
            data (DataFrame): Workers' outputs with their embeddings.
                A pandas.DataFrame containing `task`, `worker`, `output` and `embedding` columns.
            aggregated_embeddings (Series): Tasks' embeddings.
                A pandas.Series indexed by `task` and holding corresponding embeddings.
        Returns:
            DataFrame: Tasks' embeddings and outputs.
                A pandas.DataFrame indexed by `task` with `embedding` and `output` columns.
        """
        ...

    def __init__(self, distance: typing.Callable[[numpy.ndarray, numpy.ndarray], float]) -> None:
        """Method generated by attrs for class ClosestToAverage.
        """
        ...

    embeddings_and_outputs_: pandas.DataFrame
    scores_: pandas.DataFrame
    distance: typing.Callable[[numpy.ndarray, numpy.ndarray], float]
