import typing as tp

from nile.api.v1 import (
    aggregators as na,
    datetime as nd,
    extractors as ne,
    filters as nf,
    utils as nu,
    extended_schema,
    Record,
    Job,
    stream as nstream,
)
from qb2.api.v1 import (
    filters as qf,
    typing as qt,
)
from .common import WindowsConfig, convert_bytes_to_str
from .constants import FORMS_METRICS_DIMENSIONS, NULL_KEY


BEBR_LOG_TYPE = "clean"
BEBR_SERVICE_TOUCH = "touch-maps"
BEBR_SERVICE_DESKTOP = "desktop-maps"

BEBR_LOG_BASE_PATH = "//home/maps/analytics/logs/cooked-bebr-log"
BEBR_LOG_PATH_FORMAT = "/".join((BEBR_LOG_BASE_PATH, "{service}", "{logType}", "{date}"))

FORMS_METRICS_DIMENSIONS_WITHOUT_WINDOW_DAYS: tp.List[str] = FORMS_METRICS_DIMENSIONS.copy()
FORMS_METRICS_DIMENSIONS_WITHOUT_WINDOW_DAYS.remove("window_days")

EARTH_REGION_NAME = "Земля"


def bebr_tables_combined(from_date_iso_str: str, to_date_iso_str: str) -> str:
    """
    Returns table path specifier similar to:
    '//home/maps/analytics/logs/cooked-bebr-log/{desktop-maps,touch-maps}/clean/{2020-01-01..2020-12-31}'
    This allows to seamlessly concatenate all necessary bebr-tables.
    """
    services_template: str = f"{{{BEBR_SERVICE_DESKTOP},{BEBR_SERVICE_TOUCH}}}"
    dates_template: str = f"{{{from_date_iso_str}..{to_date_iso_str}}}"

    return BEBR_LOG_PATH_FORMAT.format(
        service=services_template,
        logType=BEBR_LOG_TYPE,
        date=dates_template,
    )


def prepare_bebr_data(job: Job, windows_config: WindowsConfig) -> nstream.Stream:
    """
    Concats all bebr logs tables together.

    All fields (path, log_date, event_type) in bebr logs are stored as bytes.
    For some reason path field sometimes contain incorrect sequence of bytes
    and cannot be decoded with utf-8.
    To fix this problem, script takes path as bytes, and converts path
    back to str only when path is filtered
    * either with qf.startswith("path", b"maps_www.feedback.")
    * or with qf.startswith("path", b"maps_www.ugc_profile.")
    """
    return job.table(
        bebr_tables_combined(
            windows_config.left_margin.date().isoformat(),
            windows_config.right_margin.date().isoformat(),
        ),
        ignore_missing=True,
    ).project(
        "vars",
        "path",
        log_date=ne.custom(convert_bytes_to_str, "log_date").with_type(str),
        event_type=ne.custom(convert_bytes_to_str, "event_type").with_type(str),
    ).label("prepared_bebr_data")


def extract_form_type_from_path(path: str) -> str:
    """Extract form_type from bebr path in format of the report
    Path format in bebr logs:
        maps_www.feedback.<form_type>[\\.\\w*]*
        maps_www.ugc_profile.<form_type>[\\.\\w*]*
    Additionally '/' and '-' (in "not-found", etc.) are replaced with '_':
        address/add             address_add
        entrance/location/edit	entrance_location_edit
        object/not-found		object_not_found
    """
    form_type = path.split(".")[2]

    hyphened_words = {
        "not_found": "not-found",
        "edit_info": "edit-info",
        "edit_status": "edit-status"
    }
    for key, value in hyphened_words.items():
        form_type = form_type.replace(key, value)

    form_type = form_type.replace("_", "/")

    form_subtypes = form_type.split("/")
    if form_subtypes[0] == "route":
        form_type = "/".join(form_subtypes[:2])

    return form_type


def is_excess_form_type(form_type: str) -> bool:
    """
    Filters out irrelevant/intermediate forms.
    Check out feedback forms types in docs:
    https://github.yandex-team.ru/maps/maps/blob/dev/docs/feedback.md
    """
    return form_type in (
        "feedback/edit/route/panel",
        "collapse",
        "entrance/select",
        "object/add",
        "object/edit",
        "map/edit",
        "object/select",
    )


def extract_vars_key(vars: tp.Dict[bytes, bytes], key: bytes, *, default: str = NULL_KEY) -> str:
    return vars[key].decode("utf-8") if key in vars else default


def extract_form_id(vars: tp.Dict[bytes, bytes], path: str) -> str:
    """
    Extracts the `formId` key from vars json.
    The `vars[formId]` appeared in logs since 2021-02-08.
    Before that date we extract `form_type` from `path` field.
    And then `formId` is calculated based on the first token of `form_type`.
    """
    form_type: tp.Optional[str] = vars.get(b"formType")

    if form_type is not None:
        return extract_vars_key(vars, b"formId")

    form_type = extract_form_type_from_path(path)
    root_type: str = form_type.split("/")[0]

    if root_type in ("organization", "route"):
        return root_type
    return "toponym"


def extract_client_id(vars: tp.Dict[bytes, bytes]) -> str:
    """
    Extracts the `clientId` key from vars json.
    The `vars[clientId]` appeared in logs before 2020-08-14.
    Before that date we assume that it equals to `None`.
    """
    return extract_vars_key(vars, b"clientId")


def extract_form_type(vars: tp.Dict[bytes, bytes], path: str) -> str:
    """
    Extracts the `formType` key from vars json.
    The `vars[formType]` appeared in logs since 2021-02-08.
    Before that date `formType` is extracted from `path` field.
    """
    return extract_vars_key(vars, b"formType", default=extract_form_type_from_path(path))


def extract_form_context_id(vars: tp.Dict[bytes, bytes]) -> str:
    """
    Extracts the `formContextId` key from vars json.
    Initially there was a `formContext` key since 2021-02-11.
    Then it was renamed to `formContextId` in 2021-06-24.
    """
    if b"formContextId" in vars:
        return vars[b"formContextId"].decode("utf-8")

    # For backcompatibility
    return extract_vars_key(vars, b"formContext")


def extract_client_context_id(vars: tp.Dict[bytes, bytes]) -> str:
    """
    Extracts the `clientContextId` key from vars json.
    `clientContextId` appeared in logs in 2021-06-24.
    """
    return extract_vars_key(vars, b"clientContextId")


def forms_shows_filter(*, use_new_format: bool):
    if use_new_format:
        return nf.and_(
            nf.custom(lambda vars: vars.get(b"type") == b"form"),
            nf.equals("event_type", "show"),
        )
    return nf.equals("event_type", "show")


def forms_submits_filter(*, use_new_format: bool):
    if use_new_format:
        return nf.and_(
            nf.custom(lambda vars: vars.get(b"type") == b"submit_button"),
            nf.equals("event_type", "click")
        )
    return nf.equals("event_type", "submit")


def calculate_daily_forms_metrics(bebr_data: nstream.Stream) -> nstream.Stream:
    forms_data = bebr_data.filter(
        nf.or_(
            qf.startswith("path", b"maps_www.feedback."),
            qf.startswith("path", b"maps_www.ugc_profile."),
        )
    ).project(
        ne.all(),
        fielddate=ne.custom(
            lambda log_date: nd.round_period(log_date, scale="daily").decode("utf-8")
            if log_date else None
        ).with_type(str),
        path=ne.custom(convert_bytes_to_str, "path").with_type(str),

        # TODO: Remove lagacy from vars field extraction in functions:
        # * extract_form_id,
        # * extract_client_id,
        # * extract_form_type,
        # * extract_form_context_id,
        # * extract_client_context_id,
        # when old bebr logs will be deleted.
        form_id=ne.custom(extract_form_id, "vars", "path").with_type(str),
        client_id=ne.custom(extract_client_id, "vars").with_type(str),
        form_type=ne.custom(extract_form_type, "vars", "path").with_type(str),
        form_context_id=ne.custom(extract_form_context_id, "vars").with_type(str),
        client_context_id=ne.custom(extract_client_context_id, "vars").with_type(str),
    ).filter(
        nf.not_(nf.equals("client_id", NULL_KEY)),
        nf.custom(lambda form_type: not is_excess_form_type(form_type)),
    )

    return forms_data.groupby(
        *FORMS_METRICS_DIMENSIONS_WITHOUT_WINDOW_DAYS,
    ).aggregate(
        forms_shows=na.count(nf.or_(
            forms_shows_filter(use_new_format=False),
            forms_shows_filter(use_new_format=True),
        )),
        forms_submits=na.count(nf.or_(
            forms_submits_filter(use_new_format=False),
            forms_submits_filter(use_new_format=True),
        )),
    ).label("forms_metrics")


def aggregate_forms_metrics_in_windows(forms_metrics: nstream.Stream, windows_config: WindowsConfig) -> nstream.Stream:
    @nu.with_hints(output_schema=extended_schema(
        window_days=qt.Int32,
    ))
    def windows_mapper(records: tp.Iterable[Record]) -> tp.Generator[Record, None, None]:
        for record in records:
            date = nd.Datetime.from_iso(record.fielddate)
            for fielddate, window, window_days in windows_config.windows():
                if window.includes(date):
                    yield Record(record, fielddate=fielddate, window_days=window_days)

    return forms_metrics.map(windows_mapper).groupby(
        *FORMS_METRICS_DIMENSIONS,
    ).aggregate(
        forms_shows=na.sum("forms_shows"),
        forms_submits=na.sum("forms_submits"),
    ).label("forms_metrics_with_windows")
