import re
import typing as tp

from nile.api.v1 import (
    filters as nf,
    stream as nstream,
    Job,
)
from qb2.api.v1 import filters as qf
import yt.wrapper as yt


NMAPS_FEEDBACK_DB_TABLE = "home/maps/core/nmaps/analytics/feedback/db/feedback_latest"
DOTTED_WORDS_REGEX = "^[a-z]+([\\-_\\.][a-z]+)*$"
SLASHED_WORDS_REGEX = "^[a-z]+([\\-_\\/][a-z]+)*$"

COLUMN_TO_REGEX = {
    "answer_id": None,
    "client_context_id": DOTTED_WORDS_REGEX,
    "client_id": DOTTED_WORDS_REGEX,
    "form_context_id": DOTTED_WORDS_REGEX,
    "form_id": None,
    "form_type": SLASHED_WORDS_REGEX,
    "question_id": None,
    "region_name": None,
}
REJECT_REASON = "reject_reason"


def regex_filter(regex: str):
    return nf.custom(lambda value: value and re.match(regex, value) is not None, "values")


def blacklist_filter(blacklist: tp.List[str]):
    return qf.not_(qf.one_of("values", blacklist))


def select_distinct(
    table: nstream.Stream,
    column: str,
    regex: tp.Optional[str],
    blacklist: tp.Optional[tp.List[str]]
) -> nstream.Stream:
    unique_values: nstream.Stream = table.project(
        values=column
    ).filter(
        qf.defined("values")
    ).unique("values").sort("values")

    if regex:
        unique_values = unique_values.filter(regex_filter(regex))
    if blacklist:
        unique_values = unique_values.filter(blacklist_filter(blacklist))
    return unique_values


def concat_weekly_tables(job: Job, metrics_dir: str) -> nstream.Stream:
    tables: tp.List[nstream.Stream] = []
    for table_name in yt.list(metrics_dir):
        tables.append(job.table("/".join((metrics_dir, table_name))))
    return job.concat(*tables)


def make_job(job: Job, metrics_dir: str, meta_dir: str, blacklist: tp.Dict[str, tp.List[str]]) -> None:
    joined_table = concat_weekly_tables(job, metrics_dir)

    for column, regex in COLUMN_TO_REGEX.items():
        select_distinct(
            table=joined_table,
            column=column,
            regex=regex,
            blacklist=blacklist.get(column)
        ).put("/".join((meta_dir, column)))

    nmaps_feedback_db_table: nstream.Stream = job.table(NMAPS_FEEDBACK_DB_TABLE)
    select_distinct(
        table=nmaps_feedback_db_table,
        column=REJECT_REASON,
        regex=None,
        blacklist=None
    ).put("/".join((meta_dir, REJECT_REASON)))
