import datetime
import logging
import os.path
import pwd
import re
import time

from cached_property import cached_property
import jinja2
import library.python.resource as rs
from yt.wrapper import YPath

from crypta.lib.python.bt.tasks import (
    YQLTaskV1 as YQLTask,
)
from crypta.lib.python.bt.workflow import (
    IndependentTask,
    Parameter,
)
from crypta.lib.python.solomon.reporter import (
    create_solomon_reporter,
)
from crypta.lib.python.yql_runner.task import (
    YQLRunnerTask,
)
import crypta.lib.python.bt.conf.conf as conf

logger = logging.getLogger(__name__)

PROCESSED_TABLES = "processed_tables"
DAYS_BACK = 30
PAIRS_DAYS_BACK = 14
MAX_FRESH_PER_HOUR = 32
MAX_PROCESSED_TABLES = 1000

HOURLY_DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S"
DAILY_DATETIME_FORMAT = "%Y-%m-%d"


def _timestamp():
    return str(int(time.time()))


def _username():
    return pwd.getpwuid(os.getuid())[0]


def _yql_tables_list(tables):
    return ",\n".join(["`%s`" % t for t in tables])


def _resource_loader(path, prefix="/crypta/graph/fpc/"):
    if not path.startswith(prefix):
        path = prefix + path
    return rs.find(path)


def _latest(yt, path, count):
    children = yt.list(path, absolute=True)
    children = filter(lambda x: re.match(".*[0-9]{4}-[0-9]{2}-[0-9]{2}$", x), children)
    return sorted(children, key=os.path.basename, reverse=True)[:count]


def _hourly(path):
    return ["%sT%02d:00:00" % (path.replace("1d", "1h"), i) for i in range(0, 24)]


def _hourly_as_timestamp(path):
    dt = datetime.datetime.strptime(os.path.basename(path), HOURLY_DATETIME_FORMAT)
    return time.mktime(dt.timetuple())


def _daily_as_timestamp(path):
    return int(path.split("/")[-1])


def _daily_date_as_timestamp(path):
    dt = datetime.datetime.strptime(os.path.basename(path), DAILY_DATETIME_FORMAT)
    return time.mktime(dt.timetuple())


def _get_last_ts_to_keep():
    now = int(time.time())
    return now - 86400 * DAYS_BACK


def _get_timestamp_from_table(path):
    name = os.path.basename(path)

    if "_" in name:  # 2019-07-17T09:00:00_GetFpcFromWatchLog
        head, _ = name.split("_")
        return _hourly_as_timestamp(head)
    elif "T" in name:  # 2019-07-17T09:00:00
        return _hourly_as_timestamp(name)
    else:  # 1500000000
        return int(name)


def _update_processed_tables(yt_client, document_path, input_tables):
    last_ts_to_keep = _get_last_ts_to_keep()
    logger.info("Last timestamp to keep is %d", last_ts_to_keep)

    processed_tables = sorted(yt_client.get(document_path))
    # logger.info("Processed tables was %s", processed_tables)
    processed_tables.extend(input_tables)
    processed_tables = sorted([
        x for x in processed_tables
        if _get_timestamp_from_table(x) > (last_ts_to_keep - 60 * 60)
    ])
    processed_tables = processed_tables[-MAX_PROCESSED_TABLES:]
    # logger.info("Processed tables become %s", processed_tables)
    yt_client.set(document_path, processed_tables)


def _get_solomon_reporter():
    solomon_config = conf.proto.SolomonConfig

    return create_solomon_reporter(
        project=solomon_config.Project,
        cluster=solomon_config.Cluster,
        service=solomon_config.Service,
        url=solomon_config.Url,
    )


def _export(yt, src_path, dst_paths):
    for dst_path in dst_paths:
        logger.info("Exporting %s to %s", src_path, dst_path)
        yt.copy(src_path, dst_path)


class Paths(object):

    def __init__(self, yt):
        self.yt = yt

    def watch_log_tables(self, n_latest):
        return _latest(self.yt, "//logs/bs-watch-log/1d", n_latest)

    @property
    def _environment(self):
        return os.environ.get("ENVIRONMENT", "develop").lower()

    @property
    def watch_log_hourly(self):
        return YPath("//logs/bs-watch-log/1h")

    @property
    def root(self):
        if self._environment == "develop":
            return YPath("//home/crypta/team").join(_username()).join("fpc")
        if self._environment == "testing":
            return YPath("//home/crypta/testing/state/graph/fpc")
        if self._environment == "production":
            return YPath("//home/crypta/production/state/graph/fpc")

        raise Exception("Wrong environment")

    @property
    def index(self):
        return self.root.join("IndexDuid")

    def export_hourly(self, name):
        return self.root.join("export_duids").join("hourly").join(name)

    @property
    def export_hourly_path(self):
        return self.root.join("export_duids").join("hourly")

    def get_export_paths(self, name, postfix=None):
        if postfix:
            name += postfix
        if self._environment == "production":
            return (
                YPath("//home/crypta/production/cookie_matching/rt/fpc_upload/to_upload").join(name),
                YPath("//home/crypta/testing/cookie_matching/rt/fpc_upload/to_upload").join(name),
            )
        raise Exception("Wrong environment")

    @property
    def processed(self):
        return self.root.join("processed")

    @property
    def processed_duids(self):
        return self.root.join("processed_duids")

    def processed_hourly(self, name):
        return self.processed_duids.join(name)

    @property
    def fresh_fpc(self):
        return self.root.join("fresh")

    @property
    def chevent_log_hourly(self):
        return YPath("//logs/bs-chevent-log/1h")

    @property
    def hit_log_hourly(self):
        return YPath("//logs/bs-hit-log/1h")

    @property
    def redir_log(self):
        return YPath("//logs/redir-log/30min")

    @property
    def zen_log(self):
        return YPath("//logs/zen-events-log/30min")

    @property
    def market_log(self):
        return YPath("//home/market/production/mstat/logs/market-clicks-log/30min")

    @property
    def metrika_stream_logs(self):
        return YPath("//logs/appmetrica-yandex-events/stream/5min")

    @property
    def extfp_stream_logs(self):
        return YPath("//logs/crypta-prod-ext-fp-match-log/stream/5min")

    @property
    def adstat_nginx_logs(self):
        return YPath("//logs/adstat-nginx-log/30min")

    @property
    def page_dict(self):
        return YPath("//home/yabs/dict/Page")

    @property
    def fingerprints(self):
        return self.root.join("fingerprints")

    @property
    def banned_yuids(self):
        return self.root.join("banned_yuids")

    @property
    def keyboard_uuids(self):
        return self.root.join("KeyboardUuids")

    @property
    def extfp_ab_extra_table(self):
        return self.root.join("AllExtfpData")


class WithPaths(object):

    @property
    def paths(self):
        return Paths(self.yt)


class WithJinja(object):

    @cached_property
    def _jinja(self):
        loader = jinja2.FunctionLoader(_resource_loader)
        return jinja2.Environment(loader=loader)

    def render(self, template, **kwargs):
        logger.info("Rendering %s with args: %s", template, kwargs)
        return self._jinja.get_template(template).render(**kwargs)


class TableFinder(object):
    MAX_TABLES_PER_RUN = 3
    MIN_TABLES_PER_RUN = 1

    @property
    def query(self):
        pass

    @property
    def output_table_postfix(self):
        return "_" + self.__class__.__name__

    @property
    def processed_tables_document_path(self):
        return self.paths.root.join("processed_tables" + self.output_table_postfix)

    def get_tables_to_join(self, table, join_path, prev_offset):
        if not join_path:
            return None

        name = os.path.basename(table)
        table_ts = datetime.datetime.strptime(name, HOURLY_DATETIME_FORMAT)

        if prev_offset:
            previous_ts = table_ts - prev_offset
            result = [
                str(join_path.join(datetime.datetime.strftime(previous_ts, HOURLY_DATETIME_FORMAT))),
                str(join_path.join(name)),
            ]

            next_table_ts = table_ts + datetime.timedelta(hours=1)
            while table_ts + prev_offset < next_table_ts:
                result.append(
                    str(join_path.join(datetime.datetime.strftime(table_ts + prev_offset, HOURLY_DATETIME_FORMAT))),
                )
                table_ts += prev_offset

            return result

        else:
            return [str(join_path.join(name))]

    def get_processed_tables(self):
        if not self.yt.exists(self.processed_tables_document_path):
            self.yt.create("document", self.processed_tables_document_path)
            self.yt.set(self.processed_tables_document_path, [str(self.tables_path.join("1970-01-01T00:00:00"))])

        processed_tables = self.yt.get(self.processed_tables_document_path)
        return processed_tables

    def get_tables(self, prev_offset=None):
        last_processed_table = max(self.get_processed_tables())
        available_tables = self.yt.list(self.tables_path, absolute=True)
        candidates = sorted(x for x in available_tables if x > last_processed_table)

        logger.info("Choosing over %s", candidates)

        input_tables = []
        input_join_tables = set()

        if len(candidates) < self.MIN_TABLES_PER_RUN:
            return [], []

        for input_table in candidates:
            tables_to_join = self.get_tables_to_join(input_table, self.join_tables_path, prev_offset)
            if tables_to_join:
                logger.info("Join candidates %s", tables_to_join)
                if all(map(self.yt.exists, tables_to_join)):
                    input_tables.append(input_table)
                    input_join_tables.update(tables_to_join)
            else:
                input_tables.append(input_table)

            if len(input_tables) >= self.MAX_TABLES_PER_RUN:
                break

        logger.info("Input tables: %s, join tables: %s", input_tables, input_join_tables)
        return sorted(input_tables), sorted(list(input_join_tables))

    def get_output_table(self, input_tables):
        output_table = self.paths.fresh_fpc.join("{}{}".format(os.path.basename(max(input_tables)), self.output_table_postfix))
        logger.info("Will store in %s", output_table)
        return output_table

    def update_processed(self, output_table, input_tables, join_tables):
        _update_processed_tables(self.yt, self.processed_tables_document_path, input_tables)
        self.yt.set_attribute(output_table, PROCESSED_TABLES, input_tables + join_tables)


class BuildFingerprints(TableFinder, YQLRunnerTask, IndependentTask, WithJinja, WithPaths):
    MAX_TABLES_PER_RUN = 1

    @property
    def processed_tables_document_path(self):
        return self.paths.root.join("processed_tables_BuildFingerprints")

    @property
    def tables_path(self):
        return self.paths.watch_log_hourly

    @property
    def join_tables_path(self):
        return None

    def run(self, **kwargs):
        input_tables, _ = self.get_tables()

        if not input_tables:
            return

        output_table = self.paths.fingerprints.join(os.path.basename(max(input_tables)))

        query = self.render(
            "/crypta/graph/fpc/yql/build_fingerprints.sql",
            input_tables=_yql_tables_list(input_tables),
            output_table=output_table,
            now=time.time(),
            **self.get_context_data()
        )

        if self.is_embedded:
            self._run_embedded(query)
        else:
            self._run_client(query)

        self.update_processed(output_table, input_tables, [])


class UpdateKeyboardUuids(YQLRunnerTask, IndependentTask, WithJinja, WithPaths):
    MAX_TABLES_PER_RUN=12*12

    @property
    def processed_tables_document_path(self):
        return self.paths.root.join("processed_tables_UpdateKeyboardUuids")

    def get_max_processed_table(self):
        if not self.yt.exists(self.processed_tables_document_path):
            self.yt.create("document", self.processed_tables_document_path)
            self.yt.set(self.processed_tables_document_path, self.paths.metrika_stream_logs.join("1970-01-01T00:00:00"))

        return self.yt.get(self.processed_tables_document_path)

    def set_max_processed_table(self, table_name):
        self.yt.set(self.processed_tables_document_path, table_name)

    def get_tables(self):
        available_tables = self.yt.list(self.paths.metrika_stream_logs, absolute=True)
        max_processed_table = self.get_max_processed_table()
        candidates = [x for x in available_tables if x > max_processed_table]

        return sorted(candidates)[:self.MAX_TABLES_PER_RUN]

    def run(self, **kwargs):
        new_tables = self.get_tables()
        if not new_tables:
            return

        logger.info("Input tables: %s, main table: %s", new_tables, self.paths.keyboard_uuids)

        query = self.render(
            "/crypta/graph/fpc/yql/update_keyboard_uuids.sql",
            input_tables=_yql_tables_list(new_tables),
            output_table=self.paths.keyboard_uuids,
            now=time.time(),
            **self.get_context_data()
        )

        if self.is_embedded:
            self._run_embedded(query)
        else:
            self._run_client(query)

        self.set_max_processed_table(max(new_tables))


class GetFpcViaExtfp(YQLRunnerTask, IndependentTask, WithJinja, WithPaths):
    MAX_TABLES_PER_RUN=12*12
    MINIMUM_TABLES_TO_JOIN=12

    @property
    def processed_tables_document_path(self):
        return self.paths.root.join("processed_tables_GetFpcViaExtfp")

    def get_max_processed_table(self):
        if not self.yt.exists(self.processed_tables_document_path):
            self.yt.create("document", self.processed_tables_document_path)
            self.yt.set(self.processed_tables_document_path, self.paths.extfp_stream_logs.join("1970-01-01T00:00:00"))

        return self.yt.get(self.processed_tables_document_path)

    def set_max_processed_table(self, table_name):
        self.yt.set(self.processed_tables_document_path, table_name)

    def get_tables(self):
        available_tables = self.yt.list(self.paths.extfp_stream_logs, absolute=True)
        max_processed_table = self.get_max_processed_table()
        candidates = [x for x in available_tables if x > max_processed_table]

        return sorted(candidates)[:self.MAX_TABLES_PER_RUN]

    def get_output_table(self, input_tables):
        output_table = self.paths.fresh_fpc.join("{}{}".format(os.path.basename(max(input_tables)), "_GetFpcViaExtfp"))
        logger.info("Will store in %s", output_table)
        return output_table

    def run(self, **kwargs):
        new_tables = self.get_tables()
        if len(new_tables) < self.MINIMUM_TABLES_TO_JOIN:
            logger.info("Number of avalable tables %s is less then requred %d", len(new_tables), self.MINIMUM_TABLES_TO_JOIN)
            return

        query = self.render(
            "/crypta/graph/fpc/yql/get_fpc_via_extfp.sql",
            input_tables=_yql_tables_list(new_tables),
            output_table=self.get_output_table(new_tables),
            extfp_extra_ab_table=self.paths.extfp_ab_extra_table,
            now=time.time(),
            **self.get_context_data()
        )

        if self.is_embedded:
            self._run_embedded(query)
        else:
            self._run_client(query)

        self.set_max_processed_table(max(new_tables))


class GetFpcFromAdstat(TableFinder, YQLRunnerTask, IndependentTask, WithJinja, WithPaths):
    MIN_TABLES_PER_RUN = 24
    MAX_TABLES_PER_RUN = 48

    @property
    def processed_tables_document_path(self):
        return self.paths.root.join("processed_tables_GetFpcFromAdstat")

    @property
    def tables_path(self):
        return self.paths.adstat_nginx_logs

    @property
    def join_tables_path(self):
        return None

    def run(self, **kwargs):
        input_tables, _ = self.get_tables(datetime.timedelta(minutes=30))
        if not input_tables:
            return

        output_table = self.get_output_table(input_tables)

        query = self.render(
            "/crypta/graph/fpc/yql/get_fpc_from_adstat.sql",
            input_tables=_yql_tables_list(input_tables),
            output_table=output_table,
            now=time.time(),
            **self.get_context_data()
        )

        if self.is_embedded:
            self._run_embedded(query)
        else:
            self._run_client(query)

        self.update_processed(output_table, input_tables, [])
        _get_solomon_reporter().set_value("raw_fpc_from_adstat", self.yt.get_attribute(output_table, "row_count"))


class GetFpcViaFingerprint(TableFinder, YQLRunnerTask, IndependentTask, WithJinja, WithPaths):

    @property
    def processed_tables_document_path(self):
        return self.paths.root.join("processed_tables_GetFpcViaFingerprint")

    @property
    def tables_path(self):
        return self.paths.fingerprints

    @property
    def join_tables_path(self):
        return None

    def get_tables(self, prev_offset=None):
        input_tables = []

        last_processed_table = max(self.get_processed_tables())
        available_tables = sorted(self.yt.list(self.tables_path, absolute=True))

        for (index, table_name) in enumerate(available_tables):
            if table_name <= last_processed_table:
                continue
            else:
                if len(input_tables) == 0 and index > 0:
                    input_tables.append(available_tables[index-1])

            input_tables.append(table_name)
            if len(input_tables) > self.MAX_TABLES_PER_RUN:
                break

        logger.info("Input tables: %s", input_tables)
        return input_tables

    def run(self, **kwargs):

        input_tables = self.get_tables()
        if not input_tables:
            return

        output_table = self.get_output_table(input_tables)

        query = self.render(
            "/crypta/graph/fpc/yql/get_fpc_via_fingerprint.sql",
            input_tables=_yql_tables_list(input_tables),
            output_table=output_table,
            now=time.time(),
            **self.get_context_data()
        )

        if self.is_embedded:
            self._run_embedded(query)
        else:
            self._run_client(query)

        self.update_processed(output_table, input_tables, [])
        _get_solomon_reporter().set_value("raw_fpc_from_fingerprint", self.yt.get_attribute(output_table, "row_count"))


class GetFpcViaMcPort(TableFinder, YQLRunnerTask, IndependentTask, WithJinja, WithPaths):

    @property
    def processed_tables_document_path(self):
        return self.paths.root.join("processed_tables_GetFpcViaMcPort")

    @property
    def tables_path(self):
        return self.paths.fingerprints

    @property
    def join_tables_path(self):
        return None

    def get_tables(self, prev_offset=None):
        input_tables = []

        last_processed_table = max(self.get_processed_tables())
        available_tables = sorted(self.yt.list(self.tables_path, absolute=True))

        for (index, table_name) in enumerate(available_tables):
            if table_name <= last_processed_table:
                continue
            else:
                if len(input_tables) == 0 and index > 0:
                    input_tables.append(available_tables[index-1])

            input_tables.append(table_name)
            if len(input_tables) > self.MAX_TABLES_PER_RUN:
                break

        logger.info("Input tables: %s", input_tables)
        return input_tables

    def run(self, **kwargs):

        input_tables = self.get_tables()
        if not input_tables:
            return

        output_table = self.get_output_table(input_tables)

        query = self.render(
            "/crypta/graph/fpc/yql/get_fpc_via_mc_port.sql",
            input_tables=_yql_tables_list(input_tables),
            output_table=output_table,
            now=time.time(),
            **self.get_context_data()
        )

        if self.is_embedded:
            self._run_embedded(query)
        else:
            self._run_client(query)

        self.update_processed(output_table, input_tables, [])
        _get_solomon_reporter().set_value("raw_fpc_from_mcport", self.yt.get_attribute(output_table, "row_count"))


class GetFpcFromWatchLog(TableFinder, YQLRunnerTask, IndependentTask, WithJinja, WithPaths):

    @property
    def processed_tables_document_path(self):
        return self.paths.root.join("processed_tables_GetFpcFromWatchLog")

    @property
    def tables_path(self):
        return self.paths.watch_log_hourly

    @property
    def join_tables_path(self):
        return None

    def run(self, **kwargs):
        input_tables, _ = self.get_tables()

        if not input_tables:
            return

        output_table = self.get_output_table(input_tables)

        query = self.render(
            "/crypta/graph/fpc/yql/get_fpc_from_watch_log.sql",
            input_tables=_yql_tables_list(input_tables),
            output_table=output_table,
            now=time.time(),
            **self.get_context_data()
        )

        if self.is_embedded:
            self._run_embedded(query)
        else:
            self._run_client(query)

        self.update_processed(output_table, input_tables, [])
        _get_solomon_reporter().set_value("raw_fpc_from_watch", self.yt.get_attribute(output_table, "row_count"))


class GetFpcFromMts(TableFinder, YQLRunnerTask, IndependentTask, WithJinja, WithPaths):

    @property
    def processed_tables_document_path(self):
        return self.paths.root.join("processed_tables_GetFpcFromMTS")

    @property
    def tables_path(self):
        return self.paths.watch_log_hourly

    @property
    def join_tables_path(self):
        return None

    def run(self, **kwargs):
        input_tables, _ = self.get_tables()

        if not input_tables:
            return

        output_table = self.get_output_table(input_tables)

        query = self.render(
            "/crypta/graph/fpc/yql/get_fpc_from_mts.sql",
            input_tables=_yql_tables_list(input_tables),
            output_table=output_table,
            now=time.time(),
            **self.get_context_data()
        )

        if self.is_embedded:
            self._run_embedded(query)
        else:
            self._run_client(query)

        self.update_processed(output_table, input_tables, [])
        _get_solomon_reporter().set_value("raw_fpc_from_mts", self.yt.get_attribute(output_table, "row_count"))


class GetFpcFromCheventLog(TableFinder, YQLTask, IndependentTask, WithJinja, WithPaths):

    @property
    def processed_tables_document_path(self):
        return self.paths.root.join("processed_tables_GetFpcFromCheventLog")

    @property
    def tables_path(self):
        return self.paths.chevent_log_hourly

    @property
    def join_tables_path(self):
        return self.paths.hit_log_hourly

    def run(self, **kwargs):
        input_tables, join_tables = self.get_tables()

        if not input_tables:
            return

        output_table = self.get_output_table(input_tables)

        self.yql_client.execute(
            self.render(
                "/crypta/graph/fpc/yql/get_fpc_from_chevent_log.sql",
                input_tables=_yql_tables_list(input_tables),
                join_tables=_yql_tables_list(join_tables),
                output_table=output_table,
                now=time.time()
            ), syntax_version=self.syntax_version
        )

        self.update_processed(output_table, input_tables, join_tables)
        _get_solomon_reporter().set_value("raw_fpc_from_chevent", self.yt.get_attribute(output_table, "row_count"))


class GetFpcViaSerp(TableFinder, YQLRunnerTask, IndependentTask, WithJinja, WithPaths):

    @property
    def processed_tables_document_path(self):
        return self.paths.root.join("processed_tables_GetFpcViaSerp")

    @property
    def tables_path(self):
        return self.paths.watch_log_hourly

    @property
    def join_tables_path(self):
        return self.paths.redir_log

    def run(self, **kwargs):
        input_watch_tables, input_redir_tables = self.get_tables(datetime.timedelta(minutes=30))

        if not input_watch_tables:
            return

        output_table = self.get_output_table(input_watch_tables)

        query = self.render(
            "/crypta/graph/fpc/yql/get_fpc_from_serp.sql",
            input_watch_tables=_yql_tables_list(input_watch_tables),
            input_redir_tables=_yql_tables_list(input_redir_tables),
            output_table=output_table,
            now=time.time(),
            **self.get_context_data()
        )

        if self.is_embedded:
            self._run_embedded(query)
        else:
            self._run_client(query)

        self.update_processed(output_table, input_watch_tables, input_redir_tables)
        _get_solomon_reporter().set_value("raw_fpc_from_serp", self.yt.get_attribute(output_table, "row_count"))


class GetFpcViaZen(TableFinder, YQLTask, IndependentTask, WithJinja, WithPaths):

    @property
    def processed_tables_document_path(self):
        return self.paths.root.join("processed_tables_GetFpcViaZen")

    @property
    def tables_path(self):
        return self.paths.watch_log_hourly

    @property
    def join_tables_path(self):
        return self.paths.zen_log

    def run(self, **kwargs):
        input_watch_tables, input_zen_tables = self.get_tables(datetime.timedelta(minutes=30))

        if not input_watch_tables:
            return

        output_table = self.get_output_table(input_watch_tables)

        self.yql_client.execute(
            self.render(
                "/crypta/graph/fpc/yql/get_fpc_from_zen.sql",
                input_watch_tables=_yql_tables_list(input_watch_tables),
                input_zen_tables=_yql_tables_list(input_zen_tables),
                output_table=output_table,
                now=time.time()
            ), syntax_version=self.syntax_version
        )

        self.update_processed(output_table, input_watch_tables, input_zen_tables)
        _get_solomon_reporter().set_value("raw_fpc_from_zen", self.yt.get_attribute(output_table, "row_count"))


class GetFpcViaYclid(TableFinder, YQLTask, IndependentTask, WithJinja, WithPaths):

    @property
    def processed_tables_document_path(self):
        return self.paths.root.join("processed_tables_GetFpcViaYclid")

    @property
    def tables_path(self):
        return self.paths.watch_log_hourly

    @property
    def join_tables_path(self):
        return self.paths.chevent_log_hourly

    def run(self, **kwargs):
        input_watch_tables, input_chevent_tables = self.get_tables(datetime.timedelta(hours=1))

        if not input_watch_tables:
            return

        output_table = self.get_output_table(input_watch_tables)

        self.yql_client.execute(
            self.render(
                "/crypta/graph/fpc/yql/get_fpc_via_yclid.sql",
                input_watch_tables=_yql_tables_list(input_watch_tables),
                input_chevent_tables=_yql_tables_list(input_chevent_tables),
                page_table=self.paths.page_dict,
                output_table=output_table,
                now=time.time()
            ), syntax_version=self.syntax_version
        )

        self.update_processed(output_table, input_watch_tables, input_chevent_tables)
        _get_solomon_reporter().set_value("raw_fpc_from_yclid", self.yt.get_attribute(output_table, "row_count"))


class GetFpcViaMarket(TableFinder, YQLTask, IndependentTask, WithJinja, WithPaths):

    @property
    def processed_tables_document_path(self):
        return self.paths.root.join("processed_tables_GetFpcViaMarket")

    @property
    def tables_path(self):
        return self.paths.watch_log_hourly

    @property
    def join_tables_path(self):
        return self.paths.market_log

    def run(self, **kwargs):
        input_watch_tables, input_market_tables = self.get_tables(datetime.timedelta(minutes=30))

        if not input_watch_tables:
            return

        output_table = self.get_output_table(input_watch_tables)

        self.yql_client.execute(
            self.render(
                "/crypta/graph/fpc/yql/get_fpc_via_market.sql",
                input_watch_tables=_yql_tables_list(input_watch_tables),
                input_market_tables=_yql_tables_list(input_market_tables),
                output_table=output_table,
                now=time.time()
            ), syntax_version=self.syntax_version
        )

        self.update_processed(output_table, input_watch_tables, input_market_tables)
        _get_solomon_reporter().set_value("raw_fpc_from_market", self.yt.get_attribute(output_table, "row_count"))


class GetFpcViaTls(TableFinder, YQLRunnerTask, IndependentTask, WithJinja, WithPaths):

    @property
    def processed_tables_document_path(self):
        return self.paths.root.join("processed_tables_GetFpcViaTls")

    @property
    def tables_path(self):
        return self.paths.watch_log_hourly

    @property
    def join_tables_path(self):
        return self.paths.watch_log_hourly

    def run(self, **kwargs):
        input_tables, input_tables_with_prev = self.get_tables(datetime.timedelta(minutes=60))

        if not input_tables:
            return

        output_table = self.get_output_table(input_tables)

        query = self.render(
            "/crypta/graph/fpc/yql/get_fpc_via_tls.sql",
            input_tables=_yql_tables_list(input_tables_with_prev),
            fresh_output_table=output_table,
            now=time.time(),
            **self.get_context_data()
        )

        if self.is_embedded:
            self._run_embedded(query)
        else:
            self._run_client(query)

        self.update_processed(output_table, input_tables, input_tables_with_prev)
        _get_solomon_reporter().set_value("raw_fpc_from_tls", self.yt.get_attribute(output_table, "row_count"))


class GetFpcViaCookieSync(TableFinder, YQLRunnerTask, IndependentTask, WithJinja, WithPaths):

    @property
    def processed_tables_document_path(self):
        return self.paths.root.join("processed_tables_GetFpcViaCookieSync")

    @property
    def tables_path(self):
        return self.paths.watch_log_hourly

    @property
    def join_tables_path(self):
        return None

    def run(self, **kwargs):
        input_tables, _ = self.get_tables()

        if not input_tables:
            return

        output_table = self.get_output_table(input_tables)

        query = self.render(
            "/crypta/graph/fpc/yql/get_fpc_via_cookie_sync.sql",
            input_tables=_yql_tables_list(input_tables),
            output_table=output_table,
            now=time.time(),
            **self.get_context_data()
        )

        if self.is_embedded:
            self._run_embedded(query)
        else:
            self._run_client(query)

        self.update_processed(output_table, input_tables, [])
        _get_solomon_reporter().set_value("raw_fpc_from_cookie_sync", self.yt.get_attribute(output_table, "row_count"))


class DuidsBySource(YQLTask, IndependentTask, WithJinja, WithPaths):
    keyPrefix = "src-"

    @property
    def query(self):
        return

    def run(self, **kwargs):
        query = self.yql_client.execute(
            self.render(
                "/crypta/graph/fpc/yql/duids_by_source_simple.sql",
                indexTable=self.paths.index,
                now=time.time()
            ), syntax_version=self.syntax_version
        )

        solomonReporter = _get_solomon_reporter()
        for row in query[0].rows:
            key, value = row
            solomonReporter.set_value(self.keyPrefix + str(key), value)

        max_component_size = int(query[1].rows.pop()[0])
        solomonReporter.set_value("fpc_max_component_size", max_component_size)


class Hourly(YQLTask, IndependentTask, WithJinja, WithPaths):

    do_export = Parameter()

    @property
    def query(self):
        return []

    def cleanup(self, path, last_ts):
        logger.info("Cleaning up old tables in %s", path)
        if not self.yt.exists(path):
            logger.info("Path %s does not exist", path)
            return

        for each in self.yt.list(path, absolute=True):
            if _get_timestamp_from_table(each) < (last_ts - 60 * 60):
                logger.info("Removing old %s", each)
                self.yt.remove(each)

    def get_monitoring_data(self, last_table_processed, exported_table):
        r = self.yql_client.execute(
            self.render(
                "/crypta/graph/fpc/yql/get_processed_monitoring_data.sql",
                last_table=last_table_processed,
                exported_table=exported_table
            ), syntax_version=self.syntax_version
        )

        max_ts = int(r[0].rows.pop()[0])
        total_fpcs = int(r[1].rows.pop()[0])
        delta = int(time.time() - max_ts)

        return delta, total_fpcs

    def get_input_tables(self, processedDocument):
        if not self.yt.exists(processedDocument):
            self.yt.create("document", processedDocument)
            self.yt.set(processedDocument, [str(self.paths.fresh_fpc.join("1970-01-01T00:00:00"))])

        processed_tables = set(self.yt.get(processedDocument))
        min_processed_table = min(processed_tables)
        available_tables = self.yt.list(self.paths.fresh_fpc, absolute=True)

        candidates = sorted(x for x in available_tables if x > min_processed_table and x not in processed_tables)

        return candidates[:MAX_FRESH_PER_HOUR]

    def run(self, **kwargs):
        output_table_name = _timestamp()
        processedTable = self.paths.processed_hourly(output_table_name)
        processedDocument = self.paths.root.join("processed_tables_Hourly_duid")
        indexTable = self.paths.index
        exportTable = self.paths.export_hourly(output_table_name)

        input_tables = self.get_input_tables(processedDocument)
        if not input_tables:
            logger.info("No new data yet")
            return

        logger.info("Selected tables: %s" % input_tables)
        logger.info("Will store in %s", processedTable)

        last_ts = _get_last_ts_to_keep()

        self.yql_client.execute(
            self.render(
                "/crypta/graph/fpc/yql/prepare_and_export.sql",
                input_tables=_yql_tables_list(input_tables),
                output_table=processedTable,
                export_table=exportTable,
                single_table=indexTable,
                banned_table=self.paths.banned_yuids,
                now=time.time(),
            ), syntax_version=self.syntax_version
        )

        self.yql_client.execute(
            self.render(
                "/crypta/graph/fpc/yql/merge_data.sql",
                input_table=processedTable,
                output_table=indexTable,
                single_table=indexTable,
                last_seen_margin=last_ts,
                now=time.time(),
            ), syntax_version=self.syntax_version
        )

        if self.do_export == "True":
            _export(self.yt, exportTable, self.paths.get_export_paths(output_table_name, postfix="-duid"))
        else:
            logger.info("Skipping export as it is disabled")

        self.yt.set_attribute(processedTable, PROCESSED_TABLES, input_tables)
        _update_processed_tables(self.yt, processedDocument, input_tables)

        self.cleanup(self.paths.processed_duids, last_ts)
        self.cleanup(self.paths.fingerprints, last_ts)
        self.cleanup(self.paths.export_hourly_path, last_ts)
        self.cleanup(self.paths.fresh_fpc, last_ts)

        latency, total_fpcs = self.get_monitoring_data(processedTable, exportTable)
        logger.info("Uploading monitoring data")
        _get_solomon_reporter().set_value("fpc_latency", latency)
        _get_solomon_reporter().set_value("fpc_count", total_fpcs)
