#!/usr/bin/env python
# -*- coding: utf-8 -*-
import datetime
import os
import time

from library.python import resource
import luigi

from crypta.lib.python.juggler.juggler_helpers import report_event_to_juggler
from crypta.profile.lib import date_helpers
from crypta.profile.utils.config import config
from crypta.profile.utils.loggers import send_to_graphite
from crypta.profile.utils.luigi_utils import (
    BaseYtTask,
    ExternalInput,
    OldNodesByNameCleaner,
    YtTarget,
)


geocube_query = """
PRAGMA yt.DefaultMemoryLimit = '2048M';
PRAGMA yson.DisableStrict;

$is_yandexuid = ($value) -> {{
    RETURN String::StartsWith($value, 'y');
}};

INSERT INTO `{output_table}` WITH TRUNCATE
SELECT
    IF($is_yandexuid(yandexuid), SUBSTRING(yandexuid, 1), yandexuid) AS id,
    IF($is_yandexuid(yandexuid), 'yandexuid', 'uuid') AS id_type,
    ip,
    CAST(request_region AS Uint64) AS region_id,
    CAST(`timestamp` AS Uint64) AS `timestamp`,
    normal_query AS query,
    Crypta::GetLemmas(CAST(normal_query AS Utf8), lang) AS lemmas
FROM $input
WHERE yandexuid IS NOT NULL
ORDER BY id, id_type, `timestamp`
"""


class GeocubeParserDaily(BaseYtTask):
    date = luigi.Parameter()
    log_name = 'geocube'

    def requires(self):
        return [
            ExternalInput(os.path.join(config.GEOCUBE_LOG_DIRECTORY, self.date, log))
            for log in ['maps', 'mobile_maps', 'navi', 'serp']
        ]

    def output(self):
        return YtTarget(os.path.join(config.GEOCUBE_PARSED_DIR, self.date))

    def run(self):
        with self.yt.Transaction() as transaction:
            self.yt.create_empty_table(
                self.output().table,
                schema={
                    'id': 'string',
                    'id_type': 'string',
                    'timestamp': 'uint64',
                    'query': 'string',
                    'lemmas': 'any',
                    'ip': 'string',
                    'region_id': 'uint64',
                },
            )

            input_tables = ["'{}'".format(target.table) for target in self.input()]

            query = resource.find("/query/input.yql") + geocube_query

            self.yql.query(
                query_string=query.format(
                    input_tables=', '.join(input_tables),
                    sampling=config.LOG_PARSING_SAMPLING,
                    output_table=self.output().table,
                ),
                transaction=transaction,
            )

            self.yt.set_attribute(
                self.output().table,
                'generate_date',
                self.date,
            )

            self.yt.set_attribute(
                self.output().table,
                'closed',
                True,
            )

    def on_failure(self, exception):
        report_event_to_juggler(
            status='WARN',
            service='{}_log_parsing'.format(self.log_name),
            host=config.CRYPTA_PROFILE_JUGGLER_HOST,
            description='Some tables has failed to process: {}'.format(self.output().table),
            tags=['log_parsing'],
            logger=self.logger,
        )

    def on_success(self):
        midnight_timestamp = date_helpers.from_date_string_to_timestamp(date_helpers.get_tomorrow(self.date))

        send_to_graphite(
            'task_end.LogParser_{}'.format(self.log_name),
            int(time.time()) - midnight_timestamp,
            timestamp=midnight_timestamp,
        )

        report_event_to_juggler(
            status='OK',
            service='{}_log_parsing'.format(self.log_name),
            host=config.CRYPTA_PROFILE_JUGGLER_HOST,
            description='Last processed table: {}'.format(self.date),
            tags=['log_parsing'],
            logger=self.logger,
        )


class GeocubeParser(luigi.WrapperTask):
    def requires(self):
        today = date_helpers.to_date_string(datetime.date.today())
        yesterday = date_helpers.get_yesterday(today)
        n_days = config.NUMBER_OF_DAYS_TO_KEEP_PARSED_LOGS

        for day in date_helpers.generate_back_dates(yesterday, n_days):
            yield GeocubeParserDaily(day)

        yield OldNodesByNameCleaner(
            date=yesterday,
            folder=config.GEOCUBE_PARSED_DIR,
            lifetime=n_days,
            date_format=date_helpers.DATE_FORMAT,
        )


def main():
    result = luigi.run(
        [
            '--scheduler-url', config.LUIGI_SCHEDULER_URL,
            '--workers', '2',
        ],
        main_task_cls=GeocubeParser,
    )

    if not result:
        exit(1)
