# -*- coding: utf-8 -*-

import datetime
import json
import hashlib
import logging
import os
import time

from sandbox.sandboxsdk.channel import channel
from sandbox.sandboxsdk import errors
from sandbox.sandboxsdk import parameters
from sandbox.sandboxsdk import process

from sandbox.projects import resource_types
from sandbox.projects.common.mediasearch import ban as mediaban
from sandbox.projects.common.nanny import auto_deploy
from sandbox.projects.common.wizard import parameters as wp
from sandbox.projects.common.ReleasedResourceConsumer import ReleasedResourceConsumer
from sandbox.projects.common import apihelpers
from sandbox.projects.common import file_utils as fu
from sandbox.projects.common import utils
from sandbox.projects.GetWizardResponses import ResponseProvider

from sandbox.sandboxsdk.svn import Arcadia


class FreshQueryMarksLastDaysParameter(parameters.SandboxIntegerParameter):
    name = "fresh_querymarks_last_days"
    description = "Count of last days to used fresh querymarks"
    required = True
    default_value = 3


class FreonIntentsParameter(parameters.SandboxStringParameter):
    name = "freon_intents"
    description = "Freon intents (CSV)"
    required = True
    default_value = "freshintent"


class FreonYtPathParameter(parameters.SandboxStringParameter):
    name = "freon_yt_path"
    description = "Freon directory on YT"
    required = True
    default_value = "//home/videoquality/freon"


class FreonYtTablesParameter(parameters.SandboxStringParameter):
    name = "freon_tables_yt"
    description = "Freon tables YT (CSV)"
    required = True
    # default_value = "queryfresh,news-and-sport"
    default_value = "news-and-sport"


class YtPythonParameter(parameters.ResourceSelector):
    name = 'yt_python_resource_id'
    description = 'YT Python resource'
    resource_type = resource_types.VIDEO_YT_PYTHON_EXECUTABLE


class FreonYtServerParameter(parameters.SandboxStringParameter):
    name = "freon_yt_server"
    description = "Freon YT server"
    required = True
    default_value = "hahn"


class VideoFreshReleaseQueryMarksTrie(auto_deploy.AutoNannyDeployTask, mediaban.VideoBaseReleaseBanTask, ReleasedResourceConsumer):
    """
        Builds fresh_querymarks.trie for Yandex.Video service
    """

    type = "VIDEO_FRESH_RELEASE_QUERYMARKS_TRIE"

    input_parameters = (
        FreshQueryMarksLastDaysParameter,
        FreonIntentsParameter,
        YtPythonParameter,
        FreonYtServerParameter,
        FreonYtPathParameter,
        FreonYtTablesParameter,
    ) + mediaban.VideoBaseReleaseBanTask.input_parameters + ReleasedResourceConsumer.input_parameters

    release_subject = "video/middle/fresh-querymarks-data-{timestamp}"
    release_comment = "video fast fresh_querymarks.trie"
    release_resources = (
        resource_types.VIDEO_MIDDLESEARCH_FRESH_QUERYMARKS_TRIE,
    )

    def get_stable_services(self):
        return [self._SERVICE_ID]

    def get_nanny_oauth_token(self):
        return self.get_vault_data('VIDEO-ROBOT', 'robot-video-crawl-nanny-oauth')

    def _build_ban(self):
        querymarks_trie_path = self.abs_path(resource_types.VIDEO_MIDDLESEARCH_FRESH_QUERYMARKS_TRIE.basename)
        querymarks_plain_path = querymarks_trie_path + '.plain'
        querymarks_version_path = querymarks_trie_path + '.version'
        wizard_queries_path = querymarks_trie_path + '.queries'
        collected_wizardings_path = querymarks_trie_path + '.wizardings'

        query_dict = dict()
        queries = list()
        new_queries = list()
        old_wizardings = list()
        self._extract_queries(querymarks_trie_path, query_dict, queries)
        self._prepare_queries(queries, new_queries, old_wizardings)

        if not self.ctx.get('child_tasks_ids'):
            self._generate_wizardings(wizard_queries_path, new_queries)
        else:
            utils.check_subtasks_fails()
        logging.debug("Wizarding task successfully created!")

        vnorm_dopp = dict()
        self._parse_wizardings(collected_wizardings_path, vnorm_dopp, old_wizardings)
        self._generate_plain(querymarks_plain_path, query_dict, vnorm_dopp)

        cur_md5 = hashlib.md5(open(querymarks_plain_path, 'r').read()).hexdigest()
        querymarks_last_path = self._tool(resource_types.VIDEO_MIDDLESEARCH_FRESH_QUERYMARKS_PLAIN)
        prev_md5 = hashlib.md5(open(querymarks_last_path, 'r').read()).hexdigest()
        if cur_md5 == prev_md5:
            logging.debug("Plain querymarks do not changed since last run! Exit")
            return 0

        self.create_resource(self.descr, querymarks_plain_path, resource_types.VIDEO_MIDDLESEARCH_FRESH_QUERYMARKS_PLAIN)
        logging.debug("Plain resource successfully created!")

        self._generate_trie(querymarks_plain_path, querymarks_trie_path)
        self._generate_version(querymarks_version_path)

        # Imported module has to be released because otherwise task object won't be pickled in outer realms of sandbox code
        return os.stat(querymarks_trie_path).st_size

    def _update_resource(self, resource_type, skynet_id=None, path=None):
        result = mediaban.VideoBaseReleaseBanTask._update_resource(self, resource_type, skynet_id=skynet_id, path=path)
        if result:
            self.release_resources = self.release_resources + (resource_type,)
        return result

    def _yt(self, cmd, **kwargs):
        cmd_args = tuple(v for k, v in kwargs.iteritems() if not k.startswith('_'))
        run_process_args = {k[1:]: v for k, v in kwargs.iteritems() if k.startswith('_')}
        yt_tool = self.last_released_resource(resource_types.VIDEO_YT_PYTHON_EXECUTABLE, YtPythonParameter.name)
        yt_args = (
            yt_tool,
            "--proxy", self.ctx[FreonYtServerParameter.name],
        )

        return process.run_process(
            yt_args + (cmd,) + cmd_args,
            environment={"YT_TOKEN": self.get_vault_data('VIDEODEV', 'yt_token')},
            outputs_to_one_file=False,
            log_prefix="yt.{}".format(cmd),
            timeout=600,
            wait=False,
            **run_process_args
        )

    def _read_yt_table(self, table_path, dst_path):
        yt_read = self._yt("read", format_cmd="--format", format="<encode_utf8=%false>json", table=table_path)
        if yt_read.wait():
            raise errors.SandboxTaskFailureError("Failed to read %s" % (table_path))
        os.rename(yt_read.stdout_path, dst_path)

    def _transform_intent(self, intent, value):
        if intent == 'freshintent':
            if value >= 0.8:
                return 1.2
            if value <= 0.4:
                return 0.0
        return None

    def _try_to_add_mark(self, query_dict, query, intent, j):
        marks = query_dict.get(query, {}).get('marks', {})
        ts = query_dict.get(query, {}).get('ts', {})

        if ts.get(intent) is None or ts[intent] < j['ts']:
            marks[intent + '_mark'] = j['value'][intent]
            if self._transform_intent(intent, j['value'][intent]) is not None:
                marks[intent + '_exp'] = self._transform_intent(intent, j['value'][intent])
            ts[intent] = j['ts']
        query_dict[query] = {'marks': marks, 'ts': ts}

    def _add_hand_mark(self, query_dict, query, intent, value):
        marks = query_dict.get(query, {}).get('marks', {})
        if self._transform_intent(intent, value) is not None:
            marks[intent] = self._transform_intent(intent, value)
            marks[intent + '_exp'] = self._transform_intent(intent, value)
        query_dict[query] = {'marks': marks}

    def _transform_country(self, country):
        if country in ['tr']:
            return 'com.' + country
        return country

    def _extract_queries(self, querymarks_trie_path, query_dict, queries):
        timestamp = int(time.time())
        for table in self.ctx[FreonYtTablesParameter.name].split(','):
            querymarks_yt_path = querymarks_trie_path + '.' + table
            self._read_yt_table(self.ctx[FreonYtPathParameter.name] + '/' + table, querymarks_yt_path)

            logging.debug('Intents: {0}'.format(self.ctx[FreonIntentsParameter.name].split(',')))
            for line in open(querymarks_yt_path, 'r'):
                try:
                    j = json.loads(line)
                    if int(j['ts']) > 10**10:
                        raise Exception("Strange timestamp!")
                    if int(j['ts']) + self.ctx[FreshQueryMarksLastDaysParameter.name] * 86400 < timestamp:
                        continue
                    if type(j['value']) == type(u'str'):
                        j['value'] = json.loads(j['value'])
                    queries.append(j['value']['query'])

                    for intent in self.ctx[FreonIntentsParameter.name].split(','):
                        self._try_to_add_mark(query_dict, (j['value']['query'], self._transform_country(j['country'])), intent, j)
                        if j['country'] == 'ru':
                            self._try_to_add_mark(query_dict, (j['value']['query'], '*'), intent, j)

                except Exception as e:
                    logging.debug("Failed to handle record with error '{0}'!".format(e))
        logging.debug("Count of queries from YT is {0}.".format(len(queries)))

        svn_hand_querymarks_arcadia_url = "arcadia:/arc/trunk/data/extsearch/video/quality/querymarks/hand_querymarks.lst"
        hand_querymarks_path = querymarks_trie_path + '.hand'
        for iter in range(3):
            try:
                Arcadia.export(svn_hand_querymarks_arcadia_url, hand_querymarks_path)
            except Exception as e:
                logging.debug("Failed to export hand marks from arcadia with error '{0}'!".format(e))
                if iter == 2:
                    raise e
                time.sleep(120)
            else:
                break
        logging.debug("Hand marks successfully exported!")

        for record in fu.read_line_by_line(hand_querymarks_path):
            try:
                parts = record.split('\t')
                is_actual = str(datetime.datetime.now().strftime("%Y-%m-%d")) <= parts[2]
                if not is_actual:
                    continue

                query = parts[0].decode('utf8')
                queries.append(query)
                country = self._transform_country(parts[1])
                intents = json.loads(parts[3])

                for intent in intents:
                    self._add_hand_mark(query_dict, (query, country), intent, intents[intent])
                    if country == 'ru':
                        self._add_hand_mark(query_dict, (query, '*'), intent, intents[intent])

            except Exception as e:
                logging.debug("Failed to handle hand made record with error '{0}'!".format(e))
        logging.debug("Count of queries from YT and hand made is {0}.".format(len(queries)))

    def _generate_wizardings(self, wizard_queries_path, queries):
        wizard_queries = self.create_resource("Queries for wizarding", wizard_queries_path, resource_types.PLAIN_TEXT_QUERIES)
        fu.write_lines(wizard_queries.path, map(lambda q: q.encode('utf8') + '\t225', queries))
        self.mark_resource_ready(wizard_queries)
        logging.debug("Wizard queries successfully created!")

        wizarding_task = self.create_subtask(
            task_type="GET_WIZARD_RESPONSES",
            description="Wizardings for video querymarks",
            priority=self.priority,
            inherit_notifications=True,
            input_parameters={
                ResponseProvider.name: "remote",
                wp.Queries.name: wizard_queries.id,
                wp.ExtraQueryParams.name: "&wizclient=videofresh&markup=layers=Video&format=json"
            }
        )
        self.ctx['child_tasks_ids'] = [wizarding_task.id]
        self.wait_all_tasks_completed(self.ctx['child_tasks_ids'])

    def _prepare_queries(self, queries, new_queries, old_wizardings):
        try:
            old_wizardings.extend(fu.read_lines(self._tool(resource_types.VIDEO_MIDDLESEARCH_FRESH_QUERYMARKS_WIZARDINGS)))
        except Exception as e:
            logging.debug("Work without old wizardings because of error {0}.".format(e))

        queries_set = set(queries)
        for wizarding in old_wizardings:
            try:
                j = json.loads(wizarding)
                query = j['session_info']['UR']
                if query in queries_set:
                    queries_set.remove(query)
            except Exception as e:
                logging.debug("Failed to parse old wizardings with error '{0}'!".format(e))
        new_queries.extend(list(queries_set))
        logging.debug("Count of wizarding queries is {0}.".format(len(new_queries)))

    def _parse_wizardings(self, collected_wizardings_path, vnorm_dopp, wizardings):
        wizardings_resource_id = apihelpers.list_task_resources(self.ctx['child_tasks_ids'][0], resource_types.WIZARD_RESPONSES_RESULT)[0]
        channel.task.sync_resource(wizardings_resource_id)
        wizardings_resource = channel.sandbox.get_resource(wizardings_resource_id)
        wizardings.extend(fu.read_lines(wizardings_resource.path))

        for wizarding in wizardings:
            try:
                j = json.loads(wizarding)
                query = j['session_info']['UR']
                query_norm = j['rules']['Video']['vnorm_dopp']
                vnorm_dopp[query] = query_norm
            except Exception as e:
                logging.debug("Failed to parse wizardings with error '{0}'!".format(e))

        collected_wizardings = self.create_resource("Collected wizardings", collected_wizardings_path, resource_types.VIDEO_MIDDLESEARCH_FRESH_QUERYMARKS_WIZARDINGS)
        fu.write_lines(collected_wizardings.path, wizardings)
        logging.debug("Collected wizardings successfully created!")

    def _generate_plain(self, querymarks_plain_path, query_dict, vnorm_dopp):
        domains = set()
        with open(querymarks_plain_path, 'w') as outfile:
            for record in query_dict:
                try:
                    marks = query_dict[record]['marks']
                    (query, tld) = record
                    domains.add(tld)
                    if vnorm_dopp.get(query) is None:
                        raise Exception("vnorm_dopp lost!")
                    outfile.write('\t'.join(['#query', tld.encode('utf8'), vnorm_dopp[query].encode('utf8'), json.dumps(marks)]) + '\n')

                except Exception as e:
                    try:
                        logging.debug("Lost vnorm_dopp for query '{0}' with error '{1}'!".format(query.encode('utf8'), e))
                    except Exception as e:
                        logging.debug("Lost vnorm_dopp with error '{0}'!".format(e))

        if len(query_dict) < 2:
            raise Exception("So little count of querymark records in plain data!")
        if len(query_dict) > 100000:
            raise Exception("Too many count of querymark records in plain data!")
        if len(domains) < 3:
            raise Exception("So little count of different domains in plain data!")

    def _generate_trie(self, querymarks_plain_path, querymarks_trie_path):
        indexer_tool = self._tool(resource_types.VIDEO_QUERYDATAINDEXER_EXECUTABLE)
        process.run_process([
            indexer_tool,
            "-S", "fresh_querymarks",
            "-N", "tld,exacturl",
            "-i", querymarks_plain_path,
            "-o", querymarks_trie_path,
            "-j"], outputs_to_one_file=False, log_prefix="indexer"
        )

        viewer_tool = self._tool(resource_types.VIDEO_QUERYDATAVIEWER_EXECUTABLE)
        process.run_process([
            viewer_tool,
            "-H",
            "-i", querymarks_trie_path], outputs_to_one_file=False, log_prefix="viewer")

        self.create_resource(
            self.descr,
            querymarks_trie_path,
            resource_types.VIDEO_MIDDLESEARCH_FRESH_QUERYMARKS_TRIE,
            attributes={'video_testenv_autoupdate_fresh_querymarks_trie': 'yes'}
        )
        logging.debug("Trie resource successfully created!")

    def _generate_version(self, querymarks_version_path):
        timestamp = int(time.time())
        open(querymarks_version_path, 'w').write(str(datetime.datetime.fromtimestamp(timestamp)))
        self.create_resource(self.descr, querymarks_version_path, resource_types.VIDEO_MIDDLESEARCH_FRESH_QUERYMARKS_VERSION)
        logging.debug("Version resource successfully created!")


__Task__ = VideoFreshReleaseQueryMarksTrie
