# -*- coding: utf-8 -*-

import os
import sys
import logging
import uuid

from sandbox import common
import sandbox.common.types.resource as ctr

from sandbox.sandboxsdk.task import SandboxTask
from sandbox.projects.common.utils import check_if_tasks_are_ok
from sandbox.sandboxsdk.process import run_process
from sandbox.sandboxsdk.channel import channel
from sandbox.projects.common.arcadia import sdk as arcadia_sdk
from sandbox.projects.common.utils import set_resource_attributes
from sandbox.projects import resource_types as rst
import sandbox.projects.common.constants as consts
from sandbox.projects.common import apihelpers
import task_params as tp
import sandbox.sandboxsdk.svn as sdk_svn

import sandbox.projects.logs.common as us_ci
from sandbox.projects.logs import resources
from sandbox.projects.geobase.Geodata6BinXurmaStable import resource as geo_resource


def GetResourceIdByAttrs(attrs, res_type):
    res = apihelpers.get_last_resource_with_attrs(res_type,
                                                  attrs,
                                                  all_attrs=True,
                                                  params={'limit': 1})
    return res.id if res else None


class PriemkaCreateSessionsTask(SandboxTask):
    type = "PRIEMKA_CREATE_SESSIONS"

    input_parameters = tp.CommonParams + \
                        tp.LeftRevisionParams + \
                        tp.RightRevisionParams

    binaries_key_to_res_type = {
        'sessions_diff': resources.HASHED_SESSIONS_DIFF_EXECUTABLE,
        'create_sessions': resources.CREATE_SESSIONS_EXECUTABLE,
        'user_sessions_errors': resources.USER_SESSIONS_ERRORS_EXECUTABLE,
        'mr_ls': rst.MR_LS,
        'mr_cat': rst.MR_CAT,
        'mapreduce_yt': rst.MAPREDUCE_YT_EXECUTABLE,
        'yt': rst.CREATE_SESSIONS_YT_PYTHON_EXECUTABLE,
    }

    revision_auto_pick_binaries_keys_common = [
        'create_sessions',
        'user_sessions_errors',
        'mr_ls',
        'mr_cat',
        'mapreduce_yt',
        'yt',
    ]
    revision_auto_pick_binaries_keys_custom = {
        'svn_path1': [],
        'svn_path2': ['sessions_diff']
    }

    revision_manual_packages_binaries_params = {
        'svn_path1': {
            'create_sessions': 'branch1_create_sessions_binary',
            'user_sessions_errors': 'branch1_user_sessions_errors_binary',
            'mr_ls': 'branch1_mr_ls_binary',
            'mr_cat': 'branch1_mr_cat_binary',
            'mapreduce_yt': 'branch1_mapreduce_yt_binary',
        },

        'svn_path2': {
            'sessions_diff': 'branch2_sessions_diff_binary',
            'create_sessions': 'branch2_create_sessions_binary',
            'user_sessions_errors': 'branch2_user_sessions_errors_binary',
            'mr_ls': 'branch2_mr_ls_binary',
            'mr_cat': 'branch2_mr_cat_binary',
            'mapreduce_yt': 'branch2_mapreduce_yt_binary',
        }
    }

    def __init__(self, *args, **kwargs):
        SandboxTask.__init__(self, *args, **kwargs)

        self.env = dict(os.environ)
        self.env['MR_NET_TABLE'] = 'ipv6'
        self.env['MR_USER'] = 'userdata'

        logging.info('construction complete')

    def GetBinariesBySvnPath(self, svn_path_key):
        svn_path = self.ctx[svn_path_key]
        attrs = {'build_from_svn_url': svn_path}

        d = {
            key: GetResourceIdByAttrs(attrs, self.binaries_key_to_res_type[key])
            for key in (
                self.revision_auto_pick_binaries_keys_common +
                self.revision_auto_pick_binaries_keys_custom[svn_path_key]
            )
        }

        errors = []
        for k, v in d.iteritems():
            if v is None:
                errors.append("Packages for {0} with svn_path={1} not found".format(k, svn_path))

        if len(errors):
            raise Exception("\n".join(errors))

        return d

    def CreateBuildFromRevisionTask(self, svn_path):
        logging.info('CreateBuildFromRevisionTask begin')

        params = {
            'build_create_sessions': True,
            'build_mr_cat': True,
            'build_mr_ls': True,
            'build_user_sessions_errors': True,
            'build_mapreduce-yt': True,
            'build_yt': True,
            consts.ARCADIA_URL_KEY: svn_path,
            'build_report_releases_tester': True,
            'build_baobab_counter_validator': True,
            'build_system': 'semi_distbuild',
            'build_type': 'relwithdebinfo',
            'use_aapi_fuse': True,
            'aapi_fallback': True,
        }

        params['build_hashed_sessions_diff'] = True

        task = SandboxTask.create_subtask(
            self,
            task_type='BUILD_SEARCH',
            input_parameters=params,
            description='Bulding binaries for createsessions test'
        )

        logging.info('CreateBuildFromRevisionTask end')

        return task

    def MakeBinariesResourseIdsDictionary(self, task_id, svn_url):
        if task_id is None:
            return

        task = channel.sandbox.get_task(task_id)

        result = {
            'sessions_diff': task.ctx['hashed_sessions_diff_resource_id'],
            'create_sessions': task.ctx['create_sessions_resource_id'],
            'user_sessions_errors': task.ctx['user_sessions_errors_resource_id'],
            'mr_ls': task.ctx['mr_ls_resource_id'],
            'mr_cat': task.ctx['mr_cat_resource_id'],
            'mapreduce_yt': task.ctx['mapreduce-yt_resource_id'],
            'yt': task.ctx['yt_resource_id'],
            'report_releases_tester': task.ctx['report_releases_tester_resource_id'],
            'baobab_counter_validator': task.ctx['baobab_counter_validator_resource_id'],
        }

        for key, res_id in result.iteritems():
            set_resource_attributes(res_id, {"build_from_svn_url": svn_url, "ttl": "30"})

        return result

    def GetBinariesForRevision(self, RIDs_key, svn_path_key, revision_build_type_key):
        if self.ctx[revision_build_type_key] == 'from_auto_packages':
            self.ctx[RIDs_key] = self.GetBinariesBySvnPath(svn_path_key)
        elif self.ctx[revision_build_type_key] == 'from_svn':
            task1 = self.CreateBuildFromRevisionTask(self.ctx[svn_path_key])
            return task1.id
        else:
            self.ctx[RIDs_key] = {
                (key, self.ctx[key])
                for key in self.revision_manual_packages_binaries_params[svn_path_key]
            }

        return None

    def BuildBinaries(self):
        if 'build_subtasks_ids' not in self.ctx:
            self.ctx['build_subtasks_ids_dict'] = {
                'svn_path1': self.GetBinariesForRevision('RIDs_DICT1', 'svn_path1', 'revision1_build_type'),
                'svn_path2': self.GetBinariesForRevision('RIDs_DICT2', 'svn_path2', 'revision2_build_type')
            }

            self.ctx['build_subtasks_ids'] = [tid for tid in self.ctx['build_subtasks_ids_dict'].values() if tid is not None]

            SandboxTask.wait_all_tasks_completed(self, self.ctx['build_subtasks_ids'])
        else:
            check_if_tasks_are_ok(self.ctx['build_subtasks_ids'])

        if 'RIDs_DICT1' not in self.ctx:
            self.ctx['RIDs_DICT1'] = self.MakeBinariesResourseIdsDictionary(self.ctx['build_subtasks_ids_dict']['svn_path1'], self.ctx['svn_path1'])

        if 'RIDs_DICT2' not in self.ctx:
            self.ctx['RIDs_DICT2'] = self.MakeBinariesResourseIdsDictionary(self.ctx['build_subtasks_ids_dict']['svn_path2'], self.ctx['svn_path2'])

        self.MAPREDUCE_YT_PATH = SandboxTask.sync_resource(self, self.ctx['RIDs_DICT2']['mapreduce_yt'])

    def RunProcess(self, cmd):
        cmd_str = ' '.join(cmd)
        process = run_process(
            cmd_str,
            outs_to_pipe=True, check=False, shell=True, wait=False,
            environment=self.env
        )
        result, error = process.communicate()

        if process.returncode != 0:
            raise Exception(error)

        return result

    def ReadMeatPathOnYtCommand(self, data_folder):
        cmd = [self.MAPREDUCE_YT_PATH, '-server', self.DEF_MR_SERVER, '--read', data_folder + '/freshmeat']
        return self.RunProcess(cmd)

    def GetMeatPath(self):
        if 'meatpath' not in self.ctx:
            data_folder = self.ctx['data_folder']
            if len(data_folder) > 0:
                if data_folder[-1] == '/':
                    data_folder = data_folder[:-1]

            process_result = self.ReadMeatPathOnYtCommand(data_folder)
            meatpath = process_result.strip()
            if len(meatpath) == 0:
                self.ctx['meatpath'] = data_folder
                meatpath = data_folder
            else:
                self.ctx['meatpath'] = meatpath

        else:
            meatpath = self.ctx['meatpath']

        logging.info('meatpath: ' + meatpath)

        return meatpath

    def GetLogsList(self):
        if 'logs_list' not in self.ctx or len(self.ctx['logs_list']) == 0:
            if 'logs_list_manual' not in self.ctx or len(self.ctx['logs_list_manual']) == 0:
                resource_type = rst.YT_SESSIONS_FRESHDATA_INFO
                resource_object = common.rest.Client().resource.read(
                    type=str(resource_type),
                    state=ctr.State.READY,
                    attrs={'debug': 'False'},
                    limit=1
                )

                logging.info('items count: ' + str(len(resource_object['items'])))
                resource_path = self.sync_resource(resource_object['items'][0]['id'])
                with open(resource_path, 'r') as resource_content_file:
                    resource_content = resource_content_file.read().split('\n')

                if len(resource_content) > 1:
                    self.ctx['logs_list'] = resource_content[1]
                else:
                    return ''
            else:
                self.ctx['logs_list'] = self.ctx['logs_list_manual']

        return self.ctx['logs_list']

    def PrepareEnvironment(self):
        data_ids = {}
        data_ids['geodata'] = apihelpers.get_last_resource(geo_resource.GEODATA6BIN_XURMA_STABLE).id
        data_ids['blockstat'] = apihelpers.get_last_resource(resources.SESSIONS_BLOCKSTAT).id
        data_ids['beta_list'] = apihelpers.get_last_resource(resources.SESSIONS_BETA_LIST).id
        data_ids['direct_pageids'] = apihelpers.get_last_resource(resources.SESSIONS_DIRECT_PAGEIDS).id
        data_ids['direct_resourceno_dict'] = apihelpers.get_last_resource(resources.SESSIONS_DIRECT_RESOURCENO_DICT).id
        data_ids['direct_ads_descriptions'] = apihelpers.get_last_resource(resources.SESSIONS_DIRECT_ADS_DESCRIPTIONS).id

        return data_ids

    def CreateSessionsByRevisions(self, task_infos, meatpath):
        task_ids = []
        needToWait = False
        for info in task_infos:
            if info['task_variable_name'] not in self.ctx:
                logging.info('Task for creating sessions with binary ' + str(info['binary_ids']['create_sessions']))
                needToWait = True
                task = SandboxTask.create_subtask(
                    self,
                    task_type='CREATE_SESSIONS_TESTS',
                    input_parameters={
                        'kill_timeout': '40000',
                        'create_sessions_id': info['binary_ids']['create_sessions'],
                        'user_sessions_errors_id': info['binary_ids']['user_sessions_errors'],
                        'mr_cat_id': info['binary_ids']['mr_cat'],
                        'mr_ls_id': info['binary_ids']['mr_ls'],
                        'geodata_id': info['data_ids']['geodata'],
                        'blockstat_id': info['data_ids']['blockstat'],
                        'beta_list_id': info['data_ids']['beta_list'],
                        'direct_pageids_id': info['data_ids']['direct_pageids'],
                        'direct_resourceno_dict_id': info['data_ids']['direct_resourceno_dict'],
                        'direct_ads_descriptions_id': info['data_ids']['direct_ads_descriptions'],
                        'mr_cluster': self.DEF_MR_SERVER,
                        'meatpath': meatpath,
                        'logs_list': self.ctx['logs_list'],
                        'dest_suffix': info['destination_suffix'],
                        'yt_token': self.ctx['yt_token'],
                        'yt_token_owner': self.ctx['yt_token_owner'],
                    },
                    description='Create sessions for testing'
                )

                self.ctx[info['task_variable_name']] = task.id
                task_ids.append(task.id)
            else:
                task_ids.append(self.ctx[info['task_variable_name']])

        logging.info('waiting tasks')
        if needToWait:
            SandboxTask.wait_all_tasks_completed(self, tasks=task_ids)
        else:
            check_if_tasks_are_ok(task_ids)
        logging.info('waiting done')

        tasks = []
        for task_id in task_ids:
            tasks.append(channel.sandbox.get_task(task_id))

        processed_logs_lists = []
        for task in tasks:
            processed_logs_lists.append(task.ctx['processed_logs_list'])

        return processed_logs_lists

    def CreateSessions(self, meatpath):
        data_ids = self.PrepareEnvironment()
        task_infos = []
        task_infos.append({
            'task_variable_name': 'create_sessions_task_1',
            'binary_ids': self.ctx['RIDs_DICT1'],
            'data_ids': data_ids,
            'destination_suffix': self.ctx['sess_suffix1']
        })

        task_infos.append({
            'task_variable_name': 'create_sessions_task_2',
            'binary_ids': self.ctx['RIDs_DICT2'],
            'data_ids': data_ids,
            'destination_suffix': self.ctx['sess_suffix2']
        })

        processed_logs_list = self.CreateSessionsByRevisions(task_infos, meatpath)

        return list(set(processed_logs_list[0] + processed_logs_list[1]))

    def CalculateDiffHashed(self, work_directory, logs_list, dest_path):
        if 'diff_task_id' not in self.ctx:
            with arcadia_sdk.mount_arc_path("arcadia:/arc/trunk/arcadia/quality/user_sessions/reactor/", use_arc_instead_of_aapi=True) as arc_dir:
                sys.path.append(arc_dir)

                paths_manager1 = us_ci.YtPathsManager(work_directory, self.ctx['sess_suffix1'], need_checkout=False)
                paths_manager2 = us_ci.YtPathsManager(work_directory, self.ctx['sess_suffix2'], need_checkout=False)

                all_session_tables = []
                for log in logs_list:
                    path1 = paths_manager1.SessionPath(log)
                    path2 = paths_manager2.SessionPath(log)

                    validationResultPath1 = paths_manager1.ValidationResultPath(log)
                    validationResultPath2 = paths_manager2.ValidationResultPath(log)

                    all_session_tables.append((path1, path2, us_ci.MRPathJoin(dest_path, log), paths_manager1.LogTablePath(log), log, validationResultPath1, validationResultPath2))

                task = SandboxTask.create_subtask(
                    self,
                    task_type='HASHED_SESSIONS_DIFF',
                    input_parameters={
                        'diff_tool_id': self.ctx['RIDs_DICT2']['sessions_diff'],
                        'mr_ls_id': self.ctx['RIDs_DICT2']['mr_ls'],
                        'yt_tool_id': self.ctx['RIDs_DICT2']['yt'],
                        'input_data': str(all_session_tables),
                        'kill_timeout': '40000',
                        'mr_cluster': self.DEF_MR_SERVER,
                        'yt_token': self.ctx['yt_token'],
                        'yt_token_owner': self.ctx['yt_token_owner'],

                    },
                    description='Calculating sessions diff'
                )

                self.ctx['diff_task_id'] = task.id
                SandboxTask.wait_all_tasks_completed(self, [task])
        else:
            check_if_tasks_are_ok([self.ctx['diff_task_id']])

    def CreatePathFromReleaseParam(self, path_template, release_param):
        if not len(release_param):
            raise Exception("You are using auto generated param values, so release param shouldn't be empty")

        return path_template.format(release_param)

    def MakeDestinationFolder(self, meatpath):
        if self.ctx['destination_folder_selector'] == 'auto':
            dest_folder = self.CreatePathFromReleaseParam("us_release/{0}", self.ctx['release_param'])
            dest_folder = meatpath + dest_folder
        else:
            dest_folder = self.ctx['destination_folder']

        if dest_folder == '':
            dest_folder = meatpath + 'diff/' + str(uuid.uuid1()) + '/'

        logging.info('? nonslashed dest folder:' + dest_folder)
        if dest_folder[-1:] != '/':
            dest_folder += '/'
        logging.info('slashed dest folder:' + dest_folder)

        return dest_folder

    def on_execute(self):
        if self.ctx['sess_suffix1_folder_selector'] == 'auto':
            self.ctx['sess_suffix1'] = self.CreatePathFromReleaseParam("us_release/{0}/first", self.ctx['release_param'])

        if self.ctx['sess_suffix2_folder_selector'] == 'auto':
            self.ctx['sess_suffix2'] = self.CreatePathFromReleaseParam("us_release/{0}/second", self.ctx['release_param'])

        if self.ctx['sess_suffix1'].strip() == self.ctx['sess_suffix2'].strip():
            raise Exception('suffixes for revisions must be different')

        logging.info("sess_suffix1:" + self.ctx["sess_suffix1"])
        logging.info("sess_suffix2:" + self.ctx["sess_suffix2"])

        self.DEF_MR_SERVER = self.ctx['mr_cluster']
        self.env['YT_PROXY'] = self.DEF_MR_SERVER

        owner = self.ctx.get(tp.YTTokenOwner.name, '')
        if not owner:
            owner = self.owner
        self.env['YT_TOKEN'] = self.get_vault_data(owner, self.ctx[tp.YTToken.name])

        logging.info('Building binaries')
        self.BuildBinaries()

        logging.info('Calculating meatpath')
        meatpath = self.GetMeatPath()
        if meatpath[-1] != '/':
            meatpath = meatpath + '/'

        logging.info('Getting logs list')
        logs_list = self.GetLogsList()
        logging.info('logs_list: ' + logs_list)
        if len(logs_list) == 0:
            raise Exception('empty logs list')

        logging.info('Creating sessions')
        processed_logs_list = self.CreateSessions(meatpath)

        logging.info('Creating diff')
        self.ctx['destination_folder'] = self.MakeDestinationFolder(meatpath)
        logging.info("destination_folder:" + self.ctx["destination_folder"])

        self.CalculateDiffHashed(meatpath, processed_logs_list, self.ctx['destination_folder'])


__Task__ = PriemkaCreateSessionsTask
