# -*- coding: utf-8 -*-
import datetime as dt
import json
import logging
import os
import re
import subprocess
from sandbox import sdk2
from sandbox.sandboxsdk import environments
from sandbox.sandboxsdk.errors import SandboxTaskFailureError
from sandbox.sandboxsdk.svn import Arcadia


DATE_REGEXP = '^([0-9]{4}-[0-9]{2}-[0-9]{2})?$'
DATE_FORMAT = '%Y-%m-%d'
YESTERDAY = (dt.date.today() - dt.timedelta(days=1)).strftime(DATE_FORMAT)
DEFAULT_YT_PREFIX = '//home/haas/gencfg_hosts'
DEFAULT_YT_TOKEN_VAULT_NAME = 'YT_TOKEN'


class HaasCmisExportGencfgGroupsToYt(sdk2.Task):
    """ Run me to export gencfg hosts-groups-metaprojects of any revision to YT. """

    _gencfg_svn_path = 'gencfg'
    _db_svn_path = 'gencfg/db'
    _groups_svn_path = 'gencfg/db/groups'

    _svn_gencfg_path = 'arcadia/gencfg'
    _svn_db_path = 'data/gencfg_db'

    _svn_log_file_path = 'revisions.log'
    _rev_dates_file_path = 'rev_dates.json'

    _card_file_name = 'card.yaml'

    class Requirements(sdk2.Task.Requirements):
        environments = [
            environments.PipEnvironment('yandex-yt'),
            environments.PipEnvironment('yandex-yt-yson-bindings')
        ]

    class Parameters(sdk2.Parameters):
        date_of_rev = sdk2.parameters.StrictString('Date to find revision of gencfg database', regexp=DATE_REGEXP,
                                                   default=YESTERDAY)
        only_first_day = sdk2.parameters.Bool('Skip all days except first day of month', default=False)
        yt_prefix = sdk2.parameters.String('YT directory to save yyyy-mm-dd table', required=True,
                                           default=DEFAULT_YT_PREFIX)
        yt_token_vault_name = sdk2.parameters.String('YT Token secret name', required=True,
                                                     default=DEFAULT_YT_TOKEN_VAULT_NAME)

    @property
    def gencfg_path(self):
        return str(self.path(self._gencfg_svn_path))

    @property
    def db_path(self):
        return str(self.path(self._db_svn_path))

    @property
    def groups_path(self):
        return str(self.path(self._groups_svn_path))

    @property
    def svn_gencfg_url(self):
        return '{}/trunk/{}'.format(Arcadia.ARCADIA_BASE_URL, self._svn_gencfg_path)

    @property
    def svn_db_url(self):
        return '{}/trunk/{}'.format(Arcadia.ARCADIA_BASE_URL, self._svn_db_path)

    @property
    def svn_log_file_path(self):
        return str(self.path(self._svn_log_file_path))

    @property
    def rev_dates_file_path(self):
        return str(self.path(self._rev_dates_file_path))

    def on_execute(self):
        logging.info('Hello, Sandbox!')
        logging.info('self.Parameters.date_of_rev = {}'.format(self.Parameters.date_of_rev))
        date_of_rev = self.Parameters.date_of_rev.encode('utf8').strip()
        date_of_rev = date_of_rev if date_of_rev else YESTERDAY
        logging.info('date_of_rev = {}'.format(date_of_rev))
        dt_of_rev = dt.datetime.strptime(date_of_rev, DATE_FORMAT)
        logging.info('dt_of_rev = {}'.format(dt_of_rev))
        logging.info('self.Parameters.only_first_day = {}'.format(self.Parameters.only_first_day))
        logging.info('dt_of_rev.day = {}'.format(dt_of_rev.day))
        if self.Parameters.only_first_day and dt_of_rev.day != 1:
            return

        Arcadia.checkout(self.svn_gencfg_url, self.gencfg_path)
        Arcadia.checkout(self.svn_db_url, self.db_path)

        self._svn_log_to_file(svn_path=self.groups_path, log_file_path=self.svn_log_file_path)
        self._get_revisions_from_log(date_of_rev=str(date_of_rev), log_file_path=self.svn_log_file_path,
                                     revisions_file_path=self.rev_dates_file_path)
        rows = self._get_hosts_from_revisions(date_of_rev=date_of_rev,
                                              revisions_file_path=self.rev_dates_file_path)
        if not len(rows):
            raise SandboxTaskFailureError('No rows to export')

        # Write rows to YT table
        from yt.wrapper import YtClient, JsonFormat
        yt_token = sdk2.Vault.data(self.owner, self.Parameters.yt_token_vault_name)
        yt_table = '{}/{}'.format(self.Parameters.yt_prefix, date_of_rev)
        yt_client = YtClient(proxy='hahn', token=yt_token, config={
            'create_table_attributes': {'compression_codec': 'brotli_8'}
        })
        yt_format = JsonFormat(attributes={'encode_utf8': False})
        yt_schema = [
            {"name": "group", "type": "string"},
            {"name": "host", "type": "string"},
            {"name": "metaprj", "type": "string"},
            {"name": "rev", "type": "uint64"},
            {"name": "rev_date", "type": "string"},
        ]
        with yt_client.Transaction():
            logging.info('Removing table {}'.format(yt_table))
            yt_client.remove(yt_table, force=True)
            logging.info('Creating table {} with schema {}'.format(yt_table, yt_schema))
            yt_client.create('table', yt_table, ignore_existing=True,
                             attributes={'schema': yt_schema, 'dynamic': False})
            logging.info('Writing {} rows to {}'.format(len(rows), yt_table))
            yt_client.write_table(yt_table, rows, format=yt_format)

    @classmethod
    def _svn_log_to_file(cls, svn_path, log_file_path):
        cmd = 'cd {} && svn log > {}'.format(svn_path, log_file_path)
        logging.info('cmd = {}'.format(cmd))
        p = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
        (output, err) = p.communicate()
        if err:
            logging.exception(err.decode('utf-8'))
            raise SandboxTaskFailureError('Cmd error')

    @classmethod
    def _get_revisions_from_log(cls, date_of_rev, log_file_path, revisions_file_path):
        revisions = {}
        date_of_rev_dt = dt.datetime.strptime(date_of_rev, DATE_FORMAT)
        with open(log_file_path, 'r') as log_file:
            lines = log_file.readlines()
            rev_pattern = re.compile(r'^r\d+ \| .+ \| \d{4}.+')
            for line in lines:
                if not rev_pattern.match(line):
                    continue
                logging.info('-' * 40)
                logging.info('line = {}'.format(line))
                rev_str_parts = line.split(' | ')
                if len(rev_str_parts) < 3:
                    continue
                rev_date_str = rev_str_parts[2]
                rev_date_str_parts = rev_date_str.split(' ')
                if len(rev_date_str_parts) < 1:
                    continue

                rev_dt = dt.datetime.strptime(rev_date_str_parts[0], DATE_FORMAT)
                rev = rev_str_parts[0].strip('r')
                logging.info('rev = {}, rev_dt = {}'.format(rev, rev_dt))

                if rev_dt > date_of_rev_dt:
                    continue
                if rev_dt < date_of_rev_dt:
                    if date_of_rev_dt in revisions:
                        break
                    else:
                        revisions[rev_dt] = rev
                        break

                if rev_dt in revisions:
                    if rev > revisions[rev_dt]:
                        revisions[rev_dt] = rev
                else:
                    revisions[rev_dt] = rev

            with open(revisions_file_path, 'w') as revisions_file:
                for day in revisions:
                    output_dict = {'date': day.strftime(DATE_FORMAT), 'rev': revisions[day]}
                    logging.info('output_dict = {}'.format(output_dict))
                    output = json.dumps(output_dict) + '\n'
                    revisions_file.write(output)

    def _get_hosts_from_revisions(self, date_of_rev, revisions_file_path):
        rows = []
        logging.info('opening revisions_file_path = {}'.format(revisions_file_path))
        with open(revisions_file_path, 'r') as revisions_file:
            for cnt, line in enumerate(revisions_file):
                rev_json = json.loads(line)
                rev_date = date_of_rev
                rev = rev_json[u'rev'].encode('utf8')

                logging.info('-' * 40)
                logging.info('rev_json = {}'.format(rev_json))

                self._checkout_groups_revision(rev)
                groups = self._get_groups_list()
                for group in groups:
                    metaprj = self._read_metaprj(group)
                    hosts = self._read_hosts_list(group)
                    for host in hosts:
                        rows.append({'host': host, 'metaprj': metaprj, 'group': group, 'rev': int(rev),
                                    'rev_date': rev_date})
                break
        logging.info('len(rows) = {}'.format(len(rows)))

        return rows

    def _checkout_groups_revision(self, rev):
        logging.info('Updating repo')
        cmd = 'cd {} && svn up -r {}'.format(self.groups_path, rev)
        logging.info('cmd = {}'.format(cmd))
        p = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
        (output, err) = p.communicate()
        output_lines = output.decode('utf-8').split('\n')
        log_lines = [_ for _ in output_lines if 'At revision' in _ or 'Updated to revision' in _]
        if log_lines:
            logging.info('\n'.join(log_lines))
        if err:
            logging.error(err.decode('utf-8'))
            raise SandboxTaskFailureError('Cmd error')

    def _get_groups_list(self):
        d = self.groups_path

        if not os.path.exists(d):
            logging.error('os path {} does not exists'.format(d))
            return []

        groups = [
            o
            for o in os.listdir(d)
            if os.path.isdir(os.path.join(d, o))
        ]
        logging.info('groups = {}'.format(groups))

        return groups

    def _read_metaprj(self, group):
        card_file_path = os.path.join(self.groups_path, group, self._card_file_name)
        logging.info('opening card_file_path = {}'.format(card_file_path))
        try:
            with open(card_file_path, 'r') as card_file:
                card_data = json.load(card_file)
                card_tags = card_data.get(u'tags', {})
                logging.info('card_tags = {}'.format(card_tags))
                metaprj = card_tags.get(u'metaprj').encode('utf8') if card_tags.get(u'metaprj') else None
                logging.info('metaprj = {} for {} from {}'.format(metaprj, group, card_file_path))

                return metaprj
        except Exception as e:
            logging.error(e)

            return None

    def _read_hosts_list(self, group):
        hosts = []
        hosts_file_path = '{}/{}/{}.hosts'.format(self.groups_path, group, group)
        logging.info('opening hosts_file_path = {}'.format(hosts_file_path))
        with open(hosts_file_path, 'r') as hosts_file:
            lines = hosts_file.readlines()
            host_lines = [_.strip() for _ in lines if _.strip()]
            for h in host_lines:
                hosts.append(h)
        logging.info('len(hosts) = {}'.format(len(hosts)))

        return hosts
