#!/usr/bin/env python

import urllib2
import urllib
import logging
import sys
import json
import time
import argparse


YT_PROXY = 'banach'

ENV_TEST = 'testing'
ENV_PROD = 'production'

DEFAULT_ROOT = '//home/search-functionality/qdsaas/'
SOURCE_TABLE = 'sources/%s'
DATA_FOLDER = 'deploy_%s'
DATA_TABLE = '%s/%s.%d'


def get_source_table(root, ns_name):
    return root + (SOURCE_TABLE % ns_name)


def emit_records(f, key_type, with_tld):
    tld = None
    for line in f:
        line = line.rstrip('\n\r')
        if not line:
            continue

        rec = {}
        if with_tld:
            key, tld, snippet = line.split('\t', 2)
            if not tld or len(tld) > 3:
                raise ValueError('TLD column contains invalid value for key %s: %s' % (key, tld))
        else:
            key, snippet = line.split('\t', 1)

        # Strip legacy attribute name
        if snippet.startswith('Snippet=') or snippet.startswith('Snippet:b='):
            snippet = snippet.split('=', 1)[1]

        # Validation
        snippet_js = json.loads(snippet)
        if not isinstance(snippet_js, dict):
            raise ValueError('snippet data must be dict, got %s instead', snippet_js.__class__.__name__)
        snippet_unicode = snippet.decode('utf-8')

        # C-escape the value (by json-encoding a string and removing the surrounding quotes)
        value = 'Snippet:b=' + json.dumps(snippet_unicode, ensure_ascii=False).encode('utf-8').strip('"')
        rec = {
            key_type: key,
            'Data_TSKV': value
        }
        if tld:
            rec['Subkey_SerpTLD'] = tld
        yield rec


class Uploader:
    SAAS_API_ENDPOINT = {
        ENV_TEST: 'http://fastsnips-testing.ferryman.n.yandex-team.ru/add-full-tables',
        ENV_PROD: 'http://fastsnips.ferryman.n.yandex-team.ru/add-full-tables',
    }
    STATUS_TRACKING_URL = {
        ENV_TEST: 'http://fastsnips-testing.ferryman.n.yandex-team.ru/get-batch-status?batch=%s',
        ENV_PROD: 'http://fastsnips.ferryman.n.yandex-team.ru/get-batch-status?batch=%s',
    }

    ATTR_DELIVERED = 'is_delivered'
    ATTR_TS = 'gen_ts'
    ATTR_BATCH_ID = 'batch_id'

    def __init__(self, ns_name, root_folder, yt_token=None):
        import yt.wrapper as yt
        if not yt_token:
            yt_token = yt._get_token()
        self.ns_name = ns_name
        self.root_folder = root_folder
        self.source_name = get_source_table(self.root_folder, self.ns_name)
        self.yt_client = yt.YtClient(proxy=YT_PROXY, token=yt_token)

    def upload_table(self, f, with_tld):
        import yt.wrapper as yt
        logger = logging.getLogger('upload')

        logger.info('updating %s' % self.source_name)
        key_type = 'Subkey_Url' if self.ns_name.endswith('docid_setprops') else 'Subkey_Owner'

        with self.yt_client.Transaction():
            yt.create_table(self.source_name, recursive=True, ignore_existing=True, client=self.yt_client)
            yt.write_table(self.source_name, emit_records(f, key_type, with_tld), client=self.yt_client)

    def deliver_table(self, env):
        import yt.wrapper as yt
        logger = logging.getLogger('deliver')

        data_ts = int(time.time())
        data_folder = self.root_folder + (DATA_FOLDER % env)
        table_name = DATA_TABLE % (data_folder, self.ns_name, data_ts)
        yt.copy(self.source_name, table_name, recursive=True, client=self.yt_client)
        logger.info('Will deploy to %s: %s' % (env, self.source_name))
        if env != ENV_TEST:
            logger.info('You have 10 seconds to cancel (press Ctrl-C)')
            time.sleep(10)

        nano_ts = data_ts * 1000000
        delivery_command = {
            'Path': table_name,
            'Namespace': self.ns_name,
            'Timestamp': nano_ts
        }
        delivery_urlparams = dict(tables=json.dumps(delivery_command))
        delivery_url = self.SAAS_API_ENDPOINT[env].strip('?') + '?' + urllib.urlencode(delivery_urlparams)
        logger.info('Calling ' + delivery_url)
        res = urllib2.urlopen(delivery_url, timeout=300)
        response_str = res.read()
        retcode = res.getcode()
        res.close()
        response_json = json.loads(response_str)
        batch_id = response_json['batch']

        logger.info('Result is %d, batch_id=%s' % (retcode, batch_id))
        logger.info(
            'Status tracking: watch curl \'' + self.STATUS_TRACKING_URL[env] % batch_id + '\'')

        yt.set_attribute(table_name, self.ATTR_DELIVERED, True, client=self.yt_client)
        yt.set_attribute(table_name, self.ATTR_TS, data_ts, client=self.yt_client)
        yt.set_attribute(table_name, self.ATTR_BATCH_ID, batch_id, client=self.yt_client)

    def clean_tables(self,
                     env,
                     leave=5,  # type: int
                     ):
        # type(...) -> List[str]
        """
        :return: list of the removed tables.
        """
        import yt.wrapper as yt
        log = logging.getLogger('clean')

        data_folder = self.root_folder + (DATA_FOLDER % env)
        log.info(data_folder)
        tables = []
        removed_tables = []
        for table in yt.list(data_folder, client=self.yt_client):
            if table.startswith(self.ns_name):
                prefix, ts = table.rsplit('.')
                tables.append(
                    (data_folder + '/' + table, int(ts))
                )
        if len(tables) > leave:
            remove_tables = [
                i[0] for i in sorted(tables, key=lambda x: x[1])[:-leave]
            ]
            log.info(
                'Start removing delivery tables (leaving only %s of %s)' % (
                    leave,
                    len(tables),
                )
            )
            for i, remove_table in enumerate(remove_tables):
                log.info(
                    'Removing delivery table %s (%s/%s)' % (
                        remove_table,
                        i+1,
                        len(remove_tables),
                    )
                )
                yt.remove(remove_table, force=True, client=self.yt_client)
                removed_tables.append(remove_table)
        return removed_tables


if __name__ == '__main__':
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s\t%(levelname)s\t%(msg)s',
        datefmt='%Y-%m-%d %H:%M:%S',
        stream=sys.stderr,
    )
    logger = logging.getLogger('main')

    parser = argparse.ArgumentParser()
    parser.add_argument('-r', '--root', help='root folder on %s' % YT_PROXY, type=str, default=DEFAULT_ROOT, required=False)
    parser.add_argument('-n', '--ns', help='namespace (also table name)', type=str, default=None, required=True)

    subparsers = parser.add_subparsers(dest='command', help='available operations')

    upload = subparsers.add_parser('upload', help='Upload key\t{"plugin": {...}} data from stdin to the source table for the specified namespace')
    upload.add_argument('--tld', help='data contains Serp_TLD in the second column', default=False, required=False, action='store_true')

    deploy = subparsers.add_parser('deploy', help='Deploy data from the source table to the specified namespace')
    deploy.add_argument('--production', help='deploy to PRODUCTION', default=False, required=False, action='store_true')
    deploy.add_argument('--keep-tables',
                        help='delete old delivery tables; specify a number of fresh tables to keep',
                        default=30, required=False, type=int)

    clean = subparsers.add_parser('clean', help='Remove old delivery tables from the specified namespace')
    clean.add_argument('--production', help='delete old production tables', default=False, required=False, action='store_true')
    clean.add_argument('--keep-tables', help='number of fresh tables to keep', default=5, required=False, type=int)

    args = parser.parse_args()

    root_folder = args.root.rstrip('/') + '/'
    ns_name = args.ns
    fields = ns_name.split(':', 1)
    if len(fields) != 2 or fields[-1] not in ('docid_setprops', 'categflag'):
        logger.error('Expected namespace format: some_name:docid_setprops or some_name:categflag')
        sys.exit(1)

    def env():
        return ENV_PROD if args.production else ENV_TEST

    uploader = Uploader(ns_name=ns_name, root_folder=root_folder)
    if args.command == 'deploy':
        uploader.deliver_table(env=env())
        if args.keep_tables > 0:
            uploader.clean_tables(leave=args.keep_tables, env=env())
    elif args.command == 'clean':
        uploader.clean_tables(leave=args.keep_tables, env=env())
    elif args.command == 'upload':
        uploader.upload_table(f=sys.stdin, with_tld=args.tld)
