import os
import sys
import logging
from sandbox import common
import time
import urlparse
import random
from sandbox import sdk2
from sandbox.sandboxsdk import environments, process

DEFAULT_YQL_QUERY = '''
USE hahn;
PRAGMA yt.InferSchema;

$regId = CAST(t.RegionID AS Int32);
$geoIdType = Geo::RegionById(id).type;
$geoType = Geo::RegionById(parent).type;

INSERT INTO [%DST%] WITH TRUNCATE
    SELECT
        did,
        install_id,
        'geo_' || COALESCE(CAST(IF($geoType IN (7,15), 6, $geoType) as String), '') as tag_name,
        CAST(parent as String) as tag_value
    FROM (
        SELECT b.did as did, b.install_id as install_id, b.id as id, b.parents_list as parents_list
        FROM (SELECT DISTINCT deviceId FROM [%SUP%]) AS a
        INNER JOIN (
            SELECT
                String::ToLower(String::ReplaceAll(String::ReplaceAll(t.DeviceID, '_', ''), '-', '')) as did,
                t.UUID as install_id,
                $regId as id,
                Geo::GetParents($regId) as parents_list
            FROM (
                SELECT
                    DeviceID,
                    FIRST_VALUE(UUID) OVER w AS UUID,
                    FIRST_VALUE(RegionID) OVER w AS RegionID
                FROM RANGE([%PTH%],[%SRC%])
                WHERE (CAST(ReceiveTimestamp AS Int32) - CAST(EventTimestamp AS Int32)) < %TIM%
                    AND CAST(LocationPrecision AS Int32) < %PRC%
                    AND DeviceID IS NOT NULL AND RegionID IS NOT NULL
                    AND %API%
                WINDOW w AS (
                        PARTITION BY DeviceID
                        ORDER BY ReceiveTimestamp DESC, EventNumber DESC
                    )
            ) as t
            GROUP BY t.DeviceID,t.UUID,t.RegionID
        ) AS b
        ON a.deviceId == b.did
    )
    FLATTEN BY parents_list AS parent
    WHERE
        /* only valid regions */
        parents_list IS NOT NULL AND
        /* only regions of required types https://doc.yandex-team.ru/lib/libgeobase5/concepts/region-types.xml */
        $geoType IN (3,4,5,6,7,15) AND
        /* for regions which has a child with the same type select child object (Zelenograd) */
        ($geoIdType != $geoType OR ($geoIdType == $geoType AND id == parent))
;
'''


class SupLoadRegion(sdk2.Task):
    """ Load regions from AppMetrikaLog. Its save in YT and request to SUP. """

    class Requirements(sdk2.Requirements):
        environments = [environments.PipEnvironment('pip', '9.0.1'),
                        environments.PipEnvironment('yandex-yt'),
                        environments.PipEnvironment('yql'),
                        environments.PipEnvironment('requests'), ]

    class Parameters(sdk2.Task.Parameters):
        # common parameters
        kill_timeout = 7200

        # custom parameters
        sup_url = sdk2.parameters.String('Sup url', default='http://push-beta.n.yandex-team.ru')
        yql_query = sdk2.parameters.String('YQL Query', required=True, multiline=True)
        precision = sdk2.parameters.Integer('Max precision', required=True, default=500)
        range = sdk2.parameters.Integer('Max time range', required=True, default=600)
        api_keys = sdk2.parameters.String('List of APIKeys (separate by comma)', default=None)
        source_path = sdk2.parameters.String('Source path', required=True, default='//logs/metrika-mobile-log/stream/5min')
        destination = sdk2.parameters.String('Destination path', required=True, default='//home/search-functionality/sup/tags/regions')
        sup_users_path = sdk2.parameters.String('Sup users path', required=True, default='//home/search-functionality/sup/backup/sup-prod.n.yandex-team.ru')
        max_tables_join = sdk2.parameters.Integer('Max tables join in source path', required=True, default=6)
        chunk_size = sdk2.parameters.Integer('chunk_size', required=True, default=100000)
        limit = sdk2.parameters.Integer('limit', default=None)
        wait = sdk2.parameters.Bool('Wait end of task', default=True)
        update_only = sdk2.parameters.Bool('Update only existing records', default=True)
        update_by_device = sdk2.parameters.Bool('Update by device', default=True)
        postprocess = sdk2.parameters.Bool('Postprocess', default=True)

    def on_execute(self):
        yql_token = sdk2.Vault.data(self.owner, 'robot_sup_yql_token')
        yt_token = sdk2.Vault.data(self.owner, 'robot_sup_yt_token')
        yt_proxy = 'hahn.yt.yandex.net'

        if not yt_token or not yql_token:
            raise common.errors.TaskError('Empty token param')

        import yt.wrapper as yt
        yt.config['proxy']['url'] = yt_proxy
        yt.config['token'] = yt_token

        def list_tables(yt_path):
            return [str(t) for t in yt.list(yt_path, attributes=['type']) if t.attributes['type'] == 'table']

        table_source = list_tables(self.Parameters.source_path)
        if not table_source:
            raise common.errors.TaskError('Empty source path')

        last_destination = list_tables(self.Parameters.destination)
        last_destination = last_destination[-1] if last_destination else None
        table_source = [t for t in table_source[-self.Parameters.max_tables_join:] if t > last_destination]
        if not table_source:
            self.set_info('Not found new tables in source path')
            logging.info('Not found new tables in source path')
            return

        if last_destination:
            last_destination = '[' + os.path.join(self.Parameters.destination, last_destination) + ']'
        else:
            last_destination = "(SELECT '' as did)"

        api_keys = self.Parameters.api_keys
        query = ','.join(["'%s'" % v.strip() for v in api_keys.split(',')])
        api_keys = 'APIKey IN (' + query + ')'
        if self.Parameters.api_keys == '':
            api_keys = 'True'

        path_to_users = None
        sup_path = yt.list(self.Parameters.sup_users_path)
        for path in reversed(sup_path):
            temp_path = os.path.join(self.Parameters.sup_users_path, path)
            temp_path = os.path.join(temp_path, 'sup_users')
            if yt.exists(temp_path):
                path_to_users = temp_path
                break
        if not path_to_users:
            raise common.errors.TaskError('Empty user tables in sup users path')

        output = os.path.join(self.Parameters.destination, table_source[-1])
        incremental = os.path.join(self.Parameters.destination, 'inc', table_source[-1])

        query = self.Parameters.yql_query
        query = query.replace('%DST%', output)
        query = query.replace('%PTH%', self.Parameters.source_path)
        query = query.replace('%SRC%', table_source[0])
        query = query.replace('%END%', table_source[-1])
        query = query.replace('%PRC%', str(self.Parameters.precision))
        query = query.replace('%TIM%', str(self.Parameters.range))
        query = query.replace('%API%', api_keys)
        query = query.replace('%SUP%', path_to_users)
        query = query.replace('%PRV%', last_destination)
        query = query.replace('%INC%', incremental)

        cmd = [sys.executable, os.path.join(os.path.dirname(os.path.dirname(__file__)), 'yql_runner.py'), '--query', query, '--token', yql_token, ]
        proc = process.run_process(cmd, wait=True, log_prefix='yql_runner')
        proc.communicate()

        if not self.Parameters.sup_url:
            self.set_info('Sup url is not specified. Tags are not uploaded')
            logging.info('Sup url is not specified')
            return

        result = self.load_tags(self.Parameters.sup_url, incremental, yql_token) or \
                 self.load_tags(self.Parameters.sup_url, incremental, yql_token)
        if not result:
            raise common.errors.TaskError('Uploading failed')

    def load_tags(self, url, output, yql_token):
        import requests
        response = requests.post(os.path.join(url, 'tags/batch/mongo'),
                                 headers={'Authorization': 'OAuth %s' % yql_token, 'Content-Type': 'application/json'},
                                 json={
                                     'path': output,
                                     'limit': self.Parameters.limit,
                                     'chunk_size': self.Parameters.chunk_size,
                                     'by_device': int(self.Parameters.update_by_device),
                                     'update_only': int(self.Parameters.update_only),
                                     'postprocess': int(self.Parameters.postprocess)})

        if response.status_code != 200:
            raise common.errors.TaskError('SUP API Error (%d):\n%s' % (response.status_code, response.text))

        json = response.json()
        job_id = json.get('jobExecutionId')
        if job_id is None:
            raise common.errors.TaskError('SUP API Error (%d):\n%s' % (response.status_code, response.text))
        logging.info('Job execution id (%d):\n%s' % (job_id, response.text))

        if not self.Parameters.wait:
            return job_id

        host = response.headers.get('X-Yandex-Front')
        if not host:
            raise common.errors.TaskError('Could not construct API url: unknown real')
        port = response.headers.get('X-Yandex-Front-Port')
        if port:
            host += ':' + port
        url = urlparse.urlunsplit(('http', host, os.path.join('tags/batch', str(job_id)), None, None))
        logging.info('URL to get status: ' + url)
        while True:
            try:
                response = requests.get(url, headers={'Content-Type': 'application/json'})
            except (requests.exceptions.ReadTimeout, requests.exceptions.ConnectionError):
                continue

            if response.status_code == 504:
                continue

            if response.status_code == 404:
                return False

            if response.status_code != 200:
                raise common.errors.TaskError('SUP API Error (%d):\n%s' % (response.status_code, response.text))
            json = response.json()
            status = json.get('status')
            if status == 'COMPLETED':
                logging.info('Finished uploading table %s', job_id)
                return True
            elif status == 'FAILED':
                raise common.errors.TaskError('Uploading failed (%s):\n%s' % (url, json.get('exitDescription')))
            time.sleep(random.uniform(20.0, 30.0))
        return True
