import datetime as dt
import json
import logging
import os
import subprocess

from sandbox import sdk2
from sandbox.common.types.resource import State
from sandbox.projects.common import utils
from sandbox.projects.websearch.begemot import resources as br
from sandbox.projects.websearch.begemot.tasks.BegemotYT.common import CommonYtParameters
from sandbox.projects.websearch.upper import resources as upper_resources
from sandbox.sandboxsdk.environments import PipEnvironment

EVENTLOGDATA_PATH = "//home/begemot/eventlogdata"
RS_PATH = "rs_output"

def get_input_table_path():
    t = dt.datetime.now() - dt.timedelta(hours=30, minutes=0)
    t = t.replace(minute=0, second=0, microsecond=0)
    return "//logs/apphost-event-log/1h/{}".format(t.isoformat())

def get_eventlog_request_table(service, cgi=True):
    cgi_str = "cgi-" if cgi else ""
    t = dt.datetime.now() - dt.timedelta(days=1)
    return "{}/{}-{}requests-log/{}".format(EVENTLOGDATA_PATH, service, cgi_str, t.strftime("%Y%m%d"))

def adapt_for_stdin(input_file, output_file):
    with open(input_file, "r") as i:
        requests = [json.loads(line)['answers'] for line in i.readlines()]

    for req in requests:
        for ans in req:
            if ans['name'] == 'BEGEMOT_CONFIG':
                ans['results'][0]['binary'] = False
                break

    with open(output_file, "w") as o:
        o.write("\n".join([json.dumps(r, sort_keys=True) for r in requests]))

class UploadBegemotRequests(sdk2.Task):

    class Parameters(sdk2.Task.Parameters):
        update_testenv = sdk2.parameters.Bool(
            "Update ws-begemot-trunk resources",
            default=False,
            description="Set attrs from field 'attr' to each resource. Testenv will try to update corresponding resources to task output"
        )
        config = sdk2.parameters.JSON(
            "JSON config for task",
            required=True,
            description="Set type of requests (apphost/cgi/apphost_request_sampler), source (nanny service or apphost source), number of requests and shard name"
        )
        request_sampler = sdk2.parameters.Resource(
            "request_sampler executable",
            required=False,
            resource_type=upper_resources.REQUEST_SAMPLER_EXECUTABLE,
            description="For apphost requests downloading. If not defined, latest REQUEST_SAMPLER_EXECUTABLE will be used"
        )
        begemot_mapper = sdk2.parameters.Resource(
            "begemot eventlog mapper resource",
            required=False,
            resource_type=br.BEGEMOT_YT_EVENTLOG_MAPPER,
            description="For cgi queries. If not defined, latest BEGEMOT_YT_EVENTLOG_MAPPER will be used"
        )
        yt_secret = CommonYtParameters.yt_token_vault_name(default='yt_token_for_request_sampler')
        yt_proxy = CommonYtParameters.yt_proxy(default='hahn')
        yt_pool = CommonYtParameters.yt_pool(default='begemot')
        do_not_validate = sdk2.parameters.Bool(
            "Do not validate output size",
            default=False,
        )

    class Requirements(sdk2.Requirements):
        disk_space = 25 * 1024
        ram = 4 * 1024
        environments = [PipEnvironment('yandex-yt', version='0.10.8')]


    def merge_queries(self, sources):
        if len(sources) == 1:
            return "{}_{}".format(RS_PATH, sources[0])
        path = "merged_queries_{}".format(sources[0])
        with open(path, 'w') as out:
            for f in ["{}_{}".format(RS_PATH, s) for s in sources]:
                with open(f, 'r') as inp:
                    for line in inp.readlines():
                        out.write(line)
        return path

    def check_resource(self, res, expeceted_number, shard):
        res_data = sdk2.ResourceData(res)
        logging.debug("Checking resource {} for shard {}".format(res.id, shard))
        with open(str(res_data.path), 'r') as f:
            l = len(f.readlines())
            if l == expeceted_number or self.Parameters.do_not_validate:
                logging.debug("OK for resource {}".format(res.id))
                res_data.ready()
            elif l < 2 * expeceted_number and l > expeceted_number / 2:
                self.set_info("WARNING: task have downloaded {} requests for shard {}, expected {}".format(l, shard, expeceted_number))
                res_data.ready()
            else:
                self.set_info("ERROR: task have downloaded {} requests for shard {}, expected {}. Resource is marked as broken".format(l, shard, expeceted_number))
                res_data.broken()

    def upload_via_request_sampler(self, sources, number, attr):
        if not self.Parameters.request_sampler:
            rs = sdk2.Resource["REQUEST_SAMPLER_EXECUTABLE"].find(state='READY').first()
        else:
            rs = self.Parameters.request_sampler
        token = sdk2.Vault.data(self.Parameters.yt_secret)

        filename_prefix = "{}_requests".format(number)
        output = br.BEGEMOT_APPHOST_QUERIES(
            self,
            "Apphost requests for shard {} collected by UPLOAD_BEGEMOT_REQUESTS task from apphost eventlogs".format(attr),
            "{}_{}".format(filename_prefix, attr)
        )

        number = number / len(sources)
        for source in sources:
            request_sampler_stderr = self.log_path() / 'request_sampler_stderr_{}.txt'.format(source)

            rs_path = str(sdk2.ResourceData(rs).path)
            cmd = ' '.join([
                rs_path,
                "-i", get_input_table_path(),
                "-s", source.encode('ascii', 'ignore'),
                "-d{}".format(number),
                "-f", RS_PATH,
                "-t", token,
                "--output-format", "app-host-json"
            ])
            with request_sampler_stderr.open("w") as err:
                logging.debug("Running request sampler with cmd: {}".format(cmd))
                p = subprocess.Popen(cmd, stderr=err, shell=True)
                p.wait()
                if p.returncode:
                    raise Exception('Request sampler exited with code {code}'.format(code=p.returncode))
        path = self.merge_queries(sources)
        adapt_for_stdin(path, str(output.path))
        if self.Parameters.update_testenv:
            utils.set_resource_attributes(output.id, {'shard': attr})
        self.check_resource(output, number * len(sources), attr)

    def process_log_table(self, input_table, output_table, cgi, env):
        if not self.Parameters.begemot_mapper:
            mapper = sdk2.Resource["BEGEMOT_YT_EVENTLOG_MAPPER"].find(state='READY', attrs={'released': 'stable'}).first()
        else:
            mapper = self.Parameters.begemot_mapper

        args = [
            str(sdk2.ResourceData(mapper).path),
            '--input', input_table,
            '--output', output_table,
        ]
        if cgi:
            args.append('--cgi')

        with sdk2.helpers.ProcessLog(self, logger='eventlog_mapper') as pl:
            pl.logger.propagate = 1
            subprocess.check_call(args, stdout=pl.stdout, stderr=subprocess.STDOUT, env=env)

    def upload_via_begemot_mapper(self, req_type, service, number, attr):
        import yt.wrapper as yt
        env = os.environ.copy()
        env['YT_PROXY'] = self.Parameters.yt_proxy
        env['YT_POOL'] = self.Parameters.yt_pool
        env['YT_TOKEN'] = sdk2.Vault.data(self.Parameters.yt_secret)
        yt.config['token'] = env.get('YT_TOKEN')
        yt.config["proxy"]["url"] = env.get('YT_PROXY')

        cgi = True if req_type == 'cgi' else False
        table_path = get_eventlog_request_table(service, cgi)
        processed_table_path = table_path + '_processed'
        logging.debug("TABLE PATH: {}".format(table_path))
        if not yt.exists(processed_table_path):
            self.process_log_table(table_path, processed_table_path, cgi, env)

        output_file = '{}_{}_{}.txt'.format(service, req_type, number)
        res_type = br.BEGEMOT_CGI_QUERIES if cgi else br.BEGEMOT_APPHOST_QUERIES
        output = res_type(
            self,
            "{} requests for shard {} collected by UPLOAD_BEGEMOT_REQUESTS task from begemot eventlogs".format('Cgi' if cgi else 'Apphost', attr),
            output_file
        )

        with open(output_file, 'w') as out:
            count = 0
            for row in yt.read_table(processed_table_path, format='json'):
                if cgi:
                    items = {}
                    for item in row['prepared_request'].split('&'):
                        kv = item.split('=', 1)
                        items[kv[0]] = kv[1]
                    if items.get('event_type') == 'TRequestReceived' and items['event_data'] != '%0A':
                        request_b64 = items['event_data'].split('%')[0]
                        request_b64 += '=' * (len(request_b64) % 4)
                        out.write(request_b64.decode('base64') + '\n')
                        count += 1
                else:
                    out.write(row['prepared_request'] + '\n')
                    count += 1
                if count >= number:
                    break

        if self.Parameters.update_testenv:
            utils.set_resource_attributes(output.id, {'shard': "{}{}".format(attr, "-cgi" if cgi else "")})
        self.check_resource(output, number, attr)

    def on_execute(self):
        for item in self.Parameters.config:
            if item['type'] == 'apphost_request_sampler':
                self.upload_via_request_sampler(item['sources'], item['number'], item['attr'])
            else:
                self.upload_via_begemot_mapper(item['type'], item['service'], item['number'], item['attr'])
