# -*- coding: utf-8 -*-

import os
import logging
import uuid
import time


from sandbox import sdk2
from sandbox.projects.release_machine.tasks.ScrapeRequests2.scraper_over_yt import (
    ScraperOverYtWrapper,
    BatchStatus,
)
from sandbox.projects.release_machine.tasks.ScrapeRequests2 import parameters as sr_params
from sandbox.projects import resource_types
from sandbox.projects.common import resource_selectors
from sandbox.projects.common import error_handlers as eh
from sandbox.projects.common import file_utils as fu
from sandbox.projects.common import ra2
from sandbox.projects.common import environments
from sandbox.projects.common.sdk_compat.task_helper import convert_parameter_to_sdk2
from sandbox.projects.release_machine.helpers.soy_helper import SoyApi

import sandbox.projects.release_machine.core.task_env as task_env


class ScrapeRequests2(sdk2.Task):
    class Requirements(task_env.TinyRequirements):
        environments = [
            environments.PipEnvironment('yandex-yt', use_wheel=True),
            environments.PipEnvironment('yandex-yt-yson-bindings-skynet', use_wheel=True)
        ]

    class Parameters(sdk2.Task.Parameters):
        with sdk2.parameters.Group(sr_params.FIRST_BETA_INFO_GROUP) as first_beta_group:
            first_beta_hosts = sdk2.parameters.Dict(
                "First beta hosts",
                required=True,
                description="{domain: host} dict format"
            )
            first_beta_user_agent = convert_parameter_to_sdk2(sr_params.FirstBetaUserAgentParameter)
            first_beta_collection = convert_parameter_to_sdk2(sr_params.FirstBetaCollectionParameter)
            first_beta_cgi = convert_parameter_to_sdk2(sr_params.FirstBetaCgiParameter)
            first_beta_dump_all_json = convert_parameter_to_sdk2(sr_params.FirstBetaDumpAllJson)
            first_beta_json_to_standard_format = convert_parameter_to_sdk2(sr_params.FirstBetaJsonToStandardFormat)

        with sdk2.parameters.Group(sr_params.SECOND_BETA_INFO_GROUP) as second_beta_group:
            second_beta_hosts = sdk2.parameters.Dict(
                "Second beta hosts",
                required=True,
                description="{domain: host} dict format"
            )
            second_beta_user_agent = convert_parameter_to_sdk2(sr_params.SecondBetaUserAgentParameter)
            second_beta_collection = convert_parameter_to_sdk2(sr_params.SecondBetaCollectionParameter)
            second_beta_cgi = convert_parameter_to_sdk2(sr_params.SecondBetaCgiParameter)
            second_beta_dump_all_json = convert_parameter_to_sdk2(sr_params.SecondBetaDumpAllJson)
            second_beta_json_to_standard_format = convert_parameter_to_sdk2(sr_params.SecondBetaJsonToStandardFormat)

        with sdk2.parameters.Group(sr_params.QUERIES_INFO_GROUP) as queries_group:
            requests_resource = convert_parameter_to_sdk2(sr_params.RequestsResource)
            use_dict_resources = sdk2.parameters.Bool("Use requests from dictionary", default=False, required=False)
            requests_resources = sdk2.parameters.Dict(
                "Queries for different domains",
                sdk2.parameters.Integer,
                required=False,
                description="{domain: host} dict format"
            )

        with sdk2.parameters.Group(sr_params.YT_INFO_GROUP) as yt_info_group:
            output_table = convert_parameter_to_sdk2(sr_params.OutputTableParameter)
            yt_server = convert_parameter_to_sdk2(sr_params.YtServerParameter)
            yt_user = convert_parameter_to_sdk2(sr_params.YtUserParameter)
            parser_binary = convert_parameter_to_sdk2(sr_params.ParserBinaryParameter)

        filter_cgi = convert_parameter_to_sdk2(sr_params.FilterCgi)
        with sdk2.parameters.Group(sr_params.SCRAPER_OVER_YT_GROUP) as scraper_over_yt_group:
            scraper_over_yt_pool = convert_parameter_to_sdk2(sr_params.ScraperOverYtPoolParameter)
            use_soy_api = sdk2.parameters.Bool("Use soy api instead of YT operations", default=True)

        with sdk2.parameters.Group(sr_params.PERSONAL_UIDS_GROUP) as personal_uids_group:
            use_personal_uids = convert_parameter_to_sdk2(ra2.UsePersonalUidsParameter)
            with use_personal_uids.value[True]:
                personal_uids_resource = convert_parameter_to_sdk2(ra2.PersonalUidsResourceParameter)

        with sdk2.parameters.Group(sr_params.SCRAPER_WAITING_GROUP) as scraper_waiting_group:
            active_waiting_duration = convert_parameter_to_sdk2(sr_params.ScraperActiveWaitingTime)
            passive_waiting_tries = convert_parameter_to_sdk2(sr_params.ScraperPassiveWaitingCount)
            passive_waiting_duration = convert_parameter_to_sdk2(sr_params.ScraperPassiveWaitingTime)

    class Context(sdk2.Task.Context):
        soy_job_id = None

    @staticmethod
    def _sync_resource(resource):
        resource_data = sdk2.ResourceData(resource)
        return resource_data.path

    def _load_queries(self, resource):
        queries_path = self._sync_resource(resource)
        eh.verify(
            queries_path.is_file(),
            "Cannot get queries from dependent task: {}".format(resource.id)
        )
        queries_list = []
        for line in fu.read_line_by_line(str(queries_path)):
            line = line.decode('utf-8')
            query = line.strip().split("\t")
            queries_list.append(query)
        return queries_list

    def _should_personalize(self):
        return self.Parameters.use_personal_uids

    def _get_mr_params(self):
        server = self.Parameters.yt_server
        table = self.Parameters.output_table
        user = self.Parameters.yt_user
        mr_yt_resource_id, _ = resource_selectors.by_last_released_task(resource_types.MAPREDUCE_YT_EXECUTABLE)
        mr_yt_binary = self._sync_resource(sdk2.Resource[mr_yt_resource_id])
        os.environ["YT_TOKEN"] = sdk2.Vault.data("SEARCH-RELEASERS", "yt_token")
        os.environ["SOY_TOKEN"] = sdk2.Vault.data("SEARCH-RELEASERS", "ra2_soy_token")
        return server, user, table, str(mr_yt_binary)

    def _filter_cgi(self, cgi):
        if self.Parameters.filter_cgi:
            params = [param for param in cgi.split("&") if param.split('=')[0] not in sr_params.FORBIDDEN_CGI]
            result = "&".join(params)
            logging.info("Cgi was filtered to %s", result)
            return result
        else:
            return cgi

    def _get_uids(self):
        eh.verify(self.Parameters.personal_uids_resource, "Cant get uids resource")
        uids_resource = self.Parameters.personal_uids_resource
        uids_path = self._sync_resource(uids_resource)
        eh.verify(
            uids_path.is_file(),
            "Cannot get uids from resource: {}".format(uids_resource.id)
        )
        uids = uids_path.read_text().splitlines()
        eh.verify(
            len(uids),
            "Fetched empty uids resource: {}".format(uids_resource.id)
        )
        return uids

    def _get_job_id(self):
        return self.Context.soy_job_id

    def _get_batch_status_and_table(self, server):
        if self.Parameters.use_soy_api:
            batch_status, status_api_response = ScraperOverYtWrapper.get_batch_status_via_api(
                server,
                self.Context.soy_job_id,
            )
            output_table = status_api_response["output_path"] if batch_status == BatchStatus.COMPLETED else None
        else:
            batch_status, op_id = ScraperOverYtWrapper.get_batch_status(
                server,
                self.Context.soy_job_id,
            )
            output_table = (
                "//home/search-runtime/robot-scraperoveryt/"
                "scraper_over_yt/scheduler/operations_archive/{}/output_table".format(op_id)
            )
        return batch_status, output_table

    def _abort_batch(self):
        job_id = self._get_job_id()
        if job_id:
            logging.info("Task broken, aborting soy batch with id %s", job_id)
            soy_api = SoyApi(token=os.environ.get('SOY_TOKEN'), server=self.Parameters.yt_server)
            res = soy_api.abort(job_id)
            logging.info("Soy batch aborted with status %s", res.get("status"))
        else:
            logging.info("Can't find soy job id")

    def on_execute(self):
        queries_for_domain = dict()
        if self.Parameters.use_dict_resources:
            for key, res_id in self.Parameters.requests_resources.items():
                queries_for_domain[key] = self._load_queries(sdk2.Resource[res_id])
        else:
            queries_list = self._load_queries(self.Parameters.requests_resource)
            for domain in self.Parameters.first_beta_hosts:
                queries_for_domain[domain] = queries_list
        uids = self._get_uids() if self._should_personalize() else None

        cgis = [
            (self.Parameters.first_beta_cgi or "") + "&reqinfo=ra2-scrape_requests-{}".format(self.id),
            (self.Parameters.second_beta_cgi or "") + "&reqinfo=ra2-scrape_requests-{}".format(self.id),
        ]

        eh.verify(
            len(self.Parameters.first_beta_hosts) == len(self.Parameters.second_beta_hosts),
            "Hosts amount must be same for both betas",
        )
        hosts = [self.Parameters.first_beta_hosts, self.Parameters.second_beta_hosts]
        platforms = [self.Parameters.first_beta_collection, self.Parameters.second_beta_collection]
        json_dump_flags = [self.Parameters.first_beta_dump_all_json, self.Parameters.second_beta_dump_all_json]

        server, _, table, _ = self._get_mr_params()
        with self.memoize_stage.creating_soy_batch:
            all_requests = []
            for region_prefix, host1 in hosts[0].iteritems():
                eh.verify(region_prefix in hosts[1], "Region keys must be same for both betas")
                host2 = hosts[1][region_prefix]
                requests1 = ScraperOverYtWrapper.get_queries(
                    queries_list=queries_for_domain[region_prefix],
                    host=host1,
                    platform=platforms[0],
                    cgi=(cgis[0] or "") + ("&json_dump=1" if json_dump_flags[0] else ""),
                    id_prefix="first",
                    region_prefix=region_prefix,
                    uids=uids,
                    user_agent=self.Parameters.first_beta_user_agent,
                )
                requests2 = ScraperOverYtWrapper.get_queries(
                    queries_list=queries_for_domain[region_prefix],
                    host=host2,
                    platform=platforms[1],
                    cgi=(cgis[1] or "") + ("&json_dump=1" if json_dump_flags[1] else ""),
                    id_prefix="second",
                    region_prefix=region_prefix,
                    uids=uids,
                    user_agent=self.Parameters.second_beta_user_agent,
                )
                requests = [None] * (2 * len(queries_for_domain[region_prefix]))
                requests[::2] = requests1
                requests[1::2] = requests2
                all_requests += requests
            self.Context.total_requests_count = sum([len(value) for value in queries_for_domain.itervalues()])
            logging.info('Responses are created, now launching soy batch...')
            guid = str(uuid.uuid4())
            logging.info('Generated guid for soy job %s', guid)
            self.Context.soy_job_id = guid
            self.Context.save()
            ScraperOverYtWrapper.launch_scraper(
                all_requests,
                server=server,
                input_table=table,
                guid=guid,
                pool=self.Parameters.scraper_over_yt_pool,
                use_api=self.Parameters.use_soy_api,
                launch_description="SCRAPE_REQUESTS_2 #{task_id}. {task_description}".format(
                    task_id=self.id,
                    task_description=self.Parameters.description,
                ),
            )

        with self.memoize_stage.active_waiting:
            waiting_started = time.time()
            waiting_time = 0
            while waiting_time <= self.Parameters.active_waiting_duration:
                status, scraper_table = self._get_batch_status_and_table(server)
                logging.info(
                    "%s seconds of active waiting, get status %s",
                    waiting_time,
                    BatchStatus.STATUS_NAME[status],
                )
                eh.verify(status != BatchStatus.FAILED, "Soy batch failed to download")
                if status == BatchStatus.COMPLETED:
                    ScraperOverYtWrapper.collect_responses(
                        server=server,
                        result_table=table,
                        scraper_table=scraper_table,
                    )
                    return
                time.sleep(60)
                waiting_time = time.time() - waiting_started
            if not self.Parameters.passive_waiting_tries:
                eh.fail("Soy batch timeout")
            else:
                self.set_info("Task went in passive waiting")
                raise sdk2.WaitTime(self.Parameters.passive_waiting_duration)

        with self.memoize_stage.waiting_try(self.Parameters.passive_waiting_tries) as st:
            status, scraper_table = self._get_batch_status_and_table(server)
            logging.info(
                "%s try of passive waiting, get status %s",
                st.runs,
                BatchStatus.STATUS_NAME[status]
            )
            self.set_info("{} try of passive waiting, get status {}".format(
                st.runs,
                BatchStatus.STATUS_NAME[status],
            ))
            eh.verify(status != BatchStatus.FAILED, "Soy batch failed to download")
            if status == BatchStatus.COMPLETED:
                ScraperOverYtWrapper.collect_responses(
                    server=server,
                    result_table=table,
                    scraper_table=scraper_table,
                )
                return
            raise sdk2.WaitTime(self.Parameters.passive_waiting_duration)
        eh.fail("SOY batch timeout")

    def on_break(self, *args, **kwargs):
        self._abort_batch()
        sdk2.Task.on_break(self, *args, **kwargs)

    def on_failure(self, prev_status):
        self._abort_batch()
        super(ScrapeRequests2, self).on_failure(prev_status)

    def on_timeout(self, prev_status):
        self._abort_batch()
        super(ScrapeRequests2, self).on_timeout(prev_status)

    def on_terminate(self):
        self._abort_batch()
        super(ScrapeRequests2, self).on_terminate()

    @sdk2.footer()
    def batch_status(self):
        job_id = self._get_job_id()
        if job_id:
            soy_api = SoyApi(server=self.Parameters.yt_server)
            res = soy_api.status(job_id)
            for key, value in res.items():
                res[key] = str(value)
            content = "<br>".join(["{}: {}".format(key, value) for key, value in res.items()])
        else:
            content = "No SOY batch found"
        return content
