# -*- coding: utf-8 -*-

import os
import tarfile

from sandbox.sandboxsdk.channel import channel
from sandbox.sandboxsdk.task import SandboxTask
from sandbox.sandboxsdk.paths import make_folder
from sandbox.sandboxsdk.parameters import SandboxStringParameter
from sandbox.sandboxsdk.parameters import SandboxIntegerParameter
from sandbox.sandboxsdk.parameters import SandboxBoolParameter
from sandbox.sandboxsdk.errors import SandboxTaskFailureError

from sandbox.projects import resource_types
from sandbox.projects.common.utils import check_if_tasks_are_ok

from sandbox.projects.common import apihelpers
from sandbox.projects.common import utils

from sandbox.projects.common.pumpkin import parameters as pumpkin_params


RU_DOMAIN = "ru"
TR_DOMAIN = "tr"

_COLLECTOR_TASK_ID = "collector_task_id"
_TEST_TASK_ID = "test_task_id"
_URLS_RESOURCE_ID = 'urls_resource_id'


class HostUrlParameter(SandboxStringParameter):
    name = 'host_url'
    description = 'Host url'
    default_value = 'https://yandex.ru'
    required = True


class DomainParameter(SandboxStringParameter):
    name = 'domain'
    description = 'Domain'
    choices = [
        ('yandex.ru', RU_DOMAIN),
        ('yandex.com.tr', TR_DOMAIN),
    ]
    default_value = RU_DOMAIN
    required = True


class RewriteHostUrlParameter(SandboxBoolParameter):
    name = 'rewrite_host_addr'
    description = 'Need rewrite host addr'
    default_value = False
    sub_fields = {
        "true": [HostUrlParameter.name],
    }


class SeriesNameParameter(SandboxStringParameter):
    name = 'series_name'
    description = 'Name of the series'
    default_value = 'generalized'
    required = True


class UrlsVolumesParameter(SandboxIntegerParameter):
    name = 'urls_volumes'
    description = 'Number of output url resource volumes'
    default_value = 5
    required = True


class TopQueriesParameter(SandboxIntegerParameter):
    name = 'top_queries'
    description = 'Number of queries to collect SERPs for'
    required = False


class DevelopmentModeParameter(SandboxBoolParameter):
    name = 'development_mode'
    description = 'Development mode'
    default_value = True


class RunTestParameter(SandboxBoolParameter):
    name = 'run_test'
    description = 'Run test task'
    default_value = False


class TestModeParameter(SandboxBoolParameter):
    name = 'build_test_mode'
    description = 'Build in test mode'
    default_value = False


class ConnectionsLimitParameter(SandboxIntegerParameter):
    name = 'connections_limit'
    description = 'Limit of simultaneous downloads'
    required = False


class BaseBuildPumpkinSerpTask(SandboxTask):
    input_parameters = (
        DomainParameter,
        RewriteHostUrlParameter,
        HostUrlParameter,
        SeriesNameParameter,
        UrlsVolumesParameter,
        TopQueriesParameter,
        DevelopmentModeParameter,
        RunTestParameter,
        pumpkin_params.ZipOnly,
        pumpkin_params.AddCGIParams,
        TestModeParameter,
        ConnectionsLimitParameter,
        pumpkin_params.CheckThumbs,
        pumpkin_params.SerpHeaderDelimeterParameter,
    )

    INDEX_RESOURCE_TYPE = resource_types.PUMPKIN_INDEX
    ARCHIVE_RESOURCE_TYPE = resource_types.PUMPKIN_INDEX_ARCHIVE

    def do_before_execute_on_test_server(self):
        self.ctx[TopQueriesParameter.name] = 50
        self.ctx[UrlsVolumesParameter.name] = 5

    def on_execute(self):
        if self.ctx[TestModeParameter.name]:
            self.do_before_execute_on_test_server()

        if _COLLECTOR_TASK_ID not in self.ctx:
            domain = self.ctx[DomainParameter.name]
            production_attributes = self._make_production_attributes(domain)
            index_resource = apihelpers.get_last_resource_with_attribute(
                self.INDEX_RESOURCE_TYPE, production_attributes[0], production_attributes[1])
            if not index_resource:
                raise SandboxTaskFailureError("Unable to find last production index resource")

            index_path = self.sync_resource(index_resource.id)

            urls_archive_name = "archive_urls_{0}.tar".format(self.ctx[SeriesNameParameter.name])
            urls_archive_resource = self.create_resource(
                self.descr, urls_archive_name, resource_types.SERP_COLLECTOR_URLS)
            self.ctx[_URLS_RESOURCE_ID] = urls_archive_resource.id

            self._make_archive(urls_archive_resource, os.path.join(index_path, "topqueries.txt"))
            self.ctx[_COLLECTOR_TASK_ID] = self._serp_collector(urls_archive_resource, index_resource)

        collector_task_id = self.ctx[_COLLECTOR_TASK_ID]
        collector_task = channel.sandbox.get_task(collector_task_id)
        if not collector_task.is_done():
            self.wait_task_completed(collector_task_id)

        check_if_tasks_are_ok([collector_task_id])

        if not self.ctx[RunTestParameter.name]:
            return

        serps_archive_resources = apihelpers.list_task_resources(
            collector_task_id, resource_type=resource_types.SERP_COLLECTOR_LOOT)
        if not serps_archive_resources:
            raise SandboxTaskFailureError("No serps archive was created")
        serp_archive_to_test_id = serps_archive_resources[0].id

        if _TEST_TASK_ID not in self.ctx:
            self.ctx[_TEST_TASK_ID] = self._make_test_task(
                self.ctx[_URLS_RESOURCE_ID], serp_archive_to_test_id)

        test_task_id = self.ctx[_TEST_TASK_ID]
        test_task = channel.sandbox.get_task(test_task_id)
        if not utils.is_task_stop_executing(test_task):
            self.wait_all_tasks_stop_executing([test_task])

        if test_task.new_status == self.Status.FAILURE:
            channel.sandbox.set_resource_attribute(serp_archive_to_test_id, "status", "TEST_FAILED")
        elif test_task.new_status == self.Status.SUCCESS:
            channel.sandbox.set_resource_attribute(serp_archive_to_test_id, "status", "TEST_OK")

        check_if_tasks_are_ok([test_task_id])

    def _make_archive(self, urls_archive, queries_path):
        volume_count = self.ctx[UrlsVolumesParameter.name]
        max_queries = self.ctx[TopQueriesParameter.name]
        domain = self.ctx[DomainParameter.name]

        make_folder("urls_archive", delete_content=True)
        out_files = [open("urls_archive/{0}".format(n), "w") for n in range(volume_count)]
        with open(queries_path) as input_file:
            for number, line in enumerate(input_file):
                if max_queries and number >= max_queries:
                    break
                query = line.strip().split('\t')[1]
                out_files[number % volume_count].write(self._make_serp_url(domain, query) + "\n")
                out_files[number % volume_count].write(self._make_serp_url_without_header(domain, query) + "\n")

        for out_file in out_files:
            out_file.close()

        with tarfile.open(urls_archive.path, "w") as tar:
            tar.add("urls_archive", ".")

        self.mark_resource_ready(urls_archive)

    def _make_production_attributes(self, domain):
        raise NotImplementedError()

    def _make_serp_url(self, domain, text):
        raise NotImplementedError()

    def _make_serp_url_without_header(self, domain, text):
        # raise NotImplementedError()
        return self._make_serp_url(domain, text)

    def _make_main_page_url(self, domain):
        raise NotImplementedError()

    def _make_notfound_page_url(self, domain):
        raise NotImplementedError()

    def _make_notfound_page_url_without_header(self, domain):
        # raise NotImplementedError()
        return self._make_notfound_page_url(domain)

    def _make_test_task(self, urls_archive, serps_archive):
        raise NotImplementedError()

    def _make_url(self, domain):
        if self.ctx[RewriteHostUrlParameter.name]:
            return self.ctx[HostUrlParameter.name]
        return None

    def _serp_collector(self, urls_archive_resource, index_resource):
        from projects.SerpCollector import SerpCollector
        from projects.SerpCollector import UrlsResource
        from projects.SerpCollector import SeriesName
        from projects.SerpCollector import ResourceAttrsParam
        from projects.SerpCollector import MainPageUrl
        from projects.SerpCollector import NotFoundPageUrl
        from projects.SerpCollector import NotFoundPageUrlWithoutHeader
        from projects.SerpCollector import ConnsLimit

        domain = self.ctx[DomainParameter.name]

        index_archive_id = apihelpers.get_task_resource_id(index_resource.task_id, self.ARCHIVE_RESOURCE_TYPE)
        attributes = {
            'index_resource_id': index_resource.id,
            'index_archive_id': index_archive_id
        }
        if not self.ctx[DevelopmentModeParameter.name]:
            production_attributes = self._make_production_attributes(domain)
            attributes[production_attributes[0]] = production_attributes[1]
            attributes['production_mode'] = '1'

        sub_ctx = {
            UrlsResource.name: urls_archive_resource.id,
            MainPageUrl.name: self._make_main_page_url(domain),
            NotFoundPageUrl.name: self._make_notfound_page_url(domain),
            NotFoundPageUrlWithoutHeader.name: self._make_notfound_page_url_without_header(domain),
            SeriesName.name: self.ctx[SeriesNameParameter.name],
            ResourceAttrsParam.name: ','.join("{0}={1}".format(k, v) for k, v in attributes.iteritems()),
            pumpkin_params.DisregardTime.name: self.ctx.get(DevelopmentModeParameter.name, False),
            pumpkin_params.ZipOnly.name: self.ctx[pumpkin_params.ZipOnly.name],
            pumpkin_params.AddCGIParams.name: self.ctx[pumpkin_params.AddCGIParams.name],
            pumpkin_params.CheckThumbs.name: utils.get_or_default(self.ctx, pumpkin_params.CheckThumbs),
            pumpkin_params.SerpHeaderDelimeterParameter.name: utils.get_or_default(self.ctx, pumpkin_params.SerpHeaderDelimeterParameter),
        }
        if self.ctx[ConnectionsLimitParameter.name]:
            sub_ctx[ConnsLimit.name] = self.ctx[ConnectionsLimitParameter.name]
        sub_task = self.create_subtask(
            task_type=SerpCollector.type,
            input_parameters=sub_ctx,
            description=self.descr,
            execution_space=self.execution_space
        )
        return sub_task.id
