# -*- coding: utf-8 -*-

import logging
import os

from sandbox import sdk2

from sandbox.projects.common.nanny import nanny

from sandbox.projects.ydo import (
    ydo_releasers,
    execute_cmd,
    YdoDocDocGeo2WorkersCountDump,
    YdoDocDocRubricsDump,
    YdoDocDocSitemap,
    YdoGeo2WorkersCountDump,
    YdoMainSitemap,
    YdoRubricsDump,
    YdoSeoNamesJson,
    YdoSpravGeo2WorkersCountDump,
    YdoSpravRubricsDump,
    YdoSpravSitemap,
    YdoUnpublishedSitemap,
    YdoWorkersGroups
)

from sandbox.projects.common.geosearch.utils import unpack_file
from sandbox.projects.geobase.Geodata6BinStable.resource import GEODATA6BIN_STABLE
from sandbox.projects.resource_types import GEODATATZDATA_STABLE
from sandbox.projects.ydo.resource_types import YdoDssmModel

from sandbox.sandboxsdk import environments


# Resource types
class YdoAggregatorExecutable(sdk2.Resource):
    executable = True
    releasable = True
    releasers = ydo_releasers


class YdoAggregator(nanny.ReleaseToNannyTask2, sdk2.Task):
    class Parameters(sdk2.Parameters):
        indexer = sdk2.parameters.Resource(
            'indexer executable',
            resource_type=YdoAggregatorExecutable,
            required=True
        )

        yt_backup_dir = sdk2.parameters.String('Path to database backup on YT', required=True)

        sitemap_bucket = sdk2.parameters.String('S3 bucket for sitemap')

        reactions_data = sdk2.parameters.String('Path to table with reactions data', required=False)

        sprav_dir = sdk2.parameters.String('Path to sprav directory', required=False)

        sprav_to_workers_path = sdk2.parameters.String('Path to table with sprav_to_workers table', required=False)

        price_stats_path = sdk2.parameters.String('Path to table with average price stats', required=False)

        feedback_stats_path = sdk2.parameters.String('Path to table with average rating and review stats', required=False)

        geobase_snapshot = sdk2.parameters.Resource(
            'Geobase 6 snapshot',
            resource_type=GEODATA6BIN_STABLE,
            required=True
        )

        geodata_tz_snapshot = sdk2.parameters.Resource(
            'Geodata tz',
            resource_type=GEODATATZDATA_STABLE,
            required=True
        )

        dssm_model = sdk2.parameters.Resource(
            'DSSM model',
            resource_type=YdoDssmModel,
            required=False
        )

        seo_names_file = sdk2.parameters.Resource(
            'SEO names',
            resource_type=YdoSeoNamesJson,
            required=False
        )

        sprav_only = sdk2.parameters.Bool('Sprav only mode', default=False)

        docdoc_only = sdk2.parameters.Bool('DocDoc only mode', default=False)

        removed_workers_only = sdk2.parameters.Bool('Removed workers only mode', default=False)

        build_workers_groups = sdk2.parameters.Bool('With workers groups', default=False)

        update_addresses_with_sprav = sdk2.parameters.Bool('Change worker\'s address to org or chain address', default=False)

        workers_to_sprav_companies_result_path = sdk2.parameters.String('Path to result of workers_to_sprav_companies querry', required=False)

        altay_company_to_chain_path = sdk2.parameters.String('Path to sprav company_to_chain table', required=False)

        additional_cmd_options = sdk2.parameters.String('Additional command line options')

        yt_log_level = sdk2.parameters.String('Yt log level', required=False)

        filter_far_areas_sprav_linked = sdk2.parameters.Bool('Filter areas far from sprav linked addresses', default=False)

        far_areas_max_distance = sdk2.parameters.Integer('Max distance from addresses for areas of sprav linked workers', default=200000)

        cut_areas_sprav_not_linked = sdk2.parameters.Bool('Cut areas of orgs not linked with sprav', default=False)

        areas_sprav_not_linked_max_distance = sdk2.parameters.Integer('Max distance between areas of not sprav linked orgs', default=200000)

        clear_linked_sprav_online_orgs_addresses = sdk2.parameters.Bool('Remove addresses, areas and main_address from workers linked with sprav online orgs', default=False)

        new_price_stats_table = sdk2.parameters.String('New price stats table', required=False)

        partner_aggregated_prices_table = sdk2.parameters.String('Aggregated prices from partners table', required=False)

    class Requirements(sdk2.Requirements):
        environments = [
            environments.PipEnvironment('yandex-yt'),
        ]
        cores = 1
        disk_space = 10 * 1024

        class Caches(sdk2.Requirements.Caches):
            pass

    def create_aggregates(self):
        yt_token = sdk2.Vault.data(self.owner, 'yt-token')

        env = os.environ.copy()
        env['YT_TOKEN'] = yt_token
        env['YT_PROXY'] = '{}.yt.yandex.net'.format(self.yt_host)
        env['YT_LOG_LEVEL'] = self.Parameters.yt_log_level if self.Parameters.yt_log_level else 'INFO'

        unpack_file(str(sdk2.ResourceData(self.Parameters.geodata_tz_snapshot).path), str(sdk2.Path.cwd()))

        RubricsResourceType = None
        if self.Parameters.docdoc_only:
            RubricsResourceType = YdoDocDocRubricsDump
        elif self.Parameters.sprav_only:
            RubricsResourceType = YdoSpravRubricsDump
        elif self.Parameters.removed_workers_only:
            RubricsResourceType = None
        else:
            RubricsResourceType = YdoRubricsDump

        rubrics_resource = sdk2.ResourceData(RubricsResourceType(self, 'Ydo rubrics dump', 'rubrics.json')) if RubricsResourceType is not None else None

        if self.Parameters.build_workers_groups:
            workers_groups_resource = sdk2.ResourceData(YdoWorkersGroups(self, 'Ydo workers groups', 'workers_groups.ysonl'))
        else:
            workers_groups_resource = None

        G2WCResourceType = None
        if self.Parameters.docdoc_only:
            G2WCResourceType = YdoDocDocGeo2WorkersCountDump
        elif self.Parameters.sprav_only:
            G2WCResourceType = YdoSpravGeo2WorkersCountDump
        elif self.Parameters.removed_workers_only:
            G2WCResourceType = None
        else:
            G2WCResourceType = YdoGeo2WorkersCountDump

        geoid2workers_count_resource = sdk2.ResourceData(G2WCResourceType(self, 'Ydo geoid to workers count dump', 'geoid2workers_count.json')) if G2WCResourceType is not None else None

        logging.info('Working...')

        sitemap_cmd_options = []
        sitemap_resource = None
        if self.Parameters.sitemap_bucket:
            sitemap_cmd_options = [
                '--s3_bucket_name', self.Parameters.sitemap_bucket,
                '--s3_access_key_id', sdk2.Vault.data(self.owner, 's3_ydo_key_id'),
                '--s3_secret_access_key', sdk2.Vault.data(self.owner, 's3_ydo_secret_key'),
            ]
            if self.Parameters.docdoc_only:
                SitemapResourceType = YdoDocDocSitemap
            elif self.Parameters.sprav_only:
                SitemapResourceType = YdoSpravSitemap
            elif self.Parameters.removed_workers_only:
                SitemapResourceType = YdoUnpublishedSitemap
            else:
                SitemapResourceType = YdoMainSitemap
            sitemap_resource = sdk2.ResourceData(SitemapResourceType(self, 'Ydo sitemap', 'sitemap_body'))

        execute_cmd(
            [
                str(sdk2.ResourceData(self.Parameters.indexer).path),
                '--dump_from_yt', self.Parameters.yt_backup_dir,
                '--dump_rubrics_json',
                '--geobase_file', str(sdk2.ResourceData(self.Parameters.geobase_snapshot).path),
                '--geodata_tz_location', str(sdk2.Path.cwd().joinpath('zones_bin')),
                '--dump_geoid2workers_count',
            ]
            + sitemap_cmd_options
            + (self.Parameters.additional_cmd_options.split() if self.Parameters.additional_cmd_options else [])
            + (['--workers_reactions_data', self.Parameters.reactions_data] if self.Parameters.reactions_data else [])
            + (['--sprav_dir', self.Parameters.sprav_dir, '--sprav_on_yt'] if self.Parameters.sprav_dir else [])
            + (['--sprav_to_workers_path', self.Parameters.sprav_to_workers_path] if self.Parameters.sprav_to_workers_path else [])
            + (['--price_stats_table', self.Parameters.price_stats_path] if self.Parameters.price_stats_path else [])
            + (['--feedback_stats_table', self.Parameters.feedback_stats_path] if self.Parameters.feedback_stats_path else [])
            + (['--sprav_only'] if self.Parameters.sprav_only else [])
            + (['--docdoc_mode'] if self.Parameters.docdoc_only else [])
            + (['--removed_workers_only'] if self.Parameters.removed_workers_only else [])
            + (['--dump_workers_groups'] if self.Parameters.build_workers_groups else [])
            + (['--dssm_model', str(sdk2.ResourceData(self.Parameters.dssm_model).path)] if self.Parameters.dssm_model else [])
            + (['--seo_names_file', str(sdk2.ResourceData(self.Parameters.seo_names_file).path)] if self.Parameters.seo_names_file else [])
            + (
                [
                    '--data_for_update_addresses_with_sprav_on_yt',
                    '--workers_to_sprav_companies_result_path', self.Parameters.workers_to_sprav_companies_result_path,
                    '--altay_company_to_chain_path', self.Parameters.altay_company_to_chain_path,
                ] if self.Parameters.update_addresses_with_sprav else []
            )
            + (['--filter_far_areas_sprav_linked', '--far_areas_max_distance', str(self.Parameters.far_areas_max_distance)] if self.Parameters.filter_far_areas_sprav_linked else [])
            + (
                [
                    '--cut_areas_sprav_not_linked',
                    '--areas_sprav_not_linked_max_distance',
                    str(self.Parameters.areas_sprav_not_linked_max_distance)
                ] if self.Parameters.cut_areas_sprav_not_linked else []
            )
            + (['--clear_linked_sprav_online_orgs_addresses'] if self.Parameters.clear_linked_sprav_online_orgs_addresses else [])
            + (['--new_price_stats_table', self.Parameters.new_price_stats_table] if self.Parameters.new_price_stats_table else [])
            + (['--partner_aggregated_prices_table', self.Parameters.partner_aggregated_prices_table] if self.Parameters.partner_aggregated_prices_table else []),
            'ydo_aggregator',
            'Failed to create aggregates',
            env=env
        )

        if rubrics_resource is not None:
            rubrics_resource.ready()

        if geoid2workers_count_resource is not None:
            geoid2workers_count_resource.ready()

        if workers_groups_resource is not None:
            workers_groups_resource.ready()

        if sitemap_resource is not None:
            sitemap_resource.ready()

    def on_execute(self):
        self.yt_host = 'hahn'

        with self.memoize_stage.firststage:
            self.create_aggregates()

        logging.info('Done')

    def on_release(self, additional_parameters):
        nanny.ReleaseToNannyTask2.on_release(self, additional_parameters)
        sdk2.Task.on_release(self, additional_parameters)

    def mark_released_resources(self, status, ttl=None):
        if ttl == 'inf':
            ttl = None
        if status == 'stable':
            ttl = ttl or 180
        else:
            ttl = ttl or 14

        return super(YdoAggregator, self).mark_released_resources(status, ttl)
