# -*- coding: utf-8 -*

import logging
import datetime
import subprocess
from sandbox import sdk2

import sandbox.common.types.task as ctt
from sandbox.projects import resource_types as rtypes
from sandbox.sandboxsdk.errors import SandboxTaskFailureError
from sandbox.sandboxsdk.paths import get_logs_folder
from sandbox.projects.geosearch import resource_types as geo_types
from sandbox.projects.geosearch.ParseAddrsUrlsFromYt import ParseAddrsUrlsFromYt


def get_date():
    return datetime.datetime.now().strftime('%Y-%m-%d %H:%M')


class BuildShardedAddrsWebIndexann(sdk2.Task):
    '''
        Build GEOSEARCH_WEB_INDEXANN_SHARDED
    '''

    class Parameters(sdk2.task.Parameters):
        data_preparer = sdk2.parameters.Resource('Geosearch YT data preparer',
                                                 resource_type=geo_types.GEOSEARCH_YT_DATA_PREPARER,
                                                 required=True)
        company_table_path = sdk2.parameters.String('Company table path',
                                                    required=True)
        shards_count = sdk2.parameters.Integer('Number of shards',
                                               default_value=18,
                                               required=True)
        raw_indexann_data = sdk2.parameters.Resource('Web indexann raw data',
                                                     resource_type=geo_types.MAPS_WEB_DOUBLEFRC,
                                                     required=True)

    class Requirements(sdk2.Task.Requirements):
        cores = 4

        class Caches(sdk2.Requirements.Caches):
            pass

    def make_urls(self):
        if not self.Context.build_business_urls_task:
            parse_urls_task_class = sdk2.Task[ParseAddrsUrlsFromYt.type]
            parse_urls_task = parse_urls_task_class(self,
                                                    description='Build MAPS_WEB_URLS_BUSINESS from YT',
                                                    company_table=self.Parameters.company_table_path,
                                                    data_preparer_executable=self.Parameters.data_preparer,
                                                    shards_count=self.Parameters.shards_count,
                                                    owner=self.owner,
                                                    create_sub_task=True)
            parse_urls_task.enqueue()
            self.Context.build_business_urls_task = parse_urls_task.id
            raise sdk2.WaitTask([self.Context.build_business_urls_task],
                                ctt.Status.Group.FINISH | ctt.Status.Group.BREAK,
                                wait_all=True)
        else:
            task = sdk2.Task[self.Context.build_business_urls_task]
            if task.status in ctt.Status.Group.BREAK:
                raise SandboxTaskFailureError('Parsing URLs failed')

    def on_execute(self):
        with self.memoize_stage.parse_urls:
            self.make_urls()
        parse_urls_task = sdk2.Task[self.Context.build_business_urls_task]
        business_urls_resource = sdk2.Resource[rtypes.MAPS_WEB_URLS_BUSINESS].find(task=parse_urls_task).first()
        logging.info('business_urls_resource => %s' % business_urls_resource)

        data_preparer_executable = sdk2.ResourceData(self.Parameters.data_preparer).path
        urls = sdk2.ResourceData(business_urls_resource).path
        raw_data = sdk2.ResourceData(self.Parameters.raw_indexann_data).path
        output_path = './result'
        cmd = ('{binary} web_annotations '
               '-i {raw_data} '
               '--urls-file {urls} '
               '--thread-count 8 '
               '-o {output_prefix}').format(binary=data_preparer_executable,
                                            raw_data=raw_data,
                                            urls=urls,
                                            output_prefix=(output_path + '/indexann'))
        logging.info('Running: %s' % cmd)
        log_file_path = get_logs_folder() + '/build_sharded_indexann.out.txt'
        with open(log_file_path, 'w') as log_file:
            try:
                subprocess.Popen(cmd,
                                 shell=True,
                                 stdout=log_file,
                                 stderr=subprocess.STDOUT).wait()
            except subprocess.CalledProcessError as err:
                logging.info('%s command failed' % cmd)
                logging.info('Details %s:\n' % err)
                raise SandboxTaskFailureError('Indexann build failed')

        for shard_id in xrange(self.Parameters.shards_count):
            resource_path = output_path
            if self.Parameters.shards_count != 1:
                resource_path += ('/' + str(shard_id))
            web_indexann_resource = sdk2.Resource[rtypes.MAPS_WEB_INDEXANN]
            current_web_indexann_resource = web_indexann_resource(self,
                                                                  'Sharded web index annotations',
                                                                  resource_path)
            current_web_indexann_resource.shards_count = self.Parameters.shards_count
            current_web_indexann_resource.shard_id = shard_id
            web_indexann_data = sdk2.ResourceData(current_web_indexann_resource)
            web_indexann_data.ready()
