# -*- coding: utf-8 -*-
import gzip
import logging
from sandbox import sandboxsdk
import shutil

from sandbox.sandboxsdk.errors import SandboxTaskFailureError
from sandbox.sandboxsdk.process import run_process

from sandbox.sdk2.resource import AbstractResource

from sandbox.projects import resource_types
import sandbox.projects.resource_types.releasers as resource_releasers


class MAPS_RELEVANT_URLS_EXPORT_PARSER_EXECUTABLE(AbstractResource):
    """
        Извлекатор урлов из экспорта справочника
    """
    releasable = True
    any_arch = False
    executable = True
    auto_backup = True
    releasers = resource_releasers.geosearch_releasers
    arcadia_build_path = 'extsearch/geo/tools/parse_urls_from_export/parse_urls_from_export'


class BackaExport(sandboxsdk.parameters.ResourceSelector):
    name = 'backa_export_resource_id'
    description = 'Backa export'
    required = True
    resource_type = resource_types.MAPS_DATABASE_BUSINESS_SOURCE_ARCHIVE


class GeminiWorkers(sandboxsdk.parameters.SandboxStringParameter):
    name = 'gemini_workers'
    description = 'Number of geminicl workers'
    required = True
    default_value = '8'


class ParserExecutable(sandboxsdk.parameters.ResourceSelector):
    name = 'parser_resource_id'
    description = 'Export parser executable'
    required = True
    resource_type = MAPS_RELEVANT_URLS_EXPORT_PARSER_EXECUTABLE


class BuildAddrsBusinessUrls(sandboxsdk.task.SandboxTask, object):

    type = 'BUILD_ADDRS_BUSINESS_URLS'

    execution_space = 50 << 10

    input_parameters = [BackaExport, GeminiWorkers, ParserExecutable]

    def _exception_logging(self, cmd, err):
        msg = 'Command %s failed.' % cmd
        logging.info(msg)
        logging.info('Details: %s' % err)
        raise SandboxTaskFailureError(msg)

    def _untar_companies(self, archive_path, companies_path):
        logging.info('Extracting %s from %s' % (companies_path, archive_path))
        run_process(['tar', '-xvf', archive_path, companies_path], log_prefix='untar')

    def _unzip_companies(self, gzip_path, result_path):
        logging.info('Unzipping %s to %s' % (gzip_path, result_path))
        run_process(['gunzip', ], stdin=open(gzip_path, 'rb'), stdout=open(result_path, 'wb'), log_prefix='unzip')

    def prepare_companies(self, archive_path, gzipped_companies_path,
                          companies_path):
        self._untar_companies(archive_path, gzipped_companies_path)
        self._unzip_companies(gzipped_companies_path, companies_path)

    def parse_companies(self, xml_path, output_path):
        executable_id = self.ctx.get(ParserExecutable.name)
        executable = self.sync_resource(executable_id)
        number_of_workers = self.ctx.get(GeminiWorkers.name)
        run_process(
            [executable, '-i', xml_path, '-o', output_path, '-t', '8', '-w', number_of_workers],
            log_prefix='export_parser', stderr=open('export_parser.err', 'w')
        )

    def on_execute(self):
        canonized_urls_path = 'canonized_url.txt'
        backa_export_resource_id = self.ctx.get(BackaExport.name)
        backa_export = self.sync_resource(backa_export_resource_id)
        gzipped_companies_path = 'companies2.xml.gz'
        companies_path = './companies2.xml'
        self.prepare_companies(backa_export,
                               gzipped_companies_path,
                               companies_path)
        self.parse_companies(companies_path, canonized_urls_path)
        gzipped_urls_path = './urls.gz'
        with open(canonized_urls_path, 'rb') as f_in, gzip.open(gzipped_urls_path, 'wb') as gz:
            shutil.copyfileobj(f_in, gz)
        self.create_resource(description='Made from stable MAPS_DATABASE_BUSINESS_SOURCE_ARCHIVE',
                             resource_path=gzipped_urls_path,
                             resource_type=resource_types.MAPS_WEB_URLS_BUSINESS)


__Task__ = BuildAddrsBusinessUrls
