# coding: utf-8
import logging
import os.path
from sandbox.projects import resource_types
from sandbox.projects.common.arcadia import sdk
import sandbox.projects.common.constants as consts
from sandbox.sandboxsdk import parameters
from sandbox.sandboxsdk.paths import get_logs_folder
from sandbox.sandboxsdk.process import run_process
from sandbox.sandboxsdk.svn import Arcadia
from sandbox.sandboxsdk.task import SandboxTask


class Org1SerpDownloader(SandboxTask):
    """ Принимает текстовый файл с данными о компаниях из Баки
    и диапазон строк из него, которые надо обработать

    По строке формирует запрос и стреляет в веб-поиск, собирает top-30
    урлов и сохраняет в файл
    """

    class CompaniesResourceId(parameters.SandboxIntegerParameter):
        name = 'companies_resource_id'
        description = 'resource with companies data'
        required = True

    class RangeStart(parameters.SandboxIntegerParameter):
        name = 'range_start'
        description = 'starting line number of the companies file'
        required = True

    class RangeEnd(parameters.SandboxIntegerParameter):
        name = 'range_end'
        description = 'ending line number of the companies file'
        required = True

    class ToolsPath(parameters.SandboxStringParameter):
        name = 'tools_path'
        description = 'path to python modules dir in geo/tools'
        required = True

    class UseShortCompanyName(parameters.SandboxBoolParameter):
        name = 'use_shortname'
        description = 'Use short company name for web request'
        default_value = False

    class UseShortAddress(parameters.SandboxBoolParameter):
        name = 'use_short_addr'
        description = 'Use short company address for web request'
        default_value = False

    type = "ORG1_SERP_DOWNLOADER"
    max_restarts = 1
    input_parameters = [CompaniesResourceId, RangeStart, RangeEnd, ToolsPath, UseShortCompanyName, UseShortAddress]

    BINARY_DIR = './bin'

    def initCtx(self):
        self.ctx['kill_timeout'] = 60 * 60 * 20  # 20 hours

    def on_execute(self):
        self.src_dir = Arcadia.get_arcadia_src_dir(Arcadia.trunk_url())
        binary_path = os.path.join(self.ctx[self.ToolsPath.name], 'find_duplicates_for_oids')
        sdk.do_build(
            consts.YMAKE_BUILD_SYSTEM,
            self.src_dir,
            [binary_path],
            consts.RELEASE_BUILD_TYPE,
            results_dir=self.BINARY_DIR,
            clear_build=False
        )

        companies_file = self.sync_resource(self.ctx['companies_resource_id'])

        out_file = self.path('oid2urls.tsv')

        with open(out_file, 'w') as fout, open(os.path.join(get_logs_folder(), 'stderr.log'), 'a') as ferr:
            logging.info("Requesting hamster")
            params = [
                os.path.join(self.BINARY_DIR, binary_path, 'find_duplicates_for_oids'),
                '--companies_file', companies_file,
                '--range_start', str(self.ctx[self.RangeStart.name]),
                '--range_end', str(self.ctx[self.RangeEnd.name]),
            ]
            if self.ctx.get(self.UseShortCompanyName.name, False):
                params.append('--use-shortname')
            if self.ctx.get(self.UseShortAddress.name, False):
                params.append('--use-short-addr')
            run_process(params, stdout=fout, stderr=ferr)

        self.mark_resource_ready(self.create_resource("oid2urls", out_file, resource_types.PLAIN_TEXT))


__Task__ = Org1SerpDownloader
