from sandbox import sdk2

from sandbox.sandboxsdk.task import SandboxTask
from sandbox.sandboxsdk.parameters import LastReleasedResource, SandboxStringParameter, ResourceSelector
from sandbox.sandboxsdk.process import run_process
from sandbox.sandboxsdk import environments

from sandbox.projects import resource_types
import sandbox.projects.resource_types.releasers as resource_releasers
from sandbox.projects.common.utils import get_or_default
from sandbox.sandboxsdk.paths import get_logs_folder

import os


class RatingCalcer(LastReleasedResource):
    name = 'rating_calcer'
    description = 'Rating calcer: '
    resource_type = resource_types.CALC_BUSINESS_RATING_EXECUTABLE


class BusinessIndex(LastReleasedResource):
    name = 'business_index'
    description = 'Business index: '
    resource_type = resource_types.MAPS_DATABASE_BUSINESS


class RatingModel(ResourceSelector):
    name = 'rating_model'
    description = 'Rating model: '
    resource_type = resource_types.ORGS_RATING_MODEL
    default_value = None


class BUSINESS_RATING_FACTORS(sdk2.Resource):
    releasable = True
    auto_backup = True
    releasers = resource_releasers.geosearch_releasers


class OutputTable(SandboxStringParameter):
    description = 'Table to dump rating to'
    name = 'out_table'
    default_value = '//home/geosearch/companies_rating'


class YtProxy(SandboxStringParameter):
    description = 'YT proxy'
    name = 'proxy'
    default_value = 'hahn.yt.yandex.net'


class LegacyTable(SandboxStringParameter):
    description = 'Legacy table'
    name = 'legacy_table'
    default = ''


def get_yt_row(rating_line):
    split = rating_line.strip().split('\t')
    return {'oid': split[0], 'rating': float(split[1])}


class CalcBusinessRating(SandboxTask):
    """
        Calculates machine rating for organizations
    """
    type = 'CALC_BUSINESS_RATING'

    input_parameters = (
        RatingCalcer,
        BusinessIndex,
        RatingModel,
        OutputTable,
        YtProxy,
        LegacyTable
    )
    environment = (environments.PipEnvironment('yandex-yt'), environments.PipEnvironment('yandex-yt-yson-bindings-skynet'))

    def dump_to_yt(self, token, file_name, table_name):
        schema = [
            {'name': 'oid', 'type': 'string', 'required': True},
            {'name': 'rating', 'type': 'double'},
        ]
        from yt.wrapper import YtClient
        proxy = get_or_default(self.ctx, YtProxy)
        yt = YtClient(proxy, token)
        yt.remove(table_name, force=True)
        yt.create('table', table_name, attributes={'schema': schema})
        yt.write_table(table_name, (get_yt_row(x) for x in open(file_name)), raw=False)

    def on_execute(self):
        token = self.get_vault_data('GEOMETA-SEARCH', 'yt-token')
        proc_env = os.environ.copy()
        proc_env['YT_TOKEN'] = token

        calcer = self.sync_resource(self.ctx.get('rating_calcer'))
        model = self.sync_resource(self.ctx.get('rating_model'))
        index = self.sync_resource(self.ctx.get('business_index'))
        legacy_table = get_or_default(self.ctx, LegacyTable)

        showed_rating = './showed_rating.txt'
        raw_rating = './raw_rating.txt'
        factors_dump = './factors.txt'
        reviews_dump = './reviews.txt'

        exec_params = [calcer, index + '/', model]
        proc_err = os.path.join(get_logs_folder(), 'calc_raw_rating.err')
        run_process(exec_params, stdout=open(raw_rating, 'w'), stderr=open(proc_err, 'w'))

        exec_params.append('-fm')
        exec_params.append('--postfilter')
        exec_params.append('--factordump=%s' % factors_dump)
        exec_params.append('--reviewcountdump=%s' % reviews_dump)
        if legacy_table is not None and legacy_table != '':
            exec_params.append('--legacy=%s' % legacy_table)
        proc_err = os.path.join(get_logs_folder(), 'calc_showed_rating.err')
        run_process(exec_params, stdout=open(showed_rating, 'w'), stderr=open(proc_err, 'w'), environment=proc_env)

        out_table = get_or_default(self.ctx, OutputTable)
        if out_table:
            self.dump_to_yt(token, showed_rating, out_table)

        self.create_resource('raw rating', raw_rating, resource_types.BUSINESS_COMPUTED_RAW_RATINGS)
        self.create_resource('showed rating', showed_rating, resource_types.BUSINESS_COMPUTED_RATINGS)
        self.create_resource('factors', factors_dump, BUSINESS_RATING_FACTORS)
        self.create_resource('reviews', reviews_dump, resource_types.BUSINESS_REVIEW_COUNTS)


__Task__ = CalcBusinessRating
