import os
import logging
import tarfile
import shutil

from datetime import datetime, timedelta

from sandbox import sdk2
from sandbox.sdk2.helpers import subprocess
# from sandbox.sandboxsdk import environments
from sandbox.sandboxsdk.paths import get_logs_folder
from sandbox.sandboxsdk.errors import SandboxTaskFailureError

from sandbox.projects.logs.resources import SESSIONS_BLOCKSTAT
from sandbox.projects.geobase.Geodata5BinStable import resource as geobase_res
from sandbox.projects.resource_types import MAPS_USERFEATURES_EXECUTABLE, MAPS_GEO_USER_FACTORS, OTHER_RESOURCE


def merge(sources, dest):
    factor_count = 0
    factor_counts = []

    for f in sources:
        with open(f) as stream:
            for line in stream:
                line = line.strip()
                split = line.split('\t')
                factor_count += len(split) - 1
                factor_counts.append(int(len(split) - 1))
                break

    result = {}

    for i in range(len(sources)):
        f_start = sum(factor_counts[:i])
        with open(sources[i]) as stream:
            for line in stream:
                split = line.strip().split()
                oid = split[0]
                factors = split[1:]
                result.setdefault(oid, ["-99.0"] * factor_count)
                result_factors = result[oid]
                for j in range(len(factors)):
                    result_factors[f_start + j] = factors[j]

    with open(dest, 'w') as destfile:
        for k, v in result.iteritems():
            line = [k]
            line.extend(v)
            destfile.write('\t'.join(line))
            destfile.write('\n')


def cut_file(source, dest):
    logging.info('processing file: ' + source)
    with open(source, 'r') as src:
        with open(dest, 'w') as dst:
            for line in src:
                data = line.split('\t')[0:4]
                outline = '\t'.join(data)
                dst.write(outline)
                dst.write('\n')


def postprocess(path):
    to_rename = [
        ('biz_clicks_dynamic.', 'biz_clicks_dynamic'),
        ('biz_clicks_static.', 'biz_clicks_static_raw'),
        ('biz_clicks_org1_static.', 'biz_clicks_static_org1_raw'),
        ('biz_clicks_rub_static.', 'biz_clicks_static_rub_raw'),
    ]

    for file in os.listdir(path):
        for (key, value) in to_rename:
            if file.startswith(key):
                shutil.move(os.path.join(path, file), os.path.join(path, value))
        if file.startswith('permalink_pair_stat.'):
            os.remove(os.path.join(path, file))

    cut_file(os.path.join(path, 'biz_clicks_static_raw'), os.path.join(path, 'biz_clicks_all_static'))
    cut_file(os.path.join(path, 'biz_clicks_static_org1_raw'), os.path.join(path, 'biz_clicks_org1_static'))
    cut_file(os.path.join(path, 'biz_clicks_static_rub_raw'), os.path.join(path, 'biz_clicks_rub_static'))

    tmp_files = [os.path.join(path, 'biz_clicks_all_static'), os.path.join(path, 'biz_clicks_org1_static'), os.path.join(path, 'biz_clicks_rub_static')]
    merge(tmp_files, os.path.join(path, 'biz_clicks_static'))
    for f in tmp_files:
        os.remove(f)


class BuildMapsGeoUserFactors(sdk2.Task):
    """
        Build maps geo user factors
    """
    class Parameters(sdk2.Task.Parameters):
        yt_pool = sdk2.parameters.String(
            'YT pool name',
            required=False)
        builder = sdk2.parameters.Resource(
            'Builder executable',
            resource_type=MAPS_USERFEATURES_EXECUTABLE,
            required=True
        )
        geobase = sdk2.parameters.Resource(
            'Geobase 5',
            resource_type=geobase_res.GEODATA5BIN_STABLE,
            required=True
        )
        blockstat = sdk2.parameters.Resource(
            'Blockstat',
            resource_type=SESSIONS_BLOCKSTAT,
            required=True
        )
        relev_regions = sdk2.parameters.Resource(
            'Relevant region list',
            resource_type=OTHER_RESOURCE,
            required=True
        )
        is_prod = sdk2.parameters.Bool(
            'Is production run',
            required=False,
            default_value=False
        )
        end_date = sdk2.parameters.String(
            'Calculation end date (YYYYMMDD format)',
            required=False,
            default_value=((datetime.now() - timedelta(1)).strftime("%Y%m%d")))
        days_num = sdk2.parameters.Integer(
            'Calculation days count',
            required=False,
            default_value=30)

    class Requirements(sdk2.Task.Requirements):
        cores = 1
        ram = 8192

        class Caches(sdk2.Requirements.Caches):
            pass

    def on_execute(self):
        os.environ['YT_TOKEN'] = sdk2.Vault.data('GEOMETA-SEARCH', 'yt-token')
        os.environ['YT_PREFIX'] = '//home/geosearch/'
        os.environ['MR_RUNTIME'] = 'YT'
        if self.Parameters.yt_pool is not None and self.Parameters.yt_pool != '':
            os.environ['YT_POOL'] = self.Parameters.yt_pool

        if self.Parameters.end_date is not None and self.Parameters.end_date != '':
            end_date = str(self.Parameters.end_date)
        else:
            end_date = (datetime.now() - timedelta(1)).strftime("%Y%m%d")

        executable_path = str(sdk2.ResourceData(self.Parameters.builder).path)
        geodata_path = str(sdk2.ResourceData(self.Parameters.geobase).path)
        blockstat_path = str(sdk2.ResourceData(self.Parameters.blockstat).path)
        relev_regions_path = str(sdk2.ResourceData(self.Parameters.relev_regions).path)

        output_path = './result'
        output_file = './userdata_geo.tar.gz'

        os.makedirs(output_path)

        cmd = [
            executable_path,
            "-g", geodata_path,
            "-b", blockstat_path,
            "-r", relev_regions_path,
            "-o", output_path,
            "-s", 'hahn.yt.yandex.net',
        ]
        if self.Parameters.is_prod:
            cmd.append('-u')
        cmd.append(end_date)
        cmd.append(str(self.Parameters.days_num))

        log_file_path = get_logs_folder() + '/build_geo_user_factors.out.txt'
        with open(log_file_path, 'w') as log_file:
            try:
                subprocess.check_call(cmd,
                                      stdout=log_file,
                                      stderr=subprocess.STDOUT)
            except subprocess.CalledProcessError:
                logging.exception('%s command failed' % cmd)
                raise SandboxTaskFailureError('Geo user factors build failed')

        postprocess(output_path)

        logging.info('creating tar.gz output file')
        tar = tarfile.open(output_file, "w:gz")
        for name in os.listdir(output_path):
            tar.add(os.path.join(output_path, name), arcname=name)
        tar.close()

        factors_resource = MAPS_GEO_USER_FACTORS(self, "Maps geo user factors file", output_file)
        factors_data = sdk2.ResourceData(factors_resource)
        factors_data.ready()
