# -*- coding: utf-8 -*-


from sandbox.projects.common.news.YtScriptTaskV2 import YtScriptV2
from sandbox.projects.resource_types import OTHER_RESOURCE
from sandbox.sandboxsdk.environments import PipEnvironment
from sandbox.sandboxsdk import process
from sandbox.sdk2.helpers import subprocess as sp

from sandbox import sdk2
import logging
import os
import time
import shutil
import json


class GetArchiveStoryUrlsForTesting(YtScriptV2):
    class Requirements(YtScriptV2.Requirements):
        disk_space = 40 * 1024
        ram = 8192
        environments = [
            PipEnvironment('boto3', '1.9.202')
        ]

    class Parameters(YtScriptV2.Parameters):

        newsd_table = sdk2.parameters.String("Path to newsd table on YT", required=True, default_value='//home/news-prod/archive/newsd/newsd')
        state_parser_binary = sdk2.parameters.Resource("Resource with state parser binary", required=True)
        lower_bound = sdk2.parameters.Integer("Lower bound  of shard (includsive), format: YYYYMMDD", required=True, default=None)
        upper_bound = sdk2.parameters.Integer("Upper bound  of shard (exclusive), format: YYYYMMDD", required=False, default=None)
        upload_to_s3 = sdk2.parameters.Bool("Upload to S3", required=True, default=False)
        s3_endpoint = sdk2.parameters.String("S3 endpoint", required=False, default="http://s3.mdst.yandex.net")
        s3_secret_key_selector = sdk2.parameters.YavSecret("S3 Secret key pair", required=False, default=None)
        s3_bucket = sdk2.parameters.String("S3 bucket", required=False, default="news")

    def on_save(self):
        if not bool(self.Parameters.upper_bound):
            self.Parameters.upper_bound = time.strftime("%Y%m%d")

    def upload_to_s3(self, filepath):
        import boto3
        from boto3.s3.transfer import S3Transfer

        secret_key_id = self.Parameters.s3_secret_key_selector.data()["id"]
        secret_key = self.Parameters.s3_secret_key_selector.data()["secret"]
        s3conn = boto3.client('s3',
                              endpoint_url=self.Parameters.s3_endpoint,
                              aws_access_key_id=secret_key_id,
                              aws_secret_access_key=secret_key)
        s3conn.head_bucket(Bucket=self.Parameters.s3_bucket)
        s3transfer = S3Transfer(s3conn)
        s3transfer.upload_file(
            filepath,
            self.Parameters.s3_bucket,
            "test_archive/stories.json",
        )

    def join_outputs(self, dir_path, output_path):
        result = {}
        for filename in os.listdir(dir_path):
            file = os.path.join(dir_path, filename)
            if os.path.isfile(file):
                with open(file) as f:
                    content = json.load(f)
                    result[filename] = content

        with open(output_path, "w") as out:
            json.dump(result, out)

    def on_execute(self):
        shard_dir = str(self.path('shard_data'))
        if os.path.exists(shard_dir):
            shutil.rmtree(shard_dir)
        os.makedirs(shard_dir)

        resource_id = self.Parameters.state_parser_binary
        state_parser = str(sdk2.ResourceData(sdk2.Resource[resource_id]).path)
        os.chmod(state_parser, 0775)

        scriptdir = self.get_scripts()
        python_binary = self.get_python()

        env = os.environ.copy()
        python_path = os.path.join(scriptdir, 'lib/python')
        env['PYTHONPATH'] = python_path

        token = self.get_token()
        env['YT_TOKEN'] = token

        cmdline = "{python} {cmd} --yt-proxy {proxy} --table {table} --since {lower_bound} --to {upper_bound} --out-dir {out}".format(
            python=python_binary,
            cmd=self.get_cmdline(),
            proxy=self.get_yt_proxy(),
            table=self.Parameters.newsd_table,
            lower_bound=self.Parameters.lower_bound,
            upper_bound=self.Parameters.upper_bound,
            out=shard_dir,
        )

        process.run_process(cmdline, shell=True, work_dir=scriptdir, environment=env, log_prefix='script_run')

        result_dir = str(self.path('result_dir'))
        if os.path.exists(result_dir):
            shutil.rmtree(result_dir)
        os.makedirs(result_dir)

        logging.info("Check dir {}".format(shard_dir))
        for dir_name in os.listdir(shard_dir):
            path = os.path.join(shard_dir, dir_name)
            logging.info("Check path {}".format(path))
            if os.path.isdir(path):
                logging.info("path {} is dir, lets run dumper".format(path))
                output_filename = os.path.join(result_dir, os.path.basename(path))
                with open(output_filename, 'w') as output_file:
                    subprocess = sp.Popen([state_parser], stdin=sp.PIPE, stdout=output_file)
                    subprocess.communicate('stored\t{}/'.format(path))
                    subprocess.stdin.close()
                    subprocess.wait()

        output_resource = OTHER_RESOURCE(self, "", result_dir)
        sdk2.ResourceData(output_resource).ready()

        if self.Parameters.upload_to_s3:
            self.join_outputs(result_dir, 'result_dir/joined.json')
            self.upload_to_s3('result_dir/joined.json')
