#!/usr/bin/env python
# -*- coding: utf-8 -*-

from collections import defaultdict
import json
import argparse
import sys
import os


class JsonFormatter(object):
    FirstKey = "tmpl_data"

    @staticmethod
    def DeepDict():
        return defaultdict(JsonFormatter.DeepDict)

    @staticmethod
    def ResizeArray(jsonRef, keyToArray, targetSize):
        if isinstance(jsonRef[keyToArray], list):
            jsonRef[keyToArray] += [JsonFormatter.DeepDict() for i in xrange(targetSize - len(jsonRef[keyToArray]))]
        else:
            jsonRef[keyToArray] = [JsonFormatter.DeepDict() for i in xrange(targetSize)]

    @staticmethod
    def JsonToStandardFormatRecursively(jsonRef, currentKey, joinedMultikey, value):
        if not joinedMultikey:
            jsonRef[currentKey] = value
            return

        if '.' in joinedMultikey:
            newKey, joinedMultikey = joinedMultikey.split('.', 1)
        else:
            newKey, joinedMultikey = joinedMultikey, ""

        if newKey.isdigit():
            newKey = int(newKey)
            JsonFormatter.ResizeArray(jsonRef, currentKey, newKey + 1)
            JsonFormatter.JsonToStandardFormatRecursively(jsonRef[currentKey], newKey, joinedMultikey, value)
        elif newKey == "*":
            if isinstance(value, list):
                JsonFormatter.ResizeArray(jsonRef, currentKey, len(value))
                realKeys = range(len(value))
            else:
                realKeys = value.keys()
            for realKey in realKeys:
                JsonFormatter.JsonToStandardFormatRecursively(jsonRef[currentKey], realKey, joinedMultikey, value[realKey])
        else:
            JsonFormatter.JsonToStandardFormatRecursively(jsonRef[currentKey], newKey, joinedMultikey, value)

    @staticmethod
    def JsonToStandardFormat(responseJsonDump):
        currentFormat = json.loads(responseJsonDump)
        if currentFormat is None:
            sys.stderr.write(responseJsonDump + "\n")
        if JsonFormatter.FirstKey in currentFormat.keys():
            return json.dumps({JsonFormatter.FirstKey: currentFormat[JsonFormatter.FirstKey]})

        standardFormat = {JsonFormatter.FirstKey: JsonFormatter.DeepDict()}
        for joinedMultikey in currentFormat.keys():
            JsonFormatter.JsonToStandardFormatRecursively(
                standardFormat,
                JsonFormatter.FirstKey, joinedMultikey, currentFormat[joinedMultikey]
            )
        return json.dumps(standardFormat)


def delete_unique_and_format_json(key, rows):
    listed_rows = []
    for row in rows:
        if row.get("value", "null") == "null" or not row["value"]:
            continue
        listed_rows.append({
            "key": row["key"],
            "domain": row["domain"],
            "subkey": row["subkey"],
            "value": JsonFormatter.JsonToStandardFormat(row["value"]),
        })
    if len(listed_rows) != 2:
        return
    for row in listed_rows:
        yield row


INPUT_TABLE = "//tmp/GetReportSimultaneousResponses/test_GRSR_scraper"
OUTPUT_TABLE = "//tmp/GetReportSimultaneousResponses/test_GRSR_scraper_after_map"
YT_OWNERS = ["ulyanin", "mvel", "dima-zakharov", "kulikov", "venikman1", "ilyaturuntaev", "grievous"]


def run_yt_operation(server, token, input_table, output_table):
    from yt.wrapper import YtClient, TablePath

    client = YtClient(server, token, config={
        'pickling': {
            'python_binary': '/skynet/python/bin/python'
        }
    })
    sys.stderr.write("Create YtClient on server {}\n".format(server))
    sys.stderr.write("Run sort on {}\n".format(input_table))
    client.run_sort(
        input_table,
        sort_by=["domain", "key", "subkey"],
        spec={
            "owners": YT_OWNERS,
            "job_io": {
                "table_writer": {
                    "max_row_weight": 128 << 20,
                },
            },
            "sort_job_io": {
                "table_writer": {
                    "max_row_weight": 128 << 20,
                },
            },
        },
    )
    sys.stderr.write("Run reduce from {} to {}\n".format(input_table, output_table))
    client.run_reduce(
        delete_unique_and_format_json,
        input_table,
        TablePath(output_table, sorted_by=["domain", "key", "subkey"]),
        reduce_by=["domain", "key"],
        sort_by=["domain", "key", "subkey"],
        format="json",
        spec={
            "owners": YT_OWNERS,
            "job_io": {
                "table_writer": {
                    "max_row_weight": 128 << 20,
                },
            },
            "partition_job_io": {
                "table_writer": {
                    "max_row_weight": 128 << 20,
                },
            },
            "merge_job_io": {
                "table_writer": {
                    "max_row_weight": 128 << 20,
                },
            },
            "reduce_job_io": {
                "table_writer": {
                    "max_row_weight": 128 << 20,
                },
            },
        },
    )


if __name__ == "__main__":
    parser = argparse.ArgumentParser()

    parser.add_argument("--server", help="YT cluster", required=True)
    parser.add_argument("input_table", help="Table to take records from")
    parser.add_argument("output_table", help="Table to put records to")
    parser.add_argument("--token", help="YT token", default=os.environ["YT_TOKEN"])
    args = parser.parse_args()
    run_yt_operation(args.server, args.token, args.input_table, args.output_table)
