#!/usr/bin/env python
# -*- coding: utf-8 -*-

import argparse
import sys
import re
import urllib2
import os


TEXT_REGEXP = re.compile("text=([^&]*)&")
LR_REGEXP = re.compile("lr=([^&]*)&")

YT_OWNERS = [
    "ulyanin",
    "mvel",
    "dima-zakharov",
    "kulikov",
    "venikman1",
    "ilyaturuntaev",
    "grievous",
]


def do_log(msg, *args):
    """
    Trivial message logger

    ```
    do_log("Hey %s", "Jude")
    do_log("Don't make it bad")
    do_log("Take a %s song and make it %s", 'sad', 'better')
    ```

    :param msg: message string, can be a %-format string
    :param args: whatever is needed to format `msg`
    """
    msg += "\n"
    sys.stderr.write(msg % tuple(args))


def run_mapper(server, token, soy_table, output_table):
    import yt
    do_log(str(dir(yt)))
    from yt.wrapper import YtClient
    from yt.wrapper import JsonFormat

    client = YtClient(server, token, config={
        'pickling': {
            'python_binary': '/skynet/python/bin/python'
        }
    })

    def mapper(rec):
        info_from_id = rec["id"].split("-")
        if len(info_from_id) == 2:
            beta, id = info_from_id
            reg = None
        else:
            reg, beta, id = info_from_id
        text = TEXT_REGEXP.search(rec["Url"])
        lr = LR_REGEXP.search(rec["Url"])
        text = urllib2.unquote(text.group(1)) if text else ""
        lr = lr.group(1) if lr else ""
        yield {
            "key": id,
            "domain": reg,
            "subkey": "\t".join([text, lr, "search", beta]),
            "value": rec["FetchedResult"],
        }

    do_log("Converting to GRSR format...")
    client.run_map(
        mapper,
        soy_table,
        output_table,
        format=JsonFormat(attributes={"encode_utf8": False, "encoding": None}),
        spec={
            "owners": YT_OWNERS,
            "job_io": {
                "table_writer": {
                    "max_row_weight": 64 << 20,
                },
            },
        },
    )


if __name__ == "__main__":
    parser = argparse.ArgumentParser()

    parser.add_argument("soy_table", help="Table to put records to")
    parser.add_argument("output_table", help="Table to take records from")
    parser.add_argument("--server", help="YT cluster", required=True)
    parser.add_argument("--token", help="YT token", default=os.environ.get("YT_TOKEN"))
    args = parser.parse_args()
    run_mapper(args.server, args.token, args.soy_table, args.output_table)
