from common import (
    CONTENT_GROUP,
    CONTENT_RESOURCE,
    deep_update,
    nfi_common,
)

from nile.api.v1 import (
    clusters,
    extractors as ne,
    filters as nf,
    Record,
)

import argparse
import re
import json


class extract_uuids(object):
    def __init__(self, push_column, uuid_column):
        self.push_column = push_column
        self.uuid_column = uuid_column
        self.stream_id_regexp = re.compile("stream_id=([a-z0-9]+)")

    def __call__(self, recs):
        for rec in recs:
            uuid = None

            push = rec.get(self.push_column, "{}")
            push_uri = json.loads(push).get("data", {}).get("push_uri")
            if push_uri:
                uuids = self.stream_id_regexp.findall(push_uri)
                if uuids:
                    uuid = uuids[0]

            yield Record(**{
                self.push_column: push,
                self.uuid_column: uuid,
            })


class insert_thumbnails(object):
    def __init__(self, push_column, thumbnail_column):
        self.push_column = push_column
        self.thumbnail_column = thumbnail_column

    def __call__(self, recs):
        for rec in recs:
            thumbnail = rec.get(self.thumbnail_column)
            push = rec.get(self.push_column, "{}")
            if thumbnail:
                push = json.loads(push)
                deep_update(push, {
                    "android_features": {"image": thumbnail},
                    "browser_features": {"image": thumbnail},
                })
                push = json.dumps(push)

            yield Record(**{self.push_column: push})


def parse_arguments():
    parser = argparse.ArgumentParser()
    parser.add_argument("--cluster", required=True, choices=["hahn", "arnold"])
    parser.add_argument("--push_table", required=True)
    parser.add_argument("--result_table", required=True)
    parser.add_argument("--push_column", required=True)
    parser.add_argument("--image_size", required=True)
    return parser.parse_args()


def main():
    args = parse_arguments()
    if args.cluster.lower() == "hahn":
        cluster = clusters.yt.Hahn()
    else:
        cluster = clusters.yt.Arnold()

    job = cluster.job()

    uuid_column = "uuid"
    thumbnail_column = "thumbnail"

    job.table(args.push_table) \
        .map(extract_uuids(args.push_column, uuid_column)) \
        .join(job.table(CONTENT_GROUP), by_left=uuid_column, by_right="UUID", type="left") \
        .project(args.push_column, "ContentGroupID", type="left") \
        .join(job.table(CONTENT_RESOURCE), by="ContentGroupID", type="left") \
        .filter(nf.custom(lambda val: val == "thumbnail" or not val, "ResourceName")) \
        .project(args.push_column, uuid_column,
                 **{thumbnail_column: ne.custom(
                     lambda val: ("https:" + val.replace("orig", args.image_size)) if val else val, "Value"
                 )}) \
        .map(insert_thumbnails(args.push_column, thumbnail_column), files=nfi_common) \
        .put(args.result_table)

    job.run()


if __name__ == "__main__":
    main()
