import nile
import time
from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns,
    clusters,
    Record
)
from qb2.api.v1 import (
    extractors as se,
    filters as sf
)

def object_id_to_thumb_url_mapper(recs):
    import json
    for rec in recs:
        results = json.loads(rec["results"])
        for category_info in results:
            for object_info in category_info["predicted_objects"]:
                yield Record(object_id=object_info["object_id"], thumb_url=object_info["thumb_url"], title=object_info['title'])

class prepare_to_visualisation(object):
    def __init__(self, object_id_to_thumb_url, object_id_to_title):
        self.object_id_to_thumb_url = object_id_to_thumb_url
        self.object_id_to_title = object_id_to_title
    def __call__(self, recs):
        for rec in recs:
            result = {}
            watched_objects = []
            for object_info in rec["watched_object_by_last_4_weeks"]:
                watched_objects.append({"url" : self.object_id_to_thumb_url.get(object_info["object_id"], ""),
                                        "title" : self.object_id_to_title.get(object_info["object_id"], "")})
            result["watched_objects"] = watched_objects
            for category_info in rec["predicted_objects"]:
                category_name = category_info["type"]
                predicted_objects = []
                for object_info in category_info["predicted_objects"]:
                    predicted_objects.append({"url" : self.object_id_to_thumb_url.get(object_info["object_id"], ""),
                                              "title" : self.object_id_to_title.get(object_info["object_id"], "")})
                result[category_name] = predicted_objects
            if rec["type"] == "CATEG_MIXED" and len(watched_objects) >= 1:
                yield Record(uid=rec["uid"], **result)

def main():
    cluster = clusters.Banach()
    job = cluster.job()
    job.table("//home/videolog/msvvitaly/watched_objects_by_user/reformatted_recommendationsfilter_none") \
       .map(object_id_to_thumb_url_mapper).groupby('object_id') \
       .aggregate(thumb_url=na.any("thumb_url"),
                  title=na.any("title")) \
       .put("//home/videolog/msvvitaly/watched_objects_by_user/object_id_to_url_and_title")
    job.run()
    object_id_to_thumb_url = {}
    object_id_to_title = {}
    for rec in cluster.driver.read("//home/videolog/msvvitaly/watched_objects_by_user/object_id_to_url_and_title"):
        object_id_to_thumb_url[rec["object_id"]] = rec["thumb_url"][:-8] + "S134x201_2x"
        object_id_to_title[rec["object_id"]] = rec["title"]
    for table in sorted(cluster.driver.list("//home/videolog/msvvitaly/watched_objects_by_user"), reverse=True):
        if "user_info" in table and "normal" in table:
            job = cluster.job()
            job.table("//home/videolog/msvvitaly/watched_objects_by_user/" + table) \
               .map(prepare_to_visualisation(object_id_to_thumb_url, object_id_to_title)).sort('uid') \
               .put('//home/videolog/msvvitaly/watched_objects_by_user/user_info_prepared_to_visualization')
            job.run()
            break

if __name__ == '__main__':
    main()
