import json
import sys
from collections import defaultdict, Sequence, Mapping
from urlparse import urlparse

import luigi
from lib.luigi import base_luigi_task
import yt.wrapper as yt

from lib import graphite_sender
from utils import mr_utils as mr
from utils import utils
from lib.luigi import yt_luigi
from rtcconf import config


def to_str(w):
    if isinstance(w, basestring):
        return w
    else:
        return str(w)


def map_ecomerce(rec):
    yuid = rec.get('uniqid')
    json_str = rec.get('params', '')
    url = rec.get('url', '')
    try:
        domain = urlparse(url).netloc
    except Exception:
        domain = ''
    if yuid and yuid != '-' and yuid != '0' and 'ecommerce' in json_str:
        for action, name, category, revenue in parse_ecommerce_json(json_str, {'purchase'}):
            yield {'yuid': to_str(yuid), 'action': to_str(action), 'name': to_str(name),
                   'category': to_str(category), 'revenue': to_str(revenue),
                   'url': to_str(url), 'id_value': to_str(domain)}


def parse_ecommerce_json(json_str, good_actions):
    try:
        parsed_json = json.loads(json_str)
        ecommerce = parsed_json["__ym"]["ecommerce"]
    except Exception:
        return

    if not isinstance(ecommerce, Sequence):
        return

    for ecommerce_item in ecommerce:
        if not isinstance(ecommerce_item, Mapping):
            continue
        for action, action_hash in ecommerce_item.iteritems():
            if action not in good_actions:
                continue
            if not isinstance(action_hash, Mapping):
                continue

            revenue = 0
            action_field = action_hash.get('actionField')
            if action_field and isinstance(action_field, Mapping):
                revenue = action_field.get('revenue', 0)

            products = action_hash.get("products", [])
            if not isinstance(products, Sequence):
                continue
            for product in products:
                if not isinstance(product, Mapping):
                    continue
                # if both None or empty strings - it is bad product
                name = product.get("name") if product.get("name") else product.get("id")
                category = product.get("category", "")

                if name:
                    yield action, name, category, revenue


def reduce_month(yuid_key, recs):
    recs, rest = mr.take_till_oom(recs, 1000)
    if rest:
        yield {'yuid': yuid_key['yuid'], 'oom': 1000 + rest, '@table_index': 1}
        return

    # date -> domain -> number of purchases
    history = defaultdict(lambda: defaultdict(int))
    for r in recs:
        history[r['id_date']][r['id_value']] += r['id_count']

    yield {'yuid': yuid_key['yuid'], 'purchase_history': history}


def split_desk_mob_ua(yuid_key, recs):
    yuid_recs, yuid_with_all = mr.split_left_right(recs, oom_check=False)
    if yuid_with_all and utils.is_true(yuid_with_all[0]['good']):
        ua_profile = yuid_with_all[0].get('ua_profile')
        if ua_profile:
            for yuid_rec in yuid_recs:
                if ua_profile.startswith('m'):
                    table_index = 0
                else:
                    table_index = 1

                yuid_rec['ua_profile'] = ua_profile
                yuid_rec['@table_index'] = table_index

                yield yuid_rec


def map_yuid_vertices(rec):
    if rec['id_type'].startswith('yuid'):
        rec['yuid'] = rec['key']
        del rec['key']
        del rec['id']
        yield rec


def join_yuid_ua(yuid_key, recs):
    yuid_vertices_recs, yuid_with_all = mr.split_left_right(recs, oom_check=False)
    if yuid_with_all and utils.is_true(yuid_with_all[0]['good']):
        yuid_all_rec = yuid_with_all[0]
        for yuid_vertices_rec in yuid_vertices_recs:
            yuid_vertices_rec['ua_profile'] = yuid_all_rec.get('ua_profile')

            yield yuid_vertices_rec

    else:
        for rec in yuid_vertices_recs:
            rec['@table_index'] = 1
            yield rec


def cross_device_stat(crypta_id_key, recs):
    yuid_with_purchase_recs, yuid_vertices_recs = mr.split_left_right(recs, oom_check=False)
    mob_purchase = set()
    desk_purchase = set()
    for rec in yuid_with_purchase_recs:
        ua = rec.get('ua_profile')
        if ua:
            if ua.startswith('m'):
                mob_purchase.add(rec['yuid'])
            else:
                desk_purchase.add(rec['yuid'])

    crypta_id_mobs = set()
    crypta_id_desks = set()
    for rec in yuid_vertices_recs:
        ua = rec.get('ua_profile')
        if ua:
            if ua.startswith('m'):
                crypta_id_mobs.add(rec['yuid'])
            else:
                crypta_id_desks.add(rec['yuid'])

    crypta_id = crypta_id_key['crypta_id']
    crypta_id_size = crypta_id_key['crypta_id_size']

    mob_purchase = list(mob_purchase)
    desk_purchase = list(desk_purchase)
    crypta_id_mobs = list(crypta_id_mobs)
    crypta_id_desks = list(crypta_id_desks)

    if yuid_with_purchase_recs:
        # crypta ids where at least one yuid made a purchase
        yield {'crypta_id': crypta_id, 'crypta_id_size': crypta_id_size,
               'mob_purchase': mob_purchase, 'desk_purchase': desk_purchase,
               'crypta_id_mobs': crypta_id_mobs, 'crypta_id_desks': crypta_id_desks}
        # purchased from both desktop and mobile
        if mob_purchase and desk_purchase:
            yield {'crypta_id': crypta_id, 'crypta_id_size': crypta_id_size,
                   'mob_purchase': mob_purchase, 'crypta_id_desks': crypta_id_desks,
                   '@table_index': 1}
        # purchased from desktop or mobile (not both), but has possible cross-device within crypta id
        elif crypta_id_mobs and crypta_id_desks:
            yield {'crypta_id': crypta_id, 'crypta_id_size': crypta_id_size,
                   'mob_purchase': mob_purchase, 'crypta_id_desks': crypta_id_desks,
                   '@table_index': 2}

    if mob_purchase and crypta_id_desks:
        for yuid in mob_purchase:
            yield {'yuid': yuid, 'crypta_id': crypta_id, 'crypta_id_size': crypta_id_size,
                   'crypta_id_desks': crypta_id_desks, '@table_index': 3}

    if desk_purchase and crypta_id_mobs:
        for yuid in desk_purchase:
            yield {'yuid': yuid, 'crypta_id': crypta_id, 'crypta_id_size': crypta_id_size,
                   'crypta_id_mobs': crypta_id_mobs, '@table_index': 4}


class MergeYuidPurchaseMonth(yt_luigi.BaseYtTask):
    date = luigi.Parameter()

    def input_folders(self):
        return {
            'dict': config.GRAPH_YT_DICTS_FOLDER,
            'graph': config.YT_OUTPUT_FOLDER
        }

    def output_folders(self):
        return {
            'ecommerce': config.YT_OUTPUT_FOLDER + self.date + '/ecommerce/'
        }


    def requires(self):
        import graph_watch_log
        from matching.yuid_matching import graph_dict

        mobile_day_tasks = [graph_watch_log.ImportWatchLogDayTask(date=dt, run_date=self.date)
                            for dt in utils.get_dates_before(self.date, int(config.STORE_DAYS))]

        return [graph_dict.YuidAllIdDictsTask(self.date)] + mobile_day_tasks


    def run(self):
        yuid_with_all_t = yt.TablePath(self.in_f('dict') + 'yuid_with_all',
                                       columns=['yuid', 'ua_profile', 'good'])
        day_tables = mr.get_existing_date_tables(self.in_f('graph'),
                                                 'ecommerce/yuid_with_purchase_day',
                                                 int(config.STORE_DAYS),
                                                 before_date=self.date)
        ecommerce_out_f = self.out_f('ecommerce')
        mr.mkdir(ecommerce_out_f)

        yt.run_map_reduce(None, reduce_month,
                          day_tables,
                          [ecommerce_out_f + 'yuid_with_purchase_month',
                           ecommerce_out_f + 'yuid_with_purchase_month_oom'],
                          reduce_by='yuid')
        yt.run_sort(ecommerce_out_f + 'yuid_with_purchase_month', sort_by=config.ID_TYPE_YUID)

        yt.run_reduce(split_desk_mob_ua,
                      [ecommerce_out_f + 'yuid_with_purchase_month', yuid_with_all_t],
                      [ecommerce_out_f + 'yuid_mob_with_purchase_month',
                       ecommerce_out_f + 'yuid_desk_with_purchase_month'],
                      reduce_by='yuid')

        mr.sort_all([
            ecommerce_out_f + 'yuid_mob_with_purchase_month',
            ecommerce_out_f + 'yuid_desk_with_purchase_month'
        ], sort_by='yuid')

    def output(self):
        ecommerce_out_f = self.out_f('ecommerce')
        return [yt_luigi.YtTarget(ecommerce_out_f + 'yuid_with_purchase_month'),
                yt_luigi.YtTarget(ecommerce_out_f + 'yuid_mob_with_purchase_month'),
                yt_luigi.YtTarget(ecommerce_out_f + 'yuid_desk_with_purchase_month')]


class VerticesEcommerceStats(base_luigi_task.BaseTask):
    vertices_config = luigi.Parameter()

    def requires(self):
        return [self.vertices_config.producing_task] + [MergeYuidPurchaseMonth(self.vertices_config.date)]

    def run(self):
        ecommerce_in_f = config.YT_OUTPUT_FOLDER + self.vertices_config.date + '/ecommerce/'
        yuid_with_all_t = yt.TablePath(config.GRAPH_YT_DICTS_FOLDER + 'yuid_with_all',
                                       columns=['yuid', 'ua_profile', 'good'])

        vertices_type = self.vertices_config.vertices_type
        vertices_pretty = self.vertices_config.get_vertices_table()
        vertices_stat_f = self.vertices_config.get_vertices_folder() + 'stat/ecommerce/'
        mr.mkdir(vertices_stat_f)

        yt.run_map(map_yuid_vertices,
                   vertices_pretty,
                   vertices_stat_f + 'yuid_vertices')

        # join ua to vertices to identify cross-device purchase
        # TODO: can join ua in graph_pretty
        yt.run_sort(vertices_stat_f + 'yuid_vertices', sort_by='yuid')

        yt.run_reduce(join_yuid_ua,
                      [vertices_stat_f + 'yuid_vertices', yuid_with_all_t],
                      [vertices_stat_f + 'yuid_vertices_ua', vertices_stat_f + 'yuid_vertices_debug'],
                      reduce_by='yuid'),

        mr.sort_all([
            vertices_stat_f + 'yuid_vertices_ua',
            vertices_stat_f + 'yuid_vertices_debug'
        ], sort_by='yuid')

        yt.run_reduce(mr.filter_left_by_right,
                      [vertices_stat_f + 'yuid_vertices_ua', ecommerce_in_f + 'yuid_with_purchase_month'],
                      vertices_stat_f + 'yuid_vertices_purchase',
                      reduce_by='yuid')

        # calculate cross-device purchase stats
        mr.sort_all([
            vertices_stat_f + 'yuid_vertices_ua',
            vertices_stat_f + 'yuid_vertices_purchase'
        ], sort_by=['crypta_id', 'crypta_id_size'])

        yt.run_reduce(cross_device_stat,
                      [vertices_stat_f + 'yuid_vertices_purchase', vertices_stat_f + 'yuid_vertices_ua'],
                      [vertices_stat_f + 'crypta_id_purchase',
                       vertices_stat_f + 'crypta_id_with_cross_device_purchase',
                       vertices_stat_f + 'crypta_id_cross_device_with_purchase',
                       vertices_stat_f + 'yuid_mob_with_desk_purchase',
                       vertices_stat_f + 'yuid_desk_with_mob_purchase'],
                      reduce_by=['crypta_id', 'crypta_id_size'])

        # Send to graphite

        # all purchases
        yuids_with_purchase_count = yt.row_count(ecommerce_in_f + 'yuid_with_purchase_month')
        # all mob purchases
        yuids_mob_with_purchase_count = yt.row_count(ecommerce_in_f + 'yuid_mob_with_purchase_month')
        # all desk purchases
        yuids_desk_with_purchase_count = yt.row_count(ecommerce_in_f + 'yuid_desk_with_purchase_month')

        # has crypta ids
        vertices_with_purchase_count = yt.row_count(vertices_stat_f + 'yuid_vertices_purchase')
        # mob purchase yuids that has desktop in its crypta id
        yuids_mob_with_desk_purchase_count = yt.row_count(vertices_stat_f + 'yuid_mob_with_desk_purchase')
        # desk purchase yuids that has mobile in its crypta id
        yuids_desk_with_mob_purchase_count = yt.row_count(vertices_stat_f + 'yuid_desk_with_mob_purchase')

        # all crypta ids that made a purchase
        crypta_ids_with_purchase_count = yt.row_count(vertices_stat_f + 'crypta_id_purchase')
        # all crypta ids that made a cross-device purchase
        crypta_ids_with_cross_device_purchase = yt.row_count(vertices_stat_f + 'crypta_id_with_cross_device_purchase')
        # all crypta ids that could've made cross-device purchase by didn't
        crypta_id_cross_device_with_purchase = yt.row_count(vertices_stat_f + 'crypta_id_cross_device_with_purchase')

        graphite_sender.to_graphite_sender([
            ('vertices_purchase_all_' + vertices_type, 'count', vertices_with_purchase_count),
            ('vertices_purchase_all_' + vertices_type, 'percent',
             vertices_with_purchase_count / float(yuids_with_purchase_count)),

            ('vertices_purchase_mob_desk_' + vertices_type, 'count', yuids_mob_with_desk_purchase_count),
            ('vertices_purchase_mob_desk_' + vertices_type, 'percent',
             yuids_mob_with_desk_purchase_count / float(yuids_mob_with_purchase_count)),

            ('vertices_purchase_desk_mob_' + vertices_type, 'count', yuids_desk_with_mob_purchase_count),
            ('vertices_purchase_desk_mob_' + vertices_type, 'percent',
             yuids_desk_with_mob_purchase_count / float(yuids_desk_with_purchase_count)),

            ('crypta_id_purchase_' + vertices_type, 'count', crypta_ids_with_purchase_count),
            ('crypta_id_with_cross_device_purchase_' + vertices_type, 'count', crypta_ids_with_cross_device_purchase),
            ('crypta_id_cross_device_with_purchase_' + vertices_type, 'count', crypta_id_cross_device_with_purchase),
        ], self.vertices_config.date)

        mr.drop(vertices_stat_f + 'yuid_vertices')
        mr.drop(vertices_stat_f + 'yuid_vertices_ua')

    def output(self):
        vertices_stat_f = self.vertices_config.get_vertices_folder() + 'stat/ecommerce/'
        return [yt_luigi.YtTarget(vertices_stat_f + 'yuid_vertices_purchase'),
                yt_luigi.YtTarget(vertices_stat_f + 'crypta_id_purchase'),
                yt_luigi.YtTarget(vertices_stat_f + 'crypta_id_with_cross_device_purchase'),
                yt_luigi.YtTarget(vertices_stat_f + 'crypta_id_cross_device_with_purchase'),
                yt_luigi.YtTarget(vertices_stat_f + 'yuid_mob_with_desk_purchase'),
                yt_luigi.YtTarget(vertices_stat_f + 'yuid_desk_with_mob_purchase')]


if __name__ == '__main__':
    yt.config.set_proxy(config.MR_SERVER)

    dt = sys.argv[1]
    import graph_vertices

    exact_vertices_config = graph_vertices.get_vertices_config(dt)
    luigi.build([VerticesEcommerceStats(date=dt, vertices_config=exact_vertices_config)],
                scheduler_port=8083, workers=10)
