import luigi
import yt.wrapper as yt

from lib import graphite_sender
from lib.luigi import yt_luigi
from rtcconf import config
from utils import mr_utils as mr
from utils import utils


def map_beh_hub(rec):
    values = rec['value'].split('\t')
    hub_id = values[1]
    uniq_id = values[2]

    source_bits_bin = long(values[3])
    source_bits_str = format(source_bits_bin, '064b')

    # trying to find out from source bits whether it is our matching or bb matching
    # see bits description in CRYPTAIS-818
    if (source_bits_bin >> 31) & 1:
        matching_type = 'passport'
    elif (source_bits_bin >> 32) & 1:
        matching_type = 'xuniq'
    elif (source_bits_bin >> 52) & 1:
        matching_type = 'crypta_hub_id'
    else:
        matching_type = 'unknown'

    unixtime = long(values[4])
    update_date = utils.ts_to_date_str(unixtime)

    yield {'hub_id': hub_id, 'key': uniq_id,
           'source_bits': source_bits_str, 'matching_type': matching_type,
           'ts': unixtime, 'last_update': update_date}


def get_experiment_id(crypta_id):
    if len(crypta_id) > 10:
        experiment_bit = crypta_id[-10]
        # print experiment_bit
        if experiment_bit == '0':
            experiment_id = int(crypta_id[-9])
            return experiment_id


def join_hubs_to_vertices(key, recs):
    vertices_recs, bb_hubs_recs = mr.split_left_right(recs, oom_check=False)
    vertices_crypta_recs = dict((r['crypta_id'], r) for r in vertices_recs)
    bb_crypta_recs = dict((r['hub_id'], r) for r in bb_hubs_recs)

    all_crypta_ids = set(vertices_crypta_recs.keys()).union(set(bb_crypta_recs.keys()))

    for crypta_id in all_crypta_ids:
        experiment_id = get_experiment_id(crypta_id)
        if crypta_id in vertices_crypta_recs and crypta_id in bb_crypta_recs:
            out_rec = vertices_crypta_recs[crypta_id]
            out_rec.update(bb_crypta_recs[crypta_id])
            if out_rec['id_type'] == 'deviceid':
                out_rec['@table_index'] = 1
            else:
                out_rec['@table_index'] = 0
        elif crypta_id in vertices_crypta_recs:  # only in vertices
            out_rec = vertices_crypta_recs[crypta_id]
            if out_rec['id_type'] == 'deviceid':
                out_rec['@table_index'] = 2
            else:
                out_rec['@table_index'] = 3
        elif crypta_id in bb_crypta_recs:  # only in bb
            out_rec = bb_crypta_recs[crypta_id]
            if out_rec['matching_type'] in ['passport', 'xuniq']:
                out_rec['@table_index'] = 4
            elif experiment_id:
                out_rec['@table_index'] = 5
            elif out_rec['hub_id'] == out_rec['key']:
                out_rec['@table_index'] = 6
            else:
                out_rec['@table_index'] = 7
        else:
            raise Exception('WAT!')

        out_rec['experiment_id'] = experiment_id

        yield out_rec


class ImportBehHub(yt_luigi.BaseYtTask):
    date = luigi.Parameter()

    def input_folders(self):
        return {
            'beh_hub_input': '//home/bs/dbrestore/',
        }

    def output_folders(self):
        return {
            'beh_hub_output': config.YT_OUTPUT_FOLDER + self.date + '/upload_bb/beh_hub/',
        }


    def run(self):
        out_f = self.out_f('beh_hub_output')
        mr.mkdir(out_f)

        in_f = self.in_f('beh_hub_input')
        beh_hub_tables = [in_f + f + '/BehHub' for f in mr.ls(in_f, absolute_path=False)
                          if f.startswith('bsyeti')]
        beh_hub_tables = [t for t in beh_hub_tables if yt.exists(t)]
        yt.run_map(map_beh_hub, beh_hub_tables, out_f + 'all')

        utils.wait_all([
            mr.distinct_by(['key'], out_f + 'all', out_f + 'uniques', sync=False),
            mr.distinct_by(['hub_id'], out_f + 'all', out_f + 'hubs', sync=False),
            yt.run_sort(out_f + 'all', sort_by='key', sync=False)
        ])

        mod_time = yt.get_attribute(beh_hub_tables[0], 'modification_time', None)
        if mod_time:
            mod_date = mod_time[:10]
            yt.set_attribute(out_f + 'all', 'upload_date', mod_date)

        yt.set_attribute(out_f + 'all', 'beh_hub_tables_count', len(beh_hub_tables))


    def output(self):
        return [yt_luigi.YtTarget(self.out_f('beh_hub_output') + 'all')]


def map_device_to_crypta_id(rec):
    if rec['id_type'] == 'deviceid':
        rec['key'] = rec['crypta_id']

    yield rec


class CheckVerticesInBb(yt_luigi.BaseYtTask):
    vertices_config = luigi.Parameter()

    def requires(self):
        return [ImportBehHub(self.vertices_config.date)]  # TODO: add previous day vertices?

    def input_folders(self):
        return {
            'beh_hub': config.YT_OUTPUT_FOLDER + self.vertices_config.date + '/upload_bb/beh_hub/',
            'graph': config.YT_OUTPUT_FOLDER
        }

    def output_folders(self):
        return {
            'vertices_stat': config.YT_OUTPUT_FOLDER + self.vertices_config.date + '/upload_bb/vertices_in_bb/',
        }

    def run(self):
        out_f = self.out_f('vertices_stat')
        mr.mkdir(out_f)

        vertices_config = self.vertices_config.get_previous_vertices_config()
        if vertices_config and yt.exists(vertices_config.get_vertices_table()):

            # to check devices stored as crypta id
            yt.run_map(map_device_to_crypta_id,
                       vertices_config.get_vertices_table(),
                       out_f + 'vertices')
            yt.run_sort(out_f + 'vertices', sort_by='key')

            yt.run_reduce(join_hubs_to_vertices,
                          [out_f + 'vertices', self.in_f('beh_hub') + 'all'],
                          [out_f + 'in_bb_yuids', out_f + 'in_bb_device_ids',
                           out_f + 'not_in_bb_yuids', out_f + 'not_in_bb_device_ids',
                           out_f + 'not_in_vertices_bb_matching',
                           out_f + 'not_in_vertices_experiment',
                           out_f + 'not_in_vertices_device_id',
                           out_f + 'not_in_vertices'],
                          reduce_by='key')

            yuids_in_bb_count = yt.row_count(out_f + 'in_bb_yuids')
            devids_in_bb_count = yt.row_count(out_f + 'in_bb_device_ids')
            vertices_in_bb_count = yuids_in_bb_count + devids_in_bb_count

            yuids_not_in_bb_count = yt.row_count(out_f + 'not_in_bb_yuids')
            devids_not_in_bb_count = yt.row_count(out_f + 'not_in_bb_device_ids')
            vertices_not_in_bb_count = yuids_not_in_bb_count + devids_not_in_bb_count

            graphite_sender.to_graphite_sender([
                ('vertices_in_bb', 'yuids', yuids_in_bb_count),
                ('vertices_in_bb', 'devids', devids_in_bb_count),
                ('vertices_in_bb', 'vertices', vertices_in_bb_count),
                ('vertices_in_bb', 'no_yuids', yuids_not_in_bb_count),
                ('vertices_in_bb', 'no_devids', devids_not_in_bb_count),
                ('vertices_in_bb', 'no_vertices', vertices_not_in_bb_count),
            ], self.vertices_config.date)

            mr.drop(out_f + 'vertices')
        else:
            def error_rec():
                yield {'error': 'yesterday vertices table for %s doesn\'t exist' % self.vertices_config.vertices_type}

            yt.write_table(out_f + 'in_bb_yuids', error_rec(), raw=False)
            yt.write_table(out_f + 'in_bb_device_ids', error_rec(), raw=False)

    def output(self):
        return [yt_luigi.YtTarget(self.out_f('vertices_stat') + 'in_bb_yuids'),
                yt_luigi.YtTarget(self.out_f('vertices_stat') + 'in_bb_device_ids')]


if __name__ == '__main__':
    yt.config.set_proxy(config.MR_SERVER)



    # beh_hub = '//home/crypta/production/state/graph/2016-11-08/upload_bb/beh_hub/all'
    # for dt in ['2016-11-02', '2016-11-03', '2016-11-04', '2016-11-05', '2016-11-06', '2016-11-07', '2016-11-08']:
    #     out_f = '//home/crypta/team/artembelov/devices_in_bb/%s/' % dt
    #     mr.mkdir(out_f)
    #     vertices_table = '//home/crypta/production/state/graph/%s/exact/cluster/vertices' % dt
    #
    #     yt.run_map(map_device_to_crypta_id,
    #                vertices_table,
    #                out_f + 'vertices')
    #     yt.run_sort(out_f + 'vertices', sort_by='key')
    #
    #     yt.run_reduce(join_hubs_to_vertices,
    #                   [out_f + 'vertices', beh_hub],
    #                   [out_f + 'in_bb_yuids', out_f + 'in_bb_device_ids',
    #                    out_f + 'not_in_bb_yuids', out_f + 'not_in_bb_device_ids',
    #                    out_f + 'not_in_vertices_bb_matching',
    #                    out_f + 'not_in_vertices_experiment',
    #                    out_f + 'not_in_vertices_device_id',
    #                    out_f + 'not_in_vertices'],
    #                   reduce_by='key')

    for dt in ['2016-11-02', '2016-11-03', '2016-11-04', '2016-11-05', '2016-11-06', '2016-11-07', '2016-11-08']:
        out_f = '//home/crypta/team/artembelov/devices_in_bb/%s/' % dt
        x1 = yt.row_count(out_f + 'in_bb_yuids')
        x2 = yt.row_count(out_f + 'in_bb_device_ids')
        x3 = yt.row_count(out_f + 'vertices')
        x = x3 - x1 - x2
        print dt, x3, x1 + x2, x

    recs = yt.read_table('//home/crypta/production/state/graph/2016-11-13/exact/cluster/stat/crypta_ids_size_hist')

    limit = 10

    s1 = 0
    s2 = 0
    s3 = 0
    for r in recs:
        if r['crypta_id_size'] > limit:
            s1 += (r['crypta_id_size'] * r['count'])
            s2 += ((r['crypta_id_size'] - limit) * r['count'])
            s3 += r['count']

    print s1, s2, s3
