# coding=utf-8
import datetime
from collections import defaultdict
from itertools import groupby
from operator import itemgetter

import simplejson


def get_rubric_ids_altay2backa(cluster):
    return dict(
        (_.id, _.permalink) for _ in cluster.read('home/sprav/altay/prod/snapshot/rubric')
    )


def compute_company_info_history(cluster, start_date, end_date):
    rubric2permalinks = defaultdict(dict)
    permalink2info = defaultdict(dict)

    rubric_ids_altay2backa = get_rubric_ids_altay2backa(cluster)
    i = 0
    for company in cluster.read('$task_root/companies/periods/2018-02-01:2018-09-02'):
        i += 1
        if i % 100000 == 0:
            print i
        if not start_date <= company['date'] <= end_date:
            continue
        altay_rubrics = map(itemgetter('rubric_id'), company['rubrics'])
        backa_rubrics = [rubric_ids_altay2backa[_] for _ in altay_rubrics]
        # backa_main_rubric = [
        #     rubric_ids_altay2backa[rubric['rubric_id']] for rubric in company['rubrics'] if rubric.get('is_main') is True
        # ]
        # backa_main_rubric = backa_main_rubric[0] if backa_main_rubric else backa_rubrics[0]

        permalink2info[company['date']][company['permalink']] = dict(
                # permalink=company['permalink'],
                # cluster_permalink=company.get('cluster_permalink'),
                cluster_permalink=company.get('future_cluster_permalink'),
                backa_rubrics=backa_rubrics,
                # backa_main_rubric=backa_main_rubric,
                lat=company.lat,
                lon=company.lon
            )

        daily_rubric2permalinks = rubric2permalinks[company['date']]
        for rubric in backa_rubrics:
            permalinks = daily_rubric2permalinks.get(rubric)
            if permalinks is None:
                permalinks = set()
                daily_rubric2permalinks[rubric] = permalinks
            permalinks.add(company['permalink'])

    for key in rubric2permalinks:
        for key2 in rubric2permalinks[key]:
            rubric2permalinks[key][key2] = list(rubric2permalinks[key][key2])
    return rubric2permalinks, permalink2info


def get_company_info_history(cluster, start_date, end_date, from_backup):
    if from_backup:
        print 'loading rubric2permalinks from backup '
        rubric2permalinks = simplejson.load(open('rubric2permalinks.json'))
        for date in rubric2permalinks.keys():
            if not (start_date <= date <= end_date):
                rubric2permalinks.pop(date)
        print 'loading permalink2info from backup '

        permalink2info = simplejson.load(open('permalink2info.json'))
        for date in permalink2info.keys():
            if not (start_date <= date <= end_date):
                permalink2info.pop(date)
            else:
                daily_permalinks_info = permalink2info[date]
                for str_permalink in daily_permalinks_info.keys():
                    daily_permalinks_info[int(str_permalink)] = daily_permalinks_info.pop(str_permalink)


    else:
        print 'computing rubric2permalinks,permalink2info'
        rubric2permalinks, permalink2info = compute_company_info_history(cluster, start_date, end_date)
        with open('rubric2permalinks.json', 'w') as f:
            simplejson.dump(rubric2permalinks, f)

        with open('permalink2info.json', 'w') as f:
            simplejson.dump(permalink2info, f)

    print 'len(rubric2permalinks)', len(rubric2permalinks)
    print 'len(rubric2permalinks.values()[0])', len(rubric2permalinks.values()[0])
    print 'len(permalink2info)', len(permalink2info)
    print 'len(permalink2info.values()[0])', len(permalink2info.values()[0])

    return rubric2permalinks, permalink2info