#!/usr/bin/env python
# -*- coding: utf-8 -*-
from collections import defaultdict

from crypta.profile.utils.config import config


def get_phones_segments_lemmas(yt):
    segment_ids = {
        '0_15': 'segment-c9dab41b',
        '15_25': 'segment-890a3c73',
        '25_35': 'segment-a337e706',
        '35_50': 'segment-4061a117',
        '50_100': 'segment-cb71dc34',
        '100+': 'segment-cb71dc34',
    }

    segment_tokens_by_segment_id = defaultdict(list)

    for row in yt.read_table(config.PHONE_MODELS_LEMMAS_BY_PRICE_SEGMENT):
        segment_tokens = []
        for phone_model_string in row['model_lemmas']:
            if phone_model_string:
                phone_model_lemmas = set(phone_model_string.split())
                if phone_model_lemmas:
                    segment_tokens.append(phone_model_lemmas)

        if row['price_segment'] in segment_ids:
            segment_tokens_by_segment_id[segment_ids[row['price_segment']]].extend(segment_tokens)

    phones_lemmas = []
    for segment_id, segment_tokens in segment_tokens_by_segment_id.iteritems():
        phones_lemmas.append({
            'segment_id': segment_id,
            'data': segment_tokens,
        })

    if len(phones_lemmas) != 5:
        raise Exception('Broken phone model info {}'.format(phones_lemmas))

    return phones_lemmas
