#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from __future__ import division
import sys
import os
import codecs
import argparse
from nile.api.v1 import (
    clusters,
    filters as nf,
    extractors as ne,
    aggregators as na,
    Record
)
from collections import defaultdict
import getpass
import datetime


job_root = '//home/videolog/strm_video'


def check_errors_by_content_id(records):
    for rec in records:
        if 'error_content_ids_ts' not in rec:
            continue
        channel_dct = {}
        for vs in rec['view_session']:
            if vs['view_type'] != 'live':
                continue
            channel_dct[vs['video_content_id']] = vs['view_channel']
        ets = rec['error_content_ids_ts']
        by_channel = defaultdict(set)
        for vcid in ets:
            edct = ets[vcid]
            if 'Stalled_Other' not in edct:
                continue
            if vcid not in channel_dct:
                continue
            first_ts = sorted(edct['Stalled_Other'])[-1]
            by_channel[channel_dct[vcid]] |= {
                x for x in edct
                if x != 'Stalled_Other' and
                sorted(edct[x])[0] < first_ts
            }
        for channel in by_channel:
            errors = by_channel[channel]
            if not errors:
                continue
            yield Record(
                errors=','.join(errors),
                vsid=rec['vsid'],
                channel=channel,
                os=rec['os_family']
            )


def main():
    hahn = clusters.yt.Hahn(token=os.environ['YT_TOKEN'])

    date = datetime.date.today() - datetime.timedelta(days=1)

    job = hahn.job()

    job.table(
        '//home/videolog/strm_video/{}/sessions'.format(date)
    ).map(
        check_errors_by_content_id
    ).groupby(
        'os', 'channel', 'errors'
    ).aggregate(
        count=na.count()
    ).sort(
        'os', 'channel', 'count'
    ).put(
        '//home/videolog/stalled_causes/by_date/{}'.format(date)
    ).groupby(
        'errors'
    ).aggregate(
        count=na.sum('count')
    ).sort(
        'count'
    ).put(
        '//home/videolog/stalled_causes/detailed'
    )

    job.run()


if __name__ == "__main__":
    main()
