import os
import sys
import yt.wrapper as yt

from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    clusters,
    Record,
    files
)

class PostMarkupProcess(object):
    assessments_filename = '01_assessments'
    toloka_markup_filename = '10_toloka_markup'
    merged_assessments_filename = '12_merged_assessments'
    wm_share_filename = '13_wm_share'

    def __init__(self, root_dir, state, token):
        self.root_dir = root_dir
        self.state = state
        self.state_path = os.path.join(self.root_dir, self.state)
        self.token = token
        self.cluster = clusters.Hahn(token=self.token).env(templates=dict(job_root=self.root_dir))


    def merge_assessments_and_calc_wm_share(self):
        job = self.cluster.job()
        assessments = job.table(os.path.join(self.state_path, self.assessments_filename))
        toloka_marks = job.table(os.path.join(self.state_path, self.toloka_markup_filename))

        wm_share = assessments.concat(toloka_marks) \
                                .unique('url') \
                                .put(os.path.join(self.state_path, self.merged_assessments_filename)) \
                                .groupby('owner') \
                                .reduce(reduce_wm_share) \
                                .put(os.path.join(self.state_path, self.wm_share_filename))

        job.run()

        return os.path.join(self.root_dir, self.state, self.wm_share_filename)

    def update_state_label(self):
        yt.update_config({"proxy": {"url": "hahn.yt.yandex.net"}, "token": self.token})
        yt.set_attribute(self.root_dir, 'current_state', self.state)


def reduce_wm_share(groups):
    for key, records in groups:
        total = 0
        n_wms = 0
        host = ''
        for r in records:
            if r.mark == 'wm':
                n_wms += 1
            if r.mark in ['wm', 'no_wm']:
                total += 1
            host = r.host

        if total == 0:
            wm_share = 0.0
        else:
            wm_share = float(n_wms) / total

        yield Record(owner=key.owner,
                     host=host,
                     total_marks=total,
                     wm_marks=n_wms,
                     wm_share=wm_share
                    )


def main(*args):
    params, in2, in3, token, any_param, html_file = args

    print >>sys.stderr, "before init"
    post_markup = PostMarkupProcess(params[0]['root_dir'], params[0]['new_state'], token)
    print >>sys.stderr, "init success"
    wm_share_path = post_markup.merge_assessments_and_calc_wm_share()
    print >>sys.stderr, "wm share success"
    post_markup.update_state_label()
    print >>sys.stderr, "state label update success"

    return [{'cluster': 'hahn', 'table': wm_share_path}]
