import yt.wrapper as yt
from datetime import datetime
import os
import random
from urllib2 import urlparse
import sys
import url_ndp
import math

from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    clusters,
    Record,
    files
)

class RobotTableUpdater(object):
    src_robot_filename = '01_src'
    owner_appended_filename = '02_owner_appended'
    joined_filename = '03_joined'
    dst_robot_filename = '10_dst'

    def __init__(self, root_dir, wm_table, token):
        self.root_dir = root_dir
        self.wm_table = wm_table
        self.token = token
        self.cluster = clusters.Hahn(token=self.token).env(templates=dict(job_root=self.root_dir))

    def update(self):
        job = self.cluster.job()
        robot_src = job.table(os.path.join(self.root_dir, self.src_robot_filename))
        wm_src = job.table(self.wm_table)

        robot_src.map(append_owner, files=[
                                            files.LocalFile('areas.lst'),
                                            files.LocalFile('liburl_ndp.so'),
                                            files.LocalFile('url_ndp.py')
                                            ]) \
                .put(os.path.join(self.root_dir, self.owner_appended_filename)) \
                .join(wm_src, by='owner', type='full', assume_unique_right=True) \
                .put(os.path.join(self.root_dir, self.joined_filename)) \
                .groupby('owner') \
                .reduce(update_robot_table) \
                .put(os.path.join(self.root_dir, self.dst_robot_filename))

        job.run()

        return os.path.join(self.root_dir, self.dst_robot_filename)


def append_owner(records):
    owner_formatter = url_ndp.Formatter('%y', ':'.join(['areas.lst']))
    for r in records:
        try:
            owner = owner_formatter.FormatUrl(str(r.host))
        except url_ndp.Error:
            continue
        yield Record(r, owner=owner)


def update_robot_table(groups):
    for key, records in groups:
        for r in records:
            cnt = 0
            if r.get('total_marks', -1) < 0: # no markup results for robot table, return as is
                yield Record(host=r.host,
                            TolokaCount=r.TolokaCount,
                            WatermarkTolokaCount=r.WatermarkTolokaCount,
                            TolokaPecent=float(r.TolokaPecent),
                            CVCount=r.CVCount,
                            WatermarkCVCount=r.WatermarkCVCount,
                            CVPecent=float(r.CVPecent)
                            )

            else: # markup is present, so update robot table
                if cnt == 0: # first url, copy all toloka info
                    total_marks = r.total_marks
                    wm_marks = r.wm_marks
                    wm_share = float(r.wm_share)
                else: # non-first url, all markup info is already saved, so fill with zeros
                    total_marks = 0
                    wm_marks = 0
                    wm_share = 0.0

                if r.get('CVCount', -1) < 0: # no robot table record, so fill cv values
                    total_cv_marks = 0
                    wm_cv_marks = 0
                    wm_cv_share = 0.0
                else: # preserve original cv values
                    total_cv_marks = r.CVCount
                    wm_cv_marks = r.WatermarkCVCount
                    wm_cv_share = float(r.CVPecent)

                yield Record(host=r.host,
                            TolokaCount=total_marks,
                            WatermarkTolokaCount=wm_marks,
                            TolokaPecent=wm_share,
                            CVCount=total_cv_marks,
                            WatermarkCVCount=wm_cv_marks,
                            CVPecent=wm_cv_share)

            cnt += 1 # very important increment


def main(*args):
    params_wrapped, in2, in3, token, any_param, html_file = args

    params = params_wrapped[0]

    print >>sys.stderr, "before init"
    robot_table_updater = RobotTableUpdater(root_dir=params['robot_dump_state_dir'],
                                            wm_table=params['yt_input_table'],
                                            token=token)
    print >>sys.stderr, "init success"
    dst_robot_table = robot_table_updater.update()
    print >>sys.stderr, "update success"

    return [{"cluster": "hahn", "table": dst_robot_table}]
