#!/usr/bin/python
# coding=utf-8

import os
import sys
import re
import json
import luigi

sys.path.append(os.environ.get('GRAPH_PY_FOLDER'))
sys.path.append(os.path.join("..", ".."))

from lib.luigi import base_luigi_task
import yt.wrapper as yt

from utils import mr_utils as mr
from utils import utils
from lib.luigi import yt_luigi
import requests
from collections import defaultdict
from requests.packages.urllib3 import Retry
from rtcconf import config

from sqlalchemy import Table, Column, String, DateTime, Boolean, Integer, Text
from sqlalchemy.ext.declarative import declarative_base

TOLOKA_TASKS = ('indevice_desktop', 'indevice_mobile', 'cross_device', 'household')
SHORT_CODE_LENGTH = 5

Base = declarative_base()


class TolokaResult(Base):
    __tablename__ = "TolokaResult"

    id = Column(Integer, primary_key=True)
    taskid = Column(String(40))
    iscorrect = Column(Boolean)
    browser_name = Column(String(64))
    browser_family = Column(String(32))
    browser_mobile = Column(Boolean)
    code = Column(String(64))
    useragent = Column(Text(4096))
    yuid = Column(String(20))
    dt = Column(DateTime)


def create_pool_table():
    # TODO create as dynamic table
    table = os.path.join(config.CRYPTA_TOLOKA_FOLDER, 'pools')
    schema = [{'name': 'task_name', 'type': 'string'},
              {'name': 'pool_id', 'type': 'uint64'},
              {'name': 'pool_status', 'type': 'string'}]
    if not yt.exists(table):
        yt.create_table(table)  # , attributes={'schema': schema, 'dynamic': True})
        # yt.mount_table(table)

        # # Wait for table is mounted
        # while yt.get("{0}/@tablets/0/state".format(table)) != 'mounted':
        #     time.sleep(0.1)


def add_pool(pool_id, pool_status, task_name):
    table = os.path.join(config.CRYPTA_TOLOKA_FOLDER, 'pools')
    raw = {'pool_id': pool_id,
           'pool_status': pool_status,
           'task_name': task_name}
    yt.write_table(yt.TablePath(table, append=True), [raw], raw=False)


def get_pool_by_name_and_status(task_name, pool_status):
    tablepath = os.path.join(config.CRYPTA_TOLOKA_FOLDER, 'pools')
    with yt.TempTable(config.CRYPTA_TOLOKA_FOLDER, "tmp_") as tmp_table:
        table = tablepath if yt.exists(tablepath) else tmp_table
        pools = yt.read_table(table, raw=False)
        for pool in pools:
            if pool['pool_status'] == pool_status \
                    and pool['task_name'] == task_name:
                yield pool['pool_id']


def yield_answers(assignment):
    '''Retrieves answer for each task in the assignment.
    '''
    row_tpl = dict(assignment_id=assignment['id'],
                   task_suite_id=assignment['task_suite_id'],
                   pool_id=assignment['pool_id'],
                   user_id=assignment['user_id'],
                   created=assignment['created'],
                   submitted=assignment['submitted'])
    for task, solution in zip(assignment['tasks'], assignment['solutions']):
        row = row_tpl.copy()
        for key, val in task['input_values'].iteritems():
            row[key] = val
        for key, val in solution['output_values'].iteritems():
            row[key] = val.strip()
        yield row


def yield_assignments(pool_id, status):
    retries = Retry(backoff_factor=1.0)
    s = requests.Session()
    s.mount("https://", requests.adapters.HTTPAdapter(max_retries=retries))
    source_url = 'https://' + \
                 config.TOLOKA_HOST + \
                 config.TOLOKA_API_PATH + \
                 '/assignments'
    headers = {'Authorization': 'OAuth {}'.format(config.TOLOKA_ACCESS_TOKEN)}
    payload = {"pool_id": pool_id,
               "status": status,
               "sort": "id",
               "limit": 20}
    has_more = True
    while has_more:
        # TODO  fix requests.exceptions.SSLError hostname 'foo'
        # doesn't match ... and verify=True
        r = s.get(source_url,
                  headers=headers,
                  params=payload,
                  verify=False).json()

        if r.get('code') == 'ACCESS_DENIED':
            raise Exception('Your Toloka token is dead. ' +
                            'Get new token and release config with updated TOLOKA_ACCESS_TOKEN')

        has_more = r['has_more']

        for item in r['items']:
            payload['id_gt'] = item['id']
            yield item


def yield_submitted_answers(task_name):
    for pool_id in get_pool_by_name_and_status(task_name, 'running'):
        for assignment in yield_assignments(pool_id, 'SUBMITTED'):
            for answer in yield_answers(assignment):
                yield answer


def rate_answer(task_id, toloka_answer, task_result, task_name):
    def shorten_code(code):
        return code[: SHORT_CODE_LENGTH]

    use_short_codes = True if (task_name in ["cross_device", "household"]) else False

    household_my_columns = ['my' + str(i) for i in range(1, 4)]
    household_their_column_groups = [['their' + str(j) + str(i) for i in range(1, 3)] for j in range(1, 5)]
    household_our_columns = ['our' + str(i) for i in range(1, 5)]

    code_column_names = ['code']
    code_column_names += ['code' + str(i) for i in range(10)]
    code_column_names += household_my_columns
    code_column_names += [x for lst in household_their_column_groups for x in lst]
    code_column_names += household_our_columns

    codes = []
    
    for ans in toloka_answer:
        for code_col in code_column_names:
            code = ans.get(code_col, '')
            if code:
                codes.append(code)

    clean_codes = set(code.strip() for code in codes)
    ua_codes = dict([(r.useragent, r.code)
                     for r in task_result
                     if r.iscorrect])
    correct_codes = set(ua_codes.values()) \
        if not use_short_codes else \
        set([shorten_code(code) for code in ua_codes.values()])
    correct_answers_num = len(clean_codes.intersection(correct_codes))

    if task_name == 'household' and correct_answers_num >= 2:
        # Check that codes aren't repeated for personal devices
        col_names = [household_my_columns]
        col_names += household_their_column_groups
        
        code_groups = []

        toloka_answer = toloka_answer[0]  # only one record for household tasks

        for col_grp in col_names:
            code_grp = set()
            for col in col_grp:
                code = toloka_answer.get(col)
                if code and code in correct_codes:
                    code_grp.add(code)
            code_groups.append(code_grp)

        shared_codes = [toloka_answer.get(x) for x in household_our_columns]
        shared_codes = [x for x in shared_codes if bool(x) and x in correct_codes]

        if len(code_groups) == 1 and len(shared_codes) == 0:
            return task_id, "REJECTED", u'Не указаны коды для как минимум одного сожителя или общего устройства'

        for i in range(len(code_groups)):
            for j in range(i + 1, len(code_groups)):
                if code_groups[i].intersection(code_groups[j]):
                    return task_id, "REJECTED", u'Не может быть совпадающих кодов на персональных устройствах'

        return task_id, "ACCEPTED", "well done"

    if correct_answers_num > 1 and task_name != "household":
        return (task_id,
                "ACCEPTED",
                "well done")
    else:
        return (task_id,
                "REJECTED",
                "Terribly sorry, but the codes don't seem correct")


def submit_rates(rates):
    retries = Retry(backoff_factor=1.0)
    s = requests.Session()
    s.mount("https://", requests.adapters.HTTPAdapter(max_retries=retries))
    source_url = 'https://' + \
                 config.TOLOKA_HOST + \
                 config.TOLOKA_API_PATH + \
                 '/assignments/'
    headers = {'Authorization': 'OAuth {}'.format(config.TOLOKA_ACCESS_TOKEN),
               'Content-Type': 'application/JSON'}
    for task_id, status, comment in rates:
        payload = {"status": status,
                   "public_comment": comment}
        r = s.patch(source_url + task_id,
                    headers=headers,
                    data=json.dumps(payload),
                    verify=False).json()
        yield {'assignment_id': task_id,
               'status': status,
               'submit_status': r}


def create_db_connection():
    from sqlalchemy import create_engine
    from sqlalchemy.orm import sessionmaker

    engine = create_engine(config.TOLOKA_DATABASE_URL)
    db = sessionmaker(bind=engine)()

    return db


def is_code_column(name):
    code_column_regexps = [r'^code\d+$', r'^my\d+$', r'^our\d+$', r'^their\d+$']
    for regexp in code_column_regexps:
        if re.match(regexp, name):
            return True
    return False


def check_answers(workdir, answers_table, date, task_name):
    def execute_query(db, taskid):
        return db.query(TolokaResult).filter_by(taskid=taskid)

    db = create_db_connection()

    answers = yt.read_table(answers_table, raw=False)
    tasks = defaultdict(list)

    for answer in answers:
        tasks[answer['assignment_id']].append(answer)

    tasks_results = ((task_id, toloka_answer, execute_query(db, task_id))
                     for (task_id, toloka_answer) in tasks.items())

    rates = []
    for task_id, toloka_answer, task_result in tasks_results:
        rates.append(rate_answer(task_id, toloka_answer, task_result, task_name))

    submitted_answers = submit_rates(rates)
    submitted_answers_table = os.path.join(workdir, date + '-check')
    yt.write_table(yt.TablePath(submitted_answers_table, append=True),
                   submitted_answers, raw=False)


def gen_access_log(access_gen):
    for user_id, assignment_id, created, access_results in access_gen:
        for r in access_results:
            yield {
                'user_id': user_id,
                'assignment_id': assignment_id,
                'yuid': r.yuid,
                'user_agent': r.useragent,
                'created': created
            }


def execute_query(db, taskid):
    return db.query(TolokaResult).filter_by(taskid=taskid)


def collect_splices(workdir, answers_table, date):
    db = create_db_connection()
    answers = yt.read_table(answers_table, raw=False)
    tasks = set([
                    (answer['user_id'], answer.get('assignment_id'), answer['created'])
                    for answer in answers
                    ])
    tasks_access = ((user_id,
                     task_id,
                     created,
                     execute_query(db, task_id))
                    for (user_id, task_id, created)
                    in tasks)

    access_log = gen_access_log(((user_id, task_id, created, access_results)
                                 for user_id, task_id, created, access_results
                                 in tasks_access))
    collected_splices_table = os.path.join(workdir, date + '-splices')
    yt.write_table(yt.TablePath(collected_splices_table, append=True),
                   access_log, raw=False)


class TolokaAnswersCollectorTask(base_luigi_task.BaseTask):
    date = luigi.Parameter()

    def requires(self):
        return []

    def run(self):
        mr.mkdir(config.CRYPTA_TOLOKA_FOLDER)
        for task_name in TOLOKA_TASKS:
            mr.mkdir(os.path.join(config.CRYPTA_TOLOKA_FOLDER,
                                  task_name))
            table = os.path.join(config.CRYPTA_TOLOKA_FOLDER,
                                 task_name,
                                 self.date)
            answers = yield_submitted_answers(task_name)
            yt.write_table(table, answers, raw=False)

    def output(self):
        return [yt_luigi.YtTarget(os.path.join(config.CRYPTA_TOLOKA_FOLDER,
                                               task_name,
                                               self.date),
                                  allow_empty=True)
                for task_name in TOLOKA_TASKS]


# Desktop and Mobile
class TolokaDesktopAnswersCheckerTask(base_luigi_task.BaseTask):
    date = luigi.Parameter()

    def requires(self):
        return TolokaAnswersCollectorTask(self.date)

    def run(self):
        for task_name in TOLOKA_TASKS:
            workdir = os.path.join(config.CRYPTA_TOLOKA_FOLDER, task_name)
            answers_table = os.path.join(config.CRYPTA_TOLOKA_FOLDER,
                                         task_name,
                                         self.date)

            check_answers(workdir, answers_table, self.date, task_name)

    def output(self):
        return [yt_luigi.YtTarget(os.path.join(config.CRYPTA_TOLOKA_FOLDER,
                                               task_name,
                                               self.date + '-check'),
                                  allow_empty=True)
                for task_name in TOLOKA_TASKS]


class TolokaDesktopSpliceCollector(base_luigi_task.BaseTask):
    date = luigi.Parameter()

    def requires(self):
        return TolokaDesktopAnswersCheckerTask(self.date)

    def run(self):
        for task_name in [x for x in TOLOKA_TASKS if x != 'household']:
            workdir = os.path.join(config.CRYPTA_TOLOKA_FOLDER, task_name)
            answers_table = os.path.join(config.CRYPTA_TOLOKA_FOLDER,
                                         task_name,
                                         self.date)
            collect_splices(workdir, answers_table, self.date)

    def output(self):
        return [yt_luigi.YtTarget(os.path.join(config.CRYPTA_TOLOKA_FOLDER,
                                               task_name,
                                               self.date + '-splices'),
                                  allow_empty=True)
                for task_name in TOLOKA_TASKS if task_name != 'household']


class TolokaHouseholdCollector(base_luigi_task.BaseTask):
    date = luigi.Parameter()

    def __init__(self, *args, **kwargs):
        super(TolokaHouseholdCollector, self).__init__(*args, **kwargs)
        self.output_table = os.path.join(config.CRYPTA_TOLOKA_FOLDER, 'household', self.date + '-households') 

    def requires(self):
        return TolokaDesktopAnswersCheckerTask(self.date)

    def run(self):
        db = create_db_connection()
        checked_answers = os.path.join(config.CRYPTA_TOLOKA_FOLDER, 'household', self.date + '-check')
        households = []

        my_col_names = ['my' + str(i) for i in range(1, 4)]
        their_col_names = [['their' + str(j) + str(i) for i in range(1, 3)] for j in range(1, 5)]
        our_col_names = ['our' + str(i) for i in range(1, 5)]

        for rec in yt.read_table(checked_answers):
            if rec['status'] == 'ACCEPTED':
                task_id = rec['assignment_id']
                c2c = self.codes_to_cookies(task_id, db)
                
                answers = rec['submit_status']['solutions'][0]['output_values']

                hh = dict()
                hh['assignemnt_id'] = task_id
                hh['shared'] = self.get_cookies(answers, our_col_names, c2c)
                hh['personal'] = [self.get_cookies(answers, my_col_names, c2c)] 

                for grp in their_col_names:
                    cookies = self.get_cookies(answers, grp, c2c)
                    if cookies:
                        hh['personal'].append(cookies)
                
                households.append(hh)
        
        yt.write_table(self.output_table, households)

    def get_cookies(self, answers, col_names, c2c):
        cookies = []
        for k in col_names:
            code = answers.get(k)
            cookie = c2c.get(code)

            if cookie:
                cookies.append(cookie)
        return cookies

    def codes_to_cookies(self, task_id, db):
        query = execute_query(db, task_id)
        result = dict()
        for r in query:
            result[r.code] = r.yuid
        return result

    def output(self):
        return yt_luigi.YtTarget(self.output_table, allow_empty=True)

if __name__ == '__main__':
    import sys
    from datetime import datetime

    yt.config.set_proxy(config.MR_SERVER)

    dt = datetime.now().strftime("%Y-%m-%d")
    #dt = '2018-01-23'

    #pools_table = '//home/crypta/testing/state/toloka/pools'
    #yt.write_table(yt.TablePath(pools_table, append=True), [
    #            dict(pool_id=51944, pool_status='running', task_name='household')])

    # success = luigi.build([TolokaDesktopAnswersCheckerTask(dt)], local_scheduler=True)
    #success = luigi.build([TolokaDesktopSpliceCollector(dt)], local_scheduler=True)
    #print list(yield_assignments('45260', 'REJECTED'))
    #success = luigi.build([TolokaAnswersCollectorTask(dt)], local_scheduler=True)
    #success = luigi.build([TolokaDesktopAnswersCheckerTask(dt)], local_scheduler=True)
    success = luigi.build([TolokaHouseholdCollector(dt)], local_scheduler=True)
