#!/usr/bin/env python
#! -*- coding: utf-8 -*-
from __future__ import unicode_literals
from __future__ import division
import sys
import os
import json
import toml
import codecs
import requests
import argparse
import datetime as dt
import smtplib
import pdb
import logging
from collections import Counter, defaultdict
from email.MIMEMultipart import MIMEMultipart
from email.MIMEBase import MIMEBase
from email.MIMEText import MIMEText
from email.Utils import COMMASPACE, formatdate
from email import Encoders
from pecheny.moncommons import push_to_razladki
from datetime import datetime as dtdt

_file_ = None


def send_mail(send_from, send_to, subject, text, files=[], server="localhost"):
    assert isinstance(send_to, list)
    assert isinstance(files, list)

    msg = MIMEMultipart()
    msg['From'] = send_from
    msg['To'] = COMMASPACE.join(send_to)
    msg['Date'] = formatdate(localtime=True)
    msg['Subject'] = subject

    msg.attach(MIMEText(text.encode('utf8')))

    for f in files:
        part = MIMEBase('application', "octet-stream")
        part.set_payload(open(f, "rb").read())
        Encoders.encode_base64(part)
        part.add_header('Content-Disposition',
                        'attachment; filename="%s"' % os.path.basename(f))
        msg.attach(part)

    smtp = smtplib.SMTP(server)
    smtp.sendmail(send_from, send_to, msg.as_string())
    smtp.close()


def levenshteinDistance(str1, str2):
    m = len(str1)
    n = len(str2)
    lensum = float(m + n)
    d = []
    for i in range(m + 1):
        d.append([i])
    del d[0][0]
    for j in range(n + 1):
        d[0].append(j)
    for j in range(1, n + 1):
        for i in range(1, m + 1):
            if str1[i - 1] == str2[j - 1]:
                d[i].insert(j, d[i - 1][j - 1])
            else:
                minimum = min(d[i - 1][j] + 1, d[i][j - 1] +
                              1, d[i - 1][j - 1] + 2)
                d[i].insert(j, minimum)
    ldist = d[-1][-1]
    ratio = (lensum - ldist) / lensum
    return {'distance': ldist, 'ratio': ratio}

ALLKEYS = [
    "distr_wizard_ru",
    # "distr_wizard_products_ru",
    # "distr_wizard_products_tr",
    "distr_wizard_tr",
    # "_distr_wizard_ru",
    # "_distr_wizard_old_ru",
    # "news",
    "service_block_ru",
    "smart_banner_ru",
    "smart_banner_test_ru",
    "banana/1127_filtered",
    "banana/1141",
    "banana/promofooter",
    "banana/promofooter_mobile"
    # "_distr_wizard_old_tmp_ru",
    # "mobile_apps_ru",
    # "distr_wizard_yabro_ru",
    # "banana/1127_filtered_noname",
    # "smart_banner_yabro_ru"
]

EVENTS = {'error', 'show', 'install',
          'close', 'click', 'close_stripe', 'cancel', 'showlanding', 'download'}


def valparse(value):
    values = value.split()
    return {v.split('=')[0]: int('='.join(v.split('=')[1:]))
            for v in values}


def compare_lists(a, b):
    result = {}
    indices = []
    for x in a:
        result[x] = a.index(x) - b.index(x)
        indices.append(b.index(x))
    return result


def getshow(dct, x):
    if x in dct and 'show' in dct[x]:
        return dct[x]['show']
    return 0


def getfield(dct, x, field='show'):
    if x in dct and field in dct[x]:
        return dct[x][field]
    return 0


def sumfield(dct, field='show'):
    return sum([dct[x][field] for x in dct])


def sanitize(s):
    s = s.replace('/', '_')
    s = s.replace(':', '_')
    s = s.replace(' ', '_')
    return s


def main():

    start = dtdt.now()
    ts = int((start - dtdt(1970, 1, 1)).total_seconds())
    global _file_
    global __file__                         # to fix stupid
    __file__ = os.path.abspath(__file__)    # __file__ handling
    _file_ = os.path.basename(__file__)     # in python 2
    os.chdir(os.path.dirname(__file__))
    params = toml.loads(open('distribution.toml').read())

    parser = argparse.ArgumentParser()
    parser.add_argument('--debug', action='store_true')
    parser.add_argument('--stats', action='store_true')
    args = parser.parse_args()

    logger = logging.getLogger(_file_[:-3])
    formatter = logging.Formatter('%(asctime)s | %(message)s')
    ch = logging.StreamHandler()
    logger.setLevel(logging.DEBUG)
    if args.debug:
        ch.setLevel(logging.DEBUG)
    else:
        ch.setLevel(logging.CRITICAL)
    ch.setFormatter(formatter)
    logger.addHandler(ch)
    fh = logging.FileHandler('{}/logs/{}-{}.log'.format(
        os.path.dirname(__file__), _file_[:-3], start),
        encoding='utf8')
    fh.setLevel(logging.DEBUG)
    fh.setFormatter(formatter)
    logger.addHandler(fh)

    if os.path.isfile('candidates_drop_data.json'):
        old_candidates = json.loads(open('candidates_drop_data.json').read())
        logger.info('candidates_drop_data.json found')
    else:
        old_candidates = defaultdict(lambda: {})

    new_candidates = defaultdict(lambda: {})
    major_changes = []
    minor_changes = []
    for key in ALLKEYS:
        req = None
        while (((req is None)
                or (req.status_code != 200))
               and ((dtdt.now() - start).total_seconds() < 3600)):
            logger.info('checking {}'.format(key))
            req = requests.get(
                'http://rtmr-sas-000.search.yandex.net:8080/yandsearch?view=plain&table=atom/candidate_scores&key={}&maxrecords=1'.format(key))
        try:
            atomjson = json.loads(req.content.split('\t')[1])
        except:
            logger.info('cannot parse {}'.format(key))
            continue

        if not atomjson:
            logger.info('{} is null'.format(key))
            continue
        for candidate in atomjson:
            valdict = valparse(atomjson[candidate].get('v', ''))
            new_candidates[key][candidate] = valdict

        try:
            old_candidates[key].pop(
                old_candidates[key].keys()[0].split('/')[0])
        except:
            pass
        try:
            new_candidates[key].pop(
                new_candidates[key].keys()[0].split('/')[0])
        except:
            pass
        if key not in old_candidates:
            old_candidates[key] = {}
        old_c_set = set(old_candidates[key])
        new_c_set = set(new_candidates[key])

        if key == "banana/1127_filtered":
            morda = ['distr_stripe/{}'.format(x)
                     for x in codecs.open(
                'morda.txt', 'r', 'utf8').read().split('\n')]
            turkey = ['distr_stripe/{}'.format(x)
                      for x in codecs.open(
                'turkey.txt', 'r', 'utf8').read().split('\n')]
            morda_counter = 0
            turkey_counter = 0
            oldmorda = {x: getshow(old_candidates[key], x) for x in morda}
            newmorda = {x: getshow(new_candidates[key], x) for x in morda}
            logger.info('old morda: {} {}'.format(sum(oldmorda.values()),
                                                  oldmorda))
            logger.info('new morda: {} {}'.format(sum(newmorda.values()),
                                                  newmorda))
            for x in morda:
                morda_counter += getshow(new_candidates[key], x) - \
                    getshow(old_candidates[key], x)
            for x in turkey:
                turkey_counter += getshow(new_candidates[key], x) - \
                    getshow(old_candidates[key], x)
            logger.info('morda counter: {}'.format(morda_counter))
            logger.info('turkey counter: {}'.format(turkey_counter))
            push_to_razladki({'razladki':
                              'http://launcher.razladki.yandex-team.ru/save_new_data/SearchPortalDistribution'},
                             'atom_morda_shows',
                             morda_counter)
            push_to_razladki({'razladki':
                              'http://launcher.razladki.yandex-team.ru/save_new_data/SearchPortalDistribution'},
                             'atom_turkey_shows',
                             turkey_counter)

        disappeared = old_c_set - new_c_set
        for candidate in disappeared:
            major_changes.append('[{}] {} DISAPPEARED'.format(key, candidate))
        new = new_c_set - old_c_set
        for candidate in new:
            major_changes.append(
                '[{}] NEW CANDIDATE: {}'.format(key, candidate))
        common = old_c_set & new_c_set
        old_common = sorted(common,
                            key=lambda x: getshow(old_candidates[key], x), reverse=True)
        new_common = sorted(common,
                            key=lambda x: getshow(new_candidates[key], x), reverse=True)
        for x in common:
            if new_candidates[key][x] == 0 and old_candidates[key][x] != 0:
                major_changes.append('[{}] {} dropped to 0'.format(key, x))
        comparison = compare_lists(old_common, new_common)
        for x in comparison:
            minor_changes.append((key, x, comparison[x]))
        if len(comparison) > 0:
            similarity = 1 - levenshteinDistance(
                comparison.values(), range(len(comparison.values())))['ratio']
            logger.info('[{}] Similarity ratio: {:.2}'.format(key, similarity))
            push_to_razladki({'razladki':
                              'http://launcher.razladki.yandex-team.ru/save_new_data/SearchPortalDistribution'},
                             'similarity_{}'.format(key),
                             similarity)
        # pdb.set_trace()
        top5 = sorted(new_candidates[key],
                      key=lambda x: new_candidates[key][x]['show'], reverse=True)[:5]
        sum_top5 = sum([new_candidates[key][x]['show'] for x in top5])
        if sumfield(new_candidates[key]) > 0:
            logger.info('[{}] Top5 ratio: {:.2}'.format(
                key, sum_top5 / sumfield(new_candidates[key])))
        if args.stats:
            for x in sorted(new_candidates[key],
                            key=lambda x: getshow(new_candidates[key], x)):
                logger.info('{}\t{}'.format(
                    x, getshow(new_candidates[key], x)))

        diffdict = defaultdict(lambda: {})
        for candidate in common:
            for field in new_candidates[key][candidate]:
                diffdict[candidate][field] = (getfield(
                    new_candidates[key], candidate, field=field)
                    - getfield(
                    old_candidates[key], candidate, field=field))
        for candidate in new:
            for field in new_candidates[key][candidate]:
                diffdict[candidate][field] = new_candidates[
                    key][candidate][field]
        for candidate in diffdict:
            for event in diffdict[candidate]:
                desc = '{}/{}_{}'.format(key, candidate, event)
                value = diffdict[candidate][event]
                logger.info('Will push to razladki: {} = {}'
                            .format(desc, value))
                push_to_razladki({'razladki':
                                  'http://launcher.razladki.yandex-team.ru/save_new_data/AtomBannersCandidates'},
                                 desc,
                                 value)
        with open('{}/logs/candidates_{}_{}.json'.format(
                os.path.dirname(__file__), start, sanitize(key)), 'w') as f:
            f.write(json.dumps(dict(diffdict), indent=4,
                               ensure_ascii=False).encode('utf8'))

    for x in major_changes:
        logger.info(x)
    for x in sorted(minor_changes, key=lambda x: abs(x[2]), reverse=True):
        if abs(x[2]) > 3:
            logger.info('[{}] {} {}'.format(
                x[0], x[1], '{}{}'.format(
                    '↑' if x[2] > 0 else ('↓' if x[2] < 0 else '='),
                    abs(x[2]))))

    open('candidates_drop_data.json', 'w').write(
        json.dumps(new_candidates))


if __name__ == "__main__":
    main()
