# coding=utf-8
from __future__ import print_function

import logging

import pandas as pd
from library.python import resource
from startrek_client import Startrek
from travel.hotels.lib.python import yqllib
from yql.api.v1.client import YqlClient

from travel.hotels.lib.python.versioned_process import VersionedProcess, ytlib

LOG = logging.getLogger(__name__)


class FeedsReport(VersionedProcess):
    name = 'feeds_report'

    def __init__(self, session, args):
        super(FeedsReport, self).__init__(session, args)
        self.yql_client = YqlClient(token=args.yql_token, db=args.yt_proxy)
        self.partners = args.partners
        self.st_token = args.st_token
        self.rubrics_ticket = args.rubrics_ticket
        self.amenities_ticket = args.amenities_ticket
        self.other_ticket = args.other_ticket
        self.tables_contents = {}

    @staticmethod
    def configure_arg_parser(parser, proc_env):
        common_group = parser.add_argument_group(FeedsReport.name + " - common")
        common_group.add_argument("--yql-token", required=True)
        common_group.add_argument("--partners", default=["ostrovok", "booking21", "expedia", "hotelscombined2", "101hotels"], nargs="+")
        common_group.add_argument('--st-token', default=None, help='token for ST')
        common_group.add_argument('--rubrics-ticket', default="HOTELS-3828", help='Ticket for new rubric warnings reports')
        common_group.add_argument('--amenities-ticket', default="HOTELS-3828", help='Ticket for new amenities warnings reports')
        common_group.add_argument('--other-ticket', default="HOTELS-3828", help='Ticket for new other warnings reports')

    def get_yt_table_data(self, path):
        data = self.tables_contents[path]
        for col in data.columns:
            if col.startswith('_yql_column'):
                data.drop(col, axis=1, inplace=True)
        return data

    def get_warnings_from_table(self, table_path):
        data = self.get_yt_table_data(table_path)
        index_columns = [c for c in data.columns if c in ['message', 'rubric', 'country', 'type']]
        return dict(data.set_index(index_columns)['count'])

    @staticmethod
    def send_to_st(file_path, message, ticket, token):
        client = Startrek(useragent='python', base_url='https://st-api.yandex-team.ru', token=token)
        issue = client.issues[ticket]
        issue.comments.create(text=message, attachments=[file_path])

    @staticmethod
    def format_to_unicode(s):
        if isinstance(s, unicode):
            return s
        try:
            return s.decode('utf-8')
        except:
            return ''.join(c if ord(c) < 128 else '*' for c in s)

    def process_new_warnings(self, report_path, ticket, recent_period=7):
        process_dir = self.get_process_dir()
        if self.debug:
            print("Processing {}".format(report_path))

        dates = filter(lambda timestamp: ytlib.yt.exists(ytlib.join(process_dir, timestamp, 'feeds_warnings', 'amenities', 'expedia')), ytlib.yt.list(process_dir))
        dates = sorted([d for d in dates if d != 'latest'])
        recent_dates = dates[-recent_period:-1]
        latest_path = ytlib.join(process_dir, dates[-1] if not self.debug else 'latest', report_path)
        if self.debug:
            print("latest_path: {}".format(latest_path))
        tables = ytlib.yt.list(latest_path)

        message = ""

        excel_file_path = "./{}_warnings.xlsx".format(report_path.rsplit('/')[-1])
        excel_writer = pd.ExcelWriter(excel_file_path, engine='openpyxl')

        # load tables via SQL
        tables_to_load = []
        for table in tables:
            latest_table_path = ytlib.join(latest_path, table)
            if not ytlib.yt.get_attribute(latest_table_path, 'row_count'):
                continue
            tables_to_load.append(latest_table_path)
            for date in recent_dates:
                table_path = ytlib.join(process_dir, date, report_path, table)
                if ytlib.yt.exists(table_path) and ytlib.yt.get_attribute(table_path, 'row_count'):
                    tables_to_load.append(table_path)
        query = "\n".join("SELECT * FROM `{}`;".format(table) for table in tables_to_load)

        request = yqllib.run_query(query, syntax_version=1,)
        for path, table in zip(tables_to_load, request.get_results()):
            self.tables_contents[path] = pd.DataFrame(table.rows, columns=table.column_names)

        for table in tables:
            # collect stats
            # stage 1 - merge all recent tables togehter
            latest_table_path = ytlib.join(latest_path, table)
            if not ytlib.yt.get_attribute(latest_table_path, 'row_count'):
                continue

            if self.debug:
                print("latest_table_path: {}".format(latest_table_path))
            new_warnings = self.get_warnings_from_table(latest_table_path)

            known_warnings = dict()
            for date in recent_dates:
                table_path = ytlib.join(process_dir, date, report_path, table)
                if not ytlib.yt.exists(table_path) or not ytlib.yt.get_attribute(table_path, 'row_count'):
                    continue
                new_warnings = self.get_warnings_from_table(table_path)
                known_warnings.update(new_warnings)
            new_keys = list(set(new_warnings.keys()) - set(known_warnings.keys()))
            if new_keys or self.debug:
                data = self.get_yt_table_data(latest_table_path)  # pd.DataFrame(new_warnings, index=['count']).T.reset_index()
                # data.columns = ["message", "count"]
                data['message'] = map(lambda s: self.format_to_unicode(s), data['message'])
                data['sm'] = map(lambda s: s[:11], data['message'])
                data.sort_values(by=['sm', 'count'], ascending=False, inplace=True)
                data.drop('sm', axis=1, inplace=True)
                data.to_excel(excel_writer, sheet_name=table, index=None)
                most_common_warnings = pd.Series(new_warnings)[new_keys].sort_values(ascending=False)[:3]
                message += """\nНовые ворнинги в таблице {}, вот топ 3: \n{}""".format(table, str(most_common_warnings))
        if message and ticket:
            excel_writer.save()
            self.send_to_st(file_path=excel_file_path, message=message, ticket=ticket, token=self.st_token)

    def run(self):
        requests_to_wait = []
        # 1) Feeds warnings
        query = resource.find('1_feeds_warnings.yql')
        parameters = {
            '$feeds_path': "//home/travel/prod/feeds/",
            '$stats_path': self.get_table_path('feeds_warnings'),
            '$partners_list': self.partners,
        }
        request = yqllib.run_query(query=query, parameters=parameters, client=self.yql_client, syntax_version=1, debug=self.debug)
        requests_to_wait.append(request)

        # 2) todo: Duplicate originalIds in feeds
        for request in requests_to_wait:
            yqllib.wait_results(request)

        # aggregate reports data.
        self.process_new_warnings(report_path='feeds_warnings/amenities', ticket=self.amenities_ticket)
        self.process_new_warnings(report_path='feeds_warnings/rubrics', ticket=self.rubrics_ticket)
        self.process_new_warnings(report_path='feeds_warnings/other', ticket=self.other_ticket)


if __name__ == '__main__':
    FeedsReport.main()
