# -*- coding: utf-8 -*-
from __future__ import annotations

import logging

from datetime import date
from dateutil.relativedelta import relativedelta

from travel.hotels.content_manager.data_model.storage import StorageMapping
from travel.hotels.content_manager.lib.common import get_dc_yt_schema
from travel.hotels.content_manager.lib.processor import Processor

LOG = logging.getLogger(__name__)


class ProcessorOfferPrioritization(Processor):

    @staticmethod
    def mapping_key_header():
        return ['permalink', 'operator_id', 'orig_hotel_id', 'mapping_key']

    @staticmethod
    def mapping2key(mapping):
        return tuple(mapping[key] for key in ProcessorOfferPrioritization.mapping_key_header())

    def read_ranking(self, ranking_path):
        ranking_iterator = self.persistence_manager.read(ranking_path)
        return {self.mapping2key(row): row["weight"] for row in ranking_iterator}

    def read_known_mappings(self, known_mappings_path):
        known_mappings_iterator = self.persistence_manager.read(known_mappings_path)

        return {self.mapping2key(row) for row in known_mappings_iterator
                if row["permaroom_id"] is not None and not row["is_deleted"]}

    def apply_offer(self, offer, offers_mapping, shows_mapping):
        for show_id in offers_mapping[offer].keys():
            if shows_mapping[show_id]['z'] < self.options.other_percentage * shows_mapping[show_id]['f']:
                continue
            shows_mapping[show_id]['z'] -= offers_mapping[offer][show_id]
        return offers_mapping, shows_mapping

    def test_offer(self, offer, offers_mapping, shows_mapping):
        added_shows = 0

        for show_id in offers_mapping[offer].keys():
            if shows_mapping[show_id]['z'] < self.options.other_percentage * shows_mapping[show_id]['f']:
                continue
            if (shows_mapping[show_id]['z'] - offers_mapping[offer][show_id]) >= \
                    self.options.other_percentage * shows_mapping[show_id]['f']:
                continue
            added_shows += 1
        return added_shows

    # So here we take offers from ranking table and one by one we
    #   - test what happens we offer does not falls into other
    #   - change offers_mapping and shows_mapping in a way like we have this offer matched
    def test_ordering(self, offers, offers_mapping, shows_mapping, pos, neg):
        result_data = []
        for offer in offers:
            # This is because actually some unknown offers
            # might appear only in cases when we have "other" percentage less than 0.1
            # Thus, we would not observe it into negative examples
            # So it seems safe to just ignore it
            if offer not in offers_mapping.keys():
                # So we add 1 pri
                result_data.append(1.0)
                continue
            mn = self.test_offer(offer, offers_mapping, shows_mapping)
            offers_mapping, shows_mapping = self.apply_offer(
                offer, offers_mapping, shows_mapping)
            neg -= mn
            pos += mn
            result_data.append(float(pos) / (pos + neg))

        return result_data

    # from here in this comment KEY will refer to (permalink, operator_id, orig_hotel_id, mapping_key)
    # Here we want to read offers from neg table, but we want to have data structure that allows us efficiently
    # check what happens if we map offer to permaroom
    # So we do the following thing. We build 2 dictionaries
    # In first dict we store KEY in dictionary.
    # Now we want to have access by this key and be able to check all records in neg table where
    # So in offers_mapping[KEY] we store dict {index in neg table : number of entries of the offer}
    # And in a second dictionary shows_mapping we store {index in neg table : number of zeros, number of full offers}
    def read_neg_offers(self, offers_path):
        offers_iterator = self.persistence_manager.read(offers_path)
        neg = 0
        pos = 0

        shows_mapping = {}
        offers_mapping = {}

        for row in offers_iterator:
            if float(row["other_count"]) / row["full_offer_count"] < self.options.other_percentage:
                pos += 1
                continue
            zeros_found = 0

            keys = []
            counter = 0
            for mapping_key, orig_hotel_id, operator_id, permaroom_missing, n in row["offers_data"]:
                if isinstance(mapping_key, list):
                    mapping_key = mapping_key[0]
                if isinstance(orig_hotel_id, list):
                    orig_hotel_id = orig_hotel_id[0]
                if isinstance(operator_id, list):
                    operator_id = operator_id[0]

                target_key = (row["permalink"], operator_id, orig_hotel_id, mapping_key)

                if not permaroom_missing:
                    continue

                zeros_found += n
                keys.append((target_key, n))

                counter += 1

            if float(zeros_found) / row["full_offer_count"] < self.options.other_percentage:
                pos += 1
                continue

            for key, n in keys:
                offers_mapping.setdefault(key, {})[neg] = n

            shows_mapping[neg] = {'f': row["full_offer_count"], 'z': zeros_found}

            neg += 1

        return neg, pos, offers_mapping, shows_mapping

    # This function does following thing
    # It takes logs from logs/travel-hotels-offercache-log/1d for previous month
    # for each record in a log in defines whether set of offers from the record has already been mapped
    # Now we have table $complete_dataset
    # In this table each record is a one record from OfferCache
    # For this record we know :
    #   - permalink
    #   - number of offers into this record
    #   - number of records where we don't know permaroom
    #   - list <mapping_key, orig_hotel_id, operator_id, bool indicating if permaroom is missing, number of entries
    # of the offer>
    # So in the end we build 3 tables
    #   1. Table where other / full < 0.1 called pos_table, from this table we need only number of rows to calculate
    # probability
    #   2. Table where other / full >= 0.1 called neg_table, we will use this table to check what happens if we
    # map mapping_key to permaroom
    #   3. Table with set of missing offers and ranking on these offers
    # The key idea is that we take offers one by one and check what will be the probability not to show room structure
    # in case when offer has permaroom. Since we take offers one by one, the ordering on these offers is important
    # and if we will initially take offers that are not very frequent we will see that we "mapped" a lot of offers
    # but probability has not changed significantly.
    # To set ordering empirical formula is the following
    # SUM((number of entries) / (other_count - 0.1 * full number of offers))
    # There is a logic behind this, i swear =)
    # So now we have 3 tables
    def get_offers_info(self, pos_table, neg_table, rank_table, date_from, date_to):
        known_mappings_table = self.persistence_manager.join(self.path_info.storage_path, 'mappings')
        known_permalinks_table = self.persistence_manager.join(self.path_info.storage_path, 'permalinks')

        query = f'''
            PRAGMA yt.InferSchema;
            PRAGMA yson.DisableStrict;
            PRAGMA SimpleColumns;

            $extract_offers_data = ($x, $keyword) -> {{
                $offers = Yson::ConvertToList(Yson::Lookup($x, $keyword));
                $offers_mapped = ListMap($offers,
                    ($y) -> {{ RETURN AsTuple(
                        Yson::LookupString($y, "CatRoomMappingKey"),
                        Yson::LookupString($y, "OrigHotelId"),
                        Yson::LookupUint64($y, "OperatorId")); }});
                $offers_filtered = ListFilter($offers_mapped,
                    ($y) -> {{ RETURN $y.0 IS NOT NULL AND $y.1 IS NOT NULL
                    AND $y.2 IS NOT NULL; }});
                RETURN $offers_filtered;}};

            $extract = ($x, $y, $permalink) -> {{
                $x_converted = Yson::Lookup($x, CAST($permalink ?? 0 AS String));
                $y_converted =  Yson::Lookup($y, CAST($permalink ?? 0 AS String));
                $result = IF({not self.options.ignore_skipped},
                    ListExtend($extract_offers_data($x_converted, "Prices"),
                        $extract_offers_data($y_converted, "SkippedPrices")),
                    $extract_offers_data($x_converted, "Prices"));
                RETURN $result;
            }};

            $get_permalinks = ($x) -> {{
                $list_data = Yson::ConvertToList($x);
                $permalinks_only = ListMap($list_data, ($y) -> {{ RETURN Yson::LookupString($y, "permalink"); }});
                $filtered_data = ListFilter($permalinks_only, ($y) -> {{ RETURN $y IS NOT NULL; }});
                RETURN ListUniq(ListMap($filtered_data, ($y) -> {{ RETURN CAST($y AS Uint64); }}));
            }};

            $get_travel_data = ($x) -> {{
                RETURN ListFlatMap(Yson::ConvertToList($x),
                    ($y) -> {{RETURN IF(Yson::ConvertToString($y["path"]) LIKE "%travel_map%" OR
                    Yson::ConvertToString($y["path"]) LIKE "%travel_company%", 1, NULL)}});
            }};

            $clicked_permalinks = (
                SELECT
                    $get_permalinks(clicks) as permalinks,
                    reqid
                FROM RANGE(`home/travel/analytics/user_sessions`, "{date_from}", "{date_to}")
                WHERE
                    ListLength($get_travel_data(clicks)) > 0
                    AND ListLength($get_permalinks(clicks)) > 0
                    AND device == "desktop"
            );

            $dataset_raw = (
                SELECT
                    Resp_Hotels,
                    Info_Hotels,
                    Req_ReqId AS reqid
                FROM RANGE(`logs/travel-hotels-offercache-log/1d`, "{date_from}", "{date_to}")
                WHERE
                    Environment == "prod"
                    AND Req_Full
                    AND Resp_SearchProps_MainPermalink != 0
                    AND Info_MainPermalink_FullOfferCount ?? 0 != 0
            );

            $flat_clicked = (
                SELECT
                    permalink,
                    reqid
                FROM $clicked_permalinks
                FLATTEN LIST BY permalinks as permalink
            );

            $dataset = (
                SELECT
                    Resp_Hotels,
                    Info_Hotels,
                    permalink,
                    CAST(RandomUuid(permalink) AS String) AS record_id
                FROM $dataset_raw as data
                INNER JOIN $flat_clicked as clicks
                USING (reqid)
            );

            $filtered_data = (
                SELECT
                    data.*
                FROM $dataset as data
                LEFT SEMI JOIN (
                    SELECT
                        id as permalink
                    FROM `{known_permalinks_table}`
                    WHERE NOT is_deleted) as filter_table
                USING (permalink)
            );

            $flat_data = (
                SELECT
                    permalink,
                    record_id,
                    offer.0 as mapping_key,
                    offer.1 as orig_hotel_id,
                    offer.2 as operator_id
                FROM (
                    SELECT
                        permalink,
                        record_id,
                        $extract(Resp_Hotels, Info_Hotels, permalink) as offers_data
                    FROM $filtered_data)
                FLATTEN LIST BY offers_data as offer
            );

            $operator_id_replaced = (
                SELECT
                    permalink,
                    record_id,
                    mapping_key,
                    orig_hotel_id,
                    text.OperatorId AS operator_id
                FROM $flat_data as data
                INNER JOIN `//home/travel/prod/config/operators` as text
                ON data.operator_id == text.OperatorIdInt
            );

            $mappings = (
                SELECT
                    is_hidden,
                    permalink,
                    mapping_key,
                    operator_id,
                    orig_hotel_id
                FROM `{known_mappings_table}`
                WHERE permaroom_id IS NOT NULL AND NOT is_deleted
            );

            $data_with_known_offers = (
                SELECT
                    *
                FROM (SELECT
                        data.*,
                        mappings.is_hidden IS NULL AS permaroom_missing,
                        is_hidden ?? False AS is_hidden
                    FROM $operator_id_replaced as data
                    LEFT JOIN $mappings as mappings
                    USING (permalink, mapping_key, operator_id, orig_hotel_id))
                WHERE NOT is_hidden
            );

            $groped_data = (
                SELECT
                    record_id,
                    permalink,
                    mapping_key,
                    orig_hotel_id,
                    operator_id,
                    MIN(permaroom_missing) AS permaroom_missing,
                    COUNT(*) as offer_num
                FROM $data_with_known_offers
                GROUP BY (record_id, permalink, mapping_key, orig_hotel_id, operator_id)
            );

            $full_offers_and_other = (
                SELECT
                    record_id,
                    SOME(permalink) as permalink,
                    SUM(offer_num) as full_offer_count,
                    SUM_IF(offer_num, permaroom_missing) as other_count,
                    AGGREGATE_LIST((mapping_key, orig_hotel_id, operator_id, permaroom_missing, offer_num))
                        as offers_data
                FROM $groped_data
                GROUP BY (record_id)
            );

            INSERT INTO `{neg_table}`
            SELECT
                permalink,
                full_offer_count,
                other_count,
                offers_data
            FROM $full_offers_and_other
            WHERE (CAST(other_count AS Double) / full_offer_count) >= {self.options.other_percentage};

            INSERT INTO `{pos_table}`
            SELECT
                permalink,
                full_offer_count,
                other_count,
                offers_data
            FROM $full_offers_and_other
            WHERE (CAST(other_count AS Double) / full_offer_count) < {self.options.other_percentage};

            $data_with_offers_info = (
                SELECT
                    data.permalink ?? fo.permalink as permalink,
                    mapping_key,
                    orig_hotel_id,
                    operator_id,
                    CASE
                        WHEN other_count - {self.options.other_percentage} * full_offer_count < 1
                        THEN 1.0
                        ELSE offer_num / (other_count - {self.options.other_percentage} * full_offer_count)
                    END AS weight
                FROM (SELECT * FROM $groped_data WHERE permaroom_missing) as data
                INNER JOIN $full_offers_and_other as fo
                USING (record_id)
            );

            INSERT INTO `{rank_table}`
            SELECT
                permalink,
                mapping_key,
                orig_hotel_id,
                operator_id,
                SUM(weight) as weight
            FROM $data_with_offers_info
            GROUP BY (permalink, mapping_key, orig_hotel_id, operator_id);'''

        self.yql_client.run_query(query)

    def pass_data(self, filter_mappings_table: str, keep_table: str, remove_table: str):
        LOG.info(f'Pass data from {filter_mappings_table} to {keep_table}')

        self.persistence_manager.copy(filter_mappings_table, keep_table)
        self.persistence_manager.write(remove_table, [], get_dc_yt_schema(StorageMapping))

    def create_ordering_table(self,
                              filter_mappings_table: str,
                              pos_table: str,
                              neg_table: str,
                              rank_table: str,
                              keep_table: str,
                              remove_table: str):
        LOG.info(f'Reading mappings from {filter_mappings_table}')
        input_mappings = [row for row in self.persistence_manager.read(filter_mappings_table)]
        LOG.info(f'Reading ranking from {rank_table}')
        ranking = self.read_ranking(rank_table)
        LOG.info(f'Reading negative examples from {neg_table}')
        neg, pos, offers_mapping, shows_mapping = self.read_neg_offers(neg_table)
        pos += self.persistence_manager.row_count(pos_table)

        LOG.info(f'Positive {pos} negative {neg} probability {pos / (max(1, pos + neg))}')
        ranking_sorted = sorted(list(ranking.items()), key=lambda x: x[1], reverse=True)
        ranking = list(map(lambda x: x[0], ranking_sorted))

        LOG.info(f'Testing ordering of {len(ranking)} mappings')
        ordering_probabilities = self.test_ordering(ranking, offers_mapping, shows_mapping, pos, neg)

        keep = []
        remove = []
        for mapping in input_mappings:
            mapping_key = self.mapping2key(mapping)
            if mapping_key in ranking:
                index = ranking.index(mapping_key)
                if ordering_probabilities[index] <= self.options.probability:
                    keep.append(mapping)
                    continue
            remove.append(mapping)

        LOG.info(f'Mappings to keep {len(keep)}, to remove {len(remove)}')

        self.persistence_manager.write(keep_table, keep, get_dc_yt_schema(StorageMapping))
        self.persistence_manager.write(remove_table, remove, get_dc_yt_schema(StorageMapping))

    def run(self):
        # Input data
        input_mappings_table = self.persistence_manager.join(self.input_path, 'mappings')
        # Output data
        keep_table = self.persistence_manager.join(self.output_path, 'mappings_to_keep')
        remove_table = self.persistence_manager.join(self.output_path, 'mappings_to_remove')

        if self.options.no_filter:
            self.pass_data(input_mappings_table, keep_table, remove_table)
            return

        # Temporary data
        pos_table = self.persistence_manager.join(self.output_path, 'pos')
        neg_table = self.persistence_manager.join(self.output_path, 'neg')
        rank_table = self.persistence_manager.join(self.output_path, 'rank')

        date_to = date.today()
        date_from = date_to - relativedelta(months=1)
        date_to = date_to.isoformat()
        date_from = date_from.isoformat()

        self.get_offers_info(pos_table, neg_table, rank_table, date_from, date_to)

        self.create_ordering_table(input_mappings_table, pos_table, neg_table, rank_table, keep_table, remove_table)

        if self.persistence_manager.exists(pos_table):
            self.persistence_manager.delete(pos_table)

        if self.persistence_manager.exists(neg_table):
            self.persistence_manager.delete(neg_table)

        if self.persistence_manager.exists(rank_table):
            self.persistence_manager.delete(rank_table)
