package ru.yandex.crypta.graph2.soup.workflow.ops;

import java.time.LocalDate;
import java.time.Period;

import ru.yandex.bolts.collection.Cf;
import ru.yandex.bolts.collection.IteratorF;
import ru.yandex.bolts.collection.ListF;
import ru.yandex.bolts.collection.MapF;
import ru.yandex.crypta.graph2.model.soup.edge.Edge;
import ru.yandex.crypta.graph2.model.soup.edge.EdgeType;
import ru.yandex.crypta.graph2.model.soup.edge.EdgeTypeActivityStats;
import ru.yandex.inside.yt.kosher.impl.operations.utils.ReducerWithKey;
import ru.yandex.inside.yt.kosher.operations.Statistics;
import ru.yandex.inside.yt.kosher.operations.Yield;


public class CalculateEdgeTypesStats {

    /**
     * Consider the following YQL query:
     * <p>
     * use hahn;
     * <p>
     * $get_lifetime = ($dates_list) -> {
     * $dates_list = ListMap(ListSort($dates_list), DateTime::FromString);
     * $dates_count = ListLength($dates_list);
     * $start_date = $dates_list{0};
     * $end_date = $dates_list{ListLength($dates_list) - 1};
     * <p>
     * $lifetime = case
     * when $dates_count == 0 then 0
     * when $dates_count == 1 then 1
     * else DateTime::ToDays(YQL::Unwrap($end_date) - YQL::Unwrap($start_date))
     * end;
     * <p>
     * RETURN $lifetime;
     * };
     * <p>
     * $edges = (select
     * id1Type, id2Type, sourceType, logSource,
     * ListLength(Yson::ConvertToList(dates)) as dates_count,
     * $get_lifetime(Yson::ConvertToStringList(dates)) as lifetime
     * from [//home/crypta/team/artembelov/v2_rework_full/prepare_soup/soup_edges]);
     * <p>
     * -- dates count
     * $dates_count_hist_tmp = (
     * select id1Type, id2Type, sourceType, logSource, dates_count, count(*) as edges_count
     * from $edges
     * GROUP BY id1Type, id2Type, sourceType, logSource, dates_count
     * );
     * <p>
     * $dates_count_hist = (select id1Type, id2Type, sourceType, logSource,
     * ToDict(LIST(AsTuple(dates_count, edges_count))) as dates_count_hist_dict
     * from $dates_count_hist_tmp
     * group by id1Type, id2Type, sourceType, logSource);
     * <p>
     * -- lifetime
     * $lifetime_hist_tmp = (
     * select id1Type, id2Type, sourceType, logSource, lifetime, count(*) as edges_count
     * from $edges
     * GROUP BY id1Type, id2Type, sourceType, logSource, lifetime
     * );
     * <p>
     * $lifetime_hist = (select id1Type, id2Type, sourceType, logSource,
     * ToDict(LIST(AsTuple(lifetime, edges_count))) as lifetime_hist_dict
     * from $lifetime_hist_tmp
     * group by id1Type, id2Type, sourceType, logSource);
     * <p>
     * insert into [//home/crypta/team/artembelov/edge_stats] WITH TRUNCATE
     * select
     * a.id1Type, a.id2Type, a.sourceType, a.logSource,
     * a.dates_count_hist_dict, b.lifetime_hist_dict
     * from $dates_count_hist as a
     * full join $lifetime_hist as b
     * using (id1Type, id2Type, sourceType, logSource);
     */

    public static class Mapper implements ru.yandex.inside.yt.kosher.operations.map.Mapper<Edge, EdgeTypeActivityStats> {

        static final int MAX_RECORDS = 10000;
        private MapF<EdgeType, EdgeTypeActivityStats> preCalculatedStats = Cf.hashMap();
        private int recordsCount = 0;

        @Override
        public void map(Edge edge, Yield<EdgeTypeActivityStats> yield, Statistics statistics) {
            EdgeType edgeType = edge.calculateEdgeType();

            ListF<LocalDate> activityDates = edge.getDates().sorted().map(LocalDate::parse);
            int activityDatesCount = activityDates.size();
            int edgeLifetimeDays = getEdgeLifetime(activityDates);

            EdgeTypeActivityStats stats = new EdgeTypeActivityStats(
                    edgeType,
                    activityDatesCount,
                    edgeLifetimeDays
            );

            preCalculatedStats.merge(edgeType, stats, EdgeTypeActivityStats::merge);
            recordsCount++;

            if (recordsCount > MAX_RECORDS) {
                yieldStats(yield);
            }
        }

        private void yieldStats(Yield<EdgeTypeActivityStats> yield) {
            for (EdgeType edgeType : preCalculatedStats.keySet()) {
                yield.yield(preCalculatedStats.getTs(edgeType));
            }
            preCalculatedStats.clear();
            recordsCount = 0;
        }

        @Override
        public void finish(Yield<EdgeTypeActivityStats> yield, Statistics statistics) {
            yieldStats(yield);
        }

        private int getEdgeLifetime(ListF<LocalDate> sortedActivityDates) {
            if (sortedActivityDates.isNotEmpty()) {
                LocalDate start = sortedActivityDates.first();
                LocalDate end = sortedActivityDates.last();

                Period lifetime = Period.between(start, end);
                return lifetime.getDays() + 1;  // inclusive: hit at some day means 1 day of activity
            } else {
                return 0;
            }
        }
    }


    public static class Reducer implements ReducerWithKey<EdgeTypeActivityStats, EdgeTypeActivityStats, EdgeType> {

        @Override
        public EdgeType key(EdgeTypeActivityStats edge) {
            return edge.getEdgeType();
        }

        @Override
        public void reduce(EdgeType edgeType, IteratorF<EdgeTypeActivityStats> entries, Yield<EdgeTypeActivityStats> yield, Statistics statistics) {
            EdgeTypeActivityStats merged = new EdgeTypeActivityStats(edgeType);

            entries.forEachRemaining(merged::merge);

            yield.yield(merged);
        }


    }
}
