package ru.yandex.webmaster3.worker.hoststat;

import java.util.Map;
import java.util.stream.Collectors;

import NWebmaster.NProto.Hostinfo;
import com.google.common.collect.ImmutableMap;

import ru.yandex.webmaster3.core.worker.task.PeriodicTaskType;
import ru.yandex.webmaster3.storage.clickhouse.TableType;
import ru.yandex.webmaster3.storage.hoststat.HostStatisticsCHDao;
import ru.yandex.webmaster3.storage.hoststat.HostStatisticsCHDao.F;
import ru.yandex.webmaster3.storage.util.clickhouse2.CHField;
import ru.yandex.webmaster3.storage.util.clickhouse2.CHTable;
import ru.yandex.webmaster3.storage.yql.YqlFunctions;
import ru.yandex.webmaster3.storage.yql.YqlQueryBuilder;
import ru.yandex.webmaster3.storage.ytimport.YtClickhouseDataLoad;
import ru.yandex.webmaster3.storage.ytimport.YtClickhouseDataLoadType;
import ru.yandex.webmaster3.worker.TaskSchedule;
import ru.yandex.webmaster3.worker.turbo.AbstractYqlPrepareImportTask;

/**
 * Created by Oleg Bazdyrev on 12/03/2021.
 */
public class ImportHostStatisticsTask extends AbstractYqlPrepareImportTask {

    private static final int LINES_COUNT = 1024;
    private static final Map<String, String> SOURCE_EXPRESSIONS = new ImmutableMap.Builder<String, String>()
            .put(F.HOST_ID, "$url2HostId(Host)")
            .put(F.DOCS, "cast(nvl(Docs, 0) as String)")
            .put(F.DOCS_ON_SEARCH, "cast(nvl(DocsOnSearch, 0) as String)")
            .put(F.HTML_DOCS, "cast(nvl(HtmlDocs, 0) as String)")
            .put(F.GOOD_DOCS_ON_SEARCH, "cast(nvl(GoodDocsOnSearch, 0) as String)")
            .put(F.EMPTY_TITLES, "cast(nvl(EmptyTitles, 0) as String)")
            .put(F.EMPTY_DESCRIPTIONS, "cast(nvl(EmptyDescriptions, 0) as String)")
            .put(F.SLOW_PAGES, "cast(nvl(SlowPages, 0) as String)")
            .put(F.PAGES_WITH_FETCH_TIME, "cast(nvl(PagesWithFetchTime, 0) as String)")
            .put(F.TOTAL_FETCH_TIME, "cast(nvl(TotalFetchTime, 0) as String)")
            .put(F.UNIQUE_TITLES, "cast(nvl(UniqueTitles, 0) as String)")
            .put(F.UNIQUE_DESCRIPTIONS, "cast(nvl(UniqueDescriptions, 0) as String)")
            .put(F.DUPLICATE_TITLES, "cast(nvl(DuplicateTitles, 0) as String)")
            .put(F.DUPLICATE_DESCRIPTIONS, "cast(nvl(DuplicateDescriptions, 0) as String)")
            .put(F.MORDA_SAMPLE, "$objToJson($parse_NWebmaster_NProto_THostSamples(Samples).MordaSample, 'null')")
            .put(F.SLOW_PAGE_SAMPLES, "$objToJson($parse_NWebmaster_NProto_THostSamples(Samples).SlowPageSamples, '[]')")
            .put(F.EMPTY_TITLE_SAMPLES, "$objToJson($parse_NWebmaster_NProto_THostSamples(Samples).EmptyTitleSamples, '[]')")
            .put(F.EMPTY_DESCRIPTION_SAMPLES, "$objToJson($parse_NWebmaster_NProto_THostSamples(Samples).EmptyDescriptionSamples, '[]')")
            .put(F.PREVAILING_TITLES, "$objToJson($parse_NWebmaster_NProto_THostSamples(Samples).PrevailingTitles, '[]')")
            .put(F.PREVAILING_DESCRIPTIONS, "$objToJson($parse_NWebmaster_NProto_THostSamples(Samples).PrevailingDescriptions, '[]')")
            .put(F.DUPLICATE_PAGE_SAMPLES, "$objToJson($parse_NWebmaster_NProto_THostSamples(Samples).DuplicatePageSamples, '[]')")
            .build();

    @Override
    protected CHTable getTable() {
        return HostStatisticsCHDao.TABLE;
    }

    @Override
    protected YqlQueryBuilder prepareIntermediateTable(YtClickhouseDataLoad imprt) {
        String fields = getTable().getFields().stream().map(CHField::getName).map(SOURCE_EXPRESSIONS::get)
                .collect(Collectors.joining(" || '\\t' || \n", "\n(", " || '\\n')\n"));

        YqlQueryBuilder yqlQueryBuilder = YqlQueryBuilder.newBuilder()
                .cluster(tablePath)
                .appendFDefinition(YqlFunctions.URL_2_HOST_ID)
                .appendFDefinition(YqlFunctions.protoDefinition("NWebmaster.NProto.THostSamples", YqlFunctions.protoMeta(Hostinfo.getDescriptor())))
                .appendText("$objToJson = ($l, $def) -> { return if($l is null, $def, String::ReplaceAll(cast(Yson::SerializeJson(Yson::From($l)) as String), @@\\@@, @@\\\\@@)); };\n")
                .appendText("PRAGMA yt.MaxRowWeight = '128M';\n")
                .appendText("INSERT INTO " + INTERMEDIATE_TABLE)
                .appendText("SELECT ShardId, RowId, Compress::Gzip(String::JoinFromList(AGGREGATE_LIST(data), ''), 6) as data FROM (\n")
                .appendText("SELECT (Digest::Fnv64($url2HostId(Host)) % " + getShardsCount() + ") as ShardId,")
                .appendText("((Digest::Fnv64($url2HostId(Host)) / " + getShardsCount() + ") % " + LINES_COUNT + ") as RowId,")
                .appendText(fields).appendText("as data ")
                .appendText("FROM")
                .appendTable(imprt.getSourceTable())
                .appendText(") \n GROUP BY ShardId, RowId;")
                .appendText("COMMIT;\n\n");

        return yqlQueryBuilder;
    }

    @Override
    protected TableType getTableType() {
        return TableType.HOST_STATISTICS;
    }

    @Override
    public PeriodicTaskType getType() {
        return PeriodicTaskType.IMPORT_HOST_STATISTICS;
    }

    @Override
    protected YtClickhouseDataLoadType getImportType() {
        return YtClickhouseDataLoadType.HOST_STATISTICS;
    }

    @Override
    public TaskSchedule getSchedule() {
        return TaskSchedule.startByCron("30 19 * * * *");
    }

    public static void main(String[] args) {
        System.out.println(YqlFunctions.protoDefinition("NWebmaster.NProto.THostSamples", YqlFunctions.protoMeta(Hostinfo.getDescriptor())).getFunctionDef());
    }

}
