package ru.yandex.webmaster3.worker.searchurl;

import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.function.Predicate;
import java.util.regex.Pattern;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Strings;
import lombok.AllArgsConstructor;
import lombok.Value;
import org.joda.time.Instant;
import org.joda.time.LocalDate;

import ru.yandex.webmaster3.core.worker.task.PeriodicTaskType;
import ru.yandex.webmaster3.proto.Urltree;
import ru.yandex.webmaster3.storage.clickhouse.TableType;
import ru.yandex.webmaster3.storage.jupiter.JupiterUtils;
import ru.yandex.webmaster3.storage.searchurl.history.dao.LastSiteStructureCHDao;
import ru.yandex.webmaster3.storage.util.clickhouse2.CHTable;
import ru.yandex.webmaster3.storage.util.yt.YtNode;
import ru.yandex.webmaster3.storage.util.yt.YtPath;
import ru.yandex.webmaster3.storage.yql.YqlFunctions;
import ru.yandex.webmaster3.storage.yql.YqlQueryBuilder;
import ru.yandex.webmaster3.storage.ytimport.YtClickhouseDataLoad;
import ru.yandex.webmaster3.storage.ytimport.YtClickhouseDataLoadState;
import ru.yandex.webmaster3.storage.ytimport.YtClickhouseDataLoadType;
import ru.yandex.webmaster3.worker.TaskSchedule;
import ru.yandex.webmaster3.worker.turbo.AbstractYqlPrepareImportTask;

/**
 * Created by Oleg Bazdyrev on 04/03/2021.
 */
public class ImportLastSiteStructuresTask extends AbstractYqlPrepareImportTask {

    private static final String ATTR_SOURCE = "acceptance_table";
    private static final Predicate<String> SOURCE_TABLE_PREDICATE = Pattern.compile("(ready-[0-9]+)").asMatchPredicate();
    private static final int ROW_COUNT = 1024;

    @Override
    protected int getShardsCount() {
        return 1;
    }

    @Override
    protected YtClickhouseDataLoad init(YtClickhouseDataLoad latestImport) throws Exception {
        return ytService.withoutTransactionQuery(cypressService -> {
            List<YtPath> tables = cypressService.list(tablePath);
            String lastProcessedTable = latestImport.getData() == null ? "" : latestImport.getData().split("_")[0];
            Optional<String> tableName = tables.stream().sorted().map(YtPath::getName).filter(SOURCE_TABLE_PREDICATE)
                    .filter(n -> n.compareTo(lastProcessedTable) > 0).findFirst();
            if (tableName.isEmpty()) {
                return latestImport.withState(YtClickhouseDataLoadState.DONE);
            }
            YtNode node = cypressService.getNode(YtPath.path(tablePath, tableName.get()));
            Instant baseDate = JupiterUtils.getBaseDateFromPath(node.getNodeMeta().get(ATTR_SOURCE).asText());
            return latestImport.withData(tableName.get() + "_" + baseDate.getMillis())
                    .withSourceTable(YtPath.path(tablePath, tableName.get()), LocalDate.now(), LocalDate.now());
        });
    }

    @Override
    protected String getTableSuffix(YtClickhouseDataLoad imprt) {
        return imprt.getData().split("_")[1];
    }

    @Override
    protected YqlQueryBuilder prepareIntermediateTable(YtClickhouseDataLoad imprt) {
        int shardCount = getShardsCount();
        YqlQueryBuilder queryBuilder = new YqlQueryBuilder();
        queryBuilder
                .cluster(tablePath)
                .inferSchema(YqlQueryBuilder.InferSchemaMode.INFER)
                .appendText("PRAGMA yt.MaxRowWeight = '128M';\n")
                .appendText("PRAGMA yt.DefaultMemoryLimit = '4G';\n\n")
                .appendText("INSERT INTO " + INTERMEDIATE_TABLE)
                .appendText("SELECT ShardId, RowId, Compress::Gzip(String::JoinFromList(AGGREGATE_LIST(data), ''), 6) as data FROM\n")
                .appendText("(\n")
                .appendText("  SELECT\n")
                .appendText("    (Digest::Fnv64(").appendFCall(YqlFunctions.url2HostId("host")).appendText(") % " + shardCount + ") as ShardId,\n")
                .appendText("    (Digest::CityHash(").appendFCall(YqlFunctions.url2HostId("host")).appendText(") % " + ROW_COUNT + ") as RowId,\n")
                .appendText("    (")
                .appendText("    ").appendFCall(YqlFunctions.url2HostId("host")).appendText(" || '\\t' ||\n")
                .appendText("    cast(n.num_of_docs as String) || '\\t' ||\n")
                .appendText("    cast(n.num_of_docs_on_search as String) || '\\t' ||\n")
                .appendText("    cast(n.num_of_new_search_docs as String) || '\\t' ||\n")
                .appendText("    cast(n.num_of_gone_search_docs as String) || '\\n'\n")
                .appendText("    ) as data\n")
                .appendText("  FROM\n")
                .appendText("  (\n")
                .appendText("    SELECT host, message.nodes as nodes FROM \n")
                .appendText("    (\n")
                .appendText("      SELECT key as host, ")
                .appendFCall(YqlFunctions.parseProto("NWebmaster.proto.urltree.HostInfo", YqlFunctions.protoMeta(Urltree.getDescriptor()), "value"))
                .appendText(" as message\n")
                .appendText("      FROM").appendTable(imprt.getSourceTable()).appendText("\n")
                .appendText("    )\n")
                .appendText("  )\n")
                .appendText("  FLATTEN LIST BY nodes as n\n")
                .appendText("  WHERE n.shard_id = 0 and n.search_source_id = 0 and n.name == '/' and n.node_id == 1 and n.parent_id = 0\n")
                .appendText(")\n")
                .appendText("GROUP BY ShardId, RowId;\n")
                .appendText("COMMIT;\n\n");

        return queryBuilder;
    }

    @Override
    protected CHTable getTable() {
        return LastSiteStructureCHDao.TABLE;
    }

    @Override
    protected TableType getTableType() {
        return TableType.LAST_SITE_STRUCTURE;
    }

    @Override
    protected YtClickhouseDataLoadType getImportType() {
        return YtClickhouseDataLoadType.LAST_SITE_STRUCTURE;
    }

    @Override
    public PeriodicTaskType getType() {
        return PeriodicTaskType.IMPORT_LAST_SITE_STRUCTURES;
    }

    @Override
    public TaskSchedule getSchedule() {
        return TaskSchedule.startByCron("0 17 * * * *");
    }

}
