package ru.yandex.webmaster3.storage.searchquery.importing;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.UUID;

import com.datastax.driver.core.utils.UUIDs;
import lombok.Getter;
import lombok.Setter;
import org.apache.commons.lang3.text.StrSubstitutor;
import org.joda.time.Duration;
import org.joda.time.LocalDate;
import org.joda.time.LocalTime;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import ru.yandex.webmaster3.core.WebmasterException;
import ru.yandex.webmaster3.core.searchquery.SpecialGroup;
import ru.yandex.webmaster3.core.util.TimeUtils;
import ru.yandex.webmaster3.storage.clickhouse.TableProvider;
import ru.yandex.webmaster3.storage.clickhouse.TableState;
import ru.yandex.webmaster3.storage.clickhouse.TableType;
import ru.yandex.webmaster3.storage.clickhouse.dao.ClickhouseTablesRepository;
import ru.yandex.webmaster3.storage.clickhouse.replication.MdbClickhouseReplicationManager;
import ru.yandex.webmaster3.storage.clickhouse.replication.data.ClickhouseReplicationCommand;
import ru.yandex.webmaster3.storage.clickhouse.replication.data.ClickhouseReplicationPriority;
import ru.yandex.webmaster3.storage.clickhouse.system.dao.ClickhouseSystemTablesCHDao;
import ru.yandex.webmaster3.storage.searchquery.SearchQueriesConstants;
import ru.yandex.webmaster3.storage.util.clickhouse2.AbstractClickhouseDao;
import ru.yandex.webmaster3.storage.util.clickhouse2.CHTable;
import ru.yandex.webmaster3.storage.util.clickhouse2.ClickhouseHost;
import ru.yandex.webmaster3.storage.util.clickhouse2.ClickhouseQueryContext;
import ru.yandex.webmaster3.storage.util.clickhouse2.ClickhouseServer;
import ru.yandex.webmaster3.storage.util.yt.YtException;
import ru.yandex.webmaster3.storage.util.yt.YtNodeAttributes;
import ru.yandex.webmaster3.storage.util.yt.YtOperationId;
import ru.yandex.webmaster3.storage.util.yt.YtPath;
import ru.yandex.webmaster3.storage.util.yt.YtService;
import ru.yandex.webmaster3.storage.util.yt.YtUtils;
import ru.yandex.webmaster3.storage.ytimport.ImportPriority;
import ru.yandex.webmaster3.storage.ytimport.MdbYtClickhouseImportManager;
import ru.yandex.webmaster3.storage.ytimport.YtClickhouseDataLoad;
import ru.yandex.webmaster3.storage.ytimport.YtClickhouseImportCommand;
import ru.yandex.webmaster3.storage.ytimport.YtClickhouseTableRelation;

/**
 * @author avhaliullin
 */
public class QueriesImportService {
    private static final Logger log = LoggerFactory.getLogger(QueriesImportService.class);

    protected static final long TOP_MAPPER_MEMORY_LIMIT = 2L * 1024 * 1024 * 1024; // 2Gb
    protected static final long TOP_REDUCER_MEMORY_LIMIT = 2L * 1024 * 1024 * 1024; // 2Gb
    protected static final DateTimeFormatter CH_INSERTION_DATE_FORMATTER = DateTimeFormat.forPattern("yyyy-MM-dd");
    protected static final String DB = AbstractClickhouseDao.DB_WEBMASTER3_QUERIES;
    protected static final LocalTime END_DAY = new LocalTime(23, 59, 59);

    private static final String FAVORITE_FULL_TABLE_NAME = "favorite_values";
    private static final String GROUP_FULL_TABLE_NAME = "group_statistics";
    private static final String HOST_REGIONS_TABLE_NAME = "host_regions";
    private static final DateTimeFormatter PARTITION_MONTH_FORMAT = DateTimeFormat.forPattern("yyyyMM");

    @Setter
    protected MdbYtClickhouseImportManager ytClickhouseImportManager;
    @Setter
    protected MdbClickhouseReplicationManager clickhouseReplicationManager;
    @Setter
    protected YtService ytService;
    @Setter
    protected ClickhouseServer clickhouseServer;
    @Setter
    protected ClickhouseSystemTablesCHDao clickhouseSystemTablesCHDao;
    @Setter
    protected YtPath workdir;
    @Setter
    protected YtPath mrBinary;
    @Setter
    protected TableProvider tableStorage;
    @Setter
    protected ClickhouseTablesRepository clickhouseTablesCDao;

    protected CHTable getWeekTable() {
        return QueryTablesDefinitions.MDB_WEEK_TABLE;
    }

    protected CHTable getFavoriteValuesTable() {
        return QueryTablesDefinitions.MDB_FAVORITE_VALUES_TABLE;
    }

    protected CHTable getGroupsTable() {
        return QueryTablesDefinitions.MDB_GROUPS_TABLE;
    }

    protected CHTable getTopQueriesValuesTable() {
        return QueryTablesDefinitions.MDB_TOP_QUERIES_VALUES_TABLE;
    }

    protected CHTable getTopQueriesTextsTable() {
        return QueryTablesDefinitions.MDB_TOP_QUERIES_TEXTS_TABLE;
    }

    protected CHTable getTopUrlsValuesTable() {
        return QueryTablesDefinitions.MDB_TOP_URLS_VALUES_TABLE;
    }

    protected CHTable getTopUrlsTextsTable() {
        return QueryTablesDefinitions.MDB_TOP_URLS_TEXTS_TABLE;
    }

    public YtClickhouseDataLoad replicateWeek(YtClickhouseDataLoad importData) {
        LocalDate dateFrom = importData.getDateFrom();
        LocalDate dateTo = importData.getDateTo();
        String dateFromString = SearchQueriesConstants.IN_TABLE_NAME_DATE_FORMATTER.print(dateFrom);
        String dateToString = SearchQueriesConstants.IN_TABLE_NAME_DATE_FORMATTER.print(dateTo);
        int shards = clickhouseServer.getShardsCount();
        UUID taskId = UUIDs.timeBased();
        log.info("Replication id " + taskId);
        List<ClickhouseReplicationCommand.TableInfo> tables = new ArrayList<>();

        for (int shard = 0; shard < shards; shard++) {
            for (int part = 0; part < getWeekTable().getParts(); part++) {
                tables.add(new ClickhouseReplicationCommand.TableInfo(
                        getWeekTable().replicatedMergeTreeTableName(part, dateFromString, dateToString),
                        getWeekTable().createReplicatedMergeTreeSpec(part, dateFromString, dateToString),
                        shard
                ));
            }
        }
        ClickhouseReplicationCommand command = new ClickhouseReplicationCommand(
                taskId,
                DB,
                ClickhouseReplicationPriority.OFFLINE,
                tables
        );
        clickhouseReplicationManager.enqueueReplication(command);
        // создадим сразу merge и distributed-таблицы
        ClickhouseQueryContext.Builder context = ClickhouseQueryContext.useDefaults();
        clickhouseServer.executeOnAllHosts(context, getWeekTable().createMerge(dateFromString, dateToString));

        return importData.withReplicationTaskIds(taskId);
    }

    public YtClickhouseDataLoad renameWeek(YtClickhouseDataLoad importData) {
        LocalDate dateFrom = importData.getDateFrom();
        LocalDate dateTo = importData.getDateTo();
        String dateFromString = SearchQueriesConstants.IN_TABLE_NAME_DATE_FORMATTER.print(dateFrom);
        String dateToString = SearchQueriesConstants.IN_TABLE_NAME_DATE_FORMATTER.print(dateTo);
        // добавим запись о новых таблицах в Cassandra (сразу online)
        clickhouseTablesCDao.update(getWeekTable().toClickhouseTableInfo(
                TableType.WEEK_QUERIES_SMALL, clickhouseServer.getShardsCount(), dateFromString, dateToString).withState(TableState.ON_LINE));
        return importData;
    }

    public YtClickhouseDataLoad importWeek(YtClickhouseDataLoad importData) {
        LocalDate dateFrom = importData.getDateFrom();
        LocalDate dateTo = importData.getDateTo();
        String dateFromString = SearchQueriesConstants.IN_TABLE_NAME_DATE_FORMATTER.print(dateFrom);
        String dateToString = SearchQueriesConstants.IN_TABLE_NAME_DATE_FORMATTER.print(dateTo);
        int shards = clickhouseServer.getShardsCount();
        List<YtPath> tables = weekTables(dateFrom, dateTo);
        UUID taskId = UUIDs.timeBased();
        log.info("Week import " + taskId);
        List<YtClickhouseTableRelation> tablesRels = new ArrayList<>();
        int idx = 0;
        for (int shard = 0; shard < shards; shard++) {
            for (int part = 0; part < getWeekTable().getParts(); part++) {
                tablesRels.add(new YtClickhouseTableRelation(
                        tables.get(idx++),
                        shard,
                        getWeekTable().replicatedMergeTreeTableName(part, dateFromString, dateToString),
                        getWeekTable().createReplicatedMergeTreeSpec(part, dateFromString, dateToString)
                ));
            }
        }
        YtClickhouseImportCommand command = new YtClickhouseImportCommand(
                taskId,
                tablesRels,
                DB,
                getWeekTable().importSpec(),
                ImportPriority.OFFLINE
        );
        ytClickhouseImportManager.startImport(command);
        return importData.withImportTaskIds(taskId);
    }

    public YtClickhouseDataLoad prepareWeek(YtClickhouseDataLoad importData) throws WebmasterException {
        LocalDate dateFrom = importData.getDateFrom();
        LocalDate dateTo = importData.getDateTo();
        List<YtPath> tables = weekTables(dateFrom, dateTo);

        ytService.inTransaction(workdir).execute(cypressService -> {
            YtUtils.recreateTables(cypressService, tables, new YtNodeAttributes().setCompressionCodec("none"));
            YtOperationId operationId = cypressService.mapReduce(
                    YtUtils.newPrepareTablesForImportBuilder()
                            .addInputTable(importData.getSourceTable())
                            .setOutputTables(tables)
                            .setBinary(mrBinary)
                            .setTask("SEARCH_QUERIES_WEEK")
                            .addMapperArg("--date", dateTo.toString(CH_INSERTION_DATE_FORMATTER))
                            .buildV2());

            if (!cypressService.waitFor(operationId)) {
                throw new YtException("Prepare favorite search queries failed. See " + operationId);
            }
            return true;
        });
        return importData.withPreparedTables(tables);
    }

    protected List<YtPath> weekTables(LocalDate dateFrom, LocalDate dateTo) {
        List<YtPath> result = new ArrayList<>();
        YtPath wd = YtPath.path(workdir, "week");
        for (int shard = 0; shard < clickhouseServer.getShardsCount(); shard++) {
            for (int part = 0; part < getWeekTable().getParts(); part++) {
                result.add(YtPath.path(wd, dateFrom.toString(SearchQueriesConstants.IN_TABLE_NAME_DATE_FORMATTER) + "_" +
                        dateTo.toString(SearchQueriesConstants.IN_TABLE_NAME_DATE_FORMATTER) + "_shard-" + shard + "_part-" + part));
            }
        }
        return result;
    }

    public YtClickhouseDataLoad prepareFav(YtClickhouseDataLoad importData) throws WebmasterException {
        // YtPath sourceTable, int lines, LocalDate dateFrom, LocalDate dateTo
        LocalDate dateFrom = importData.getDateFrom();
        LocalDate dateTo = importData.getDateTo();
        List<YtPath> tables = favTables(dateFrom, dateTo);

        ytService.inTransaction(workdir).execute(cypressService -> {
            YtUtils.recreateTables(cypressService, tables, new YtNodeAttributes().setCompressionCodec("none"));
            YtOperationId operationId = cypressService.mapReduce(
                    YtUtils.newPrepareTablesForImportBuilder()
                            .addInputTable(importData.getSourceTable())
                            .setOutputTables(tables)
                            .setMapperMemoryLimit(TOP_MAPPER_MEMORY_LIMIT)
                            .setReducerMemoryLimit(TOP_REDUCER_MEMORY_LIMIT)
                            .setBinary(mrBinary)
                            .setTask("SEARCH_QUERY_STATS")
                            .addMapperArg("--date-from", String.valueOf(dateFrom.toDateTimeAtStartOfDay(TimeUtils.EUROPE_MOSCOW_ZONE).getMillis() / 1000L))
                            .addMapperArg("--date-to", String.valueOf(dateTo.toDateTime(END_DAY, TimeUtils.EUROPE_MOSCOW_ZONE).getMillis() / 1000L))
                            .addMapperArg("--query-text", "false")
                            .buildV2());

            if (!cypressService.waitFor(operationId)) {
                throw new YtException("Prepare favorite search queries failed. See " + operationId);
            }
            return true;
        });
        return importData.withPreparedTables(tables);
    }

    public YtClickhouseDataLoad importFav(YtClickhouseDataLoad importData) {
        LocalDate dateFrom = importData.getDateFrom();
        LocalDate dateTo = importData.getDateTo();
        int shards = clickhouseServer.getShardsCount();
        UUID taskId = UUIDs.timeBased();
        log.info("Fav import " + taskId);
        List<YtClickhouseTableRelation> tablesRels = new ArrayList<>();
        for (int shard = 0; shard < shards; shard++) {
            tablesRels.add(new YtClickhouseTableRelation(
                    importData.getPreparedTables().get(shard),
                    shard,
                    getFavoriteValuesTable().replicatedMergeTreeTableName(-1, SearchQueriesConstants.TEMP_PREFIX,
                            SearchQueriesConstants.IN_TABLE_NAME_DATE_FORMATTER.print(dateFrom),
                            SearchQueriesConstants.IN_TABLE_NAME_DATE_FORMATTER.print(dateTo)),
                    getFavoriteValuesTable().createMergeTreeSpec(-1)
            ));
        }
        YtClickhouseImportCommand command = new YtClickhouseImportCommand(
                taskId,
                tablesRels,
                DB,
                getFavoriteValuesTable().importSpec(),
                ImportPriority.ONLINE
        );
        ytClickhouseImportManager.startImport(command);
        return importData.withImportTaskIds(taskId);
    }


    public YtClickhouseDataLoad replicateFav(YtClickhouseDataLoad importData) {
        return importData.withNextState();
    }

    public YtClickhouseDataLoad renameFav(YtClickhouseDataLoad importData) throws Exception {
        String tempTableName = getFavoriteValuesTable().replicatedMergeTreeTableName(-1, SearchQueriesConstants.TEMP_PREFIX,
                SearchQueriesConstants.IN_TABLE_NAME_DATE_FORMATTER.print(importData.getDateFrom()),
                SearchQueriesConstants.IN_TABLE_NAME_DATE_FORMATTER.print(importData.getDateTo())
        );
        mergeTables(getFavoriteValuesTable().getDatabase(), FAVORITE_FULL_TABLE_NAME, tempTableName, importData, false);
        return importData;
    }

    protected List<YtPath> favTables(LocalDate dateFrom, LocalDate dateTo) {
        List<YtPath> result = new ArrayList<>();
        YtPath wd = YtPath.path(workdir, "favorite");
        int shards = clickhouseServer.getShardsCount();
        for (int shard = 0; shard < shards; shard++) {
            result.add(YtPath.path(wd, dateFrom.toString(SearchQueriesConstants.IN_TABLE_NAME_DATE_FORMATTER) + "_" + dateTo.toString(SearchQueriesConstants.IN_TABLE_NAME_DATE_FORMATTER) + "_shard-" + shard));
        }
        return result;
    }

    // Groups
    public YtClickhouseDataLoad prepareGroups(YtClickhouseDataLoad importData) throws WebmasterException {
        //YtPath sourceTable, int lines, LocalDate date
        List<YtPath> tables = groupsTables(importData.getDateFrom());
        ytService.inTransaction(workdir).execute(cypressService -> {
            YtUtils.recreateTables(cypressService, tables, new YtNodeAttributes().setCompressionCodec("none"));
            YtOperationId operationId = cypressService.mapReduce(
                    YtUtils.newPrepareTablesForImportBuilder()
                            .addInputTable(importData.getSourceTable())
                            .setOutputTables(tables)
                            .setBinary(mrBinary)
                            .setTask("SEARCH_QUERY_GROUPS")
                            .addMapperArg("--date", importData.getDateFrom().toString(CH_INSERTION_DATE_FORMATTER))
                            .buildV2());

            if (!cypressService.waitFor(operationId)) {
                throw new YtException("Prepare group search queries failed. See " + operationId);
            }
            return true;
        });
        return importData.withPreparedTables(tables);
    }

    public YtClickhouseDataLoad importGroups(YtClickhouseDataLoad importData) {
        int shards = clickhouseServer.getShardsCount();
        LocalDate date = importData.getDateFrom();
        UUID taskId = UUIDs.timeBased();
        log.info("Groups import " + taskId);
        List<YtClickhouseTableRelation> tablesRels = new ArrayList<>();
        for (int shard = 0; shard < shards; shard++) {
            tablesRels.add(new YtClickhouseTableRelation(
                    importData.getPreparedTables().get(shard),
                    shard,
                    getGroupsTable().replicatedMergeTreeTableName(-1, SearchQueriesConstants.TEMP_PREFIX,
                            SearchQueriesConstants.IN_TABLE_NAME_DATE_FORMATTER.print(date)),
                    getGroupsTable().createMergeTreeSpec(-1)
            ));
        }
        YtClickhouseImportCommand command = new YtClickhouseImportCommand(
                taskId,
                tablesRels,
                DB,
                getGroupsTable().importSpec(),
                ImportPriority.ONLINE
        );
        ytClickhouseImportManager.startImport(command);
        return importData.withImportTaskIds(taskId);
    }

    public YtClickhouseDataLoad replicateGroups(YtClickhouseDataLoad importData) {
        return importData.withNextState();
    }

    public YtClickhouseDataLoad renameGroup(YtClickhouseDataLoad importData) throws Exception {
        String tempTableName = getGroupsTable().replicatedMergeTreeTableName(-1, SearchQueriesConstants.TEMP_PREFIX,
                SearchQueriesConstants.IN_TABLE_NAME_DATE_FORMATTER.print(importData.getDateFrom()));
        // update host regions
        mergeTables(getFavoriteValuesTable().getDatabase(), GROUP_FULL_TABLE_NAME, tempTableName, importData, true);
        return importData;
    }

    protected List<YtPath> groupsTables(LocalDate date) {
        List<YtPath> result = new ArrayList<>();
        YtPath wd = YtPath.path(workdir, "groups");
        int shards = clickhouseServer.getShardsCount();
        for (int shard = 0; shard < shards; shard++) {
            result.add(YtPath.path(wd, date.toString(SearchQueriesConstants.IN_TABLE_NAME_DATE_FORMATTER) + "_shard-" + shard));
        }
        return result;
    }

    public YtClickhouseDataLoad prepareTopQueries(YtClickhouseDataLoad importData) throws WebmasterException {
        LocalDate date = importData.getDateTo();
        QueryTables tables = topQueriesTables(date);

        ytService.inTransaction(workdir).execute(cypressService -> {

            YtUtils.recreateTables(cypressService, tables.getAllTables(), new YtNodeAttributes().setCompressionCodec("none"));

            YtOperationId operationId = cypressService.mapReduce(
                    YtUtils.newPrepareTablesForImportBuilder()
                            .addInputTable(importData.getSourceTable())
                            .setOutputTables(tables.getAllTables())
                            .setMapperTableCount(tables.getValueTables().size())
                            .setTextsTableCount(tables.getQueryTables().size())
                            .setMapperMemoryLimit(TOP_MAPPER_MEMORY_LIMIT)
                            .setReducerMemoryLimit(TOP_REDUCER_MEMORY_LIMIT)
                            .setBinary(mrBinary)
                            .setTask("SEARCH_QUERY_STATS")
                            .addMapperArg("--query-text", "true")
                            .buildV2());

            if (!cypressService.waitFor(operationId)) {
                throw new YtException("Prepare top search queries failed. See " + operationId);
            }
            return true;
        });
        return importData.withPreparedTables(tables.getAllTables());
    }

    public YtClickhouseDataLoad importTopQueries(YtClickhouseDataLoad importData) {
        LocalDate date = importData.getDateTo();
        String dateString = SearchQueriesConstants.IN_TABLE_NAME_DATE_FORMATTER.print(date);
        int shards = clickhouseServer.getShardsCount();
        QueryTables tables = topQueriesTables(date);
        UUID queriesTaskId = UUIDs.timeBased();
        UUID valuesTaskId = UUIDs.timeBased();
        log.info("Queries import " + queriesTaskId + ", values import " + valuesTaskId);
        List<YtClickhouseTableRelation> valueRels = new ArrayList<>();
        List<YtClickhouseTableRelation> queryRels = new ArrayList<>();
        int idx = 0;
        for (int shard = 0; shard < shards; shard++) {
            for (int part = 0; part < getTopQueriesValuesTable().getParts(); part++) {
                valueRels.add(new YtClickhouseTableRelation(
                        tables.valueTables.get(idx++),
                        shard,
                        getTopQueriesValuesTable().replicatedMergeTreeTableName(part, dateString),
                        getTopQueriesValuesTable().createReplicatedMergeTreeSpec(part, dateString)
                ));
            }
        }
        idx = 0;
        for (int shard = 0; shard < shards; shard++) {
            for (int part = 0; part < getTopQueriesTextsTable().getParts(); part++) {
                queryRels.add(new YtClickhouseTableRelation(
                        tables.queryTables.get(idx++),
                        shard,
                        getTopQueriesTextsTable().replicatedMergeTreeTableName(part, dateString),
                        getTopQueriesTextsTable().createReplicatedMergeTreeSpec(part, dateString)
                ));
            }
        }
        YtClickhouseImportCommand valuesCommand = new YtClickhouseImportCommand(
                valuesTaskId,
                valueRels,
                DB,
                getTopQueriesValuesTable().importSpec(),
                ImportPriority.OFFLINE
        );
        YtClickhouseImportCommand queriesCommand = new YtClickhouseImportCommand(
                queriesTaskId,
                queryRels,
                DB,
                getTopQueriesTextsTable().importSpec(),
                ImportPriority.OFFLINE
        );
        ytClickhouseImportManager.startImport(queriesCommand);
        ytClickhouseImportManager.startImport(valuesCommand);
        return importData.withImportTaskIds(valuesTaskId, queriesTaskId);
    }

    public YtClickhouseDataLoad replicateTopQueries(YtClickhouseDataLoad importData) {
        LocalDate date = importData.getDateTo();
        String dateString = SearchQueriesConstants.IN_TABLE_NAME_DATE_FORMATTER.print(date);
        int shards = clickhouseServer.getShardsCount();
        UUID queriesId = UUIDs.timeBased();
        UUID valuesId = UUIDs.timeBased();
        log.info("Replication queries id " + queriesId + " values id " + valuesId);
        List<ClickhouseReplicationCommand.TableInfo> queryTables = new ArrayList<>();
        List<ClickhouseReplicationCommand.TableInfo> valueTables = new ArrayList<>();

        for (int shard = 0; shard < shards; shard++) {
            for (int part = 0; part < getTopQueriesTextsTable().getParts(); part++) {
                queryTables.add(new ClickhouseReplicationCommand.TableInfo(
                        getTopQueriesTextsTable().replicatedMergeTreeTableName(part, dateString),
                        getTopQueriesTextsTable().createReplicatedMergeTreeSpec(part, dateString),
                        shard
                ));
            }
            for (int part = 0; part < getTopQueriesValuesTable().getParts(); part++) {
                valueTables.add(new ClickhouseReplicationCommand.TableInfo(
                        getTopQueriesValuesTable().replicatedMergeTreeTableName(part, dateString),
                        getTopQueriesValuesTable().createReplicatedMergeTreeSpec(part, dateString),
                        shard
                ));
            }
        }
        ClickhouseReplicationCommand queriesCmd = new ClickhouseReplicationCommand(
                queriesId,
                DB,
                ClickhouseReplicationPriority.OFFLINE,
                queryTables
        );
        ClickhouseReplicationCommand valuesCmd = new ClickhouseReplicationCommand(
                valuesId,
                DB,
                ClickhouseReplicationPriority.OFFLINE,
                valueTables
        );
        // создадим сразу merge и distributed-таблицы
        ClickhouseQueryContext.Builder context = ClickhouseQueryContext.useDefaults();
        clickhouseServer.executeOnAllHosts(context, getTopQueriesValuesTable().createMerge(dateString));

        clickhouseReplicationManager.enqueueReplication(queriesCmd);
        clickhouseReplicationManager.enqueueReplication(valuesCmd);

        return importData.withReplicationTaskIds(queriesId, valuesId);
    }

    public YtClickhouseDataLoad renameTop(YtClickhouseDataLoad importData) {
        String dateString = SearchQueriesConstants.IN_TABLE_NAME_DATE_FORMATTER.print(importData.getDateTo());
        // добавим запись о новых таблицах в Cassandra (сразу online)
        clickhouseTablesCDao.update(getTopQueriesTextsTable().toClickhouseTableInfo(
                TableType.TOP_3000__QUERIES, clickhouseServer.getShardsCount(), dateString).withState(TableState.ON_LINE));
        clickhouseTablesCDao.update(getTopQueriesValuesTable().toClickhouseTableInfo(
                TableType.TOP_3000__VALUES, clickhouseServer.getShardsCount(), dateString).withState(TableState.ON_LINE));
        return importData;
    }

    protected QueryTables topQueriesTables(LocalDate date) {
        int shards = clickhouseServer.getShardsCount();
        List<YtPath> queryTables = new ArrayList<>();
        List<YtPath> valueTables = new ArrayList<>();
        YtPath basePath = YtPath.path(workdir, "top");
        for (int shard = 0; shard < shards; shard++) {
            for (int part = 0; part < getTopQueriesTextsTable().getParts(); part++) {
                queryTables.add(YtPath.path(basePath, "queries-" + date.toString(SearchQueriesConstants.IN_TABLE_NAME_DATE_FORMATTER) + "-shard" + shard + "-part" + part));
            }
        }
        for (int shard = 0; shard < shards; shard++) {
            for (int part = 0; part < getTopQueriesValuesTable().getParts(); part++) {
                valueTables.add(YtPath.path(basePath, "values-" + date.toString(SearchQueriesConstants.IN_TABLE_NAME_DATE_FORMATTER) + "-shard" + shard + "-part" + part));
            }
        }
        return new QueryTables(queryTables, valueTables);
    }

    public YtClickhouseDataLoad prepareTopUrls(YtClickhouseDataLoad importData) throws WebmasterException {
        LocalDate date = importData.getDateTo();
        QueryTables tables = topUrlsTables(date);

        ytService.inTransaction(workdir).execute(cypressService -> {

            YtUtils.recreateTables(cypressService, tables.getAllTables(), new YtNodeAttributes().setCompressionCodec("none"));

            YtOperationId operationId = cypressService.mapReduce(
                    YtUtils.newPrepareTablesForImportBuilder()
                            .addInputTable(importData.getSourceTable())
                            .setOutputTables(tables.getAllTables())
                            .setMapperTableCount(tables.getValueTables().size())
                            .setTextsTableCount(tables.getQueryTables().size())
                            .setMapperMemoryLimit(TOP_MAPPER_MEMORY_LIMIT)
                            .setReducerMemoryLimit(TOP_REDUCER_MEMORY_LIMIT)
                            .setBinary(mrBinary)
                            .setTask("SEARCH_QUERIES_TOP_URLS")
                            .addMapperArg("--query-text", "true")
                            .buildV2());

            if (!cypressService.waitFor(operationId)) {
                throw new YtException("Prepare top urls queries failed. See " + operationId);
            }
            return true;
        });
        return importData.withPreparedTables(tables.getAllTables());
    }

    public YtClickhouseDataLoad importTopUrls(YtClickhouseDataLoad importData) {
        LocalDate date = importData.getDateTo();
        String dateString = SearchQueriesConstants.IN_TABLE_NAME_DATE_FORMATTER.print(date);
        int shards = clickhouseServer.getShardsCount();
        QueryTables tables = topUrlsTables(date);
        UUID textsTaskId = UUIDs.timeBased();
        UUID valuesTaskId = UUIDs.timeBased();
        log.info("Queries top_urls import " + textsTaskId + ", values import " + valuesTaskId);
        List<YtClickhouseTableRelation> valueRels = new ArrayList<>();
        List<YtClickhouseTableRelation> textRels = new ArrayList<>();
        int idx = 0;
        for (int shard = 0; shard < shards; shard++) {
            for (int part = 0; part < getTopUrlsValuesTable().getParts(); part++) {
                valueRels.add(new YtClickhouseTableRelation(
                        tables.getValueTables().get(idx++),
                        shard,
                        getTopUrlsValuesTable().replicatedMergeTreeTableName(part, dateString),
                        getTopUrlsValuesTable().createReplicatedMergeTreeSpec(part, dateString)
                ));
            }
        }
        idx = 0;
        for (int shard = 0; shard < shards; shard++) {
            for (int part = 0; part < getTopUrlsTextsTable().getParts(); part++) {
                textRels.add(new YtClickhouseTableRelation(
                        tables.getQueryTables().get(idx++),
                        shard,
                        getTopUrlsTextsTable().replicatedMergeTreeTableName(part, dateString),
                        getTopUrlsTextsTable().createReplicatedMergeTreeSpec(part, dateString)
                ));
            }
        }
        YtClickhouseImportCommand valuesCommand = new YtClickhouseImportCommand(
                valuesTaskId,
                valueRels,
                DB,
                getTopUrlsValuesTable().importSpec(),
                ImportPriority.OFFLINE
        );
        YtClickhouseImportCommand textsCommand = new YtClickhouseImportCommand(
                textsTaskId,
                textRels,
                DB,
                getTopUrlsTextsTable().importSpec(),
                ImportPriority.OFFLINE
        );
        ytClickhouseImportManager.startImport(textsCommand);
        ytClickhouseImportManager.startImport(valuesCommand);
        return importData.withImportTaskIds(valuesTaskId, textsTaskId);
    }

    public YtClickhouseDataLoad replicateTopUrls(YtClickhouseDataLoad importData) {
        LocalDate date = importData.getDateTo();
        String dateString = SearchQueriesConstants.IN_TABLE_NAME_DATE_FORMATTER.print(date);
        int shards = clickhouseServer.getShardsCount();
        UUID textsId = UUIDs.timeBased();
        UUID valuesId = UUIDs.timeBased();
        log.info("Replication queries top_urls id " + textsId + " values id " + valuesId);
        List<ClickhouseReplicationCommand.TableInfo> textsTables = new ArrayList<>();
        List<ClickhouseReplicationCommand.TableInfo> valueTables = new ArrayList<>();

        for (int shard = 0; shard < shards; shard++) {
            for (int part = 0; part < getTopUrlsTextsTable().getParts(); part++) {
                textsTables.add(new ClickhouseReplicationCommand.TableInfo(
                        getTopUrlsTextsTable().replicatedMergeTreeTableName(part, dateString),
                        getTopUrlsTextsTable().createReplicatedMergeTreeSpec(part, dateString),
                        shard
                ));
            }
            for (int part = 0; part < getTopUrlsValuesTable().getParts(); part++) {
                valueTables.add(new ClickhouseReplicationCommand.TableInfo(
                        getTopUrlsValuesTable().replicatedMergeTreeTableName(part, dateString),
                        getTopUrlsValuesTable().createReplicatedMergeTreeSpec(part, dateString),
                        shard
                ));
            }
        }
        ClickhouseReplicationCommand textsCmd = new ClickhouseReplicationCommand(
                textsId,
                DB,
                ClickhouseReplicationPriority.OFFLINE,
                textsTables
        );
        ClickhouseReplicationCommand valuesCmd = new ClickhouseReplicationCommand(
                valuesId,
                DB,
                ClickhouseReplicationPriority.OFFLINE,
                valueTables
        );
        clickhouseReplicationManager.enqueueReplication(textsCmd);
        clickhouseReplicationManager.enqueueReplication(valuesCmd);
        // создадим сразу merge и distributed-таблицы
        ClickhouseQueryContext.Builder context = ClickhouseQueryContext.useDefaults();
        clickhouseServer.executeOnAllHosts(context, getTopUrlsValuesTable().createMerge(dateString));

        return importData.withReplicationTaskIds(textsId, valuesId);
    }

    public YtClickhouseDataLoad renameTopUrls(YtClickhouseDataLoad importData) {
        String dateString = SearchQueriesConstants.IN_TABLE_NAME_DATE_FORMATTER.print(importData.getDateTo());
        // добавим запись о новых таблицах в Cassandra (сразу online)
        clickhouseTablesCDao.update(getTopUrlsTextsTable().toClickhouseTableInfo(
                TableType.TOP_URLS_TEXTS, clickhouseServer.getShardsCount(), dateString).withState(TableState.ON_LINE));
        clickhouseTablesCDao.update(getTopUrlsValuesTable().toClickhouseTableInfo(
                TableType.TOP_URLS_VALUES, clickhouseServer.getShardsCount(), dateString).withState(TableState.ON_LINE));
        return importData;
    }

    protected QueryTables topUrlsTables(LocalDate date) {
        int shards = clickhouseServer.getShardsCount();
        List<YtPath> textsTables = new ArrayList<>();
        List<YtPath> valueTables = new ArrayList<>();
        YtPath basePath = YtPath.path(workdir, "top_urls");
        for (int shard = 0; shard < shards; shard++) {
            for (int part = 0; part < getTopUrlsTextsTable().getParts(); part++) {
                textsTables.add(YtPath.path(basePath, "texts-" + date.toString(SearchQueriesConstants.IN_TABLE_NAME_DATE_FORMATTER) + "-shard" + shard + "-part" + part));
            }
        }
        for (int shard = 0; shard < shards; shard++) {
            for (int part = 0; part < getTopUrlsValuesTable().getParts(); part++) {
                valueTables.add(YtPath.path(basePath, "values-" + date.toString(SearchQueriesConstants.IN_TABLE_NAME_DATE_FORMATTER) + "-shard" + shard + "-part" + part));
            }
        }
        return new QueryTables(textsTables, valueTables);
    }

    private void mergeTables(String database, String fullTableName, String tempTableName, YtClickhouseDataLoad importData, boolean updateHostRegions)
            throws Exception {
        // удаляем из общей таблицы подливаемую дату и вливаем свежие данные
        for (ClickhouseHost host : clickhouseServer.getHosts()) {
            boolean hasTable = !clickhouseSystemTablesCHDao.getTables(host, database, Collections.singleton(tempTableName)).isEmpty();
            if (!hasTable) {
                continue;
            }
            ClickhouseQueryContext.Builder ctx = ClickhouseQueryContext.useDefaults().setHost(host).setTimeout(Duration.standardMinutes(10L));
            log.info("Attach partitions on shard {} DC {}", host.getShard(), host.getDcName());
            // удаляем старое
            clickhouseServer.execute(ctx, String.format("ALTER TABLE %s.%s DELETE WHERE date between '%s' and '%s'",
                    database, fullTableName, importData.getDateFrom(), importData.getDateTo()));
            // update host regions
            if (updateHostRegions) {
                StrSubstitutor substitutor = new StrSubstitutor(Map.of(
                        "database", DB,
                        "host_regions", HOST_REGIONS_TABLE_NAME,
                        "tmp_host_regions", "tmp_" + HOST_REGIONS_TABLE_NAME,
                        "tmp_groups", tempTableName,
                        "all_queries_group_id", SpecialGroup.ALL_QUERIES.getGroupId().toString()
                ));
                // aka multiquery
                for (String query : substitutor.replace(UPDATE_HOST_REGIONS_QUERY).split("\n\n")) {
                    clickhouseServer.execute(ctx, query);
                }
            }

            // вставляем новое
            StringBuilder sb = new StringBuilder();
            sb.append("ALTER TABLE ").append(database).append(".").append(fullTableName);
            LocalDate startMonth = importData.getDateFrom().withDayOfMonth(1);
            LocalDate endMonth = importData.getDateTo().withDayOfMonth(1);
            for (LocalDate date = startMonth; !date.isAfter(endMonth); date = date.plusMonths(1)) {
                if (date.isAfter(startMonth)) {
                    sb.append(",");
                }
                sb.append(" ATTACH PARTITION '").append(date.toString(PARTITION_MONTH_FORMAT)).append("' FROM ").append(database)
                        .append(".").append(tempTableName);
            }
            clickhouseServer.execute(ctx, sb.toString());
            // дропаем временную табличку
            clickhouseServer.execute(ctx, String.format("DROP TABLE %s.%s", database, tempTableName));
        }
    }

    private static final String UPDATE_HOST_REGIONS_QUERY = "" +
            "DROP TABLE IF EXISTS ${database}.${tmp_host_regions} NO DELAY;\n\n" +

            "CREATE TABLE ${database}.${tmp_host_regions} ENGINE = MergeTree() ORDER BY host_id " +
            "AS SELECT host_id, arraySort(arrayDistinct(arrayConcat(prev.region_ids, curr.region_ids))) as region_ids FROM " +
            "${database}.${host_regions} as prev FULL OUTER JOIN (SELECT host_id, arraySort(groupUniqArray(region_id)) as region_ids FROM " +
            "${database}.${tmp_groups}  WHERE group_id = '${all_queries_group_id}' GROUP BY host_id) as curr USING host_id;\n\n" +

            "ALTER TABLE ${database}.${host_regions} REPLACE PARTITION tuple() FROM ${database}.${tmp_host_regions};\n\n" +

            "DROP TABLE IF EXISTS ${database}.${tmp_host_regions} NO DELAY;";

    @Getter
    protected static class QueryTables {
        private final List<YtPath> queryTables;
        private final List<YtPath> valueTables;
        private final List<YtPath> allTables;

        public QueryTables(List<YtPath> queryTables, List<YtPath> valueTables) {
            this.queryTables = queryTables;
            this.valueTables = valueTables;
            this.allTables = new ArrayList<>(queryTables);
            allTables.addAll(valueTables);
        }
    }

}
