package ru.yandex.webmaster3.worker.regions;

import ru.yandex.webmaster3.core.worker.task.PeriodicTaskType;
import ru.yandex.webmaster3.storage.clickhouse.TableType;
import ru.yandex.webmaster3.storage.host.dao.HostRegionsCHDao;
import ru.yandex.webmaster3.storage.util.clickhouse2.CHTable;
import ru.yandex.webmaster3.storage.yql.YqlFunctions;
import ru.yandex.webmaster3.storage.yql.YqlQueryBuilder;
import ru.yandex.webmaster3.storage.ytimport.YtClickhouseDataLoad;
import ru.yandex.webmaster3.storage.ytimport.YtClickhouseDataLoadType;
import ru.yandex.webmaster3.worker.TaskSchedule;
import ru.yandex.webmaster3.worker.turbo.AbstractYqlPrepareImportTask;

/**
 * Created by Oleg Bazdyrev on 28/05/2021.
 */
public class ImportHostRegionsTask extends AbstractYqlPrepareImportTask {

    private static final int LINES_COUNT = 256;

    @Override
    protected CHTable getTable() {
        return HostRegionsCHDao.TABLE;
    }

    @Override
    protected TableType getTableType() {
        return TableType.HOST_REGIONS;
    }

    @Override
    protected YqlQueryBuilder prepareIntermediateTable(YtClickhouseDataLoad imprt) {
        YqlQueryBuilder yqlQueryBuilder = YqlQueryBuilder.newBuilder()
                .cluster(tablePath)
                .appendFDefinition(YqlFunctions.URL_2_HOST_ID)
                .appendText("$objToJson = ($l, $def) -> { return if($l is null, $def, String::ReplaceAll(cast(Yson::SerializeJson(Yson::From($l)) as String), @@\\@@, @@\\\\@@)); };\n")
                .appendText("PRAGMA yt.MaxRowWeight = '128M';\n")
                .appendText("INSERT INTO " + INTERMEDIATE_TABLE)
                .appendText("SELECT ShardId, RowId, Compress::Gzip(String::JoinFromList(AGGREGATE_LIST(data), ''), 6) as data FROM (\n")
                .appendText("SELECT (Digest::Fnv64($url2HostId(Host)) % " + getShardsCount() + ") as ShardId,")
                .appendText("((Digest::Fnv64($url2HostId(Host)) / " + getShardsCount() + ") % " + LINES_COUNT + ") as RowId,")
                .appendText("($url2HostId(Host) || '\\t' ||\n")
                .appendText("$objToJson(DictionaryRegions, '[]') || '\\t' ||\n")
                .appendText("$objToJson(WebmasterRegions, '[]') || '\\n') as data\n")
                .appendText("FROM")
                .appendTable(imprt.getSourceTable())
                .appendText(") \n GROUP BY ShardId, RowId;")
                .appendText("COMMIT;\n\n");

        return yqlQueryBuilder;
    }

    @Override
    protected YtClickhouseDataLoadType getImportType() {
        return YtClickhouseDataLoadType.HOST_REGIONS;
    }

    @Override
    public PeriodicTaskType getType() {
        return PeriodicTaskType.IMPORT_HOST_REGIONS;
    }

    @Override
    public TaskSchedule getSchedule() {
        return TaskSchedule.startByCron("30 44 * * * *");
    }
}
