package ru.yandex.webmaster3.worker.sitemap;

import java.util.Map;
import java.util.stream.Collectors;

import com.google.common.collect.ImmutableMap;
import lombok.Getter;
import lombok.Setter;

import ru.yandex.webmaster3.core.worker.task.PeriodicTaskType;
import ru.yandex.webmaster3.storage.clickhouse.TableType;
import ru.yandex.webmaster3.storage.clickhouse.replication.data.ClickhouseReplicationPriority;
import ru.yandex.webmaster3.storage.sitemap.dao.SitemapsCHDao;
import ru.yandex.webmaster3.storage.util.clickhouse2.CHField;
import ru.yandex.webmaster3.storage.util.clickhouse2.CHTable;
import ru.yandex.webmaster3.storage.yql.YqlFunctions;
import ru.yandex.webmaster3.storage.yql.YqlQueryBuilder;
import ru.yandex.webmaster3.storage.ytimport.YtClickhouseDataLoad;
import ru.yandex.webmaster3.storage.ytimport.YtClickhouseDataLoadType;
import ru.yandex.webmaster3.worker.TaskSchedule;
import ru.yandex.webmaster3.worker.turbo.AbstractYqlPrepareImportTask;

import static ru.yandex.webmaster3.storage.sitemap.dao.SitemapsCHDao.F.*;

/**
 * Created by Oleg Bazdyrev on 11/06/2021.
 */
public class ImportSitemapsTask extends AbstractYqlPrepareImportTask {

    @Setter
    private int rowCount;

    private static final Map<String, String> SOURCE_EXPRESSIONS = new ImmutableMap.Builder<String, String>()
            .put(HOST_ID, YqlFunctions.url2HostId("s.Host").getApplicationCode())
            .put(ID, "$nameUUIDFromBytes(Digest::Md5Raw(s.Url))")
            .put(PARENT_ID, "if (s.Host == Referrer, '00000000-0000-4000-a000-000000000000', $nameUUIDFromBytes(Digest::Md5Raw(Referrer)))")
            .put(URL, "$escape(s.Url)")
            .put(REFERRER, "$escape(Referrer)")
            .put(HTTP_CODE, "cast(HttpCode as String)")
            .put(LAST_ACCESS, "cast(LastAccess as String)")
            .put(LAST_CHANGE, "cast(LastChange as String)")
            .put(LAST_URLS_CHANGE, "cast(LastUrlsChange as String)")
            .put(URL_COUNT, "cast(UrlCount as String)")
            .put(ERROR_COUNT, "cast(ErrorCount as String)")
            .put(SOURCE_ID, "$listToJson(SourceId)")
            .put(REDIR_TARGET, "$escape(RedirTarget)")
            .put(IS_SITEMAP_PARSED, "if(IsSitemapParsed, '1', '0')")
            .put(IS_SITEMAP_INDEX, "if(IsSitemapIndex, '1', '0')")
            .put(URLS, "$escape($listToJson(Urls))")
            .put(ERRORS, "$escape($listToJson(ListFilter(Errors, ($e) -> { return Unicode::IsUtf($e.Text); })))")
            .put(CHILDREN_COUNT, "cast(c.ChildrenCount as String)")
            .build();

    @Override
    protected YtClickhouseDataLoad init(YtClickhouseDataLoad imprt) throws Exception {
        return super.initBySourceAttr(imprt, "TableTimestamp");
    }

    @Override
    protected YqlQueryBuilder prepareIntermediateTable(YtClickhouseDataLoad imprt) {
        String fields = getTable().getFields().stream().map(CHField::getName).map(SOURCE_EXPRESSIONS::get)
                .collect(Collectors.joining(" || '\\t' || ", "(", " || '\\n')"));

        YqlQueryBuilder yqlQueryBuilder = YqlQueryBuilder.newBuilder()
                .cluster(tablePath)
                .appendFDefinition(YqlFunctions.URL_2_HOST_ID)
                .appendFDefinition(YqlFunctions.ESCAPE)
                .appendText("PRAGMA yt.MaxRowWeight = '128M';\n")
                .appendText("PRAGMA yt.DefaultMemoryLimit = '6G';\n")
                .appendText("$listToJson = ($l) -> { return if($l is null, '[]', cast(Yson::SerializeJson(Yson::From($l)) as String)); };\n\n")
                .appendText("$script = @@\n" +
                        "def NameUUIDFromBytes(s):\n" +
                        "    return \"{:02x}{:02x}{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}{:02x}{:02x}{:02x}{:02x}\"\\\n" +
                        "     .format(s[0], s[1], s[2], s[3], s[4], s[5], (s[6] & 0x0f) | 0x30, s[7], \\\n" +
                        "      (s[8] & 0x3f) | 0x80, s[9], s[10], s[11], s[12], s[13], s[14], s[15])\n" +
                        "@@;\n" +
                        "$nameUUIDFromBytes = Python3::NameUUIDFromBytes(Callable<(String?)->String>, $script);\n\n")
                .appendText("$childrenCount = (\n")
                .appendText("  SELECT Host, Url, if(len(RedirTarget) > 0, count(r.Url), if(IsSitemapIndex, count(c.Url), 0)) as ChildrenCount\n")
                .appendText("  FROM ").appendTable(tablePath).appendText(" as s\n")
                .appendText("  LEFT JOIN ").appendTable(tablePath).appendText(" as r\n")
                .appendText("  ON s.Host == r.Host and r.Url == s.RedirTarget\n")
                .appendText("  LEFT JOIN ").appendTable(tablePath).appendText(" as c\n")
                .appendText("  ON s.Host == c.Host and s.Url == c.Referrer\n")
                .appendText("  GROUP BY s.Host as Host, s.Url as Url, s.RedirTarget as RedirTarget, s.IsSitemapIndex as IsSitemapIndex\n")
                .appendText(");\n\n")
                .appendText("INSERT INTO " + INTERMEDIATE_TABLE)
                .appendText("SELECT ShardId, RowId, Compress::Gzip(String::JoinFromList(AGGREGATE_LIST(data), ''), 6) as data FROM (\n")
                .appendText("SELECT (Digest::Fnv64($url2HostId(s.Host)) % " + getShardsCount() + ") as ShardId,")
                .appendText("(Digest::Fnv64(s.Host || s.Url) % " + rowCount + ") as RowId,")
                .appendText(fields).appendText("as data ")
                .appendText(" FROM").appendTable(tablePath).appendText("as s\n")
                .appendText(" LEFT JOIN $childrenCount as c\n")
                .appendText(" ON s.Host == c.Host AND s.Url == c.Url\n")
                .appendText(") \n GROUP BY ShardId, RowId;")
                .appendText("COMMIT;\n\n");

        return yqlQueryBuilder;
    }

    @Override
    protected ClickhouseReplicationPriority getReplicationPriority() {
        return ClickhouseReplicationPriority.OFFLINE;
    }

    @Override
    protected CHTable getTable() {
        return SitemapsCHDao.TABLE;
    }

    @Override
    protected TableType getTableType() {
        return TableType.SITEMAPS;
    }

    @Override
    public PeriodicTaskType getType() {
        return PeriodicTaskType.IMPORT_SITEMAPS;
    }

    @Override
    protected YtClickhouseDataLoadType getImportType() {
        return YtClickhouseDataLoadType.SITEMAPS;
    }

    @Override
    public TaskSchedule getSchedule() {
        return TaskSchedule.startByCron("30 21 * * * *");
    }
}
