package ru.yandex.webmaster3.worker.metrika;

import lombok.RequiredArgsConstructor;
import org.joda.time.DateTime;
import org.joda.time.LocalDate;
import org.springframework.beans.factory.annotation.Autowired;

import ru.yandex.webmaster3.storage.clickhouse.TableType;
import ru.yandex.webmaster3.storage.host.CommonDataState;
import ru.yandex.webmaster3.storage.metrika.dao.MetrikaCrawlSamplesCHDao;
import ru.yandex.webmaster3.storage.settings.dao.CommonDataStateYDao;
import ru.yandex.webmaster3.storage.util.clickhouse2.CHTable;
import ru.yandex.webmaster3.storage.util.yt.YtNode;
import ru.yandex.webmaster3.storage.yql.YqlQueryBuilder;
import ru.yandex.webmaster3.storage.ytimport.YtClickhouseDataLoad;
import ru.yandex.webmaster3.storage.ytimport.YtClickhouseDataLoadState;
import ru.yandex.webmaster3.storage.ytimport.YtClickhouseDataLoadType;
import ru.yandex.webmaster3.worker.TaskSchedule;
import ru.yandex.webmaster3.worker.turbo.AbstractYqlPrepareImportTask;

import static ru.yandex.webmaster3.storage.host.CommonDataType.METRIKA_CRAWL_SAMPLES_LAST_UPDATE;

/**
 * @author leonidrom
 */
@RequiredArgsConstructor(onConstructor_ = @Autowired)
public class ImportMetrikaCrawlSamplesTask extends AbstractYqlPrepareImportTask {
    public static final String ATTR_UPDATE_TIMESTAMP = "update_timestamp";

    private static final int ROWS_COUNT = 128;

    private final CommonDataStateYDao commonDataStateYDao;

    @Override
    protected CHTable getTable() {
        return MetrikaCrawlSamplesCHDao.TABLE;
    }

    @Override
    protected TableType getTableType() {
        return TableType.METRIKA_CRAWL_SAMPLES;
    }

    @Override
    protected YtClickhouseDataLoad init(YtClickhouseDataLoad imprt) throws Exception {
        return doInit(imprt);
    }

    @Override
    protected YqlQueryBuilder prepareIntermediateTable(YtClickhouseDataLoad imprt) {
        String dateString = IN_YQL_QUERY_DATE_FORMATTER.print(imprt.getDateTo());
        int shardCount = getShardsCount();

        return YqlQueryBuilder.newBuilder()
                .cluster(tablePath)
                .appendText("PRAGMA yt.MaxRowWeight = '128M';")
                .appendText("PRAGMA yt.ForceInferSchema;")
                .appendText("INSERT INTO " + INTERMEDIATE_TABLE)
                .appendText("SELECT ShardId, RowId, Compress::Gzip(String::JoinFromList(AGGREGATE_LIST(data), ''), 6)" +
                        " as data FROM (")
                .appendText("SELECT")
                .appendText("(Digest::Fnv64(Domain) % " + shardCount + ") as ShardId,")
                .appendText("((Digest::Fnv64(Domain) / " + shardCount + ") % " + ROWS_COUNT + ") as RowId,")
                .appendText("(")
                .appendText("'" + dateString + "'")
                .appendText("|| '\\t' || Url::CutWWW(Url::HostNameToPunycode(String::ToLower(Domain))) ")
                .appendText("|| '\\t' || CAST(CounterId as String) ")
                .appendText("|| '\\t' || String::EscapeC(Url) ")
                .appendText("|| '\\t' || String::EscapeC(nvl(Title, '')) ")
                .appendText("|| '\\t' || String::Base64Encode(nvl(Title, '')) ")
                .appendText("|| '\\n') as data")
                .appendText("FROM")
                .appendTable(tablePath)
                .appendText(") GROUP BY ShardId, RowId;")
                .appendText("COMMIT;");
    }

    @Override
    protected YtClickhouseDataLoad rename(YtClickhouseDataLoad imprt) throws Exception {
        YtClickhouseDataLoad result = super.rename(imprt);

        commonDataStateYDao.update(new CommonDataState(METRIKA_CRAWL_SAMPLES_LAST_UPDATE,
                String.valueOf(imprt.getData()), DateTime.now()));

        return result;
    }

    @Override
    protected YtClickhouseDataLoadType getImportType() {
        return YtClickhouseDataLoadType.METRIKA_CRAWL_SAMPLES;
    }

    @Override
    public TaskSchedule getSchedule() {
        return TaskSchedule.startByCron("0 05 */6 * * *");
    }

    private YtClickhouseDataLoad doInit(YtClickhouseDataLoad lastImport) throws Exception {
        long lastImportTimestamp = lastImport.getData() == null ? 0L : Long.parseLong(lastImport.getData());

        // проверим дату обновления таблички
        return ytService.inTransaction(tablePath).query(cypressService -> {
            YtNode node = cypressService.getNode(tablePath);
            if (node != null) {
                String updateTimestampAsText = node.getNodeMeta().get(ATTR_UPDATE_TIMESTAMP).asText();
                long updateTimestamp = updateTimestampAsText == null ? 0L :
                        Long.parseLong(updateTimestampAsText) * 1000;
                if (updateTimestamp > lastImportTimestamp) {
                    LocalDate dateFrom = new DateTime(updateTimestamp).toLocalDate();
                    return lastImport.withData(String.valueOf(updateTimestamp))
                            .withSourceTable(tablePath, dateFrom, dateFrom);
                }
            }

            // ничего нового
            return lastImport.withState(YtClickhouseDataLoadState.DONE);
        });
    }
}
