package ru.yandex.webmaster3.storage.links;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import com.datastax.driver.core.utils.UUIDs;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import lombok.Setter;
import org.joda.time.LocalDate;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import ru.yandex.webmaster3.core.WebmasterException;
import ru.yandex.webmaster3.core.http.WebmasterErrorResponse;
import ru.yandex.webmaster3.storage.clickhouse.TableType;
import ru.yandex.webmaster3.storage.clickhouse.dao.ClickhouseTablesRepository;
import ru.yandex.webmaster3.storage.clickhouse.replication.MdbClickhouseReplicationManager;
import ru.yandex.webmaster3.storage.clickhouse.replication.data.ClickhouseReplicationCommand;
import ru.yandex.webmaster3.storage.clickhouse.replication.data.ClickhouseReplicationPriority;
import ru.yandex.webmaster3.storage.util.clickhouse2.AbstractClickhouseDao;
import ru.yandex.webmaster3.storage.util.clickhouse2.CHPrimitiveType;
import ru.yandex.webmaster3.storage.util.clickhouse2.CHTable;
import ru.yandex.webmaster3.storage.util.clickhouse2.ClickhouseException;
import ru.yandex.webmaster3.storage.util.clickhouse2.ClickhouseQueryContext;
import ru.yandex.webmaster3.storage.util.clickhouse2.ClickhouseServer;
import ru.yandex.webmaster3.storage.util.ydb.exception.WebmasterYdbException;
import ru.yandex.webmaster3.storage.util.yt.YtException;
import ru.yandex.webmaster3.storage.util.yt.YtNodeAttributes;
import ru.yandex.webmaster3.storage.util.yt.YtOperationId;
import ru.yandex.webmaster3.storage.util.yt.YtPath;
import ru.yandex.webmaster3.storage.util.yt.YtService;
import ru.yandex.webmaster3.storage.util.yt.YtUtils;
import ru.yandex.webmaster3.storage.ytimport.ImportPriority;
import ru.yandex.webmaster3.storage.ytimport.MdbYtClickhouseImportManager;
import ru.yandex.webmaster3.storage.ytimport.YtClickhouseDataLoad;
import ru.yandex.webmaster3.storage.ytimport.YtClickhouseDataLoadType;
import ru.yandex.webmaster3.storage.ytimport.YtClickhouseImportCommand;
import ru.yandex.webmaster3.storage.ytimport.YtClickhouseTableRelation;

import static ru.yandex.webmaster3.storage.ytimport.YtClickhouseDataLoadType.EXTERNAL_GONE_LINKS;
import static ru.yandex.webmaster3.storage.ytimport.YtClickhouseDataLoadType.EXTERNAL_LINKS;
import static ru.yandex.webmaster3.storage.ytimport.YtClickhouseDataLoadType.INTERNAL_LINKS;
import static ru.yandex.webmaster3.storage.ytimport.YtClickhouseDataLoadType.LINK_STATISTICS;

/**
 * Created by Oleg Bazdyrev on 15/05/2017.
 */
public class LinksImportService {

    private static final Logger log = LoggerFactory.getLogger(LinksImportService.class);

    private static final Pattern STATISTICS_TABLE_NAME_PATTERN = Pattern.compile("report_(\\d+)");
    private static final DateTimeFormatter IN_TABLE_NAME_DATE_FORMATTER = DateTimeFormat.forPattern("yyyyMMdd");
    private static final DateTimeFormatter CH_INSERTION_DATE_FORMATTER = DateTimeFormat.forPattern("yyyy-MM-dd");
    private static final Map<YtClickhouseDataLoadType, Integer> ROWS_PER_LOAD_TYPE = ImmutableMap.of(
            INTERNAL_LINKS, 512,
            EXTERNAL_LINKS, 512,
            EXTERNAL_GONE_LINKS, 512,
            LINK_STATISTICS, 32
    );
    private static final Map<YtClickhouseDataLoadType, Integer> PARTS_PER_LOAD_TYPE = ImmutableMap.of(
            INTERNAL_LINKS, 6,
            EXTERNAL_LINKS, 6,
            EXTERNAL_GONE_LINKS, 1,
            LINK_STATISTICS, 1
    );
    private static final Map<YtClickhouseDataLoadType, String> MAPPER_TASK_PER_LOAD_TYPE = ImmutableMap.of(
            INTERNAL_LINKS, "LINK_SAMPLES",
            EXTERNAL_LINKS, "LINK_SAMPLES",
            EXTERNAL_GONE_LINKS, "LINK_SAMPLES",
            LINK_STATISTICS, "LINK_STATS"
    );
    public static final CHTable INTERNAL_LINKS_TABLE = linksBuilder().name("int_%s").parts(PARTS_PER_LOAD_TYPE.get(INTERNAL_LINKS)).build();
    public static final CHTable EXTERNAL_LINKS_TABLE = linksBuilder().name("ext_%s").parts(PARTS_PER_LOAD_TYPE.get(EXTERNAL_LINKS)).build();
    public static final CHTable EXTERNAL_GONE_LINKS_TABLE = linksBuilder().name("ext_gone_%s").parts(PARTS_PER_LOAD_TYPE.get(EXTERNAL_GONE_LINKS)).build();
    private static final CHTable LINK_STATISTICS_TABLE = CHTable.builder()
            .database(AbstractClickhouseDao.DB_WEBMASTER3_LINKS)
            .name("tmp_link_statistics_part_%s")
            .partitionBy("toYYYYMM(date)")
            .keyField("date", CHPrimitiveType.Date)
            .keyField("host_id", CHPrimitiveType.String)
            .field("data", CHPrimitiveType.String)
            .build();

    private static final Map<YtClickhouseDataLoadType, CHTable> TABLES_PER_LOAD_TYPE = ImmutableMap.of(
            INTERNAL_LINKS, INTERNAL_LINKS_TABLE,
            EXTERNAL_LINKS, EXTERNAL_LINKS_TABLE,
            EXTERNAL_GONE_LINKS, EXTERNAL_GONE_LINKS_TABLE,
            LINK_STATISTICS, LINK_STATISTICS_TABLE
    );
    private static final Map<YtClickhouseDataLoadType, TableType> LOAD_TYPE_TO_TABLE_TYPE = ImmutableMap.of(
            INTERNAL_LINKS, TableType.INTERNAL_LINK_SAMPLES,
            EXTERNAL_LINKS, TableType.EXTERNAL_LINK_SAMPLES,
            EXTERNAL_GONE_LINKS, TableType.EXTERNAL_DELETED_LINK_SAMPLES
    );

    @Setter
    private YtService ytService;
    @Setter
    private ClickhouseServer clickhouseServer;
    @Setter
    private MdbYtClickhouseImportManager ytClickhouseImportManager;
    @Setter
    private MdbClickhouseReplicationManager clickhouseReplicationManager;
    @Setter
    private ClickhouseTablesRepository clickhouseTablesCDao;
    @Setter
    private YtPath mrBinary;
    @Setter
    private YtPath workDir;

    // все таблицы имеют одинаковые колонки
    private static CHTable.Builder linksBuilder() {
        return CHTable.builder()
                .database(AbstractClickhouseDao.DB_WEBMASTER3_LINKS)
                .partNameSuffix("_shard_")
                .partitionBy("toYYYYMM(date)")
            .keyField("date", CHPrimitiveType.Date)
                .keyField("host_id", CHPrimitiveType.String)
                .field("host_path", CHPrimitiveType.String)
                .field("host_url_utf", CHPrimitiveType.String)
                .field("host_http_code", CHPrimitiveType.Int16)
                .field("host_last_access", CHPrimitiveType.Int32)
                .field("source_host_id", CHPrimitiveType.String)
                .field("source_path", CHPrimitiveType.String)
                .field("source_url_utf", CHPrimitiveType.String)
                .field("source_last_access", CHPrimitiveType.Int32)
                .field("source_tci", CHPrimitiveType.Int32)
                .keyField("link_date", CHPrimitiveType.Int32)
                .field("link_text", CHPrimitiveType.String)
                .field("source_iks", CHPrimitiveType.Int32)
                .field("link_delete_date", CHPrimitiveType.Int32);
    }

    /**
     * Подготавливает данные по ссылкам в YT для импорта в Clickhouse
     *
     * @param tableType
     */
    public YtClickhouseDataLoad prepareLinks(YtClickhouseDataLoad importData) {
        Preconditions.checkArgument(ROWS_PER_LOAD_TYPE.containsKey(importData.getType()));
        LocalDate date = getTableDate(importData);
        List<YtPath> tables = linksTables(importData);
        log.info("IMPORT LINKS STATISTICS: {}", date.toString(CH_INSERTION_DATE_FORMATTER));
        try {
            ytService.inTransaction(YtPath.fromString("arnold://tmp")).execute(cypressService -> {

                YtUtils.recreateTables(cypressService, tables, new YtNodeAttributes().setCompressionCodec("none"));
                YtOperationId operationId = cypressService.mapReduce(
                        YtUtils.newPrepareTablesForImportBuilder()
                                .addInputTable(importData.getSourceTable())
                                .setOutputTables(tables)
                                .setBinary(mrBinary) //TODO
                                .setTask(MAPPER_TASK_PER_LOAD_TYPE.get(importData.getType()))
                                .setLines(ROWS_PER_LOAD_TYPE.get(importData.getType()))
                                .addMapperArg("--date", date.toString(CH_INSERTION_DATE_FORMATTER))
                                .build());

                if (!cypressService.waitFor(operationId)) {
                    throw new YtException("Prepare links failed. See " + operationId);
                }
                return true;
            });
        } catch (YtException e) {
            throw new RuntimeException(e);
        }
        return importData.withPreparedTables(tables);
    }

    public LocalDate getTableDate(YtClickhouseDataLoad importData) {
        LocalDate date = importData.getDateTo();
        // грязный хак, дату для таблицы статистики берем из названия таблицы
        if (importData.getType() == LINK_STATISTICS) {
            Matcher matcher = STATISTICS_TABLE_NAME_PATTERN.matcher(importData.getSourceTable().getName());
            if (matcher.matches()) {
                try {
                    date = new LocalDate(Long.parseLong(matcher.group(1)) * 1000L);
                } catch (NumberFormatException ignored) {
                }
            }
        }
        return date;
    }

    public YtClickhouseDataLoad importLinks(YtClickhouseDataLoad importData) {
        Preconditions.checkArgument(ROWS_PER_LOAD_TYPE.containsKey(importData.getType()));
        LocalDate date = importData.getDateTo();
        String dateString = IN_TABLE_NAME_DATE_FORMATTER.print(date);
        int shards = getShardsCount(importData);
        List<YtPath> tables = linksTables(importData);
        UUID taskId = UUIDs.timeBased();
        log.info("Links import " + taskId);
        CHTable table = TABLES_PER_LOAD_TYPE.get(importData.getType());
        List<YtClickhouseTableRelation> tablesRels = new ArrayList<>();
        int idx = 0;
        for (int shard = 0; shard < shards; shard++) {
            for (int part = 0; part < PARTS_PER_LOAD_TYPE.get(importData.getType()); part++) {
                tablesRels.add(new YtClickhouseTableRelation(
                        tables.get(idx++),
                        shard,
                        table.replicatedMergeTreeTableName(part, dateString),
                        table.createReplicatedMergeTreeSpec(part, dateString)
                ));
            }
        }
        YtClickhouseImportCommand command = new YtClickhouseImportCommand(
                taskId,
                tablesRels,
                AbstractClickhouseDao.DB_WEBMASTER3_LINKS,
                table.importSpec(),
                ImportPriority.OFFLINE
        );
        ytClickhouseImportManager.startImport(command);
        return importData.withImportTaskIds(taskId);
    }

    public YtClickhouseDataLoad replicateLinks(YtClickhouseDataLoad importData) {
        Preconditions.checkArgument(ROWS_PER_LOAD_TYPE.containsKey(importData.getType()));
        LocalDate date = importData.getDateTo();
        String dateString = IN_TABLE_NAME_DATE_FORMATTER.print(date);
        int shards = getShardsCount(importData);
        CHTable table = TABLES_PER_LOAD_TYPE.get(importData.getType());
        UUID taskId = UUIDs.timeBased();
        log.info("Replication id " + taskId);
        List<ClickhouseReplicationCommand.TableInfo> tables = new ArrayList<>();

        for (int shard = 0; shard < shards; shard++) {
            for (int part = 0; part < PARTS_PER_LOAD_TYPE.get(importData.getType()); part++) {
                tables.add(new ClickhouseReplicationCommand.TableInfo(
                        table.replicatedMergeTreeTableName(part, dateString),
                        table.createReplicatedMergeTreeSpec(part, dateString),
                        shard
                ));
            }
        }
        ClickhouseReplicationCommand command = new ClickhouseReplicationCommand(
                taskId,
                AbstractClickhouseDao.DB_WEBMASTER3_LINKS,
                ClickhouseReplicationPriority.OFFLINE,
                tables
        );
        clickhouseReplicationManager.enqueueReplication(command);
        // создадим сразу merge и distributed-таблицы
        try {
            if (LOAD_TYPE_TO_TABLE_TYPE.containsKey(importData.getType())) {
                ClickhouseQueryContext.Builder context = ClickhouseQueryContext.useDefaults();
                clickhouseServer.executeOnAllHosts(context, table.createMerge(dateString));
                // добавим запись о новых таблицах в Cassandra
                clickhouseTablesCDao.update(table.toClickhouseTableInfo(
                        LOAD_TYPE_TO_TABLE_TYPE.get(importData.getType()), shards, dateString));
            }
        } catch (ClickhouseException e) {
            throw new WebmasterException("Error creating merge and distributed tables",
                    new WebmasterErrorResponse.ClickhouseErrorResponse(getClass(), null, e), e);
        } catch (WebmasterYdbException e) {
            throw new WebmasterException("Error writing table info to Cassandra",
                    new WebmasterErrorResponse.YDBErrorResponse(getClass(), e), e);
        }
        return importData.withReplicationTaskIds(taskId);
    }

    private List<YtPath> linksTables(YtClickhouseDataLoad importData) {
        int parts = PARTS_PER_LOAD_TYPE.get(importData.getType());
        List<YtPath> tables = new ArrayList<>();
        YtPath dir = YtPath.path(workDir, importData.getType().name().toLowerCase());
        // TODO временный костыль, необходимо перешардировать link_statistics
        int shardCount = getShardsCount(importData);
        for (int shard = 0; shard < shardCount; shard++) {
            if (parts > 1) {
                for (int part = 0; part < parts; part++) {
                    tables.add(YtPath.path(dir, importData.getDateTo().toString(IN_TABLE_NAME_DATE_FORMATTER) +
                            "_shard_" + shard + "_part_" + part));
                }
            } else {
                tables.add(YtPath.path(dir, importData.getDateTo().toString(IN_TABLE_NAME_DATE_FORMATTER) +
                        "_shard_" + shard));
            }
        }
        return tables;
    }

    private int getShardsCount(YtClickhouseDataLoad importData) {
        return clickhouseServer.getShardsCount();
    }

}
