package ru.yandex.webmaster3.worker.url.checker3.fetchers;

import NUrlChecker.Response;
import com.google.common.primitives.UnsignedLong;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.io.IOUtils;
import org.jetbrains.annotations.NotNull;
import org.joda.time.DateTime;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import ru.yandex.webmaster3.core.WebmasterException;
import ru.yandex.webmaster3.core.http.WebmasterErrorResponse;
import ru.yandex.webmaster3.core.url.checker3.UrlCheckRequestParams;
import ru.yandex.webmaster3.core.util.ArcUtils;
import ru.yandex.webmaster3.core.util.CityHash102;
import ru.yandex.webmaster3.storage.jupiter.JupiterUtils;
import ru.yandex.webmaster3.storage.url.checker3.data.blocks.PageTextContentData;
import ru.yandex.webmaster3.storage.url.checker3.data.UrlCheckDataBlockType;
import ru.yandex.webmaster3.storage.util.yt.YtPath;
import ru.yandex.webmaster3.storage.util.yt.YtRowMapper;
import ru.yandex.webmaster3.worker.url.checker3.AbstractUrlCheckDataBlockFetcher;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.List;

/**
 * @author leonidrom
 *
 * Ходит в walrus таблицы за текстовым архивом страницы
 */
@Component
@Slf4j
@RequiredArgsConstructor(onConstructor_ = @Autowired)
public class PageTextContentDataFetcher extends AbstractUrlCheckDataBlockFetcher<PageTextContentData> {
    private final String WALRUS_TABLE_PATH_PATTERN = "//home/jupiter/walrus/%s/%s/data.0";

    @Override
    public PageTextContentData doFetchBlock(DateTime searchBaseDate, UrlCheckRequestParams requestParams) throws Exception {
        String tablePath = WALRUS_TABLE_PATH_PATTERN.formatted(
                searchBaseDate.toString(JupiterUtils.STATE_DATE_FORMAT),
                getShard(requestParams.getUrl()));

        List<YtRow> rows = readTable(YtPath.create("arnold", tablePath), requestParams.getUrl(), new YtWalrusRowMapper());
        if (rows.isEmpty()) {
            return new PageTextContentData(null);
        }

        YtRow row = rows.get(0);
        return new PageTextContentData(ArcUtils.extractDocText(row.arcData));
    }

    @Override
    public PageTextContentData doFetchBlock(Response.TUrlCheckResponse response) throws Exception {
        return null;
    }

    @Override
    public Class<PageTextContentData> getDataBlockClass() {
        return PageTextContentData.class;
    }

    private static class YtRow {
        private static final String F_HOST = "Host";
        private static final String F_PATH = "Path";
        private static final String F_ARC = "Arc";

        private byte[] arcData;
    }

    private static class YtWalrusRowMapper implements YtRowMapper<YtRow> {
        private YtRow row = new YtRow();

        @Override
        public void nextField(String name, InputStream data) {
            if (YtRow.F_ARC.equals(name)) {
                try {
                    row.arcData = IOUtils.toByteArray(data);
                } catch (IOException e) {
                    throw new WebmasterException("Error reading arc data", new WebmasterErrorResponse.YTServiceErrorResponse(getClass(), e));
                }
            }
        }

        @Override
        public YtRow rowEnd() {
            YtRow r = row;
            row = new YtRow();
            return r;
        }

        @Override
        public List<String> getColumns() {
            return List.of(YtRow.F_HOST, YtRow.F_PATH, YtRow.F_ARC);
        }
    }

    private int getShard(String url) {
        byte[] urlBytes = url.getBytes();
        return UnsignedLong.fromLongBits(CityHash102.cityHash64(urlBytes, 0, urlBytes.length))
                .mod(UnsignedLong.valueOf(18L)).intValue();
    }

    @Override
    public @NotNull UrlCheckDataBlockType getBlockType() {
        return UrlCheckDataBlockType.TEXT_CONTENT;
    }
}
