package ru.yandex.webmaster.viewer.normalizer;

import java.io.File;
import java.io.UnsupportedEncodingException;
import java.net.URL;

import com.sun.jna.Library;
import com.sun.jna.Native;
import com.sun.jna.Pointer;
import com.sun.jna.ptr.IntByReference;
import com.sun.jna.ptr.PointerByReference;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Required;

import ru.yandex.wmconsole.service.error.WMCUserProblem;
import ru.yandex.wmtools.common.Constants;
import ru.yandex.wmtools.common.error.UserException;
import ru.yandex.wmtools.common.service.IService;

public class HtmlNormalizerService implements IService, Constants {
    private static final Logger log = LoggerFactory.getLogger(HtmlNormalizerService.class);

    private IHTNorm ihtNorm;

    private File dictionaryPath;

    public void init() {
        if (!dictionaryPath.exists()) {
            throw new RuntimeException("Normalizer dictionary not found: " + dictionaryPath.getAbsoluteFile());
        }
        ihtNorm = (IHTNorm) Native.loadLibrary("htnormso", IHTNorm.class);
        ihtNorm.SetRecognizerDictPath(dictionaryPath.getAbsolutePath());
    }

    public String normalizeHtml(byte[] data, URL url) throws UnsupportedEncodingException, UserException {
        if (data == null || data.length == 0 || url == null) {
            throw new UserException(WMCUserProblem.HTML_PARSE_ERROR, "Empty document can't be normalized");
        }

        PointerByReference replyPtr = new PointerByReference();
        IntByReference replySize = new IntByReference();

        int result = ihtNorm.NormalizeHtml(data, data.length, getBase(url), replyPtr, replySize);

        Pointer p = replyPtr.getValue();
        try {
            byte[] buffer = p.getByteArray(0, replySize.getValue());
            if (result != 0) {
                log.error("Unable to normalize html: " + new String(buffer, "UTF-8"));
                throw new UserException(WMCUserProblem.HTML_PARSE_ERROR, "Error normalizing html");
            }
            return new String(buffer, "UTF-8");
        } finally {
            ihtNorm.ReleaseNormalized(p);
        }
    }

    private String getBase(URL url) {
        String path = url.getPath();
        if (path.length() > 0) {
            int lastSlash = path.lastIndexOf('/');
            int lastDot = path.lastIndexOf('.');
            if (lastDot != -1) {
                if (lastSlash > 0) {
                    path = path.substring(0, lastSlash);
                } else {
                    path = "/";
                }
            }
        }

        return url.getProtocol() + SCHEME_DELIMITER + url.getAuthority() + path;
    }

    private interface IHTNorm extends Library {
        int NormalizeHtml(byte[] source, int sourceLen, String base, PointerByReference replyPtr, IntByReference replySize);

        void ReleaseNormalized(Pointer resultDoc);

        void SetRecognizerDictPath(String path);
    }

    @Required
    public void setDictionaryPath(File dictionaryPath) {
        this.dictionaryPath = dictionaryPath;
    }
}
