// package com.yandex.arcadia.htnorm;

import com.sun.jna.Library;
import com.sun.jna.Native;
import com.sun.jna.Platform;
import com.sun.jna.Pointer;
import com.sun.jna.ptr.PointerByReference;
import com.sun.jna.ptr.IntByReference;
import java.io.File;
import java.io.InputStream;
import java.io.FileInputStream;
import java.io.IOException;

public class Normalize {
    // jna interface reflects shared library - do not modify without updating htnorm
    public interface IHTNorm extends Library {
        // Use byte[] instead of String for source to avoid implicit conversion
        // reply is UTF8 encoded byte[]
        // on error -1 is returned, otherwise 0
        int NormalizeHtml(byte[] source, int sourceLen, String base, PointerByReference replyPtr, IntByReference replySize);
        void ReleaseNormalized(Pointer resultDoc);
        void SetRecognizerDictPath(String dictDictPath);
    };

    public class NormalizeException extends Exception {
        public NormalizeException(String s) {
            super(s);
        }
    };

    private static byte[] getBytesFromFile(File file) throws IOException {
         InputStream is = new FileInputStream(file);
         long length = file.length();
         if (length > Integer.MAX_VALUE) {
             /// @todo: File is too large
         }

         byte[] bytes = new byte[(int)length];

         // Read in the bytes
         int offset = 0;
         int numRead = 0;
         while (offset < bytes.length
                && (numRead=is.read(bytes, offset, bytes.length-offset)) >= 0) {
             offset += numRead;
         }

         // Ensure all the bytes have been read in
         if (offset < bytes.length) {
             throw new IOException("Could not completely read file "+file.getName());
         }

         // Close the input stream and return bytes
         is.close();
         return bytes;
     }


    public static void main(String[] args) {
        String base = "http://localhost/";

        try {
                String filename = args[0];
                if(args.length >= 2)
                    base = args[1];
                byte[] content = getBytesFromFile(new File(filename));
                Normalize obj = new Normalize();
                if (args.length >= 3)
                    obj.norm.SetRecognizerDictPath(args[2]);
                String r = obj.normalize(content, base);
                System.out.println(r);
        }
        catch(IOException e) {
                System.err.println("cannot read file: " + e.toString());
        }
        catch(NormalizeException e) {
                System.err.println("cannot normalize: " + e.toString());
        }

    }

    private IHTNorm norm;

    public Normalize() {
        norm = (IHTNorm)Native.loadLibrary("htnormso", IHTNorm.class);
    }

    public void setRecognizerDictPath(String dictDictPath) {
        norm.SetRecognizerDictPath(dictDictPath);
    }

    public String normalize(byte[] doc, String base) throws NormalizeException
    {
        PointerByReference replyPtr = new PointerByReference();
        IntByReference replySize = new IntByReference();

        int result = norm.NormalizeHtml(doc, doc.length, base, replyPtr, replySize);

        Pointer p = replyPtr.getValue();
        byte[] buffer = p.getByteArray(0, replySize.getValue());
        String reply = "";

        try {
            reply = new String(buffer, "UTF-8");
        }
        catch(java.io.UnsupportedEncodingException e) {
            System.err.println("UTF-8 is not supported: " + e.toString());
        }
        finally {
            norm.ReleaseNormalized(p);
        }

        if(result != 0)
            throw new NormalizeException(reply);

        return reply;
    }
};
