# 
# -*- Encoding: UTF-8 -*-
#
import sys
from ctypes import *

# This module calls functions from disamblemmer_c_binding shared library using callback interface

class TaggedWord:
    def __init__(self, word, lemma, lex_f, infl_f, weight):
        self.word = word
        self.lemma = lemma
        self.lexical_features = lex_f
        self.inflection_features = infl_f
        self.weight = weight

class Session:
    def __init__(self, lib, cbfunc, language):
        self.lib = lib
        
        def result_callback(_, rawresults):
            rescount = self.lib.DL_GetAnalysisCount(rawresults)
            text = self.lib.DL_GetWordText(rawresults)
            results = []
            for i in range(rescount):
                analysis = self.lib.DL_GetAnalysisAt(rawresults, i)
                analysis = c_void_p(analysis)
                form = self.lib.DL_GetWordForm(analysis).decode("utf-8")
                lemma = self.lib.DL_GetLemma(analysis).decode("utf-8")
                lexfeatures = self.lib.DL_GetLexicalFeatures(analysis).decode("utf-8")
                inflfeatures = self.lib.DL_GetInflectionFeatures(analysis).decode("utf-8")
                weight = self.lib.DL_GetWeight(analysis)
                results.append(TaggedWord(form, lemma, lexfeatures, inflfeatures, weight))
            cbfunc(text.decode("utf-8"), results)

        self.result_callback = CallbackTagger.result_callback_proto(result_callback)
        self.session = lib.DL_CreateSession(self.result_callback, 0, language)
        
    def __del__(self):
        self.lib.DL_DestroySession(self.session)
        
    def NextWord(self, text):
        self.lib.DL_NextWord(self.session, c_char_p(text.encode("utf-8")))
        
    def Flush(self):
        self.lib.DL_Flush(self.session)

class CallbackTagger:
    result_callback_proto = CFUNCTYPE(None, c_void_p, c_void_p);

    def __init__(self):
        if sys.platform == 'darwin':
            soext = "dylib"
        elif sys.platform.lower().startswith('win'):
            soext = "dll"
        else:  
            soext = 'so'
        self.lib = CDLL("./libdisamblemmer_c_binding." + soext)
        
        # Type checks
        self.lib.DL_CreateSession.restype = c_void_p
        self.lib.DL_CreateSession.argtypes = [self.result_callback_proto, 
                                              c_void_p, c_char_p]

        self.lib.DL_DestroySession.restype = None
        self.lib.DL_DestroySession.argtypes = [c_void_p]

        self.lib.DL_NextWord.restype = None
        self.lib.DL_NextWord.argtypes = [c_void_p, c_char_p]

        self.lib.DL_Flush.restype = None
        self.lib.DL_Flush.argtypes = [c_void_p]

        self.lib.DL_GetWordText.restype = c_char_p
        self.lib.DL_GetWordText.argtypes = [c_void_p]

        self.lib.DL_GetAnalysisCount.restype = c_int
        self.lib.DL_GetAnalysisCount.argtypes = [c_void_p]

        self.lib.DL_GetAnalysisAt.restype = c_void_p
        self.lib.DL_GetAnalysisAt.argtypes = [c_void_p, c_int]
        
        self.lib.DL_GetWordForm.restype = c_char_p
        self.lib.DL_GetWordForm.argtypes = [c_void_p]
        
        self.lib.DL_GetLemma.restype = c_char_p
        self.lib.DL_GetLemma.argtypes = [c_void_p]
        
        self.lib.DL_GetLexicalFeatures.restype = c_char_p
        self.lib.DL_GetLexicalFeatures.argtypes = [c_void_p]
        
        self.lib.DL_GetInflectionFeatures.restype = c_char_p
        self.lib.DL_GetInflectionFeatures.argtypes = [c_void_p]
        
        self.lib.DL_GetWeight.restype = c_float
        self.lib.DL_GetWeight.argtypes = [c_void_p]

    def CreateSession(self, cbfunc, language):
        return Session(self.lib, cbfunc, language)

def main():
    # Just testing, nothing serious
    import codecs
    import re

    src = codecs.getreader("utf-8")(sys.stdin, "ignore")
    out = codecs.getwriter("utf-8")(sys.stdout, "xmlcharrefreplace")
    logger = codecs.getwriter("utf-8")(sys.stderr, "xmlcharrefreplace")
    word_re = re.compile(u"([a-zA-Z\u00C0-\u01FF\u0401-\u04FF]+(?:[-][a-zA-Z\u00C0-\u01FF\u0401-\u04FF]+)?)")
    
    def PrintResult(text, analyses):
        if not analyses:
            print >> out, u"%s: <unknown>" % text 
        else:
            print >> out, u"%s: %s (%s)" % (text, analyses[0].lemma, analyses[0].lexical_features)
    
    tagger = CallbackTagger()
    session = tagger.CreateSession(PrintResult, sys.argv[1])
    for line in src:
        for word in word_re.split(line.strip())[1::2]:
            session.NextWord(word)
        session.Flush()

if __name__ == "__main__":
    main()
