#pragma once

#include "token.h"
#include "unperson_listener.h"

#include <util/generic/hash_set.h>
#include <util/generic/noncopyable.h>
#include <util/generic/ptr.h>
#include <util/generic/string.h>
#include <util/generic/vector.h>

namespace NTextDeobfuscate {
    class TTextDeobfuscator;
}

namespace NUnperson {
    class TLemmerCache;
    class TDeobfuscatorCache;

    static inline bool IsAlnumOrSimilar(const wchar32 c) {
        return IsAlnum(c) || (c > u'~' && IsMath(c));
    }

    class TContext {
    private:
        const NTextDeobfuscate::TTextDeobfuscator* Deobfuscator;
        TDeobfuscatorCache* DeobfuscatorCache;
        THolder<wchar16, TFree> TmpWBuf;
        size_t WBufSize;
        THolder<char, TFree> TmpBuf;
        size_t BufSize;

    public:
        TLemmerCache& LemmerCache;
        TString TmpStr;

    public:
        TContext(
            const NTextDeobfuscate::TTextDeobfuscator* deobfuscator,
            TDeobfuscatorCache* deobfuscatorCache,
            TLemmerCache& lemmerCache)
            : Deobfuscator(deobfuscator)
            , DeobfuscatorCache(deobfuscatorCache)
            , WBufSize(0)
            , BufSize(0)
            , LemmerCache(lemmerCache)
        {
        }

        // Stores result in TmpStr
        void Deobfuscate(TWtringBuf str);

        TWtringBuf ToLower(TWtringBuf str);

        TStringBuf WideToUTF8(TWtringBuf str);

        TWtringBuf UTF8ToWide(TStringBuf str);
    };

    class TTokenizer: public TNonCopyable {
    public:
        virtual ~TTokenizer();

        virtual void Tokenize(
            TWtringBuf str,
            TContext& context,
            TUnpersonListener& listener) const = 0;
    };

    class TSentenceTokenizer: public TTokenizer {
    public:
        const bool OneSentencePerLine;

    public:
        TSentenceTokenizer(bool oneSentencePerLine)
            : OneSentencePerLine(oneSentencePerLine)
        {
        }

        void Tokenize(
            TWtringBuf str,
            TContext& context,
            TUnpersonListener& listener) const override;
    };

    class TBestEffortSentenceTokenizer: public TTokenizer {
    public:
        const bool OneSentencePerLine;

    public:
        TBestEffortSentenceTokenizer(bool oneSentencePerLine)
            : OneSentencePerLine(oneSentencePerLine)
        {
        }

        void Tokenize(
            TWtringBuf str,
            TContext& context,
            TUnpersonListener& listener) const override;
    };

    class TSubSentenceTokenizer: public TTokenizer {
    public:
        void Tokenize(
            TWtringBuf str,
            TContext& context,
            TUnpersonListener& listener) const override;
    };

    class TPhoneTokenizer: public TTokenizer {
    public:
        TPhoneTokenizer();

        void Tokenize(
            TWtringBuf str,
            TContext& context,
            TUnpersonListener& listener) const override;

    private:
        class TImpl;
        THolder<TImpl> Impl;
    };

    class TNumberTokenizer: public TTokenizer {
    public:
        TNumberTokenizer(const TUtf16String& fusedTokensSeparator)
            : FusedTokensSeparator(fusedTokensSeparator)
        {
        }

        void Tokenize(
            TWtringBuf str,
            TContext& context,
            TUnpersonListener& listener) const override;

    private:
        const TUtf16String FusedTokensSeparator;
    };

    class TSpaceTokenizer: public TTokenizer {
    public:
        void Tokenize(
            TWtringBuf str,
            TContext& context,
            TUnpersonListener& listener) const override;
    };

    class TUnderscoreTokenizer: public TTokenizer {
    public:
        void Tokenize(
            TWtringBuf str,
            TContext& context,
            TUnpersonListener& listener) const override;
    };

    class TBracketsTokenizer: public TTokenizer {
    public:
        void Tokenize(
            TWtringBuf str,
            TContext& context,
            TUnpersonListener& listener) const override;
    };

    class TUriTokenizer: public TTokenizer {
    public:
        TUriTokenizer();

        void Tokenize(
            TWtringBuf str,
            TContext& context,
            TUnpersonListener& listener) const override;

    private:
        class TImpl;
        THolder<TImpl> Impl;
    };

    class TDateTokenizer: public TTokenizer {
    public:
        TDateTokenizer(const wchar16* pattern, int flags, size_t minLen, ETokenType type);

        void Tokenize(
            TWtringBuf str,
            TContext& context,
            TUnpersonListener& listener) const override;

    private:
        class TImpl;
        THolder<TImpl> Impl;
    };

    class TSingleSeparatorTokenizer: public TTokenizer {
    public:
        void Tokenize(
            TWtringBuf str,
            TContext& context,
            TUnpersonListener& listener) const override;
    };

    class TPasswordTokenizer: public TTokenizer {
    public:
        TPasswordTokenizer(
            const TUtf16String& fusedTokensSeparator,
            size_t minPasswordLength)
            : FusedTokensSeparator(fusedTokensSeparator)
            , MinPasswordLength(minPasswordLength)
        {
        }

        void Tokenize(
            TWtringBuf str,
            TContext& context,
            TUnpersonListener& listener) const override;

    private:
        const TUtf16String FusedTokensSeparator;
        const size_t MinPasswordLength;
    };

    class TIgnoreListTokenizer: public TTokenizer {
    public:
        TIgnoreListTokenizer(const THashSet<TUtf16String>& ignoreList)
            : IgnoreList(ignoreList)
        {
        }

        void Tokenize(
            TWtringBuf str,
            TContext& context,
            TUnpersonListener& listener) const override;

    private:
        const THashSet<TUtf16String> IgnoreList;
    };

    class TDeobfuscatorTokenizer: public TTokenizer {
    public:
        void Tokenize(
            TWtringBuf str,
            TContext& context,
            TUnpersonListener& listener) const override;
    };

    class TWordTokenizer: public TTokenizer {
    public:
        TWordTokenizer();

        void Tokenize(
            TWtringBuf str,
            TContext& context,
            TUnpersonListener& listener) const override;

    private:
        class TImpl;
        THolder<TImpl> Impl;
    };

    class TSingleLetterWordEraser: public TTokenizer {
    public:
        void Tokenize(
            TWtringBuf str,
            TContext& context,
            TUnpersonListener& listener) const override;
    };

    class TStopWordsEraser: public TTokenizer {
    public:
        TStopWordsEraser();

        void Tokenize(
            TWtringBuf str,
            TContext& context,
            TUnpersonListener& listener) const override;

    private:
        class TImpl;
        THolder<TImpl> Impl;
    };
}

