#include "tokens.h"

#include <security/ant-secret/internal/string_utils/hash.h>
#include <security/ant-secret/secret-search/internal/searchers/token_validators/all.h>
#include <security/ant-secret/internal/regex_storage/storage.h>

#include <util/generic/set.h>
#include <util/generic/strbuf.h>
#include <util/folder/path.h>
#include <util/stream/file.h>
#include <util/stream/buffer.h>
#include <util/string/strip.h>

namespace NSSInternal {
    namespace NSearchers {
        namespace {
            using TSpottedMap = THashMap<TStringBuf, TSet<size_t, std::less<>>>;

            const TVector<TStringBuf> kSecretKeywords = {
                "pass",
                "token",
                "auth",
                "session",
                "secret",
                "[\\w\\-]key",
                "(?-i)AQAD-",
                "Session_id",
                "yc_session",
                "(?-i)(AQVN|YC|c1\\.|t1\\.)"
            };

            // TODO(buglloc): ugly!
            const TVector<TStringBuf> kExcludedPrefixes = {
                "request_id=",
            };

            TSecret ValidationResultToSecret(const NTokenValidators::TValidatorResult* result) {
                TSecret secret = {
                    .Type = result->Type,
                    .Secret = result->Secret,
                    .Validated = result->Validated,
                    .ValidationError = result->ValidationError,
                    .Owners = result->Owners,
                    .Additional = {
                        {"sha1", NStringUtils::Sha1(result->Secret)},
                        {"secret_users", result->TokenUser},
                        {"secret_validate_url", result->ValidateUrl},
                    }};

                secret.Additional.insert(result->Additional.begin(), result->Additional.end());
                return secret;
            }

            bool IsExcludedByPrefix(TStringBuf line, size_t pos) {
                for (auto prefix : kExcludedPrefixes) {
                    auto len = prefix.Size();
                    if (pos < len - 1) {
                        continue;
                    }

                    if (line.SubStr(pos - len + 1, len) == prefix) {
                        return true;
                    }
                }
                return false;
            }

        }

        TTokens::TTokens(TContext& ctx)
            : TBase(ctx)
        {
            // Order matters!!!
            validators.push_back(MakeHolder<NTokenValidators::TYandexOAuth>(ctx));
            validators.push_back(MakeHolder<NTokenValidators::TYandexSession>(ctx));
            validators.push_back(MakeHolder<NTokenValidators::TYCApiKey>(ctx));
            validators.push_back(MakeHolder<NTokenValidators::TYCToken>(ctx));
            validators.push_back(MakeHolder<NTokenValidators::TYCCookie>(ctx));
            validators.push_back(MakeHolder<NTokenValidators::TYCStaticCred>(ctx));
            validators.push_back(MakeHolder<NTokenValidators::TTvm>(ctx));
            validators.push_back(MakeHolder<NTokenValidators::TXiva>(ctx));
            validators.push_back(MakeHolder<NTokenValidators::TGithub>(ctx));
            validators.push_back(MakeHolder<NTokenValidators::TTelegram>(ctx));
            validators.push_back(MakeHolder<NTokenValidators::TSlack>(ctx));
            validators.push_back(MakeHolder<NTokenValidators::TJwt>(ctx));

            {
                TVector<const char*> expressions;
                TVector<unsigned int> ids;
                TVector<unsigned int> flags;

                unsigned int id = 0;
                const unsigned int flag = HS_FLAG_DOTALL | HS_FLAG_PREFILTER | HS_FLAG_SINGLEMATCH;
                for (size_t validatorId = 0; validatorId < validators.size(); ++validatorId) {
                    const auto& patterns = validators[validatorId]->PrefilterPatterns();
                    for (size_t patternId = 0; patternId < patterns.size(); ++patternId) {
                        id++;
                        expressions.push_back(patterns[patternId].data());
                        ids.push_back(id);
                        flags.push_back(flag);
                    }
                }

                prefilterRe = TRegexStorage::Instance().GetOrCompile("tokens_prefilter_db", expressions, flags, ids);
            }

            {
                TVector<const char*> expressions;
                TVector<unsigned int> ids;
                TVector<unsigned int> flags;

                unsigned int id = 0;
                const unsigned int flag = HS_FLAG_DOTALL | HS_FLAG_SOM_LEFTMOST;
                for (size_t validatorId = 0; validatorId < validators.size(); ++validatorId) {
                    const auto& patterns = validators[validatorId]->PrefilterPatterns();
                    for (size_t patternId = 0; patternId < patterns.size(); ++patternId) {
                        id++;
                        tokenToValidator[id] = validatorId;
                        expressions.push_back(patterns[patternId].data());
                        ids.push_back(id);
                        flags.push_back(flag);
                    }
                }

                tokensRe = TRegexStorage::Instance().GetOrCompile("tokens_token_db", expressions, flags, ids);
            }

            {
                TVector<const char*> expressions;
                TVector<unsigned int> ids;
                TVector<unsigned int> flags;

                const unsigned int flag = 0 | HS_FLAG_PREFILTER | HS_FLAG_CASELESS | HS_FLAG_SINGLEMATCH | HS_FLAG_DOTALL;
                for (unsigned int i = 0; i < kSecretKeywords.size(); ++i) {
                    expressions.push_back(kSecretKeywords[i].data());
                    ids.push_back(i);
                    flags.push_back(flag);
                }

                keywordsRe = TRegexStorage::Instance().GetOrCompile("tokens_keyword_db", expressions, flags, ids);
            }
        }

        void TTokens::CheckLineTo(TStringBuf line, size_t lineNo, const TFsPath& path, TSecretList& secrets) {
            Y_UNUSED(path);

            {
                // reject any line w/o "keyword" phrases
                bool isKeywordFound = NHyperscan::MatchesPtr(keywordsRe.db, keywordsRe.scratch, line);
                // except keyword in filename (e.g. .conductor_auth)
                bool isSecretFile = lineNo == 1 && NHyperscan::MatchesPtr(keywordsRe.db, keywordsRe.scratch, path.GetName());
                if (!isKeywordFound && !isSecretFile) {
                    return;
                }
            }

            if (!NHyperscan::MatchesPtr(prefilterRe.db, prefilterRe.scratch, line)) {
                // nothing matched :(
                return;
            }

            //TODO(buglloc): pass validator regex ID!
            TSpottedMap spottedValidators;
            auto matcher = [&spottedValidators, line, this](unsigned int id, unsigned long long from,
                                                            unsigned long long to) {
                if (!IsExcludedByPrefix(line, from)) {
                    spottedValidators[line.SubStr(from, to - from)].insert(tokenToValidator[id]);
                }
            };
            NHyperscan::ScanPtr(tokensRe.db, tokensRe.scratch, line, matcher);

            for (auto&& [token, validatorIds] : spottedValidators) {
                bool haveSecret = false;
                for (const auto& validatorId : validatorIds) {
                    const auto& validator = validators[validatorId];
                    auto&& secret = ValidateBy(token, validator.Get());
                    if (!secret) {
                        continue;
                    }

                    if (haveSecret && !secret->Validated) {
                        // Skip if we found some not validated secret previously
                        continue;
                    }

                    if (ctx.WithSourceLine) {
                        secret->SourceLine = StripString(line);
                    }
                    secret->LineNo = lineNo;

                    secrets.push_back(secret.GetRef());
                    haveSecret = true;

                    if (!ctx.Validate) {
                        // secret found!
                        break;
                    }

                    if (secret->Validated) {
                        // If we found something valid - stop iteration to prevent valid token leakage
                        break;
                    }
                }
            }
        }

        TMaybe<TSecret> TTokens::ValidateBy(const TStringBuf data, NTokenValidators::IValidator* validator) {
            TMaybe<NTokenValidators::TValidatorResult> result;
            if (ctx.Validate) {
                result = validator->Validate(data);
            } else {
                result = validator->Match(data);
            }

            if (!result) {
                return Nothing();
            }

            if (ctx.ValidOnly && !result->Validated) {
                return Nothing();
            }

            return ValidationResultToSecret(result.Get());
        }

    }
}
