#include "archive_reader.h"
#include <util/generic/ptr.h>
#include <library/cpp/charset/doccodes.h>
#include <library/cpp/charset/codepage.h>
#include <library/cpp/charset/recyr.hh>
#include <contrib/libs/libarchive/libarchive/archive.h>
#include <contrib/libs/libarchive/libarchive/archive_entry.h>
#include <dict/recognize/docrec/recognizer.h>
#include <algorithm>

#include <iostream>
#include <util/folder/dirut.h>

#ifndef MAX_PATHNAME_LEN
#define MAX_PATHNAME_LEN 1024
#endif

#include <mail/so/spamstop/tools/so-common/ArjReader.h>
#include <mail/so/spamstop/tools/so-common/sputil.h>
#include <mail/so/spamstop/sp/spamstop.h>

const char* supported_archive_extensions[] = {
    // p7zip-supported formats:
    ".zip",
    ".bz2",
    ".rar",
    ".arj",
    ".z",
    ".lzh",
    ".7z",
    ".cab",
    ".nsis",
    ".lzma",
    ".xz",
    ".iso",
    ".rpm",
    ".deb",
    ".cpio",
    ".tar",
    ".gz",

    // below archives supported via libarchiver
    ".ar",
    ".lha",
    ".xar"};

using std::cout;
using std::endl;

namespace {
    template <class T, size_t N>
    size_t array_size(T (&)[N]) {
        return N;
    }

    class TArchiveDeleter {
    public:
        static inline void Destroy(archive* arc) noexcept {
            if (arc) {
                if (archive_read_free(arc) != ARCHIVE_OK) {
                    Syslog(TLOG_ERR) << "archive_free failed";
                }
            }
        }
    };

    int has_extension(const TString& name, const char** extensions, size_t extensions_size) {
        int result = -1;
        for (size_t i = 0; i < extensions_size; ++i) {
            const char* suffix = extensions[i];
            size_t suffix_len = strlen(suffix);
            if (suffix_len < name.size()) {
                if (strcasecmp(name.substr(name.size() - suffix_len).c_str(), suffix) == 0) {
                    result = i;
                    break;
                }
            }
        }
        return result;
    }

} // namespace

void recode_file_names(const TString& all_files, std::vector<TArciveItem>* result, ECharset charset) {
    if (all_files.empty())
        return;
    if (charset == CODES_UNKNOWN || charset == CODES_KOI8)
        return;
    for (std::vector<TArciveItem>::iterator it = result->begin(); it != result->end(); ++it) {
        try {
            it->name = Recode(charset, CODES_KOI8, it->name);
        } catch(...) {
            Syslog(TLOG_WARNING) << "cannot recode " << it->name << ':' << CurrentExceptionMessage();
        }
    }
}

ui64 GetArchiveContents(const TLog& logger, const char* data, size_t data_size, const char* suffix, const char* qidLabel, void* gsp, std::vector<TArciveItem>* result, TRecognizer* recoder) {
    ui64 status = NArchive::ArchInitFail;
    int ret_code;
    TString all_files;

    // first try libarchiver
    {
        size_t padding_size = 500; //TODO fix libarchive zip module to get rid of this padding, dirty_hax++
        std::vector<char> data_copy(data_size + padding_size, '\0');
        std::copy(data, data + data_size, data_copy.begin());
        THolder<archive, TArchiveDeleter> arc_holder(archive_read_new());
        archive* ina = arc_holder.Get();
        archive_entry* entry;

        if (ina == nullptr)
            logger << (TLOG_ERR) << qidLabel << "arhive_new fail";
        else if (archive_read_support_filter_all(ina) != ARCHIVE_OK)
            logger << (TLOG_ERR) << qidLabel << "arhive_support_filter fail";
        else if (archive_read_support_format_all(ina) != ARCHIVE_OK)
            logger << (TLOG_ERR) << qidLabel << "arhive_support_format fail";
        else if (archive_read_open_memory(ina, &data_copy[0], data_size) != ARCHIVE_OK)
            logger << (TLOG_ERR) << qidLabel << "arhive_open_memory fail";
        else {
            ret_code = archive_read_next_header(ina, &entry);
            bool is_zip = ((archive_format(ina) & ARCHIVE_FORMAT_BASE_MASK) == ARCHIVE_FORMAT_ZIP);

            all_files.clear();
            size_t file_count = 0;
            status = NArchive::ArchParseOk;

            if (is_zip)
                all_files.reserve(2048);

            while ((ret_code == ARCHIVE_WARN) || (ret_code == ARCHIVE_OK)) {
                if (++file_count > 100)
                    break;

                if (S_ISREG(archive_entry_mode(entry))) {
                    const char* name = archive_entry_pathname(entry);
                    ui64 size = archive_entry_size(entry);
                    if (name) {
                        result->emplace_back(name, size);
                        if (is_zip) {
                            all_files += name;
                            all_files += "\n";
                        }
                    } else
                        status |= NArchive::ArchParseNameDecodeError;
                }
                ret_code = archive_read_next_header(ina, &entry);
            }

            if (ret_code != ARCHIVE_EOF)
                status |= NArchive::ArchParseNoEof;

            if (is_zip && recoder)
                recode_file_names(all_files, result, recoder->RecognizeEncoding(all_files.c_str(), all_files.size()));
        }
    }

    // if libarchiver did not success,
    if (!result->empty()) // then try p7zip
        return status;

    SpSetRule((TRengine*)gsp, "LIBA_FAIL");

    if (strcmp(suffix, "arj") != 0)
        return status;

    NArj::TTraitsList filesTraits;
    if (!NArj::ReadTraits(data, data_size, filesTraits) || filesTraits.empty()) {
        SpSetRule((TRengine*)gsp, "ARJ_FAIL");
        return status;
    }

    status = NArchive::ArchParseOk;

    result->reserve(filesTraits.size());

    for (NArj::TTraitsList::const_iterator it = filesTraits.begin(); it != filesTraits.end(); ++it) {
        const NArj::TFileTraits& traits = *it;

        TString filename(traits.filename);

        size_t lastForwardSlash = filename.find_last_of('\\');
        size_t lastBackSlash = filename.find_last_of('/');

        size_t offset = 0;

        if (lastForwardSlash != TString::npos)
            offset = lastForwardSlash + 1;

        if (lastBackSlash != TString::npos && offset <= lastBackSlash)
            offset = lastBackSlash + 1;

        if (offset)
            filename = filename.substr(offset);

        result->push_back(TArciveItem(filename, traits.size));
    }

    return status;
}

const char* is_archive(const TString& name) {
    int arcTypeIndex = has_extension(name, supported_archive_extensions, array_size(supported_archive_extensions));
    if (arcTypeIndex >= 0)
        return supported_archive_extensions[arcTypeIndex];

    return 0;
}

bool is_unsupported_archive(const TString& name) {
    const char* extensions[] = {
        ".ace",
        ".uue",
    };
    return has_extension(name, extensions, array_size(extensions)) >= 0;
}

bool is_executable(const TString& name) {
    const char* extensions[] = {
        ".exe",
        ".scr",
        ".com",
        ".bat",
        ".sys",
        ".dll",
        ".vbs",
        ".pif",
        ".ps1",
        ".ttf",
        ".cpl",
        ".hta",
        ".cmd",
        ".chm",
        ".jar",
        ".js",
    };
    return has_extension(name, extensions, array_size(extensions)) >= 0;
}

bool is_image(const TString& name) {
    const char* extensions[] = {
        ".jpg",
        ".jpe",
        ".jpeg",
        ".gif",
        ".png",
        ".bmp",
    };
    return has_extension(name, extensions, array_size(extensions)) >= 0;
}

TString normalize_name(const TString& file_name, bool full_path, size_t max_name_len) {
    TString result;
    result.reserve(max_name_len);
    const char* start = (full_path ? file_name.c_str() : GetFileNameComponent(file_name.c_str()));
    const char* end = file_name.c_str() + file_name.length();
    size_t ext_len = 0;
    const char* last_dot_pos = nullptr;
    bool trim_name = false;

    if ((start + max_name_len) < end) {
        trim_name = true;
        last_dot_pos = strrchr(start, '.');
        ext_len = end - last_dot_pos + 3;
        if (ext_len > max_name_len) {
            ext_len = std::min(size_t(3), max_name_len);
            last_dot_pos = nullptr;
        }
    }

    for (const char* it = start; it != end; ++it) {
        if (isspace(*it) || ((*it < 0x20) && (*it > 0))) {
            result += '_';
        } else {
            result += *it;
        }
        if (trim_name && ((result.size() + ext_len) >= max_name_len)) {
            result += "XXX";
            if (last_dot_pos) {
                result += last_dot_pos;
            }
            break;
        }
    }
    return result;
}
