#include "CdictServer.h"
#include "Index.h"
#include "MultiIndex.h"

#include <irt/common/lib/logger/logger.h>

#include <rt-research/broadmatching/scripts/cpp-source/common/norm/NormDict.h>
#include <rt-research/broadmatching/scripts/cpp-source/common/HuffmanTree.h>

#include <util/generic/string.h>
#include <library/cpp/getopt/small/last_getopt.h>

#include <clocale>
#include <cstdlib>
#include <ctime>
#include <fstream>
#include <iostream>

#include <getopt.h>
#include <sys/stat.h>

void DoNoServerMode(const Index& index) {
    bool is_done = false;

    while(!is_done && std::cin.good()) {
        std::string cmd;

        std::cout << "> ";
        std::getline(std::cin, cmd);

        if(cmd == "exit") {
            is_done = true;
        } else {
            TVector<TString> data = StringSplitter(cmd).Split('\t');
            if(data.size() != 2) {
                std::cout << "bad command" << std::endl;
                continue;
            }

            IndexValueType value_type;
            const unsigned char* value_ptr = index.FindValue(data[0].c_str(), data[1].c_str(), value_type);
            if(!value_ptr) {
                std::cout << "no data" << std::endl;
            } else {
                std::cout << index.GetValue(value_ptr, value_type) << std::endl;
            }
        }
    }
}

void DetermineDictsProperties(TMap<TString, DictProperties>& output, const char *file_name, int version) {
    std::ifstream stream(file_name);

    if(stream.fail()) {
        return;
    }

    const unsigned BUFFER_SIZE = 1024 * 1024;
    char buffer[BUFFER_SIZE];
    while(stream.good()) {
        stream.getline(buffer, BUFFER_SIZE);

        if(buffer[0]) {
            char *dict_name = &buffer[0];
            char *key = dict_name;
            char *value = 0;

            // ключ
            while(*key && *key != '\t') key++;
            if(!*key) {
                continue;
            }
            *key = 0;
            key++;

            TMap<TString, DictProperties>::iterator it = output.find(dict_name);
            if(it == output.end()) {
                DictProperties dp;
                dp.key_type = IKT_UINT32;
                dp.value_type = (version >= 2 ? IVT_UINT32 : IVT_COMPRESSED);
                it = output.insert(std::pair<std::string, DictProperties>(dict_name, dp)).first;
            }

            // проверяем, что ключ представим в виде инта
            if(it->second.key_type != IKT_COMPRESSED) {
                for (; *key && *key != '\t'; key++) {
                    if(!isdigit(*key)) {
                        it->second.key_type = IKT_COMPRESSED;
                        break;
                    }
                }

                if(*key == '\t') {
                    value = key + 1;
                }
            }

            // проверяем, что значение представимо в виде интов
            if(version >= 2 && it->second.value_type != IVT_COMPRESSED) {
                if(!value) {
                    for(value = key; *value && *value != '\t'; value++) ;
                    value++;
                }

                for (; *value; value++) {
                    if(!isdigit(*value)) {
                        if(*value == ' ') {
                            it->second.value_type = IVT_UINT32V;
                        } else {
                            it->second.value_type = IVT_COMPRESSED;
                            break;
                        }
                    }
                }
            }
        }
    }

    // не используем значения типа IVT_UINT32
    TMap<TString, DictProperties>::iterator it;
    for(it = output.begin(); it != output.end(); ++it) {
        if(it->second.value_type == IVT_UINT32) {
            it->second.value_type = IVT_COMPRESSED;
        }
    }
}

int GenerateData(const TString& data_file, bool debug, bool gen_dict, bool gen_index, int version, const TString& outputFile, const TString& dictFile) {
    TMap<TString, DictProperties> dicts_properties;

    // определяем свойства словарей
    if(version > 0) {
        INFO_LOG << "checking dict properties (version " << version << ")";
        DetermineDictsProperties(dicts_properties, data_file.data(), version);
        for (auto it = dicts_properties.begin(); it != dicts_properties.end(); it++) {
            TStringBuilder log_buf;
            log_buf << "dict " << it->first << ": ";

            if(it->second.key_type == IKT_COMPRESSED) {
                log_buf << "compressed ";
            } else if(it->second.key_type == IKT_UINT32) {
                log_buf << "uint32 ";
            }

            if(it->second.value_type == IVT_COMPRESSED) {
                log_buf << "compressed ";
            } else if(it->second.value_type == IVT_UINT32) {
                log_buf << "uint32 ";
            } else if(it->second.value_type == IVT_UINT32V) {
                log_buf << "uint32v ";
            }

            INFO_LOG << log_buf;
        }
    }

    TString dict_file = data_file + ".dict";
    if(gen_dict && !gen_index && outputFile) {
        if (outputFile) {
            dict_file = outputFile;
        }
    } else {
        if (dictFile) {
            dict_file = dictFile;
        }
    }

    TString index_file = data_file + ".index";
    if(gen_index && !gen_dict) {
        if (outputFile) {
            index_file = outputFile;
        }
    }

    HuffmanTree huffman;

    // генерация кодов Хаффмана
    if(gen_dict) {
//        huffman.GenerateFromFile(data_file, dicts_properties); // old single-thread version
        huffman.GenerateFromFileParallel(data_file.data(), dicts_properties); // multithread version
        if(!gen_index) {
            huffman.Save(dict_file.c_str());
        }
    }

    // генерация индекса
    if(gen_index) {
        if(!gen_dict && !huffman.Load(dict_file.c_str())) {
            ERROR_LOG << "can't load dict " << dict_file;
            return 1;
        }

        Index index(debug, huffman);
        index.SetDictsProperties(dicts_properties);
//        if(!index.Load(data_file)) { // old single-thread version
        if(!index.LoadParallel(data_file.data())) { // multithread version
            ERROR_LOG << "can't open " << data_file;
            return 1;
        }
        INFO_LOG << index.NumDicts() << " dicts totally";

        if(!gen_dict) {
            index.SaveBinary(index_file.c_str(), version);
        } else {
            std::string cdict_file = data_file + ".cdict";
            if (outputFile) {
                index_file = outputFile;
            }

            std::ofstream stream(cdict_file, std::ios::binary);

            huffman.Save(stream);
            index.SaveBinary(stream, version);
        }
    }

    return 0;
}

std::vector<std::string> GetSingleFiles(int argc, char** argv) {
    opterr = 0;
    static struct option long_options[] = {
        {"single-file", required_argument, 0, 0},
        {NULL, 0, 0, 0}
    };
    std::vector<std::string> result;
    int option_index = 0;
    int getopt_result = 0;
    while ((getopt_result = getopt_long(argc, argv, "", long_options, &option_index)) != -1) {
        if (getopt_result != 0) {
            continue;
        }
        result.push_back(optarg);
    }
    return result;
}

bool DumpDataFile(const char* data_file, const char* backup_directory) {
    INFO_LOG << "Dump data file " << data_file << " into " << backup_directory;

    struct stat sb;
    stat(backup_directory, &sb);
    if (!S_ISDIR(sb.st_mode)) {
        ERROR_LOG << "directory '" << backup_directory << "' doesn't exist";
        return false;
    }
    std::string copy_system_call = "cp " + std::string(data_file) + " " + std::string(backup_directory);
    const int copy_status_code = std::system(copy_system_call.c_str());
    if (copy_status_code) {
        WARNING_LOG << "can't copy (" << copy_system_call << ")" << "; status code " << copy_status_code;
        return false;
    }

    return true;
}

bool DumpData(const TString& data_file, const TString& raw_files_dir) {
    INFO_LOG << "raw files dir: " << raw_files_dir;
    if (!DumpDataFile(data_file.data(), raw_files_dir.data())) {
        WARNING_LOG << "failed to dump single file: " << data_file;
        return false;
    }
    return true;
}

int main(int argc, char** argv) {
    NIRT::InitLog<NIRT::TTabLoggerFormatter>({.ConsoleOnly=true});
    std::setlocale(LC_ALL, "ru_RU.utf8");

    TString data_file;
    TString norm_config;
    TString single_file;
    TString logging_level;
    TString mode;
    TString raw_files_dir;
    TString output_file;
    TString dict_file;
    TString test_file;

    bool keep_raw_files = false;
    bool debug = false;

    uint32_t port;
    uint32_t version;

    NLastGetopt::TOpts opts;
    opts.AddLongOption("data-file").Optional().StoreResult(&data_file);
    opts.AddLongOption("norm-config").Optional().StoreResult(&norm_config);
    opts.AddLongOption("single-file").Optional().StoreResult(&single_file);
    opts.AddLongOption("logging-level").Optional().StoreResult(&logging_level);
    opts.AddLongOption("port").Optional().DefaultValue("0").StoreResult(&port);
    opts.AddLongOption("debug").Optional().StoreTrue(&debug);
    opts.AddLongOption("mode").Optional().DefaultValue("server").StoreResult(&mode);
    opts.AddLongOption("keep-raw-files").Optional().StoreTrue(&keep_raw_files);
    opts.AddLongOption("raw-files-dir").Optional().StoreResult(&raw_files_dir);
    opts.AddLongOption("output-file").Optional().StoreResult(&output_file);
    opts.AddLongOption("dict-file").Optional().StoreResult(&dict_file);
    opts.AddLongOption("version").Optional().DefaultValue("2").StoreResult(&version);
    opts.AddLongOption("test-file").Optional().StoreResult(&test_file);

    NLastGetopt::TOptsParseResult (&opts, argc, argv);

    if(!data_file && !single_file) {
        ERROR_LOG << "no data file";
        return 0;
    }

    INFO_LOG << "mode: " << mode;

    if(mode == "server" || mode == "console" || mode == "info") {
        INFO_LOG << "loading data... ";
        std::vector<Index*> indeces;
        if (single_file) {
            std::vector<std::string> single_files = GetSingleFiles(argc, argv);
            for (const std::string& single_file : single_files) {
                std::ifstream single_stream(single_file, std::ios::binary);
                if (single_stream.fail()) {
                    ERROR_LOG << "can't open " << single_file;
                    return 1;
                }
                HuffmanTree* huffman_tree_ptr = new HuffmanTree();;
                if (!huffman_tree_ptr->Load(single_stream)) {
                    ERROR_LOG << "can't load dict " << single_file;
                    return 1;
                }
                Index* index_ptr = new Index(debug, *huffman_tree_ptr);
                if (!index_ptr->LoadBinary(single_stream)) {
                    ERROR_LOG << "can't open " << data_file;
                    return 1;
                }
                indeces.push_back(index_ptr);
                INFO_LOG << index_ptr->NumDicts() << " dicts totally";
            }
        } else {
            // словарь Хаффмана
            HuffmanTree* huffman_ptr = new HuffmanTree();
            std::string default_dict_file = std::string(data_file);
            size_t pos;
            if((pos = default_dict_file.find(".index")) != std::string::npos) {
                default_dict_file = default_dict_file.substr(0, pos);
            }
            default_dict_file += ".dict";

            if(!dict_file) {
                dict_file = default_dict_file;
            }

            if(!huffman_ptr->Load(dict_file.c_str())) {
                ERROR_LOG << "can't load dict " << dict_file;
                return 1;
            }
            // индексы
            Index* index_ptr = new Index(debug, *huffman_ptr);
            if(!index_ptr->LoadBinary(data_file.data())) {
                ERROR_LOG << "can't open " << data_file;
                return 1;
            }
            indeces.push_back(index_ptr);
            INFO_LOG << index_ptr->NumDicts() << " dicts totally";
        }
        // данные для нормализации
        NormDict norm;
        if(!norm_config || !norm.LoadConfig(norm_config.data())) {
            ERROR_LOG << "can't load norm config " << norm_config;
            return 1;
        }
        INFO_LOG << norm.NumLanguages() << " languages";
        MultiIndex multi_index(indeces.begin(), indeces.end());
        if(mode == "console") {
            DoNoServerMode(*indeces[0]);
        } else if(mode == "server") {
            CdictServer server(multi_index, norm, port);
            if (logging_level) {
                if (logging_level == "Full")
                    server.setLoggingLevel(Full);
                else if (logging_level == "None")
                    server.setLoggingLevel(None);
            }
            server.Run();
        } else if(mode == "info") {
            indeces[0]->DumpStats();
        }
    } else if(mode == "gen-index" || mode == "gen-dict" || mode == "gen") {
        bool gen_index = mode == "gen-index" || mode == "gen";
        bool gen_dict = mode == "gen-dict" || mode == "gen";

        if (keep_raw_files && !DumpData(data_file, raw_files_dir)) {
            WARNING_LOG << "failed to dump data";
        }
        return GenerateData(data_file, debug, gen_dict, gen_index, version, output_file, dict_file);
    } else if(mode == "test-dict") {
        HuffmanTree tree;

        if(!tree.Load(data_file.data())) {
            ERROR_LOG << "can't load dict";
            return 1;
        }

        unsigned BUFFER_SIZE = 1024 * 1024;
        unsigned char buffer[BUFFER_SIZE + 1];

        if(test_file) {
            char cmd_buffer[BUFFER_SIZE];

            std::ifstream stream(test_file.data());
            while(stream.good()) {
                stream.getline(cmd_buffer, BUFFER_SIZE);

                if(!cmd_buffer[0]) {
                    continue;
                }

                TVector<TString> fields = StringSplitter(cmd_buffer).Split('\t');
                for(unsigned i = 1; i < 3; i++) {
                    tree.Compress(fields[i], &buffer[0], &buffer[BUFFER_SIZE]);
                    const auto buf = tree.Decompress(buffer);
                    if(buf != fields[i]) {
                        ERROR_LOG << fields[i] << " -> " << buf;
                    }
                }
            }
        } else {
            while(1) {
                std::string str;

                std::cout << "> ";
                std::getline(std::cin, str);

                unsigned len = tree.Compress(str, &buffer[0], &buffer[BUFFER_SIZE - 1]);
                std::cout << len << std::endl;
                for(unsigned i = 0; i < len; i++) {
                    for(unsigned j = 0; j < 8; j++) {
                        std::cout << (unsigned)((buffer[i] >> j) & 1);
                    }
                }
                std::cout << std::endl;
            }
        }
    } else {
        ERROR_LOG << "unknown mode";
    }

    return 0;
}
