#include <maps/libs/log8/include/log8.h>
#include <maps/libs/cmdline/include/cmdline.h>
#include <maps/libs/common/include/exception.h>

#include <mapreduce/yt/interface/client.h>

#include <util/charset/utf8.h>
#include <util/charset/wide.h>

#include <fstream>
#include <iostream>
#include <sstream>
#include <vector>

using namespace NYT;

namespace {
    void ReadNumbers(const TNode &node, std::vector<TUtf16String> &numbers) {
        static const TString OBJECT_TYPE_NAME = "house_number_sign";

        const TVector<TNode>& objectsNodes = node.AsList();
        for (size_t i = 0; i < objectsNodes.size(); i++) {
            const TNode &objectNode = objectsNodes[i];

            if (objectNode["type"].AsString() != OBJECT_TYPE_NAME)
                continue;

            numbers.emplace_back( UTF8ToWide(objectNode["num"].AsString()) );
        }
    }
} //namespace

int main(int argc, const char** argv) try {
    constexpr size_t MAX_CHARS_CNT = 5;

    Initialize(argc, argv);

    maps::cmdline::Parser parser("Calculate statistic of house number sign dataset");

    maps::cmdline::Option<std::string> inputTable = parser.string("input")
        .required()
        .help("Path to YT table with dataset");

    parser.parse(argc, const_cast<char**>(argv));

    IClientPtr client = CreateClient("hahn");
    TTableReaderPtr<TNode> reader = client->CreateTableReader<TNode>(inputTable.c_str());

    INFO() << "Process items";
    std::vector<TUtf16String> numbers;
    std::map<wchar16, size_t> charsCount;
    std::vector<size_t> withAmountChars(MAX_CHARS_CNT + 1, 0);
    std::vector<size_t> withAmountDigits(MAX_CHARS_CNT + 1, 0);
    for (int processedItems = 0; reader->IsValid(); reader->Next(), processedItems++) {
        const TNode& inpRow = reader->GetRow();
        numbers.clear();
        ReadNumbers(inpRow["objects"], numbers);
        for (size_t i = 0; i < numbers.size(); i++) {
            size_t digitsCount = 0;
            const TUtf16String& number = numbers[i];
            for (size_t chIdx = 0; chIdx < number.size(); chIdx++) {
                const wchar16 ch = number[chIdx];
                charsCount[ch]++;
                if (std::iswdigit(ch))
                    digitsCount++;
            }
            if (digitsCount >= MAX_CHARS_CNT)
                digitsCount = MAX_CHARS_CNT;
            withAmountDigits[digitsCount]++;
            size_t charsCount = numbers[i].size();
            if (charsCount >= MAX_CHARS_CNT)
                charsCount = MAX_CHARS_CNT;
            withAmountChars[charsCount]++;
        }
        if ((processedItems + 1) % 100 == 0)
            INFO() << "Processed items: " << processedItems + 1;
    }
    INFO() << "House Numbers with chars count:";
    for (size_t i = 0; i < MAX_CHARS_CNT; i++)
        INFO() << "       " << i << ": " << withAmountChars[i];
    INFO() << "    >= " << MAX_CHARS_CNT << ": " << withAmountChars[MAX_CHARS_CNT];

    INFO() << "House Numbers with digits count:";
    for (size_t i = 0; i < MAX_CHARS_CNT; i++)
        INFO() << "       " << i << ": " << withAmountDigits[i];
    INFO() << "    >= " << MAX_CHARS_CNT << ": " << withAmountDigits[MAX_CHARS_CNT];

    INFO() << "Amount chars:";
    for (auto cit = charsCount.cbegin(); cit != charsCount.cend(); cit++) {
        const TUtf16String wch(&cit->first, 1);
        INFO() << "       " << WideToUTF8(wch) << " | "
               << std::hex << "0x" << std::setw(4) << std::setfill('0') << (int)cit->first << " | "
               << std::dec << std::setw(5) << std::setfill(' ') << (int)cit->first << " | "
               << cit->second;
    }
    return EXIT_SUCCESS;
}
catch (const maps::Exception& e) {
    FATAL() << "Worker failed: " << e;
    return EXIT_FAILURE;
}
catch (const std::exception& e) {
    FATAL() << "Worker failed: " << e.what();
    return EXIT_FAILURE;
}
