#include "static_zmap.h"

#include <library/cpp/logger/global/global.h>

#include <util/system/env.h>
#include <util/string/split.h>

#include <fcntl.h>
#include <sys/mman.h>

StaticZmap::Header::Header(const char* _signature, char _version) {
  strcpy(signature, _signature);
  version = _version;
}

StaticZmap::Header::Header() {
  *signature = 0;
  version = 0;
}

std::istream& operator>>(std::istream& stream, StaticZmap::Header& header) {
  stream.read(reinterpret_cast<char*>(&header), sizeof(header));
  return stream;
}

StaticZmap::StaticZmap(
    const char* data_file,
    char words_delim,
    bool keep_key_unencoded,
    char elements_delim,
    char fields_delim,
    char escape,
    char escaped_elements_delim,
    char escaped_fields_delim)
    : data_file_descriptor(1),
      words_delim_(words_delim),
      keep_key_unencoded_(keep_key_unencoded),
      elements_delim_(elements_delim),
      fields_delim_(fields_delim),
      escape_(escape),
      escaped_elements_delim_(escaped_elements_delim),
      escaped_fields_delim_(escaped_fields_delim) {
  std::ifstream data_stream(data_file);
  if (!data_stream.is_open()) {
    ERROR_LOG << "can't read \"" << data_file << "\"";
  }
  StaticZmap::Header header;
  data_stream >> header;
  if (!huffman_tree_.Load(data_stream, false)) {
    ERROR_LOG << "can't load huffman tree from \"" << data_file << "\"";
  }
  dict_offset_ = data_stream.tellg();
  data_stream.close();
  data_file_descriptor = open(data_file, O_RDONLY);
  if (data_file_descriptor == -1) {
    ERROR_LOG << "can't read \"" <<  data_file << "\"";
  }
  storage_size_ = FileSizeInBytes(data_file);
  storage_ = (const char*)mmap(NULL, storage_size_, PROT_READ,
                               MAP_SHARED, data_file_descriptor, 0);
  if (storage_ == MAP_FAILED) {
    ERROR_LOG << "can't map \"" << data_file << "\"";
  }

  TString env_mr_broadmatch = GetEnv("MR_BROADMATCH");

  if ( env_mr_broadmatch && env_mr_broadmatch != "0" ){
    int mlock_result = mlock((const void *)storage_, storage_size_);
    if (mlock_result != 0) {
      INFO_LOG << "can't mlock \"" << data_file << "\", message \"" << strerror(errno) << "\"";
    }
  }
  SetSearchParams();
}

StaticZmap::~StaticZmap() {
    if (storage_ != NULL) {
        munmap((void *)storage_, storage_size_);
        // снимаем блокировку с адресов ram
        munlock((const void *)storage_, storage_size_);
    }
    // закрываем дескриптор, если все еще открыт
    if (data_file_descriptor != -1 && fcntl(data_file_descriptor, F_GETFD) != -1) {
        close(data_file_descriptor);
    }
}

std::string StaticZmap::At(const char* key, char words_delim) const {
  std::vector<std::string> key_words = StringSplitter(key).Split(words_delim);
  std::vector<char> encoded_key;
  if (keep_key_unencoded_) {
    encoded_key = std::vector<char>(key, key + strlen(key));
  } else if (!huffman_tree_.Encode(key_words, encoded_key)) {
    return "";
  }
  encoded_key = ApplyEscape(encoded_key);
  encoded_key.push_back(fields_delim_);
  const char* left_bound = left_bound_;
  size_t search_range = search_range_;
  while (search_range != 0) {
    size_t shift = (search_range + 1) >> 1;
    const char* middle_bound = left_bound + shift;
    const char* candidate = middle_bound;
    while (*candidate != elements_delim_) {
      ++candidate;
    }
    ++candidate;
    if (memcmp(encoded_key.data(), candidate, encoded_key.size()) < 0) {
      search_range = --shift;
    } else {
      left_bound = middle_bound;
      search_range -= shift;
    }
  }
  ++left_bound;
  if (memcmp(encoded_key.data(), left_bound, encoded_key.size()) != 0) {
    return "";
  }
  const char* value_first = left_bound + encoded_key.size();
  const char* value_last = value_first;
  while (*value_last != elements_delim_) {
    ++value_last;
  }
  std::vector<char> encoded_value(value_first, value_last);
  encoded_value = ApplyUnescape(encoded_value);
  return huffman_tree_.Decode(encoded_value, words_delim);
}

std::string StaticZmap::At(const char* key) const {
  return At(key, words_delim_);
}

size_t StaticZmap::FileSizeInBytes(const char* file) {
  FILE* file_handle = fopen(file, "rb");
  if (file_handle == NULL) {
    return 0;
  }
  fseek(file_handle, 0, SEEK_END);
  size_t result = ftell(file_handle);
  fclose(file_handle);
  return result;
}

void StaticZmap::SetSearchParams() {
  left_bound_ = storage_ + dict_offset_ - 1;
  search_range_ = storage_size_ - dict_offset_;
  if (*(left_bound_ + search_range_) == elements_delim_) {
    --search_range_;
  }
  while (*(left_bound_ + search_range_) != elements_delim_) {
    --search_range_;
  }
}

std::vector<char> StaticZmap::ApplyEscape(
    const std::vector<char>& encoded_data) const {
  std::vector<char> result;
  for (char byte : encoded_data) {
    if (byte == elements_delim_) {
      result.push_back(escape_);
      result.push_back(escaped_elements_delim_);
    } else if (byte == fields_delim_) {
      result.push_back(escape_);
      result.push_back(escaped_fields_delim_);
    } else if (byte == escape_) {
      result.push_back(escape_);
      result.push_back(escape_);
    } else {
      result.push_back(byte);
    }
  }
  return result;
}

std::vector<char> StaticZmap::ApplyUnescape(
    const std::vector<char>& encoded_data) const {
  std::vector<char> result;
  size_t byte_index = 0;
  while (byte_index < encoded_data.size()) {
    char byte = encoded_data[byte_index];
    if (byte != escape_) {
      result.push_back(byte);
    } else {
      char next_byte = encoded_data[byte_index + 1];
      if (next_byte == escaped_elements_delim_) {
        result.push_back(elements_delim_);
      } if (next_byte == escaped_fields_delim_) {
        result.push_back(fields_delim_);
      } else if (next_byte == escape_) {
        result.push_back(escape_);
      }
      ++byte_index;
    }
    ++byte_index;
  }
  return result;
}
