import os
import logging
from datetime import datetime

from sandbox.sandboxsdk import environments
from sandbox.sandboxsdk.paths import make_folder
from sandbox.sandboxsdk.parameters import SandboxIntegerParameter, SandboxBoolParameter
from sandbox.sandboxsdk.process import run_process
from sandbox.sandboxsdk.sandboxapi import RELEASE_PRESTABLE

from sandbox.projects.geosuggest.resources import (
    MAPS_GEO_SUGGEST_GEO_SEARCH_HISTORY_TSV,
    MAPS_GEO_SUGGEST_GEO_SEARCH_HISTORY_BIN,
    MAPS_GEO_SUGGEST_GEO_SEARCH_NGRAMS_TSV,
    MAPS_GEO_SUGGEST_GEO_SEARCH_NGRAMS_BIN,
)
from sandbox.projects.geosuggest.common.parameters import (
    GeoSuggestSandboxBinParameter,
    SuggestBuilderParameter,
    SandboxVaultOwner,
    SandboxVaultYtTokenName,
)
from sandbox.projects.geosuggest.common.preview import create_task_preview_resource, get_task_preview_resource
from sandbox.projects.geosuggest.common.utils import get_or_default_with_ctx_update
from sandbox.projects.geosuggest.common import qp
from sandbox.projects.common.utils import get_or_default


class DaysCount(SandboxIntegerParameter):
    name = 'days_count'
    description = 'Days count of recent geo search history to process'
    default_value = 7


class MinQueryCount(SandboxIntegerParameter):
    name = 'min_query_count'
    description = 'Queries with less count value in region will be discarded'
    default_value = 10


class ReleaseOnSuccess(SandboxBoolParameter):
    name = 'release_on_success'
    description = 'If true, the output resources will be released after execution'
    default_value = False


class BuildMapsGeoSuggestGeoSearchHistory(qp.GeoSuggestQPTask):
    type = 'BUILD_MAPS_GEO_SUGGEST_GEO_SEARCH_HISTORY'

    execution_space = 32 * 1024
    disk_space = 32 * 1024
    required_ram = 60 * 1024  # 60 Gb
    TIMEOUT = 36 * 3600

    input_parameters = [
        GeoSuggestSandboxBinParameter,
        DaysCount,
        MinQueryCount,
        SuggestBuilderParameter,
        SandboxVaultOwner,
        SandboxVaultYtTokenName,
        ReleaseOnSuccess,
    ]

    environment = (
        environments.PipEnvironment("yandex-yt", "0.7.34-0"),
        environments.PipEnvironment("yandex-yt-yson-bindings-skynet"),
    )

    def on_execute(self):
        geo_suggest_sandbox_bin = self.sync_resource(get_or_default_with_ctx_update(self.ctx, GeoSuggestSandboxBinParameter))

        vault_owner = get_or_default(self.ctx, SandboxVaultOwner)
        vault_name = get_or_default(self.ctx, SandboxVaultYtTokenName)
        yt_token = self.get_vault_data(vault_owner, vault_name)

        release_on_success = get_or_default(self.ctx, ReleaseOnSuccess)

        days_count = get_or_default(self.ctx, DaysCount)
        min_query_count = get_or_default(self.ctx, MinQueryCount)
        suggest_data_builder_path = self.sync_resource(get_or_default(self.ctx, SuggestBuilderParameter))

        output_dictionary_tsv_dir = os.path.join(self.abs_path(), "output_dictionary_tsv")
        output_dictionary_bin_dir = os.path.join(self.abs_path(), "output_dictionary_bin")

        self.make_dictionary(
            yt_token=yt_token,
            make_dictionary_executable=os.path.join(geo_suggest_sandbox_bin, "make_dictionary"),
            min_query_count=min_query_count,
            days_count=min(days_count, 7),  # NOTE: temporary, we do not use this dictionary, so restrict the history length
            output_tsv_dir=output_dictionary_tsv_dir,
            output_bin_dir=output_dictionary_bin_dir,
            suggest_data_builder_executable=suggest_data_builder_path
        )

        output_ngrams_tsv_dir = os.path.join(self.abs_path(), "output_ngrams_tsv")
        output_ngrams_bin_dir = os.path.join(self.abs_path(), "output_ngrams_bin")

        self.make_ngrams(
            yt_token=yt_token,
            make_trie_executable=os.path.join(geo_suggest_sandbox_bin, "make_trie"),
            min_query_count=min_query_count,
            days_count=days_count,
            output_tsv_dir=output_ngrams_tsv_dir,
            output_bin_dir=output_ngrams_bin_dir,
            trie_compiler_executable=os.path.join(geo_suggest_sandbox_bin, "triecompiler"),
        )

        filepaths_for_preview = [
            os.path.join(output_dictionary_tsv_dir, "requests.tsv"),
            os.path.join(output_dictionary_tsv_dir, "streams.dat"),
            os.path.join(output_ngrams_tsv_dir, "ngrams.tsv"),
        ]

        self.save_preview(filepaths_for_preview)

        if release_on_success:
            self.create_subtask(
                task_type="RELEASE_ANY",
                inherit_notifications=True,
                input_parameters={"release_task_id": self.id, "release_status": RELEASE_PRESTABLE},
                description="Autorelease task #{}".format(self.id))
            logging.info("Task release started")
        else:
            logging.info("Task release skipped")

    def make_dictionary(self, yt_token, make_dictionary_executable, min_query_count, days_count, output_tsv_dir, output_bin_dir, suggest_data_builder_executable):
        make_folder(output_tsv_dir)
        make_folder(output_bin_dir)

        current_date = datetime.now().strftime("%Y-%m-%d")

        cmd = [
            make_dictionary_executable,
            "--yt-token", yt_token,
            "--min-query-count", str(min_query_count),
            "--days-count", str(days_count),
            "--output-tsv-directory", output_tsv_dir,
            "--output-dictionary-directory", output_bin_dir,
            "--dictionary-builder", suggest_data_builder_executable
        ]

        run_process(cmd, log_prefix="make_dictionary")

        resource_attributes = {
            "min_query_count": min_query_count,
            "days_count": days_count,
            "until_date": current_date
        }

        tsv_resource = self.create_resource(
            "User friendly Geo Search history",
            output_tsv_dir,
            MAPS_GEO_SUGGEST_GEO_SEARCH_HISTORY_TSV,
            attributes=resource_attributes)
        self.mark_resource_ready(tsv_resource.id)

        resource_attributes["tsv_resource_id"] = tsv_resource.id
        bin_resource = self.create_resource(
            "Geo Search history binary dictionary",
            output_bin_dir,
            MAPS_GEO_SUGGEST_GEO_SEARCH_HISTORY_BIN,
            attributes=resource_attributes)
        self.mark_resource_ready(bin_resource.id)

    def make_ngrams(self, yt_token, make_trie_executable, min_query_count, days_count, output_tsv_dir, output_bin_dir, trie_compiler_executable):
        make_folder(output_tsv_dir)
        make_folder(output_bin_dir)

        current_date = datetime.now().strftime("%Y-%m-%d")

        cmd = [
            make_trie_executable,
            "--yt-token", yt_token,
            "--min-query-count", str(min_query_count),
            "--days-count", str(days_count),
            "--output-tsv-file", os.path.join(output_tsv_dir, "ngrams.tsv"),
            "--output-bin-file", os.path.join(output_bin_dir, "ngrams.trie"),
            "--trie-compiler", trie_compiler_executable,
        ]

        run_process(cmd, log_prefix="make_trie")

        resource_attributes = {
            "min_query_count": min_query_count,
            "days_count": days_count,
            "until_date": current_date
        }

        tsv_resource = self.create_resource(
            "User friendly Geo Search history n-grams",
            output_tsv_dir,
            MAPS_GEO_SUGGEST_GEO_SEARCH_NGRAMS_TSV,
            attributes=resource_attributes)
        self.mark_resource_ready(tsv_resource.id)

        resource_attributes["tsv_resource_id"] = tsv_resource.id
        bin_resource = self.create_resource(
            "Geo Search history n-grams trie",
            output_bin_dir,
            MAPS_GEO_SUGGEST_GEO_SEARCH_NGRAMS_BIN,
            attributes=resource_attributes)
        self.mark_resource_ready(bin_resource.id)

    @property
    def footer(self):
        return self.load_preview()

    def save_preview(self, filepaths):
        def read_file_part(filename, lines_count_limit, line_length_limit):
            lines = []
            read_lines = 0
            for line in open(filename, "r"):
                read_lines += 1
                if read_lines <= lines_count_limit:
                    line = line.strip()
                    if len(line) > line_length_limit:
                        line = line[0:line_length_limit - 3] + "..."
                    lines.append(line + "\n")
                else:
                    lines.append("..." + "\n")
                    break
            return lines

        preview_dir = os.path.join(self.abs_path(), "preview")
        make_folder(preview_dir)
        for filepath in filepaths:
            preview_content = read_file_part(filepath, 5, 80)
            filename = os.path.basename(filepath)
            with open(os.path.join(preview_dir, filename + ".preview"), "w") as preview_file:
                preview_file.writelines(preview_content)
        create_task_preview_resource(self, "Geo Search History preview", preview_dir)

    def load_preview(self):
        preview_resource = get_task_preview_resource(self)
        if not preview_resource:
            return "No preview information available"

        previews = []
        for name in ["requests.tsv", "streams.dat", "ngrams.tsv"]:
            preview = "<h4>{:s}</h4>".format(name)
            try:
                content = qp.download_content(preview_resource, name + ".preview")
                preview += "<pre><small>{:s}</small></pre>".format(content)
            except Exception:
                preview += "Not available"
            previews.append(preview)

        return " ".join(previews)


__Task__ = BuildMapsGeoSuggestGeoSearchHistory
