import hashlib
import json
import logging
import os
import io
from os.path import normpath, basename, join
from datetime import datetime, timedelta
from sandbox import sdk2
from sandbox.projects.common.vcs import arc, util
from sandbox.sdk2.vcs.svn import Arcadia
from sandbox.projects.mediaanalyst.libs.common.base import MaSandboxBaseTask
from sandbox.projects.mediaanalyst.libs.common.arc_helpers import arc_create_remote_branch, arc_delete_remote_branch


class MaAutoDocs(MaSandboxBaseTask):
    BINARY_TASK_ATTR_TARGET = "mediaanalyst/tasks/MaAutoDocs"
    ARC_TRUNK_URL = "arcadia-arc:/#trunk"
    ARC_DOCS_BASEDIR = "ms/data/docs"
    ARC_DOCS_BRANCH = "users/robot-msdata-infra/autodocs"

    class Parameters(MaSandboxBaseTask.Parameters):
        config_path = sdk2.parameters.String("Path to config", required=True, default="")
        yav_secret = sdk2.parameters.YavSecret("Robot secrets", required=True)

    def parse_config(self):
        from ms.infra.libs.schemas import get_yt_table_schema
        from yaml import load
        try:
            from yaml import CLoader as Loader
        except ImportError:
            from yaml import Loader

        logging.info('reading main configuration...')
        txt_config = Arcadia.cat(':'.join([Arcadia.ARCADIA_SCHEME,
                                 self.Parameters.config_path]))
        config = load(txt_config.strip(), Loader=Loader)
        self.Context.arc_docs_dir = join("/arc/trunk/arcadia/", self.ARC_DOCS_BASEDIR)
        self.Context.arc_schema_dir = join(self.Context.arc_docs_dir, "_schemas")
        self.Context.rel_schema_dir = join(self.ARC_DOCS_BASEDIR, "_schemas")
        self.Context.arc_schema_descriptions_dir = join(self.Context.arc_schema_dir, "descriptions")
        self.Context.rel_schema_descriptions_dir = join(self.Context.rel_schema_dir, "descriptions")
        self.Context.arc_toc_path = join("/arc/trunk/arcadia", self.ARC_DOCS_BASEDIR, "toc.yaml")
        self.Context.rel_toc_path = join(self.ARC_DOCS_BASEDIR, "toc.yaml")
        self.Context.projects_info = config["projects"]
        self.Context.projects = list(config['projects'].keys())

        schema_descriptions_configs = self.list_schema_descriptions()
        result = []
        logging.info("processing per project configuration...")
        for project_name in self.Context.projects:
            logging.info(project_name)
            project_docs = config["docs"]["projects"]

            if project_name in project_docs:
                for entry in config["docs"]["projects"][project_name]:
                    name_format = entry["format"]
                    cur_doc_item_name = entry["item"]
                    cur_doc_item_cluster = entry["cluster"]
                    cur_doc_item_toc_section = entry["toc_section"]

                    if name_format == 'date':
                        format_string = entry["format_string"]
                        yesterday = datetime.now() + timedelta(days=-1)
                        doc_table_path = normpath(entry["in"]) + yesterday.strftime(format_string)
                    elif name_format == 'static_path':
                        doc_table_path = entry["in"]

                    schema_description_filename = cur_doc_item_name + ".yaml"
                    schema_description_abs_path = join(self.Context.arc_schema_descriptions_dir,
                                                       schema_description_filename)
                    if schema_description_filename in schema_descriptions_configs:
                        td = self.parse_table_description(schema_description_abs_path)
                    else:
                        td = None
                    schema_obj = get_yt_table_schema(
                        path=doc_table_path,
                        cluster=cur_doc_item_cluster,
                        token=self.Parameters.yav_secret.data()["yt.token"],
                        schema_description=td
                    )
                    markdown_schema_path = normpath(join(self.Context.arc_schema_dir, cur_doc_item_name + ".md"))
                    result.append({"name": cur_doc_item_name,
                                   "schema": schema_obj,
                                   "project": project_name,
                                   "toc": cur_doc_item_toc_section,
                                   "md_schema_arc_path": markdown_schema_path})
        return result

    def parse_table_description(self, arc_url):
        from yaml import load
        try:
            from yaml import CLoader as Loader
        except ImportError:
            from yaml import Loader

        logging.info('reading config %s' % arc_url)
        txt_config = Arcadia.cat(':'.join([Arcadia.ARCADIA_SCHEME, arc_url]))
        config = load(txt_config.strip(), Loader=Loader)

        result = dict()
        for c in config["fields"]:
            column_name = c["name"].strip()
            column_description = c["comment"]
            result[column_name] = column_description.strip()
        return result

    def dump_schema_to_md_file(self, schema, path):
        logging.info("dumping schema to: %s" % path)
        with open(path, 'w') as f:
            markdown_schema = schema.markdown()
            f.write(markdown_schema)

    def dump_description_to_yaml(self, name, schema):
        import yaml
        dict_schema = json.loads(schema.json())
        result = {"fields": []}
        for column_name in dict_schema:
            result["fields"].append({"name": column_name, "comment": ""})
        description_file = join(self.Context.rel_schema_descriptions_dir, name)
        with open(description_file, 'w') as f:
            f.write(yaml.safe_dump(result, sort_keys=False))

    def list_schema_descriptions(self):
        arc_url = ':'.join([Arcadia.ARCADIA_SCHEME, self.Context.arc_schema_descriptions_dir])
        configs = Arcadia.list(arc_url, as_list=True)
        return map(normpath, configs)

    def render_template(self, template_path, schema_filename, page_title=None):
        from ms.infra.libs.templates import docs_page_template
        title = page_title
        if not title:
            title = "Default title"
        with open(template_path, 'w') as f:
            rendered_template = docs_page_template.format(
                title=title,
                schema_filename=schema_filename
            )
            logging.info("rendering template to: %s" % template_path)
            f.write(rendered_template)

    def get_toc(self):
        from yaml import load
        try:
            from yaml import CLoader as Loader
        except ImportError:
            from yaml import Loader
        txt_config = Arcadia.cat(':'.join([Arcadia.ARCADIA_SCHEME,
                                           self.Context.arc_toc_path]))
        return load(txt_config.strip(), Loader=Loader)

    def toc_section_exist(self, data, section_name, page_title):
        section = [item for item in data.get('items') if item.get('name') == section_name]
        if not section:
            return False
        section_item = [x for x in section[0]['items'] if x.get('name') == page_title]
        if section_item:
            return True
        return False

    def update_toc(self, data, section_name, link, title):
        section = [item for item in data.get('items') if item.get('name') == section_name]
        if not section:
            section = [{'name': section_name, 'items': []}]

        new_item = [{'href': link, 'name': title}]

        section[0]['items'] = [x for x in section[0]['items'] if x.get('name') != title] + new_item
        data['items'] = [x for x in data['items'] if x.get('name') != section_name] + section

    def sync_toc(self, data):
        import yaml
        with io.open(self.Context.rel_toc_path, 'w', encoding='utf-8') as f:
            yaml.safe_dump(data, f, default_flow_style=False, allow_unicode=True, sort_keys=False)

    def on_execute(self):
        self.Context.docs_changed = False
        self.Context.toc_changed = False
        data = self.parse_config()
        clone_path, commit_id, vcs_type = util.extract_path_details(self.ARC_TRUNK_URL)
        logging.info("clone_path: %s commit_id: %s vcs_type: %s" % (clone_path, commit_id, vcs_type))
        logging.info("retrieving toc.yaml")
        toc = self.get_toc()
        logging.info("mounting arcadia...")

        with arc.Arc().mount_path(clone_path, commit_id, fetch_all=False) as mount_point:
            logging.info("mounted to %s" % mount_point)
            os.chdir(mount_point)

            try:
                arc_delete_remote_branch(mount_point, self.ARC_DOCS_BRANCH)
            except arc.ArcCommandFailed as e:
                logging.info(e)
            arc_create_remote_branch(mount_point, branch_name=self.ARC_DOCS_BRANCH)

            for doc_info in data:
                doc_obj_name = doc_info["name"]
                doc_schema_filename = doc_obj_name + ".md"
                doc_schema_description_filename = doc_obj_name + ".yaml"
                doc_schema_object = doc_info["schema"]
                doc_project = doc_info["project"]
                doc_schema_absolute_path = doc_info["md_schema_arc_path"]
                doc_schema_relative_path = join(self.Context.rel_schema_dir, doc_schema_filename)
                doc_schema_description_absolute_path = join(self.Context.arc_schema_descriptions_dir,
                                                            doc_schema_description_filename)
                doc_project_dir = self.Context.projects_info[doc_project][0]["arc"]
                doc_page_title = doc_info["toc"]
                doc_toc_section_name = self.Context.projects_info[doc_project][0]["toc_section"]
                doc_template_page_relative_path = join(self.ARC_DOCS_BASEDIR, basename(doc_project_dir),
                                                       doc_schema_filename)
                doc_template_page_absolute_path = join(self.Context.arc_docs_dir, basename(doc_project_dir),
                                                       doc_schema_filename)

                logging.info("document %s project: %s\nschema: %s\n\t%s\n\t%s\npage template:\n\t%s\n\t%s" % (
                    doc_obj_name, doc_project, doc_schema_filename,
                    doc_schema_absolute_path, doc_schema_relative_path,
                    doc_template_page_absolute_path,
                    doc_template_page_relative_path))

                if not Arcadia.check(":".join([Arcadia.ARCADIA_SCHEME, doc_schema_description_absolute_path])):
                    if not self.Context.docs_changed:
                        self.Context.docs_changed = True
                    self.dump_description_to_yaml(doc_schema_description_filename, doc_schema_object)

                if Arcadia.check(":".join([Arcadia.ARCADIA_SCHEME, doc_schema_absolute_path])):
                    logging.info("schema already exist...checking changes")
                    trunk_schema_txt = Arcadia.cat(":".join([Arcadia.ARCADIA_SCHEME,
                                                   doc_schema_absolute_path])).strip()
                    current_schema = doc_schema_object.markdown().strip()
                    trunk_hash = hashlib.sha1(trunk_schema_txt).hexdigest()
                    cur_hash = hashlib.sha1(current_schema).hexdigest()
                    logging.info("SHA1: trunk %s current %s" % (trunk_hash, cur_hash))
                    if trunk_hash != cur_hash:
                        if not self.Context.docs_changed:
                            self.Context.docs_changed = True
                        self.dump_schema_to_md_file(doc_schema_object,
                                                    doc_schema_relative_path)
                else:
                    if not self.Context.docs_changed:
                        self.Context.docs_changed = True
                    self.dump_schema_to_md_file(doc_schema_object, doc_schema_relative_path)

                if not Arcadia.check(":".join([Arcadia.ARCADIA_SCHEME, doc_template_page_absolute_path])):
                    self.render_template(doc_template_page_relative_path,
                                         doc_schema_filename, doc_page_title)

                if not self.toc_section_exist(toc, doc_toc_section_name, doc_page_title):
                    page_link = join(basename(doc_project_dir), doc_schema_filename)
                    if not self.Context.toc_changed:
                        self.Context.toc_changed = True
                    self.update_toc(toc, doc_toc_section_name, page_link, doc_page_title)

            if self.Context.toc_changed:
                self.sync_toc(toc)

            if self.Context.docs_changed:
                arc.Arc().add(mount_point, all_changes=True)
                arc.Arc().commit(mount_point, message="autogenerated YT table schema docs", all_changes=True)
                arc.Arc().push(mount_point, upstream=self.ARC_DOCS_BRANCH)
                try:
                    arc.Arc().pr_status(mount_point)
                except arc.ArcCommandFailed as e:
                    logging.info(e)
                    arc.Arc().pr_create(mount_point, message="autodocs: YT table schema docs",
                                        publish=True, auto=False)
