from sandbox.projects.common.vcs import arc

from collections import defaultdict
from requests import post
from os import walk, makedirs, getcwd
from os.path import join

from sandbox.projects.impulse.ProjectsCollector.common import ext2lang


class ProjectInsights:
    arcadia_base_path = "arcadia"
    path_exclude_list = [
        "frontend", "direct", "taxi",
    ]

    def __init__(self, upstream, token):
        self.mount_point = join(getcwd(), self.arcadia_base_path)
        makedirs(self.mount_point)
        self.mp = arc.Arc().mount_path(None, changeset=None, mount_point=self.mount_point, fetch_all=False)

        self.headers = {
            "Authorization": "OAuth {}".format(token),
            "Content-Type": "application/json"
        }
        self.upstream = upstream

    def fetch_arcanum_project_dirs(self, project_id):
        upstream = self.upstream + "/gateway/ci/GetProject"
        data = {
            "includeInvalidConfigs": True,
            "projectId": project_id,
        }

        res = post(upstream, headers=self.headers, json=data)
        if res.status_code != 200:
            return None
        project_data = res.json()

        return project_data["configs"]

    def fetch_arcanum_projects(self):
        upstream = self.upstream + "/gateway/ci/GetProjects"
        data = {
            "includeInvalidConfigs": True,
            "onlyFavorite": False,
            "limit": 100,
        }
        projects_ids = []

        while True:
            res = post(upstream, headers=self.headers, json=data)
            if res.status_code != 200:
                return None
            projects_data = res.json()
            if len(projects_data["projects"]) == 0:
                break
            projects_ids += [project_item["project"]["id"] for project_item in projects_data["projects"]]
            data["offsetProjectId"] = projects_ids[-1]

        return projects_ids

    def get_languages_for_dir(self, ayaml_rel_path):
        languages_dict = defaultdict(lambda: 0)
        total = 0
        known_total = 0

        for r, d, files in walk(join(self.mount_point, ayaml_rel_path)):
            total += len(files)
            if total > 50000:
                break
            for f in files:
                lang = ext2lang(f.rsplit('.', 1)[-1])
                if lang is None:
                    continue
                known_total += 1
                languages_dict[lang] += 1

        languages = []
        for k, v in languages_dict.items():
            if v * 1.0 / known_total > 0.1:
                languages.append(k)

        return languages

    def fetch_projects(self):
        projects = []
        arcanum_projects = self.fetch_arcanum_projects()

        for project_id in arcanum_projects:
            project_dirs = self.fetch_arcanum_project_dirs(project_id)
            if project_dirs is None:
                continue
            for a_dir in project_dirs:
                if not a_dir["dir"] or a_dir["dir"].startswith("junk/") or a_dir["dir"] in self.path_exclude_list:
                    continue
                url = "https://a.yandex-team.ru/arc/trunk/arcadia/{}".format(a_dir["dir"])
                full_name = a_dir["title"][:128]
                slug = "pi-" + a_dir["dir"].replace("/", "-")[-125:]
                languages = self.get_languages_for_dir(a_dir["dir"])
                print(project_id, a_dir["dir"], full_name, languages)
                projects.append(dict(full_name=full_name, slug=slug, path=a_dir["dir"], url=url, languages=languages))

        return projects
