from sandbox.projects.common.vcs import arc

from os import walk, listdir, makedirs, getcwd
from os.path import join, isdir

from sandbox.projects.impulse.ProjectsCollector.common import ext2lang


class Arcadia:
    arcadia_base_path = 'arcadia'
    threshold = 30000
    exclude_list = ['sandbox', 'contrib', 'packages', 'build', 'dict', 'vendor',
                    'autocheck', 'analytics', 'fuzzing', 'frontend', 'junk',
                    'infra/awacs/mirror', 'infra/awacs/templates']
    force_split_maps = [
        "maps/analytics", "maps/analyzer", "maps/automotive", "maps/carparks",
        "maps/factory", "maps/geoq", "maps/infra", "maps/libs", "maps/masstransit",
        "maps/photos", "maps/poi", "maps/pylibs", "maps/realty", "maps/search",
        "maps/tools"
    ]

    force_split_market = [
        "market/access", "market/ammo_api", "market/amore", "market/backctld",
        "market/buker", "market/cataloger", "market/click_n_collect",
        "market/debpkg", "market/devtools", "market/dmock",
        "market/dynamic_pricing", "market/forecaster", "market/giffany",
        "market/infra", "market/kombat", "market/library", "market/mbi",
        "market/mbo", "market/mobile_validator", "market/model", "market/qpipe",
        "market/quoter", "market/recommender", "market/redirector",
        "market/replenishment", "market/report", "market/robots", "market/sailor",
        "market/seo", "market/shade", "market/shapi", "market/sitemap",
        "market/tarantino", "market/ugc"
    ]

    force_split_yweb = [
        "yweb/blogs", "yweb/clones", "yweb/crawlrank", "yweb/disk", "yweb/erf",
        "yweb/incproc", "yweb/mail", "yweb/mango", "yweb/music", "yweb/querydata",
        "yweb/rca", "yweb/reg_word_host", "yweb/serpapi", "yweb/structhtml",
        "yweb/trender", "yweb/webdaemons", "yweb/webutil"
    ]

    force_split_quality = [
        "quality/commercial", "quality/dssm_tools", "quality/gigaminer",
        "quality/miscdata", "quality/offline", "quality/query_pool",
        "quality/related-search", "quality/seinfo_monitor",
        "quality/session_metrics", "quality/tool",
        "quality/tsnet", "quality/usf_beta"
    ]

    force_split_search = [
        "search/alice", "search/base_search", "search/begemot", "search/formula",
        "search/fuzzing", "search/lingboost", "search/meta", "search/panther",
        "search/plutonium", "search/priemka", "search/report", "search/scraper_over_yt"
    ]

    force_split_alice = [
        "alice/bass", "alice/boltalka", "alice/cuttlefish", "alice/hollywood",
        "alice/megamind", "alice/nlu", "alice/paskills", "alice/tests",
    ]

    force_split_others = [
        "FactExtract", "adfox", "ads/tensor_transport", "adv", "antiadblock", "arc",
        "balancer", "billing", "bindings", "captcha", "catboost", "cloud/logs", "cmnt",
        "datacloud", "direct/apps", "disk", "drive", "ether", "games", "geosuggest",
        "haas", "htf", "infra/pod_agent", "infra/yp", "infra/yp_dns",
        "infra/yp_service_discovery", "ipreg", "juggler", "kernel", "lbs", "library",
        "mail/iex", "mail/search", "mail/so", "mapreduce", "mds", "media", "mediapers",
        "mlportal", "mssngr", "orgvisits", "passport", "quasar", "rt-research", "rtline",
        "security", "serp", "smart_devices", "solomon", "specsearches", "statbox", "strm",
        "sup", "taxi", "taxi/logistic-dispatcher", "travel/avia", "travel/hotels",
        "travel/rasp", "velocity", "weather", "web", "yabs/nanpu", "yabs/qa", "yabs/stat",
        "ydf", "ydo", "yql", "ysite", "zen", "zora"]

    force_split = set(
        force_split_maps + force_split_market + force_split_yweb +
        force_split_quality + force_split_search + force_split_alice + force_split_others
    )

    def __init__(self):
        self.mount_point = join(getcwd(), self.arcadia_base_path)
        makedirs(self.mount_point)
        self.mp = arc.Arc().mount_path(None, changeset=None, mount_point=self.mount_point, fetch_all=False)

    def fetch_projects(self, root='', exclude_list=None, threshold=None, force_split=None):
        if exclude_list is None:
            exclude_list = self.exclude_list
        if threshold is None:
            threshold = self.threshold
        if force_split is None:
            force_split = self.force_split

        projects = []

        for directory in listdir(join(self.mount_point, root)):
            rel_path = join(root, directory)
            if directory.startswith('.') or rel_path in exclude_list:
                continue
            path = join(self.mount_point, root, directory)
            if not isdir(path):
                continue

            if rel_path in force_split:
                print("force split", rel_path)
                projects += self.fetch_projects(root=rel_path)
                continue

            total = 0
            languages = set()
            for r, d, files in walk(path):
                total += len(files)
                languages.update(set(ext2lang(f.rsplit('.', 1)[-1]) for f in files))
            if None in languages:
                languages.remove(None)
            languages = list(languages)

            if total < threshold:
                if rel_path[0] == '/':
                    rel_path = rel_path[1:]
                url = 'https://a.yandex-team.ru/arc/trunk/arcadia/{}'.format(rel_path)
                slug = 'a-' + rel_path.replace('/', '-')
                name = rel_path.rsplit('/', 1)[-1]
                full_name = rel_path
                projects.append(dict(name=name, full_name=full_name, url=url, slug=slug, languages=languages))
                print(full_name, url, slug, total)
            else:
                print(rel_path, total)
                projects += self.fetch_projects(root=rel_path)

        return projects
