#!/usr/bin/env python
# -*- coding: utf-8 -*-
from nile.api.v1 import clusters
import argparse
import datetime
import re
import pdb
import json
from videolog_common import get_cluster, get_date


class DummySearcher(object):
    def search(self, *args, **kwargs):
        return False


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--config", "-c")
    parser.add_argument("--debug", action="store_true")
    parser.add_argument("--dry-run", action="store_true")
    args = parser.parse_args()

    with open(args.config, "r") as f:
        config = json.load(f)

    cls_ = {}

    for element in config:
        if element["cluster"] not in cls_:
            cls_[element["cluster"]] = get_cluster(
                clusters,
                {
                    "proxy": element["cluster"].title(),
                    "job_root": "//tmp",
                    "title": "YT Folder Cleaner",
                    "no_yql": True,
                },
            )
        cluster = cls_[element["cluster"]]
        threshold = datetime.date.today() - datetime.timedelta(
            days=element.get("days") or 30
        )

        yt = cluster.driver.client

        re_blacklist = None
        re_whitelist = None
        path_filter = None
        if not yt.exists(element["root"]):
            print("folder {} doesn't exist, skipping".format(element["root"]))
            continue
        log_line = "\n---\n\n[{}] cleaning root folder {}".format(element["cluster"].title(), element["root"])
        if element.get("whitelist"):
            re_whitelist = re.compile(element.get("whitelist"))
            path_filter = lambda x: not re_whitelist.search(x)
            log_line += ' and whitelist "{}"'.format(element["whitelist"])
        elif element.get("blacklist"):
            re_blacklist = re.compile(element.get("blacklist"))
            path_filter = lambda x: re_blacklist.search(x)
            log_line += ' and blacklist "{}"'.format(element["blacklist"])
        log_line += " and threshold of {}\n---\n\n".format(threshold)
        if element.get("date_from_path"):
            object_filter = None
            if path_filter:
                _path_filter = path_filter
            else:
                _path_filter = lambda x: True
            path_filter = lambda x: _path_filter(x) and get_date(x) and get_date(x) < threshold
        else:
            object_filter = (
                lambda x: get_date(x.attributes["access_time"]) < threshold
            )

        print(log_line)

        tables = list(
            yt.search(
                root=element["root"],
                attributes=["access_time"],
                node_type=["table", "file"],
                object_filter=object_filter,
                path_filter=path_filter,
            )
        )

        if args.debug:
            pdb.set_trace()

        deleted_counter = 0
        for table in tables:
            if args.dry_run:
                print("NOT removed table {}".format(table))
                continue
            try:
                yt.remove(table)
                print("removed table {}".format(table))
                deleted_counter += 1
            except:
                print("couldnt remove table {}".format(table))
        print("\nremoved {} tables in total".format(deleted_counter))

        print("now going for empty folders")

        folders = list(
            yt.search(
                root=element["root"],
                attributes=["count"],
                node_type="map_node",
                object_filter=lambda x: (x.attributes["count"] == 0),
            )
        )

        if args.debug:
            pdb.set_trace()

        deleted_counter = 0
        for folder in folders:
            if args.dry_run:
                print("NOT removed folder {}".format(folder))
            try:
                yt.remove(folder)
                print("removed folder {}".format(folder))
                deleted_counter += 1
            except Exception as e:
                print("couldnt remove folder {}: {}".format(folder, e))
        print("\nremoved {} folders in total".format(deleted_counter))


if __name__ == "__main__":
    main()
