import os
import time
import yaml
import pathlib
import subprocess
from copy import copy
from json import loads as json_loads

import yt.wrapper as yt
from infra.dostavlyator.lib.misc.misc import (
    safe_remove,
    safe_stat,
    safe_file_md5,
    get_instance_details,
    GetLogger,
)
from infra.dostavlyator.lib.db.tables import (
    LookupBoxAssignedTable,
    LookupBoxAppliedTable,
    InsertBoxAppliedTable,
    GetPathBoxAppliedTable,
)
from infra.dostavlyator.proto import main_pb2, tables_pb2
from infra.dostavlyator.proto.main_pb2 import EFileStatus
from infra.dostavlyator.lib.fetcher import cleanup


log = GetLogger("infra.dostavlyator.lib.fetcher")

ITERATION_WAIT_TIME = 5
INSTANCE_DETAILS = get_instance_details()
INSTANCE_FILTER = [
    {
        "DeployPodPersistentFqdn": INSTANCE_DETAILS["DeployPodPersistentFqdn"],
        "DeployBoxId": INSTANCE_DETAILS["DeployBoxId"],
    }
]


def check_rbtorrent(r, storage_dir):
    rc_id = r.ResourceCandidateId
    rc_dir = storage_dir / rc_id
    args = ["/usr/local/bin/sky", "files", "--json", r.Source.Skynet.Url]
    files = json_loads(subprocess.check_output(args, shell=False).decode("utf-8"))
    for f in files:
        f_stat = safe_stat(rc_dir / f["name"])
        if f_stat["size"] != f["size"]:
            log.error(
                f"Size check failed for file {f} in resource candidate with id = \"{rc_id}\", local = {f_stat['size']}, sky = {f['size']}"
            )
            return False
        f_md5 = safe_file_md5(rc_dir / f["name"])
        if f_md5.hexdigest() != f["md5sum"]:
            log.error(
                f"MD5 check failed for file {f} in resource candidate with id = \"{rc_id}\", local = {f_md5}, sky = {f['md5sum']}"
            )
            return False
    return True


def fetch_rbtorrent(resource, storage_dir, network_limit, direct_io):
    try:
        rc_id = resource.ResourceCandidateId
        rc_dir = storage_dir / rc_id
        args = [
            "/usr/local/bin/sky",
            "get",
            "-u",
            "-d",
            str(rc_dir),
            # TODO: check progress and drop stale processes
            # "--progress",
            # "--progress-format", "json",
            # "--progress-version", "1",
        ]
        if network_limit:
            args += ["--max-dl-speed", str(network_limit)]
        if direct_io:
            args += ["--opts", "\n".join(["direct_write: 1", "direct_read: 1"]) + "\n"]
        args += [resource.Source.Skynet.Url]
        log.info(f"Starting skynet.copier process: {repr(' '.join(args))}")
        # TODO: read process stdout/stderr
        subprocess.check_call(args, shell=False)
    except:
        safe_remove(rc_dir)
        raise


def check_direct_io():
    try:
        with open("/Berkanavt/supervisor/services/copier/etc/config.yaml", "r") as f:
            return yaml.load(f).get("seed_direct_io", False)
    except Exception as error:
        log.debug(f"in check_direct_io(): {error}")
    return False


class Fetcher():
    def __init__(self, yt_cluster, yt_dir, storage_dir, size_limit, network_limit, direct_io: bool, verify_checksum: bool, shadow_mode: bool, once: bool):
        self._yt_cluster = yt_cluster
        self._yt_dir = yt_dir
        self._yt_client = yt.YtClient(self._yt_cluster, token=os.getenv("YT_TOKEN"), config={"backend": "rpc"})
        self._storage_dir = pathlib.Path(storage_dir)
        self._size_limit = size_limit
        self._network_limit = network_limit
        self._direct_io = direct_io
        self._verify_checksum = verify_checksum
        self._shadow_mode = shadow_mode
        self._once = once
        # TODO - pass token explicitly?
        self._box_assigned = None
        self._box_applied = None

        self.initialize()

    def get_box_assigned(self):
        box_assigned_list = LookupBoxAssignedTable(self._yt_client, self._yt_dir, INSTANCE_FILTER)
        if not box_assigned_list:
            raise Exception(f"Can't find TBoxAssignedTable record using instance_filter: \"{INSTANCE_FILTER}\"")
        return box_assigned_list[0]

    def lock_box_applied(self):
        self._yt_client.lock_rows(GetPathBoxAppliedTable(self._yt_dir), INSTANCE_FILTER, lock_type="exclusive")

    def get_box_applied(self):
        box_applied_list = LookupBoxAppliedTable(self._yt_client, self._yt_dir, INSTANCE_FILTER)
        return box_applied_list[0] if box_applied_list else None

    def update_box_applied(self, box_applied):
        if self._shadow_mode:
            return
        InsertBoxAppliedTable(self._yt_client, self._yt_dir, box_applied)

    def update_box_applied_file_status(self):
        if self._shadow_mode:
            return
        # transaction and lock start here
        with self._yt_client.Transaction(type="tablet"):
            self.lock_box_applied()
            original_box_applied = self.get_box_applied()
            box_applied = copy(original_box_applied)
            for lr in self._box_applied.Spec.Resource:
                for r in box_applied.Spec.Resource:
                    if r.ResourceId == lr.ResourceId:
                        r.FileStatus = lr.FileStatus
            if original_box_applied != box_applied:
                self.update_box_applied(box_applied)

    def read_box_assigned_applied(self):
        self._box_assigned = self.get_box_assigned()
        self._box_assigned_resources_dict = {r.Id: r for r in self._box_assigned.Spec.Resource}

        original_box_applied = None
        with self._yt_client.Transaction(type="tablet"):
            if not self._shadow_mode:
                self.lock_box_applied()
                original_box_applied = self.get_box_applied()
                self._box_applied = copy(original_box_applied)

            if not self._box_applied:
                self._box_applied = tables_pb2.TBoxAppliedTable(
                    DeployPodPersistentFqdn=self._box_assigned.DeployPodPersistentFqdn,
                    DeployPodId=self._box_assigned.DeployPodId,
                    DeployNodeDC=self._box_assigned.DeployNodeDC,
                    DeployProjectId=self._box_assigned.DeployProjectId,
                    DeployStageId=self._box_assigned.DeployStageId,
                    DeployUnitId=self._box_assigned.DeployUnitId,
                    DeployBoxId=self._box_assigned.DeployBoxId,
                    Spec=main_pb2.TBoxAppliedSpec(Resource=[], ActiveResourceSet=[])
                )

            # sync main_pb2.TBoxAppliedSpec.Resource with TBoxAssignedSpec.Resource (add and delete resources)
            filtered_by_box_assigned = [r for r in self._box_applied.Spec.Resource if r.ResourceId in self._box_assigned_resources_dict]
            del self._box_applied.Spec.Resource[:]
            self._box_applied.Spec.Resource.extend(filtered_by_box_assigned)
            box_applied_resource_ids = {r.ResourceId for r in self._box_applied.Spec.Resource}
            for r_id, assigned_resource in self._box_assigned_resources_dict.items():  # order is preserved here
                if r_id not in box_applied_resource_ids:
                    self._box_applied.Spec.Resource.append(
                        main_pb2.TBoxAppliedResourceSpec(
                            ResourceId=r_id,
                            ValidationStatus=assigned_resource.ValidationStatus,
                            FileStatus=main_pb2.EFileStatus.FILE_NOT_FOUND,
                        )
                    )

            # set main_pb2.TBoxAppliedSpec.Resource.FileStatus = main_pb2.EFileStatus.FILE_NOT_FOUND for definitely not fetched resources
            for r in self._box_applied.Spec.Resource:
                rc_dir = self._storage_dir / self._box_assigned_resources_dict[r.ResourceId].ResourceCandidateId
                if not rc_dir.exists() or not rc_dir.is_dir():
                    r.FileStatus = main_pb2.EFileStatus.FILE_NOT_FOUND

            if original_box_applied != self._box_applied:
                self.update_box_applied(self._box_applied)

    def update_whitelisted_paths(self):
        self._whitelisted_paths = {self._storage_dir / resource.ResourceCandidateId for resource in self._box_assigned_resources_dict.values()}
        for resource_set in self._box_applied.Spec.ActiveResourceSet:
            for active_file in resource_set.ActiveFiles:
                self._whitelisted_paths.add(self._storage_dir / active_file)

    def update_broken_paths(self):
        self._broken_paths = {
            self._storage_dir / self._box_assigned_resources_dict[r.ResourceId].ResourceCandidateId
            for r in self._box_applied.Spec.Resource
            if r.FileStatus in {EFileStatus.BAD_FILE_CHECKSUMM, }
        }

    def cleanup(self):
        cleanup.run_cleanup(
            storage_dir=self._storage_dir,
            whitelisted_paths=self._whitelisted_paths,
            broken_paths=self._broken_paths,
            size_limit=self._size_limit,
        )

    def initialize(self):
        self._storage_dir.mkdir(parents=True, exist_ok=True)
        self.read_box_assigned_applied()
        self.mark_ready_verify()

    def mark_ready_verify(self):
        log.debug('Mark READY resources as READY_VERIFY')
        for r in self._box_applied.Spec.Resource:
            if (r.FileStatus == main_pb2.EFileStatus.READY):
                r.FileStatus = main_pb2.EFileStatus.READY_VERIFY
        self.update_box_applied_file_status()

    def run(self):
        if self._direct_io and check_direct_io():
            log.info("DirectIO check successful")
        else:
            log.error("DirectIO check failed, add \"seed_direct_io: True\" to skynet host config")

        while True:
            try:
                if not self._once:
                    self.read_box_assigned_applied()
                log.info("Applied resources list (after sync):")
                for r_app in self._box_applied.Spec.Resource:
                    r = self._box_assigned_resources_dict[r_app.ResourceId]
                    log.info(f"- Id={r.Id} TResourceSpec.Id={r.ResourceSpecId} Name={r.Name} Path={r.Path} {main_pb2.EFileStatus.Name(r_app.FileStatus)}")
                self.update_whitelisted_paths()
                self.update_broken_paths()
                self.cleanup()

                for r_app in sorted(self._box_applied.Spec.Resource, key=lambda r: r.FileStatus):
                    if r_app.FileStatus == EFileStatus.READY:
                        continue
                    r = self._box_assigned_resources_dict[r_app.ResourceId]
                    if r_app.FileStatus == EFileStatus.READY_VERIFY:
                        log.info(f'Verify Id={r.Id}, Name={r.Name}')
                    else:
                        log.info(f'Fetch  Id={r.Id}, Name={r.Name}')
                        if r_app.FileStatus == EFileStatus.FILE_NOT_FOUND:
                            r_app.FileStatus = EFileStatus.DOWNLOADING
                            self.update_box_applied_file_status()
                    try:
                        fetch_rbtorrent(r, self._storage_dir, self._network_limit, self._direct_io)
                        if self._verify_checksum and not check_rbtorrent(r, self._storage_dir):
                            r_app.FileStatus = EFileStatus.BAD_FILE_CHECKSUMM
                        else:
                            r_app.FileStatus = EFileStatus.READY
                    except Exception as e:
                        log.error(f'Error while fetch resource: {e}')
                        r_app.FileStatus = EFileStatus.DOWNLOAD_FAILED
                    self.update_box_applied_file_status()
                    break
                else:
                    log.info("Nothing to fetch, all done")
                    if self._once:
                        return

            except KeyboardInterrupt:
                return
            except:
                log.exception("Exception during fetch iteration:")
            log.info(f"Iteration done, will sleep for {ITERATION_WAIT_TIME} seconds")
            time.sleep(ITERATION_WAIT_TIME)
