import logging
import os
import re
import stat
import tarfile

from crypta.dmp.yandex.bin.ftp_to_yt.lib import exceptions


logger = logging.getLogger(__name__)


META_FILE = "meta"
BINDINGS_FILE = "bindings"


def extract(archive_path, output_dir):
    logger.info("Extract %s to %s", archive_path, output_dir)
    try:
        with tarfile.open(archive_path, "r:gz") as tar:
            tar.extractall(path=output_dir)
    except tarfile.TarError:
        raise exceptions.DmpArchiveError("invalid archive format")
    except IOError as e:
        if "CRC check failed" in e.message:
            raise exceptions.DmpArchiveError("invalid archive format. CRC check failed")
        raise

    for dirpath, dirnames, filenames in os.walk(output_dir):
        for dirname in dirnames:
            path = os.path.join(dirpath, dirname)
            logger.info("Set permissions to %s", path)
            os.chmod(path, stat.S_IREAD | stat.S_IWRITE | stat.S_IEXEC)

        for filename in filenames:
            path = os.path.join(dirpath, filename)
            logger.info("Set permissions to %s", path)
            os.chmod(path, stat.S_IREAD | stat.S_IWRITE)


def get_ts(filename):
    filename_regexp = r'^(segments-)(\d{10})\.tar\.gz$'
    m = re.search(filename_regexp, filename)
    if m is None:
        raise exceptions.DmpArchiveError("wrong archive name format")
    return int(m.group(2))


def validate_archive_dir(archive_dir):
    found_files = set(os.listdir(archive_dir))

    unexpected_files = found_files - set((META_FILE, BINDINGS_FILE))
    if unexpected_files:
        raise exceptions.DmpArchiveError("archive contains unexpected file(s): {}".format(", ".join(unexpected_files)))

    for file in found_files:
        if not os.path.isfile(os.path.join(archive_dir, file)):
            raise exceptions.DmpArchiveError("{} is not a file".format(file))
