#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import print_function
import os
import sys
import fnmatch
import ctypes
import threading
import subprocess
import signal
import logging
import re
from os.path import join, getsize
from optparse import OptionParser


libc = ctypes.CDLL('libc.so.6')


logging.basicConfig(
    level=logging.DEBUG,
    format="%(asctime)s %(levelname)s %(name)s %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S")


class LineExtractor(object):
    """
    Assembles lines from consecutive chunks of bytes.

    Lines are separated by `sep`. Line length is limited by `max_line_length`,
    any excess is skipped.
    Chunks must be fed into `process_chunk` method.
    For each full line `on_line_ready` called.
    """
    def __init__(self, on_line_ready, max_line_length=200, sep="\n"):
        """
        `on_line_ready` is a function of 2 arguments, line itself and a boolean flag indicating
        wether line was truncated to `max_line_length` or not.
        """
        self.max_line = max_line_length
        self.sep = sep
        self.sink = on_line_ready
        self.skip = False
        self._reset()

    def _reset(self):
        self.line_chunks = []
        self.line_left = self.max_line

    def _append(self, chunk):
        self.line_chunks.append(chunk)
        self.line_left -= len(chunk)

    def _commit(self, truncated):
        self.sink("".join(self.line_chunks), truncated)
        self._reset()

    def process_chunk(self, chunk):
        """
        `chunk` is a string of arbitrary size,

        possibly containing none or more of `sep`,
        for example read from program stderr.
        """
        while len(chunk):
            if self.skip:
                sep_pos = chunk.find(self.sep)
                if sep_pos == -1:
                    break
                self.skip = False
                chunk = chunk[sep_pos+1:]
            else:
                sep_pos = chunk.find(self.sep, 0, self.line_left + 1)
                if sep_pos != -1:
                    self._append(chunk[0:sep_pos])
                    self._commit(False)
                    chunk = chunk[sep_pos+1:]
                else:
                    excess_pos = self.line_left
                    if excess_pos >= len(chunk):
                        self._append(chunk)
                        break
                    self._append(chunk[:excess_pos])
                    self._commit(True)
                    self.skip = True
                    chunk = chunk[excess_pos:]


class StreamLogger(object):
    READ_SIZE = 256

    def __init__(self, parent_logger, suffix):
        self.logger = logging.getLogger(parent_logger.name + "." + suffix)
        self.thread = None

    def start(self, file_object):
        def target():
            self._read_and_log(file_object.fileno())

        self.thread = threading.Thread(target=target, name=self.logger.name)
        self.thread.start()

    def _read_and_log(self, fd):
        def sink(s, truncated):
            if truncated:
                s = s + "...<TRUNCATED>"
            self.logger.info(s)

        limiter = LineExtractor(sink)
        while 1:
            chunk = os.read(fd, self.READ_SIZE)
            if not chunk:
                break
            limiter.process_chunk(chunk)

    def wait(self):
        self.thread.join()


def set_pdeathsig():
    PR_SET_PDEATHSIG = 1
    libc.prctl(PR_SET_PDEATHSIG, signal.SIGTERM)


def create_revisionapi_tool_process(cfg, user_id, offset):
    args = ['revisionapi',
            '--cfg=' + cfg,
            '--cmd=import',
            '--batch-mode',
            '--id-offset=' + str(offset),
            '--user-id=' + user_id]

    return subprocess.Popen(args, bufsize=0,
                            stdin=subprocess.PIPE, stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE, preexec_fn=set_pdeathsig)


class ArgumentError(Exception):
    pass

class RevisionapiCommError(Exception):
    pass


class RevisionapiRunner(object):
    def __init__(self, logger, path_size_list, controller):
        self.path_size_list = path_size_list
        self.controller = controller
        self.logger = logger
        self.thread = None

    def _loop(self):
        try:
            process = create_revisionapi_tool_process(
                self.controller.cfg,
                self.controller.user_id,
                self.controller.offset)
            err_logger = StreamLogger(self.logger, "err")
            err_logger.start(process.stderr)
        except Exception:
            self.logger.exception("failed to start revisionapi:")
            self.controller.fail()
            return

        def read_matched_line(pattern):
            def match_next_line():
                line = process.stdout.readline()
                if not line:
                    raise RevisionapiCommError("revisionapi closed stdout unexpectedly")
                return line, re.match(pattern, line)

            line, match = match_next_line()
            while (not match):
                self.logger.warning("unexpected response: {0}".format(line))
                line, match = match_next_line()
            return match

        try:
            for path, size in self.path_size_list:
                if self.controller.must_stop():
                    self.logger.info("stopped by request")
                    break
                process.stdin.write(path + "\n")
                process.stdin.flush()
                read_matched_line("completed: {0}".format(path))
                self.controller.file_completed(path, size)

            process.stdin.write("\n")
            process.stdin.flush()
            match = read_matched_line("commit-ids: ((?:\d+)(?:,\d+)*)")
            self.controller.process_commit_ids(match.group(1).split(","))
            self.logger.info("finished")

        except RevisionapiCommError as e:
            self.controller.fail()
            self.logger.error(e)
            try:
                process.terminate()
            except:
                pass
            
        except Exception:
            self.controller.fail()
            self.logger.exception("revisionapi communication error: ")
            try:
                process.terminate()
            except:
                pass
            
        exit_code = process.wait()
        err_logger.wait()
        if exit_code != 0:
            self.logger.error("revisionapi tool failed with code {0}".format(exit_code))
            self.controller.fail()

    def run(self):
        self.thread = threading.Thread(target=self._loop, name=self.logger.name)
        self.thread.start()

    def wait(self):
        if self.thread is not None:
            try:
                self.thread.join()
            except KeyboardInterrupt:
                pass


class Controller(object):
    def __init__(self, logger, options, total_files, total_bytes):
        self.logger = logger
        self.cfg = options.cfg
        self.user_id = options.user_id
        self.offset = options.offset
        self.files = 0
        self.files_total = total_files
        self.bytes = 0
        self.bytes_total = total_bytes
        self._failed = False
        self.commit_ids = []

    def fail(self):
        self._failed = True

    def failed(self):
        return self._failed

    def must_stop(self):
        return self._failed

    def file_completed(self, path, size):
        self.files += 1
        self.bytes += size
        self.report_progress()

    def process_commit_ids(self, commit_ids):
        self.commit_ids.extend(commit_ids)

    def report_progress(self):
        self.logger.info("completed: files: {0}/{1} ({2}%), bytes: {3}/{4} ({5}%)".format(
                self.files, self.files_total, self.files * 100 / self.files_total,
                self.bytes, self.bytes_total, self.bytes * 100 / self.bytes_total))


def check_options(options):
    if options.input_dir is None:
        raise ArgumentError("input dir is not specified")
    if options.cfg is None:
        raise ArgumentError("config path is not specified")
    if options.user_id is None:
        raise ArgumentError("user id is not specified")


def divide_job(path_size_list, size_threshold):
    result = []
    current_size = 0
    current_list = []
    
    for path_size in path_size_list:
        current_list.append(path_size)
        current_size += path_size[1]
        if current_size > size_threshold:
            result.append((current_list, current_size))
            current_list = []
            current_size = 0

    if current_size:
        result.append((current_list, current_size))
    return result


def find_paths_sizes(dir_path, pattern):
    path_size_list = []
    for root, dirs, files in os.walk(dir_path):
        for name in fnmatch.filter(files, pattern):
            path = join(root, name)
            size = getsize(path)
            path_size_list.append((path, size))
    return path_size_list


def load_max_id(input_dir):
    return int(open(os.path.join(input_dir, "maxid.txt"), "r").readline().strip())

def clear_all(input_dir, cfg):
    max_id = load_max_id(input_dir)
    args = ["revisionapi",
            "--cfg=" + cfg,
            "--cmd=clearAll",
            "--sequence-id=" + str(max_id)]
    process = subprocess.Popen(args, bufsize=0,
                            stdin=subprocess.PIPE, stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE, preexec_fn=set_pdeathsig)
    err_logger = StreamLogger(logging.getLogger("json2tds.clear_all"), "err")
    out_logger = StreamLogger(logging.getLogger("json2tds.clear_all"), "out")
    err_logger.start(process.stderr)
    out_logger.start(process.stdout)
    exit_code = process.wait()
    err_logger.wait()
    out_logger.wait()
    if exit_code != 0:
        raise Exception("revisionapi --cmd=clearAll failed with code {0}".format(exit_code))

def reserve_ids(input_dir, cfg):
    # json ids must start from 1
    json_max_id = load_max_id(input_dir)
    args = ["revisionapi",
            "--cfg=" + cfg,
            "--cmd=reserveObjectIds",
            "--id-count=" + str(json_max_id)]
    process = subprocess.Popen(args, bufsize=0,
                            stdin=subprocess.PIPE, stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE, preexec_fn=set_pdeathsig)
    err_logger = StreamLogger(logging.getLogger("json2tds.reserve_ids"), "err")
    err_logger.start(process.stderr)
    output = process.stdout.read()
    exit_code = process.wait()
    err_logger.wait()
    if exit_code != 0:
        raise Exception("revisionapi --cmd=reserveObjectIds failed with code {0}".format(exit_code))
    return int(output)
    

def main():
    parser = OptionParser()
    parser.add_option("--input-dir", help="directory to read json files from")
    parser.add_option("--cfg", help="path to config file")
    parser.add_option("--user-id", help="user id to pass to revisionapi tool")
    parser.add_option("--file-name-pattern", help="only files matching pattern processed")
    parser.add_option("--threads", type="int", help="maximum revisionapi processes to start")
    parser.add_option("--append", action="store_true", default=False)
    parser.add_option("--commit-ids-file", help="path to store commit ids")

    parser.set_defaults(
        file_name_pattern="*.json",
        threads=15,
        offset=0,
        commit_ids_file="commit-ids.txt")

    logger = logging.getLogger("json2tds")

    try:
        (options, args) = parser.parse_args()
        check_options(options)

        paths_sizes = find_paths_sizes(options.input_dir, options.file_name_pattern)
        total = sum([ps[1] for ps in paths_sizes])

        divided = divide_job(paths_sizes, total / options.threads)

        if options.append:
            options.offset = reserve_ids(options.input_dir, options.cfg)
        else:
            clear_all(options.input_dir, options.cfg)

        controller = Controller(logger, options, len(paths_sizes), total)
        runners = []
        for n, (l, s) in enumerate(divided):
            runner = RevisionapiRunner(logging.getLogger("json2tds.{0}".format(n)), l, controller)
            runner.run()
            runners.append(runner)
        for r in runners:
            r.wait()
        if controller.failed():
            sys.exit(1)
        with open(options.commit_ids_file, "w") as f:
            f.write(",".join(controller.commit_ids))

    except ArgumentError as e:
        logger.error("program argument error: {0}".format(e))
        sys.exit(2)
    except Exception as e:
        logger.error(e)
        sys.exit(1)


if __name__ == '__main__':
    main()
