import hashlib
import os
import random
import string
import traceback

import gevent
try:
    import gevent.coros as coros
except ImportError:
    import gevent.lock as coros
import gevent.pool

import py

from ..greenlet import LinkedFailed
from ..utils import Path


class IO(object):
    def __init__(self, log):
        self.log = log

        self.workers_count = 16

        self.piecemap = None
        self.jobs = gevent.queue.Queue()
        self.workers = []

        self._started = False

    def set_piecemap(self, pmap):
        assert pmap.loaded.isSet()
        self.piecemap = pmap

    def _worker(self):
        raise NotImplementedError()

    def start(self):
        if self._started:
            return

        for _ in range(self.workers_count):
            self.workers.append(gevent.spawn(self._worker))

        self.log.debug('Started %d workers', len(self.workers))
        self._started = True

    def stop(self):
        while self.workers:
            wrk = self.workers.pop(0)
            wrk.kill(block=True)

        self.log.debug('Stopped all workers')

    def async_read(self, idx, mem, cb, ecb):
        self.jobs.put(('READ', idx, mem, cb, ecb))

    def async_read_path(self, idx, path, mem, cb, ecb, real_path=None):
        if not real_path:
            real_path = path
        self.jobs.put(('READ_PATH', idx, path, real_path, mem, cb, ecb))

    def async_write(self, idx, mem, cb, ecb):
        self.log.debug('WRT: PIECE %d (%d bytes async...)', idx, mem.size)
        self.jobs.put(('WRITE', idx, mem, cb, ecb))


class SeedIO(IO):
    def __init__(self, log):
        super(SeedIO, self).__init__(log)

        # For seeding we use just 2 workers, that would be enough.
        self.workers_count = 2

        # Callback with signature (md5, start, length, sha1)
        # It is used to grab data in seeding mode (if 0 pieces will be downloaded and checked)
        self.seed_cb = None

    def set_seed_cb(self, cb):
        self.seed_cb = cb

    def _worker(self):
        while True:
            job = self.jobs.get()

            assert job[0] == 'READ'

            block_idx = job[1]
            mem = job[2]
            cb = job[3]
            ecb = job[4]

            try:
                data = self.piecemap.data_by_idx[block_idx]

                file_block_idx = block_idx - data.offset

                md5 = data.md5hash
                piece = data.pieces[file_block_idx]

                start = piece.length * file_block_idx
                end = start + piece.length
                end = min(end, data.size)
                length = end - start

                assert self.seed_cb(md5, start, length, mem, piece.sha1hash)
                assert mem.size == length
                mem.rewind()

                cb(block_idx)

            except gevent.GreenletExit:
                raise

            except BaseException as ex:
                ecb(block_idx, ex, traceback.format_exc())
                continue

    def async_write(self, idx, mem, cb, ecb):
        raise NotImplementedError('Writes is not possible with SeedIO')


class DlIO(IO):
    def __init__(self, log):
        super(DlIO, self).__init__(log)

        self.dl_path = None

        # Callback with signature (fn, start, length, sha1)
        # It is used during downloads to write data received from network
        self.dl_put_cb = None

        # Callback with signature (fn, start, data, sha1)
        # It is used during downloads to grab data from files and send to other peers
        # Also it is used to initially check any files already exist and construct initial
        # piece map.
        self.dl_get_cb = None

        self.pathmap = {}       # path => pathinfo
        self.path_by_data = {}  # data => [paths]

        self._dedup = None
        self._partial = None

        self.stats = {
            'total_bytes': 0,
            'done_bytes': 0
        }

    def set_deduplicate(self, mode):
        assert mode in (False, 'symlink', 'hardlink', 'hardlinknocheck'), 'Invalid deduplicate mode: %r' % (mode, )

        self._dedup = mode

    def set_dl_cbs(self, path, get_cb, put_cb):
        self.dl_path = path
        self.dl_get_cb = get_cb
        self.dl_put_cb = put_cb

    def set_piecemap(self, pmap):
        super(DlIO, self).set_piecemap(pmap)

        assert self.dl_path

        for idx, data in pmap.data.itervalues():
            for path in pmap.path_by_data[data]:
                path = self.dl_path.join(path)
                self.path_by_data.setdefault(data, []).append(path)

                piece_array = pmap.piece_array.pieces[data.offset:data.offset + len(data.pieces)]

                self.stats['total_bytes'] += data.size

                self.pathmap[path] = [
                    False,          # 0 do not downloaded yet
                    data.offset,    # 1 data offset
                    piece_array,
                    0,              # 3 read bytes
                    0,              # 4 written bytes
                    None,           # 5 mtime once done
                    None,           # 6 size once done
                    None,           # 7 inode once done
                    None,           # 8 mode once done
                ]

    def set_partial(self, partial):
        self._partial = partial

    def _check_file_done(self, md5hash, path, pathinfo, origin):
        if not pathinfo[0] and all(pathinfo[2]):
            stat = path.stat()

            self.log.info(
                '%s: file done %s (%s, read %d, wrote %d) [inode: %d, blocks: %d, mtime: %d]',
                md5hash, path, origin, pathinfo[3], pathinfo[4], stat.ino, stat.blocks, stat.mtime
            )

            pathinfo[5] = stat.mtime
            pathinfo[6] = stat.size

            pathinfo[7] = stat.ino
            pathinfo[8] = stat.mode
            pathinfo[0] = True

            return True

    def _worker(self):
        while True:
            job = self.jobs.get()
            if job[0] in ('READ', 'READ_PATH'):
                block_idx = job[1]

                if job[0] == 'READ_PATH':
                    fn = job[2]
                    rfn = job[3]
                    mem = job[4]
                    cb = job[5]
                    ecb = job[6]
                else:
                    fn = rfn = None
                    mem = job[2]
                    cb = job[3]
                    ecb = job[4]

                try:
                    data = self.piecemap.data_by_idx[block_idx]

                    file_block_idx = block_idx - data.offset

                    base_path = self.dl_path

                    piece = data.pieces[file_block_idx]
                    start = piece.length * file_block_idx
                    end = start + piece.length
                    end = min(end, data.size)
                    length = end - start

                    if fn is None:
                        fn = rfn = base_path.join(data.name).strpath

                    pathinfo = self.pathmap.get(rfn, None)

                    try:
                        assert self.dl_get_cb(fn, start, length, mem, piece.sha1hash)
                        assert mem.size == length
                        mem.rewind()

                        if pathinfo:
                            pathinfo[3] += length
                    except BaseException as ex:
                        self.log.error('Unable to call dl_get_cb: %s', ex)
                        raise

                    cb(block_idx)

                except gevent.GreenletExit:
                    raise

                except BaseException as ex:
                    tb = traceback.format_exc()
                    try:
                        ecb(block_idx, ex, tb)
                    except BaseException as ex:
                        self.log.critical('cb unhandled error: %s', tb)
                        self.log.critical(
                            'during handling above error, another error occurred: %s', traceback.format_exc()
                        )
                        os._exit(1)
                    continue

            elif job[0] == 'WRITE':
                block_idx = job[1]
                mem = job[2]
                cb = job[3]
                ecb = job[4]

                try:
                    base_path = self.dl_path

                    data = self.piecemap.data_by_idx[block_idx]
                    file_block_idx = block_idx - data.offset
                    piece = data.pieces[file_block_idx]

                    start = piece.length * file_block_idx

                    written_inodes = set()

                    for path in self.piecemap.path_by_data[data]:
                        if self._partial and path not in self._partial:
                            continue

                        path = base_path.join(path)
                        pathinfo = self.pathmap[path]

                        if pathinfo[0]:
                            # already done
                            continue

                        if pathinfo[2][file_block_idx]:
                            # block already done
                            continue

                        if pathinfo[7] not in written_inodes:
                            assert self.dl_put_cb(path.strpath, start, mem, piece.sha1hash, piece.quickhash)
                            stat = path.stat()
                            self.log.debug(
                                'WRT: PIECE %d fn %s [ino: %d, size: %d, blocks: %d] '
                                'chunk [off: %d, size: %d, sha1: %s] done',
                                block_idx, path.strpath, stat.ino, stat.size, stat.blocks,
                                start, mem.size, str(piece.sha1hash)
                            )

                            pathinfo[4] += mem.size  # add written bytes
                            written_inodes.add(pathinfo[7])
                            pathinfo[2][file_block_idx] = True
                            self._check_file_done(data.md5hash, path, pathinfo, 'wrote (direct)')
                        else:
                            # we already wrote to this inode, so just mark as done
                            pathinfo[2][file_block_idx] = True
                            self._check_file_done(data.md5hash, path, pathinfo, 'wrote (indirect)')

                        self.stats['done_bytes'] += mem.size

                    cb(block_idx)

                except gevent.GreenletExit:
                    raise

                except BaseException as ex:
                    tb = traceback.format_exc()
                    try:
                        ecb(block_idx, ex, tb)
                    except BaseException as ex:
                        self.log.critical('cb unhandled error: %s', tb)
                        self.log.critical(
                            'during handling above error, another error occurred: %s', traceback.format_exc()
                        )
                        os._exit(1)
                    continue

    def _check_path(self, shmem, data, path, alias_paths, on_piece_done):
        pathinfo = self.pathmap[path]

        read_state = [gevent.event.Event(), 0, pathinfo[2].count(False)]  # evfin, reading, left

        self.log.debug('%s: checking path %s', data.md5hash, path)

        memory_segments = {}

        def _on_read(block_idx):
            memory_segment = memory_segments.pop(block_idx)
            datalen = memory_segment.size
            sha1hash = hashlib.sha1(memory_segment.peek()).hexdigest()
            shmem.put_segment(memory_segment)

            idx = block_idx - data.offset
            piece = data.pieces[idx]

            if piece.sha1hash == sha1hash:
                pathinfo[2][idx] = True

                read_state[2] -= 1
                if read_state[2] == 0:
                    self._check_file_done(data.md5hash, path, pathinfo, 'checked')

                if not self.piecemap.piece_array.pieces[block_idx]:
                    self.piecemap.piece_done(block_idx)
                    on_piece_done(block_idx)

                self.stats['done_bytes'] += datalen

                # Store blocks for paths with same inode
                for alias_path in alias_paths:
                    self.pathmap[alias_path][2][idx] = True
                    self.stats['done_bytes'] += datalen

            read_state[1] -= 1
            if read_state[1] == 0:
                read_state[0].set()

        def _on_read_err(block_idx, ex, tb):
            memory_segment = memory_segments.pop(block_idx)
            shmem.put_segment(memory_segment)

            self.log.warning('%s: Unable to read %s: %s', data.md5hash, path, ex)

            read_state[1] -= 1
            if read_state[1] == 0:
                read_state[0].set()

        for idx, done in enumerate(pathinfo[2]):
            if done:
                continue

            read_state[1] += 1

            real_idx = data.offset + idx
            memory_segment = shmem.get_segment(block=True)
            memory_segments[real_idx] = memory_segment
            self.async_read_path(real_idx, path.strpath, memory_segment, _on_read, _on_read_err)

        if read_state[1] == 0:
            read_state[0].set()

        read_state[0].wait()

        for alias_path in alias_paths:
            self._check_file_done(data.md5hash, alias_path, self.pathmap[alias_path], 'checked (alias)')

    def _copy_block(self, shmem, data, path, block_idx):
        result = gevent.event.AsyncResult()

        memory_segments = {}

        def _on_read(block_idx):
            memory_segment = memory_segments.pop(block_idx)
            result.set(memory_segment)

        def _on_read_err(block_idx, ex, tb):
            memory_segment = memory_segments.pop(block_idx)
            shmem.put_segment(memory_segment)
            result.set_exception(ex)

        self.log.debug(
            '%s: copy block %d from %s to %d other paths',
            data.md5hash, block_idx, path,
            len(self.piecemap.path_by_data[data]) - 1
        )

        memory_segment = shmem.get_segment(block=True)
        memory_segments[block_idx] = memory_segment
        self.async_read_path(block_idx, path.strpath, memory_segment, _on_read, _on_read_err)
        memory_segment = result.get()

        result = gevent.event.AsyncResult()

        def _on_write(block_idx):
            result.set(True)

        def _on_write_err(ex, tb):
            result.set_exception(ex)

        self.async_write(block_idx, memory_segment, _on_write, _on_write_err)

        try:
            assert result.get()
        finally:
            shmem.put_segment(memory_segment)

    def _deduplicate(self, data, paths, stats, reduce_logging):
        """
        Deduplicate files in-resource before checking
        """

        if not self._dedup:
            return

        paths_by_executable = {True: [], False: []}

        for path in paths:
            stat = path.stat()
            want_executable = bool(stat.mode & 0o111)

            paths_by_executable[want_executable].append(path)

        for want_executable, target_paths in paths_by_executable.iteritems():
            if len(target_paths) > 1:
                src = sorted(target_paths)[0]

                targets = sorted(target_paths)[1:]

                log_prefix = '%s: dedup %s' % (data.md5hash, '[x]' if want_executable else '[o]')

                for tgt_real in targets:
                    pathinfo = self.pathmap[tgt_real]

                    tgt = tgt_real.dirpath().join(
                        tgt_real.basename + '_skbn_' +
                        ''.join(
                            random.choice(string.ascii_letters)
                            for _ in range(8)
                        )
                    )

                    try:
                        if self._dedup == 'hardlink' or self._dedup == 'hardlinknocheck':
                            tgt.mklinkto(src)
                            tgt.chmod(tgt_real.stat().mode)
                        else:
                            tgt.mksymlinkto(src)
                            tgt.chmod(tgt_real.stat().mode)

                        # After deduplication we mark all done blocks from original file to target
                        src_pathinfo = self.pathmap[src]

                        if not reduce_logging:
                            self.log.info(
                                '%s completed %s (from %s)',
                                log_prefix, tgt_real, src
                            )

                        tgt.rename(tgt_real)
                        stats['deduplicated'] += 1

                        if src_pathinfo[0]:
                            # was marked as done
                            pathinfo[2][:] = src_pathinfo[2][:]
                            if self._check_file_done(data.md5hash, tgt_real, pathinfo, 'deduplicated (in-resource)'):
                                self.stats['done_bytes'] += tgt_real.stat().size
                        else:
                            for idx, done in enumerate(src_pathinfo[2]):
                                pathinfo[2][idx] = done

                    except Exception as ex:
                        self.log.debug(
                            '%s failed %s (from %s) with %s: %s', log_prefix, tgt_real, src, type(ex).__name__, ex
                        )
                        continue
                    finally:
                        try:
                            tgt.remove()
                        except py.error.ENOENT:
                            pass

    def _check_data(self, shmem, data, paths, on_piece_done, nocheck, stats):
        if nocheck is None:
            nocheck = []

        if nocheck:
            # If we have some paths we should not even try to check -- drop
            # them from list here
            paths_to_check = list(set(paths) - set(nocheck))
        else:
            paths_to_check = paths

        checkers_pool = gevent.pool.Pool(size=2)

        paths_to_check_by_inode = {}

        for path in paths_to_check:
            stat = path.stat()
            self.pathmap[path][7] = stat.ino

            paths_to_check_by_inode.setdefault(stat.ino, []).append(path)

        for inode, xpaths in paths_to_check_by_inode.iteritems():
            path = xpaths[0]  # check only first one

            if not self.pathmap[path][0]:
                checkers_pool.spawn(self._check_path, shmem, data, path, xpaths[1:], on_piece_done)
                gevent.sleep()
                stats['checked'] += 1

        # Do not use raise_error here
        checkers_pool.join()

        if len(paths) > 1:
            paths_by_done_pieces = []
            paths_by_miss_pieces = []

            for path_idx, path in enumerate(paths):
                pathinfo = self.pathmap[path]

                if path_idx == 0:
                    for done in pathinfo[2]:
                        paths_by_done_pieces.append([])
                        paths_by_miss_pieces.append([])

                for idx, done in enumerate(pathinfo[2]):
                    if done:
                        paths_by_done_pieces[idx].append(path)
                    else:
                        paths_by_miss_pieces[idx].append(path)

            if paths_by_miss_pieces:
                copiers_pool = gevent.pool.Pool(2)

                for idx, paths in enumerate(paths_by_miss_pieces):
                    if not paths:
                        continue

                    real_idx = idx + data.offset

                    done_paths = paths_by_done_pieces[idx]
                    if done_paths:
                        copiers_pool.spawn(self._copy_block, shmem, data, done_paths[0], real_idx)
                        stats['copied'] += 1
                        gevent.sleep()

                # Since we MUST copy missing block to other files, we should raise an error here if any
                copiers_pool.join(raise_error=True)

        # Fill any missing inodes
        for path in paths:
            pathinfo = self.pathmap[path]
            if not pathinfo[7]:
                stat = path.stat()
                pathinfo[7] = stat.ino
                stats['stated'] += 1

    def check(self, on_piece_done, shmem, partial_abs=None, nocheck=None):
        checkers_pool = gevent.pool.Pool(size=10)

        stats = {
            'deduplicated': 0,
            'checked': 0,
            'copied': 0,
            'stated': 0,
        }

        reduce_logging = len(self.path_by_data) > 1000

        if self._dedup:
            self.log.debug('Deduplicate %d datas in-resource (before checking)', len(self.path_by_data))

            for idx, (data, paths) in enumerate(self.path_by_data.iteritems()):
                if len(paths) > 1:
                    checkers_pool.spawn(self._deduplicate, data, paths, stats, reduce_logging)
                    gevent.sleep()

                if idx > 1000 and idx % 1000 == 0:
                    self.log.debug('  deduplicated %d datas from %d', idx, len(self.path_by_data))
                    self.log.debug('    stats %r', stats)

        checkers_pool.join(raise_error=True)

        if nocheck:
            nocheck = set(nocheck)
        else:
            nocheck = set()

        self.log.debug('Check existing %d datas ignoring %d nocheck paths', len(self.path_by_data), len(nocheck))

        for idx, (data, paths) in enumerate(self.path_by_data.iteritems()):
            if partial_abs:
                paths = [p for p in paths if p in partial_abs]

            checkers_pool.spawn(self._check_data, shmem, data, paths, on_piece_done, nocheck, stats)
            gevent.sleep()

            if idx > 1000 and idx % 1000 == 0:
                self.log.debug('  checked %d datas from %d', idx, len(self.path_by_data))
                self.log.debug('    stats: %r', stats)

        checkers_pool.join(raise_error=True)

        self.log.info('Finished checking, stats: %r', stats)

    def _yield_blocks(self, shmem, data, path, indexes):
        max_parallel_reads = coros.Semaphore(4)
        result_queue = gevent.queue.Queue()

        state = [len(indexes)]

        memory_segments = {}

        def _on_read(block_idx):
            memory_segment = memory_segments.pop(block_idx)

            sha1hash = hashlib.sha1(memory_segment.peek()).hexdigest()
            idx = block_idx - data.offset

            piece = data.pieces[idx]

            if piece.sha1hash == sha1hash:
                result_queue.put(('OK', block_idx, memory_segment))
            else:
                shmem.put_segment(memory_segment)
                result_queue.put(('ERR', Exception('bad hash'), ''))
            state[0] -= 1

        def _on_read_err(block_idx, ex, tb):
            memory_segment = memory_segments.pop(block_idx)
            shmem.put_segment(memory_segment)
            result_queue.put(('ERR', ex, tb))

        def _schedule_reads():
            while indexes:
                max_parallel_reads.acquire()
                real_idx = indexes.pop(0)
                memory_segment = shmem.get_segment(block=True)
                memory_segments[real_idx] = memory_segment
                self.async_read_path(real_idx, str(path), memory_segment, _on_read, _on_read_err)

        def _grn_failure_notifier(grn, current=gevent.getcurrent()):
            if not current.ready():
                current.throw(LinkedFailed(grn))

        scheduler_grn = gevent.spawn(_schedule_reads)
        scheduler_grn.link_exception(_grn_failure_notifier)

        while True:
            result = result_queue.get()

            if result[0] == 'OK':
                block_idx, memory_segment = result[1:]
                yield 'OK', block_idx, memory_segment
                max_parallel_reads.release()

            elif result[0] == 'ERR':
                scheduler_grn.kill()
                yield 'ERR', result[1], result[2]
                return

            if not indexes and state[0] == 0:
                scheduler_grn.kill()
                return

    def _trycopy_data(self, shmem, data, pieces, paths, on_piece_done, partial=None):
        log_prefix = '%s: %s' % (data.md5hash, 'trycopy')

        try:
            paths = set(paths)
        except TypeError:
            # Some lists around there
            paths = list(paths)

            for idx, infos in enumerate(paths):
                if isinstance(infos, list):
                    paths[idx] = tuple(infos)

            paths = set(paths)

        stats = {
            'done_blocks': 0,
            'files': set(),
            'missing_at_start': pieces.count(False)
        }

        pieces_done = set()

        for path, _, _ in paths:
            max_parallel_writes = coros.Semaphore(4)
            missing_indexes = [data.offset + pair[0] for pair in enumerate(pieces) if not pair[1]]

            for idx, iidx in reversed(list(enumerate(missing_indexes))):
                if iidx in pieces_done:
                    missing_indexes.pop(idx)

            if not missing_indexes:
                break

            try:
                self.log.debug('trycopy: trying path %s', path)

                scheduled_writes = [True, 0, gevent.event.AsyncResult()]  # still scheduling, scheduled count, result
                memory_segments = {}

                def _on_write(block_idx):
                    memory_segment = memory_segments.pop(block_idx)
                    shmem.put_segment(memory_segment)

                    max_parallel_writes.release()
                    scheduled_writes[1] -= 1

                    stats['done_blocks'] += 1
                    stats['files'].add(path)

                    if not self.piecemap.piece_array.pieces[block_idx]:
                        self.piecemap.piece_done(block_idx)
                        pieces_done.add(block_idx)
                        on_piece_done(block_idx)

                    if not scheduled_writes[0] and scheduled_writes[1] == 0:
                        scheduled_writes[2].set(True)

                def _on_write_err(block_idx, ex, tb):
                    memory_segment = memory_segments.pop(block_idx)
                    shmem.put_segment(memory_segment)

                    max_parallel_writes.release()

                    scheduled_writes[1] -= 1
                    scheduled_writes[2].set_exception(ex)

                for result in self._yield_blocks(shmem, data, path, missing_indexes[:]):
                    if scheduled_writes[2].ready():
                        # Quick check for any error here
                        scheduled_writes[2].get()

                    if result[0] == 'OK':
                        block_idx, memory_segment = result[1:]

                        max_parallel_writes.acquire()
                        scheduled_writes[1] += 1

                        memory_segments[block_idx] = memory_segment
                        self.async_write(block_idx, memory_segment, _on_write, _on_write_err)

                    elif result[0] == 'ERR':
                        ex = result[1]
                        self.log.debug(
                            '%s failed to grab %d missing blocks from %s: %s: %s',
                            log_prefix, len(missing_indexes), path, type(ex).__name__, ex
                        )
                        break

                scheduled_writes[0] = False
                if scheduled_writes[1] != 0:
                    scheduled_writes[2].get()

            except Exception as ex:
                import traceback

                self.log.warning('%s failed with unhandled exception: %s: %s', log_prefix, type(ex).__name__, ex)
                self.log.warning(traceback.format_exc())

        self.log.info(
            '%s done %d from %d blocks from %d files',
            log_prefix,
            stats['done_blocks'],
            stats['missing_at_start'],
            len(stats['files'])
        )


    def trycopy(self, paths, on_piece_done, shmem):
        for data_idx, data in self.piecemap.data.values():
            pieces = self.piecemap.piece_array.pieces[data.offset:data.offset + len(data.pieces)]

            if all(pieces):
                continue

            if data.md5hash not in paths:
                continue

            self._trycopy_data(shmem, data, pieces, paths[data.md5hash], on_piece_done)

    def _deduplicate_data(self, shmem, data, paths, on_piece_done, partial=None, reduce_logging=False):
        target_paths = self.piecemap.path_by_data[data]
        target_paths_by_executable = {True: [], False: []}

        paths_stat_cache = {}

        do_not_log_failed = reduce_logging or len(target_paths) >= 1000

        for path in target_paths:
            if partial and path not in partial:
                continue

            path = self.dl_path.join(path)

            stat = path.stat()
            want_executable = bool(stat.mode & 0o111)

            target_paths_by_executable[want_executable].append(path)

        for want_executable, target_paths in target_paths_by_executable.iteritems():
            if not target_paths:
                continue

            target_paths = set(target_paths)
            done = False

            log_prefix = '%s: dedup %s' % (data.md5hash, '[x]' if want_executable else '[o]')

            for src_path, src_inode, src_mtime in paths:
                src_path = Path(src_path)

                if src_path in target_paths:
                    # If we will want do dl/deduplicate this file -- ignore it
                    # we cant make sym/hard links to itself
                    continue

                try:
                    src_stat = paths_stat_cache.get(src_path, None)

                    if not src_stat:
                        src_stat = src_path.stat()

                    is_executable = bool(src_stat.mode & 0o111)

                    if is_executable != want_executable:
                        if not do_not_log_failed:
                            self.log.debug(
                                '%s ignored file %s, executable bit mismatch (want: %s, file: %s)',
                                log_prefix, src_path, want_executable, is_executable
                            )
                        continue

                    if src_stat.uid != os.getuid():
                        if not do_not_log_failed:
                            self.log.debug(
                                '%s ignored file %s, owner mismatch (we: %d, file: %d)',
                                log_prefix, src_path, os.getuid(), src_stat.uid
                            )
                        continue

                    if src_stat.ino != src_inode:
                        if not do_not_log_failed:
                            self.log.debug(
                                '%s ignored file %s, inode mismatch (should be %d, got %d)',
                                log_prefix, src_path, src_inode, src_stat.ino
                            )
                        continue

                    # Choose one target file, by name
                    tgt_real = sorted(target_paths)[0]
                    tgt_stat = tgt_real.stat()

                    # Check maybe file is bigger/lower than we want (stupid check, but...)
                    if src_stat.size != tgt_stat.size:
                        if not do_not_log_failed:
                            self.log.debug(
                                '%s ignored file %s, size mismatch (we: %d, file: %d)',
                                log_prefix, src_path, tgt_real.stat().size, src_stat.size
                            )
                        continue

                    if src_stat.dev != tgt_stat.dev:
                        if not do_not_log_failed:
                            self.log.debug(
                                '%s ignored file %s, different device (we: %d, file: %d)',
                                log_prefix, src_path, src_stat.dev, tgt_stat.dev
                            )
                        continue

                    pathinfo = self.pathmap[tgt_real]

                    try:
                        # Grab temporary file name
                        tgt = tgt_real.dirpath().join(
                            tgt_real.basename + '_skbn_' +
                            ''.join(
                                random.choice(string.ascii_letters)
                                for _ in range(8)
                            )
                        )

                        # First, try to make link
                        try:
                            no_check = self._dedup == 'hardlinknocheck'

                            if self._dedup == 'hardlink' or self._dedup == 'hardlinknocheck':
                                tgt.mklinkto(src_path)
                                tgt.chmod(tgt_real.stat().mode)
                            else:
                                tgt.mksymlinkto(src_path)
                                tgt.chmod(tgt_real.stat().mode)

                            # Next, check resulting file checksum
                            blocks_to_read = pathinfo[2][:]
                            read_state = [gevent.event.AsyncResult(), 0, len(pathinfo[2])]
                            memory_segments = {}

                            def _on_read(block_idx):
                                memory_segment = memory_segments.pop(block_idx)

                                try:
                                    if read_state[0].ready():
                                        return

                                    sha1hash = hashlib.sha1(memory_segment.peek()).hexdigest()
                                finally:
                                    shmem.put_segment(memory_segment)

                                idx = block_idx - data.offset

                                piece = data.pieces[idx]
                                if piece.sha1hash == sha1hash:
                                    blocks_to_read[idx] = True
                                else:
                                    read_state[0].set_exception(Exception('bad data at block %r' % (idx, )))
                                    return

                                read_state[1] -= 1
                                read_state[2] -= 1

                                if read_state[1] == 0:
                                    read_state[0].set(True)
                                else:
                                    self.log.debug(
                                        '%s %s checking blocks (%d left)',
                                        log_prefix, src_path, read_state[2]
                                    )

                            def _on_read_err(block_idx, ex, tb):
                                memory_segment = memory_segments.pop(block_idx)
                                shmem.put_segment(memory_segment)

                                if read_state[0].ready():
                                    return

                                read_state[0].set_exception(ex)

                            for idx, done in enumerate(pathinfo[2]):
                                assert not done, 'block marked as done!?'
                                real_idx = data.offset + idx

                                if no_check:
                                    blocks_to_read[idx] = True
                                    read_state[2] -= 1
                                else:
                                    read_state[1] += 1

                                    memory_segment = shmem.get_segment(block=True)
                                    memory_segments[real_idx] = memory_segment

                                    self.async_read_path(
                                        real_idx, tgt.strpath, memory_segment,
                                        _on_read, _on_read_err, tgt_real.strpath
                                    )

                            if no_check:
                                self.log.debug(
                                    '%s %s bypassing block checking', log_prefix, src_path
                                )
                                read_state[0].set(True)

                            if read_state[0].get():
                                if all(blocks_to_read):
                                    # Finally, rename to our target name
                                    # This should not fail :)
                                    tgt.rename(tgt_real)

                                    assert len(blocks_to_read) == len(pathinfo[2])

                                    self.log.info(
                                        '%s completed %s (from %s)',
                                        log_prefix, tgt_real, src_path
                                    )

                                    for idx, done in enumerate(blocks_to_read):
                                        real_idx = data.offset + idx

                                        pathinfo[2][idx] = True

                                        if not self.piecemap.piece_array.pieces[real_idx]:
                                            self.piecemap.piece_done(real_idx)
                                            on_piece_done(real_idx)

                                    if self._check_file_done(
                                        data.md5hash, tgt_real, pathinfo, 'deduplicated (outside)'
                                    ):
                                        self.stats['done_bytes'] += tgt_real.stat().size
                                else:
                                    self.log.debug(
                                        '%s %s failed -- some blocks are invalid',
                                        log_prefix, src_path
                                    )
                                    continue
                        except Exception as ex:
                            for idx, done in enumerate(pathinfo[2]):
                                pathinfo[2][idx] = False
                            raise

                    except Exception as ex:
                        if not do_not_log_failed:
                            self.log.debug(
                                '%s failed %s with %s: %s', log_prefix, src_path, type(ex).__name__, ex
                            )
                        continue
                    finally:
                        try:
                            tgt.remove()
                        except py.error.ENOENT:
                            pass

                    done = True

                except Exception as ex:
                    if not do_not_log_failed:
                        self.log.debug(
                            '%s ignored file %s, error %s', log_prefix, src_path, ex
                        )
                    continue

                if done:
                    break

    def deduplicate(self, alternatives, on_piece_done, shmem, partial=None):
        if not self._dedup:
            return

        deduplicators_pool = gevent.pool.Pool(10)

        if len(alternatives) > 1000:
            reduce_logging = True
        else:
            reduce_logging = False

        alternatives_used = 0

        for idx, (md5hash, paths) in enumerate(alternatives.iteritems()):
            if md5hash not in self.piecemap.data:
                continue

            alternatives_used += len(paths)
            data = self.piecemap.data[md5hash][1]

            deduplicators_pool.spawn(
                self._deduplicate_data, shmem, data, paths, on_piece_done, partial, reduce_logging
            )
            gevent.sleep()  # allow other greenlets to run

            if idx > 1000 and idx % 1000 == 0:
                self.log.debug(
                    '  deduplicated %d datas from %d using total %d alternatives',
                    idx, len(alternatives), alternatives_used
                )

        deduplicators_pool.join()
