import msgpack
import time

from .file import ResourceItem

from ..component import Component
from ..utils import Path


class Cache(Component):
    def __init__(self, db, parent=None):
        super(Cache, self).__init__(logname='cache', parent=parent)

        self.db = db
        self.packer = msgpack.Packer()

    def load_info_from_db(self, items, cache_dict=None):
        """
        Load all possible data from database to resource.

        It will load info only for files whose mtime and size is the same
        as stored in db.
        """

        items_by_path = {}
        items_by_inode = {}

        for ritem in items:
            if isinstance(ritem, ResourceItem.file):
                if ritem.data is None:
                    items_by_path[ritem.path.strpath] = ritem
                    items_by_inode.setdefault(ritem.inode, []).append(ritem)

        if not items_by_path:
            return

        for path, inode, mtime, chktime, data, size, md5, sha1_blocks in self.db.query(
            'SELECT '
            '    f.path, f.inode, f.mtime, f.chktime, '
            '    d.id, d.size, d.md5, d.sha1_blocks '
            'FROM file f JOIN data d ON (d.id = f.data) '
            'WHERE f.path IN (??) -- Cache.load_info_from_db', [
                items_by_path.keys()
            ],
            log=False
        ):
            ritem = items_by_path[path]
            if ritem.mtime == mtime and ritem.inode == inode and ritem.size == size:
                ritem.md5 = md5.decode('hex')
                ritem.sha1_blocks = msgpack.loads(sha1_blocks)
                ritem.data = data.decode('hex')
                ritem.chktime = chktime

        if cache_dict:
            for name, info in cache_dict.iteritems():
                ritem = items_by_path[info['path']]
                ritem.data = info['md5']
                ritem.inode = info['inode']
                ritem.size = info['size']
                ritem.md5 = info['md5']
                ritem.sha1_blocks = info['sha1_blocks']
                ritem.chktime = info['chktime']
                ritem.size = info['size']

        missing_inodes = list(set([xritem.inode for xritem in items_by_path.values() if not xritem.data]))

        if missing_inodes:
            for inode, mtime, chktime, data, size, md5, sha1_blocks in self.db.query(
                'SELECT '
                '    f.inode, f.mtime, f.chktime, '
                '    d.id, d.size, d.md5, d.sha1_blocks '
                'FROM '
                '   file f JOIN data d ON (d.id = f.data) '
                'WHERE f.inode IN (??) '
                'GROUP BY f.inode', [
                    missing_inodes
                ],
                log=False
            ):
                for ritem in items_by_inode[inode]:
                    if ritem.data:
                        continue

                    if ritem.mtime == mtime and ritem.inode == inode and ritem.size == size:
                        ritem.md5 = md5.decode('hex')
                        ritem.sha1_blocks = msgpack.loads(sha1_blocks)
                        ritem.data = data.decode('hex')
                        ritem.inode = inode
                        ritem.chktime = chktime

    def get_item_check_time(self, item):
        return self.db.query_one_col('SELECT chktime FROM file WHERE path = ?', [item.path.strpath], log=False)

    def _store_resource_rbtorrent1(self, uid, rbtorrent):
        # We are trying to update resource record, because in many
        # other tables we set ON DELETE CASCADE if resource is being removed.

        # All childs + metatorrent itself
        torrents_count = len(rbtorrent.torrents) + 1
        data = buffer(self.packer.pack(rbtorrent.dbdict()))

        if self.db.query_one_col('SELECT id FROM resource WHERE id = ?', [uid]):
            self.db.query(
                'UPDATE resource SET '
                '    type = ?, data = ?, torrents_count = ? '
                'WHERE id = ?', [
                    'rbtorrent1',
                    data,
                    torrents_count,
                    uid
                ]
            )
        else:
            self.db.query(
                'INSERT INTO resource ('
                '    id, type, data, torrents_count'
                ') VALUES (?, ?, ?, ?)', [
                    uid,
                    'rbtorrent1',
                    data,
                    torrents_count,
                ]
            )

    def _store_resource_data(self, items):
        already_in_db = set((
            md5.decode('hex') for md5 in self.db.query_col(
                'SELECT md5 FROM data WHERE ('
                '    legacy_id AND id IN (??)'
                ')', [
                    [item.md5.encode('hex') for item in items]
                ]
            )
        ))

        seen_datas = set()
        items_to_save = []

        for item in items:
            if item.md5 in already_in_db:
                continue
            if item.md5 in seen_datas:
                continue
            seen_datas.add(item.md5)
            items_to_save.append((
                item.md5.encode('hex'),
                True,
                item.size,
                item.md5.encode('hex'),
                buffer(self.packer.pack(item.sha1_blocks))
            ))

        if items_to_save:
            self.db.execute_many(
                'INSERT INTO data ('
                '    id, legacy_id, size, md5, sha1_blocks'
                ') VALUES (?, ?, ?, ?, ?)', items_to_save
            )

        self.log.info(
            'Stored data: %d added, %d already exist',
            len(items_to_save), len(already_in_db)
        )

    def _store_files(self, items):
        already_in_db = dict(
            (row[0], row[1])
            for row in self.db.query(
                'SELECT path, data FROM file WHERE path IN (??)',
                [[item.path.strpath for item in items]]
            )
        )

        new_files = []
        old_files = []
        changed_files_data = []

        for item in items:
            if item.path.strpath not in already_in_db:
                new_files.append(item)
            else:
                dbitem = already_in_db[item.path.strpath]
                newmd5 = item.md5.encode('hex')
                if newmd5 != dbitem:
                    self.log.debug(
                        'file changed %s: md5: %s -> %s', item.path,
                        dbitem, newmd5
                    )
                    changed_files_data.append(item.path)
                    new_files.append(item)
                else:
                    old_files.append(item)

        if changed_files_data:
            self.delete_paths(changed_files_data)

        if new_files:
            self.db.execute_many(
                'INSERT INTO file ('
                '    path, inode, data, mtime, chktime'
                ') VALUES (?, ?, ?, ?, ?)', [
                    [
                        item.path.strpath,
                        item.inode,
                        item.md5.encode('hex'),
                        item.mtime,
                        item.chktime,
                    ]
                    for item in new_files
                ]
            )

        if old_files:
            self.db.execute_many(
                'UPDATE file SET inode = ?, mtime = ?, chktime = ? '
                'WHERE path = ?', [
                    [
                        item.inode,
                        item.mtime,
                        item.chktime,
                        item.path.strpath,
                    ]
                    for item in old_files
                ]
            )

        self.log.info(
            'Stored files: %d added, %d changed data, %d updated',
            len(new_files) - len(changed_files_data), len(changed_files_data), len(old_files)
        )

    def _store_resource_map(self, uid, items, structure):
        self.db.query('DELETE FROM resource_data WHERE resource = ?', [uid])

        self.db.execute_many(
            'INSERT INTO resource_data ('
            '    resource, name, hash, data, symlink, directory, perms'
            ') VALUES (?, ?, ?, ?, ?, ?, ?)', (
                [
                    uid, name,
                    structure[name]['resource']['id'] if structure[name]['resource']['type'] == 'torrent' else None,
                    item.md5.encode('hex') if isinstance(item, ResourceItem.file) else None,
                    item.symlink if isinstance(item, ResourceItem.symlink) else None,
                    True if isinstance(item, ResourceItem.directory) else False,
                    item.perms if isinstance(item, ResourceItem.file) else -1,
                ] for name, item in items.iteritems()
            )
        )

    def store_resource(self, resource):
        with self.db(debug_sql=False):
            # No other greenlets allowed to run here
            # So we must be quick!
            self._store_resource_rbtorrent1(resource.uid, resource.data)

            file_items = set([
                item for item in resource.items.values()
                if isinstance(item, ResourceItem.file)
            ])

            if file_items:
                self._store_resource_data(file_items)
                self._store_files(file_items)

            self._store_resource_map(resource.uid, resource.items, resource.data.structure)
            self.log.info('[resid:%s] Stored resource', resource.uid[:8])

    def get_resource(self, uid):
        info = self.db.query_one(
            'SELECT type, data FROM resource WHERE id = ?',
            [uid],
            log=False
        )
        if info:
            return info[0], msgpack.loads(info[1])
        else:
            return None, None

    def get_resource_by_child_infohash(self, infohash):
        info = self.db.query_one(
            'SELECT r.id, r.type, r.data FROM resource r '
            'JOIN resource_data rd ON rd.resource = r.id '
            'WHERE rd.hash = ? '
            'LIMIT 1',
            [infohash],
            log=False
        )
        if info:
            return info[0], info[1], msgpack.loads(info[2])
        return None, None, None

    def get_resource_items(self, uid):
        items = {}
        items_by_data = {}

        for (
            name, data_id, symlink, directory, perms,
            size, md5, sha1_blocks, path, inode, mtime
        ) in self.db.query(
            'SELECT '
            '    rd.name, rd.data, rd.symlink, rd.directory, rd.perms, '
            '    d.size, d.md5, d.sha1_blocks, '
            '    f.path, f.inode, f.mtime '
            'FROM resource_data rd '
            'LEFT JOIN data d ON rd.data = d.id '
            'LEFT JOIN file f ON f.path = (SELECT path FROM file WHERE data = d.id LIMIT 1) '
            'WHERE rd.resource = ?',
            [uid],
            log=False
        ):
            if symlink:
                item = ResourceItem.symlink(symlink=symlink)
            elif directory:
                item = ResourceItem.directory()
            else:
                item = ResourceItem.file(
                    path=Path(path), inode=inode, data=data_id, size=size,
                    mtime=mtime, perms=perms,
                    md5=md5, sha1_blocks=msgpack.loads(sha1_blocks)
                )
                items_by_data[item.data] = item

            items[name] = item

        return items

    def get_paths_in_dir(self, directory):
        return self.db.query_col('SELECT path FROM file WHERE path GLOB ?', [directory.strpath + '/*'])

    def get_path_alternatives_one(self, data, limit=None):
        return self.db.query(
            'SELECT '
            '   path, mtime, chktime '
            'FROM file '
            'WHERE data = ? '
            'LIMIT ?',
            [data, limit]
        )

    def get_path_alternatives(self, datas):
        alternatives = {}
        for data in datas:
            data_alternatives = self.db.query(
                'SELECT '
                '    path, inode, mtime '
                'FROM file '
                'WHERE data = ? '
                'LIMIT 100',
                [data],
                log=False
            )

            if data_alternatives:
                alternatives[data] = data_alternatives

        return alternatives

    def get_files_to_check(self, count, max_chktime):
        return [
            ResourceItem.file(
                path=Path(path), inode=inode, data=data.decode('hex'), size=size, mtime=mtime,
                md5=md5.decode('hex'), sha1_blocks=msgpack.loads(sha1_blocks), chktime=chktime
            ) for path, inode, mtime, chktime, data, size, md5, sha1_blocks in
            self.db.query(
                'SELECT '
                '    f.path, f.inode, f.mtime, f.chktime, '
                '    d.id, d.size, d.md5, d.sha1_blocks '
                'FROM file f JOIN data d ON (d.id = f.data) '
                'WHERE chktime < ? '
                'ORDER BY chktime ASC '
                'LIMIT ?',
                [max_chktime, count]
            )
        ]

    def update_file_check_time(self, item):
        self.db.query(
            'UPDATE file SET chktime = ? WHERE path = ?',
            [int(time.time()), item.path.strpath],
            log=False
        )

    def delete_paths(self, paths):
        paths = [str(path) for path in paths]
        if not paths:
            return [0, 0]

        if len(paths) == 1:
            path_in = '= ?'
            paths = paths[0]
        else:
            path_in = 'IN (??)'

        # First of all -- find datas which may be left without any file pointing to it
        datas = self.db.query_col('SELECT DISTINCT data FROM file WHERE path %s' % (path_in, ), [paths])

        if len(datas) == 1:
            data_in = '= ?'
            datas = datas[0]
        else:
            data_in = 'IN (??)'

        # Next, drop files as requested
        dropped_paths = self.db.query('DELETE FROM file WHERE path %s' % (path_in, ), [paths], get_changed=True)

        # Tricky part: now we need to find all resources which will have data
        # without any files poiting to it. I.e. this resources are invalid anymore
        bad_resources = self.db.query_col(
            'SELECT DISTINCT resource FROM resource_data WHERE data IN ( '
            '    SELECT d.id '
            '        FROM data d '
            '        LEFT JOIN file f ON f.data = d.id '
            '    WHERE f.data IS NULL AND d.id %s '
            ')' % (data_in, ),
            [datas]
        )

        # Final part: remove bad resources if any. This will remove also resource_data record
        # and leave some data which is not in any resource anymore. But this is ok, becaouse we would
        # clean such data periodically.
        if bad_resources:
            for resource in bad_resources:
                self.log.debug('[resid:%s] removing (incomplete)', resource[:8])

            with self.db.deblock.lock('drop_bad_resources'):
                max_block = 1000
                for idx in range(
                    len(bad_resources) / max_block +
                    (1 if len(bad_resources) % max_block != 0 else 0)
                ):
                    start = idx * max_block
                    end = start + max_block

                    bad_resources_block = bad_resources[start:end]

                    self.db.query(
                        'INSERT OR REPLACE INTO announce_stop (tracker, hash, deadline) '
                        'SELECT tracker, resource, '
                        '   CASE WHEN timestamp <= ? '
                        '        THEN ? '
                        '        ELSE timestamp '
                        '   END '
                        'FROM announce WHERE resource IN (??)',
                        [int(time.time()), int(time.time() + 86400), bad_resources_block]
                    )
                    self.db.query('DELETE FROM resource WHERE id IN (??)', [bad_resources_block])

        self.log.debug('Removed %d files (and %d resources)', dropped_paths, len(bad_resources))
        return dropped_paths, len(bad_resources)

    def get_item_by_resource_and_item_name(self, uid, name):
        try:
            path, data, size, mtime, md5, sha1_blocks, chktime = self.db.query_one(
                'SELECT '
                '    f.path, f.data, d.size, f.mtime, d.md5, '
                '    d.sha1_blocks, f.chktime '
                'FROM file f '
                'JOIN data d ON d.id = f.data '
                'JOIN resource_data rd ON rd.data = d.id '
                'WHERE rd.resource = ? AND rd.name = ?',
                [uid, name]
            )
        except TypeError:
            return None
        else:
            return ResourceItem.file(
                path=Path(path), data=data.decode('hex'), size=size, mtime=mtime,
                md5=md5.decode('hex'), sha1_blocks=msgpack.loads(sha1_blocks), chktime=chktime
            )

    @Component.green_loop(logname='housekeep')
    def _drop_stale_data(self, log):
        """
        Clean stale data every hour.

        This one is quite expensive if many files were removed during last
        period. Since this can be VERY VERY expensive, we remove stale datas in blocks
        by 100k records.

        After each block completes -- we unlock db, so other greenlets are able to run
        """

        max_block = 10000
        extended_log_if_lower_than = 10000

        with self.db.deblock.lock('drop_stale_data'):
            stale = {}
            for data, md5, key in self.db.query(
                'SELECT DISTINCT '
                '    d.id, d.md5, 1 '
                'FROM data d '
                'LEFT JOIN resource_data rd ON rd.data = d.id '
                'WHERE rd.data IS NULL '
                'UNION '
                'SELECT '
                '    d.id, d.md5, 2 '
                'FROM data d '
                'WHERE d.id NOT IN (SELECT data FROM file)',
                log=False
            ):
                stale.setdefault(data, [md5, False, False])[key] = True

            stale = stale.items()

            if stale:
                blocks = range(
                    len(stale) / max_block +
                    (1 if len(stale) % max_block != 0 else 0)
                )

                for idx in blocks:
                    start = idx * max_block
                    end = start + max_block

                    block = stale[start:end]

                    self.db.query(
                        'DELETE FROM data WHERE id IN (??)',
                        [[s[0] for s in block]]
                    )

                    if len(stale) >= extended_log_if_lower_than:
                        log.debug('[%d/%d] Removed %d stale data', idx + 1, len(blocks), len(block))
                    else:
                        # This is usually used as an audit
                        # Removed resource => incomplete (no data X) => search remove stale data X
                        for data, info in block:
                            log.debug(
                                '[md5:%s] Removed stale (no_resource: %r, no_file: %r)',
                                info[0], info[1], info[2]
                            )

        with self.db.deblock.lock('drop_stale_resources'):
            for resid in self.db.query_col(
                'SELECT id FROM resource WHERE id NOT IN ('
                '   SELECT DISTINCT resource FROM resource_data'
                ')',
                log=False
            ):
                log.debug('[resid:%s%s] Drop stale resource (no data stored)', resid[:8], '?' * 32)
                self.db.query('DELETE FROM resource WHERE id = ?', [resid])

        return 3600
