# -*- coding: utf-8 -*-
import copy
import operator
import re
import time
import types
from collections import defaultdict
from datetime import datetime, timedelta
from itertools import chain, groupby, ifilter, imap, starmap
from multiprocessing.pool import ThreadPool

import pymongo
from bson import SON

import mpfs.common.errors as errors
import mpfs.engine.process
from mpfs.common.util import filetypes, hashed
from mpfs.common.util.ordereddict import OrderedDict
from mpfs.config import settings
from mpfs.core import address
from mpfs.core.address import Address
from mpfs.core.filesystem.dao.legacy import is_new_fs_spec_required
from mpfs.core.filesystem.dao.recount import RecountDAO
from mpfs.core.filesystem.resources.base import Resource
from mpfs.core.services.reindex.models import QuickMoveReindexTask, QuickMoveReindexTaskStatus
from mpfs.core.user.constants import NOTES_AREA_PATH, PHOTOUNLIM_AREA_PATH, YATEAM_DIR_PATH
from mpfs.dao.base import BaseDAO
from mpfs.metastorage import Document, MResponse
from mpfs.metastorage.mongo.binary import Binary
from mpfs.metastorage.mongo.collections.base import DiskCollection
from mpfs.metastorage.mongo.collections.base import KeyValue, ShardedById, UserCollection
from mpfs.metastorage.mongo.collections.base import UserCollectionKeyValue
from mpfs.metastorage.mongo.collections.changelog import ChangelogCollection
from mpfs.metastorage.mongo.util import change_path_parent, compress_data, decompress_data, generate_version_number, \
    id_for_key, is_subpath, map_kv_for_read, map_kv_for_write, name_for_key, parent_for_key, parent_id_for_key, \
    propper_key, propper_uid

FEATURE_TOGGLES_DISABLE_OLD_PREVIEWS = settings.feature_toggles['disable_old_previews']
FEATURE_TOGGLES_FULL_DIFF_FILE_LIMIT = settings.feature_toggles['full_diff_file_limit']
FEATURE_TOGGLES_FULL_DIFF_FETCHING_FILE_CHILDREN_BUG_FIX_ENABLED_UIDS = (
    settings.feature_toggles['full_diff_fetching_file_children_bug_fix_enabled_uids'])
POSTGRES_QUICK_MOVE_ENABLED = settings.postgres['quick_move']['enabled']
POSTGRES_QUICK_MOVE_ALLOWED_UIDS = settings.postgres['quick_move']['allowed_uids']
POSTGRES_QUICK_MOVE_ALLOW_FOR_REINDEXED_USERS = settings.postgres['quick_move']['allow_for_reindexed_users']
HIDDEN_DATA_CLEANER_WORKER_CLEAN_DELAY_FOR_UNMARKED_FILES = (
    settings.hidden_data_cleaner['worker']['clean_delay_for_unmarked_files'])
HIDDEN_DATA_CLEANER_WORKER_CLEAN_DELAY = settings.hidden_data_cleaner['worker']['clean_delay']
HIDDEN_DATA_CLEANER_YAROVAYA_DTIME_DEPENDS_ON_MARK_ENABLED = (
    settings.hidden_data_cleaner['yarovaya']['dtime_depends_on_mark_enabled'])
HIDDEN_DATA_CLEANER_YAROVAYA_MARK_ACTIVE_SINCE_UTIME = (
    settings.hidden_data_cleaner['yarovaya']['mark_active_since_utime'])
SETPROP_SYMLINK_FIELDNAME = settings.system['setprop_symlink_fieldname']

DELTAS_SNAPSHOT_ON_GROUP_FOLDER_INVITE_ENABLED = settings.deltas['snapshot_on_group_folder_invite']['enabled']
DELTAS_SNAPSHOT_ON_GROUP_FOLDER_INVITE_DRY_RUN = settings.deltas['snapshot_on_group_folder_invite']['dry_run']

# Log
# ===============================================================================
log = mpfs.engine.process.get_default_log()
error_log = mpfs.engine.process.get_error_log()
requests_log = mpfs.engine.process.get_requests_log()
# ===============================================================================
dbctl = mpfs.engine.process.dbctl()


def _process_chanelog_items(src, skip_filter=False):
    """Отфильтровать дельты.

        ..notes::
            * не возвращает дельты создания\изменения, если ресурс в итоге удалён
            * не возвращает дельты детей, если родитель удалён
            * не возвращает дельты по ресурсу, который был создан и удалён
            * склеивает new и changed по одному ресурсу
    """

    def get_result_dict(val):
        result = {}
        for k, v in val.iteritems():
            if k in ('_id', 'shard_key'):
                continue
            elif not settings.feature_toggles['broken'] and k == 'broken':
                result['broken'] = 0
            else:
                result[k] = v
        return result

    if skip_filter:
        src.sort(key=lambda d: (d['key'], d['version']))
        return map(get_result_dict, src)

    processed = set()
    src.sort(key=lambda d: (d['key'], -d['version']))

    deleted = []

    # фильтруем дельты удалений
    for element in ifilter(lambda x: x['op'] == 'deleted', src):
        if element['key'] in processed:
            continue
        else:
            # собираем все дельты создания(new) и удаления(deleted) текущего ресурса
            new_records = []
            deleted_records = []
            for each in ifilter(lambda x: x['key'] == element['key'], src):
                if each['op'] == 'deleted':
                    deleted_records.append(each)
                elif each['op'] == 'new':
                    new_records.append(each)

            # ищем все дельты удаления родителей, которые новее текущей дельты
            # если такие есть, не возвращаем эту дельту (ниже по коду)
            dead_parents = filter(
                lambda x:
                re.match('^%s/.*' % re.escape(x['key']), element['key'])
                and x['op'] == 'deleted'
                and x['version'] > element['version'],
                src
            )

            # если входной массив дельт отсортирован, самая новая версия создания ресурса
            try:
                last_new_this_version = new_records[0]['version']
            except Exception:
                last_new_this_version = 0

            # ищем все дельты создания ресурса, которые старее текущей дельты
            dead_this = filter(lambda x: x['version'] < element['version'], new_records)

            # вроде эквивалентно:
            #     если дельты создания есть и все они старее
            #         (потому что last_new_this_version - самая новая вресия)
            #     то добавить текущий ресурс в processed (т.е. не возвращать по нему других дельт)
            #         (потому что dead_this подмножество new_records, в new_records только дельты по текущему ресурсу)
            if dead_this and last_new_this_version < element['version']:
                processed.update(
                    imap(operator.itemgetter('key'), dead_this)
                )

            if element['type'] == 'dir':
                # ищем все дельты детей, которые старее текущей дельты
                dead_children = filter(
                    lambda x:
                    re.match('^%s/.*' % (re.escape(element['key'])), x['key'])
                    and x['version'] < element['version'],
                    src
                )

                # по этим ресурсам не будут возвращены дельты
                # даже если есть более новые дельты создания - баг:
                #    1. создать папку /folder
                #    2. создать файл /folder/file.jpg
                #    3. удалить папку /folder
                #    4. создать папку /folder
                #    5. создать файл /folder/file.jpg
                #    не будет дельты про создание файла /folder/file.jpg
                if dead_children:
                    processed.update(
                        imap(operator.itemgetter('key'), dead_children)
                    )

            # вроде эквивалентно:
            #   не возвращаем дельту, если:
            #     1. есть более новые удаления родтельской папки,
            #     2. дельты создания есть и они все старее (т.е. последняя - удаление) и их столько же, сколько удалений
            #          логика видимо такая: ресурс создали и удалили, про него ничего возвращать не надо
            #     3. есть более новая дельта создания
            if dead_parents or \
                (dead_this and last_new_this_version < element['version'] and len(new_records) == len(
                    deleted_records)) or \
                last_new_this_version > element['version']:
                continue
            # добавляем дельту в массив дельт, которые будут возвращены
            # остальные дельты по этому ресурсу игнорируются
            else:
                deleted.append(element)
                processed.add(element['key'])

    all_changed = []
    all_new = []

    # берем все дельты, которые не были помечены processed при обработке дельт удаления
    for v in ifilter(lambda x: x['op'] != 'deleted'
                               and x['key'] not in processed
                               and x['key'] != '/disk', src):
        if v['op'] == 'new':
            all_new.append(v)
        elif v['op'] == 'changed':
            all_changed.append(v)

    # дикт вида key: delta, берётся самая последняя дельта
    new = dict([[v[0], sorted(v[1], key=lambda x: x['version'])[-1]]
                for v in
                groupby(all_new, lambda x: x['key'])])

    # дикт вида key: delta, берётся самая последняя дельта
    changed = dict([[v[0], sorted(v[1], key=lambda x: x['version'])[-1]]
                    for v in
                    groupby(all_changed, lambda x: x['key'])])

    # склеивание дельт new и changed:
    #     если по одному ресурсу есть дельты new и changed
    #     и версия new меньше changed
    #     то возвращается одна дельта new с версией из changed
    _changed = []
    for k in changed:
        try:
            k_in_new = new[k]
        except Exception:
            _changed.append(changed[k])
        else:
            if k_in_new['version'] < changed[k]['version']:
                new[k] = changed[k]
                new[k]['op'] = 'new'

    new = list(new.itervalues())

    return map(get_result_dict, new + _changed + deleted)


def get_version(uid):
    return mpfs.engine.process.usrctl().version(uid)


def get_last_quick_move_version(uid):
    return mpfs.engine.process.usrctl().last_quick_move_version(uid)


def get_force_snapshot_version(uid):
    return mpfs.engine.process.usrctl().force_snapshot_version(uid)


def is_quick_move_enabled(uid):
    if not POSTGRES_QUICK_MOVE_ENABLED:
        return False

    if uid in POSTGRES_QUICK_MOVE_ALLOWED_UIDS:
        return True

    if POSTGRES_QUICK_MOVE_ALLOW_FOR_REINDEXED_USERS and is_reindexed_for_quick_move(uid):
        return True

    return False


def is_reindexed_for_quick_move(uid):
    return mpfs.engine.process.usrctl().is_reindexed_for_quick_move(uid)


def set_reindexed_for_quick_move(uid):
    task = QuickMoveReindexTask.controller.get(uid=uid)
    if task is not None:
        task.reindex_finished_time = datetime.now()

    if is_reindexed_for_quick_move(uid):
        if task is not None:
            task.status = QuickMoveReindexTaskStatus.REINDEX_FINISHED
            task.save()
        raise errors.UserAlreadyReindexedForQuickMove()

    if not mpfs.engine.process.usrctl().is_user_in_postgres(uid):
        if task is not None:
            task.status = QuickMoveReindexTaskStatus.MONGO_USER
            task.save()
        raise errors.UserCannotBeReindexedForQuickMove()

    result = mpfs.engine.process.usrctl().set_reindexed_for_quick_move(uid)
    if task is not None:
        task.status = QuickMoveReindexTaskStatus.REINDEX_FINISHED
        task.save()

    return result


class UserCollectionFileSystem(UserCollection):
    pass


class LinkDataCollection(UserCollectionKeyValue):
    name = 'link_data'
    sharded = True

    def build_id(self, uid, key):
        return hashed(propper_uid(uid) + ':' + self._propper_key(key))

    def _propper_key(self, key):
        return '/' + '/'.join(filter(None, key.split('/')))

    def links_exist(self, uid):
        """
        Существует ли хотя бы одна ссылка
        """
        spec = {
            'uid': propper_uid(uid),
        }
        return self.db[self.name].find(spec).count() - 1

    def find_all(self, spec, **kwargs):
        return super(DiskCollection, self).find(spec, **kwargs)


class UserCollectionZipped(UserCollection):
    compress_user_data = settings.mongo['options']['compress_user_data']

    """
        Zip conditions.
    """
    FILE_DATA_FIELDS = {'mtime', 'utime', 'size', 'mimetype', 'visible', 'name', 'public', 'original_id', 'blocked',
                        'etime', 'download_counter', 'mt', 'file_id', 'file_mid', 'digest_mid', 'pmid', 'previews',
                        'hid_updated', 'modify_uid', 'is_live_photo', 'live_video_id', 'width', 'height', 'angle',
                        'yarovaya_mark', 'aesthetics', 'photoslice_album_type', 'albums_exclusions', 'ext_coordinates',
                        'office_access_state', 'office_doc_short_id'}

    FOLDER_DATA_FIELDS = {'mtime', 'utime', 'visible', 'name', 'original_id', 'blocked', 'public', 'download_counter',
                          'file_id', 'modify_uid', 'yarovaya_mark'}

    FILE_MULCA_FIELDS = {'digest_mid', 'file_mid', 'pmid', 'emid'}

    FILE_META_FIELDS = {'labels', 'ctime', 'drweb', 'sha256', 'md5', 'source'}

    FOLDER_META_FIELDS = {'rights', 'labels', 'ctime', 'gid'}

    PUBLICATION_FIELDS = {'public_hash', 'symlink', 'short_url'}

    FILE_ROOT_FIELDS = {'hid'}

    """
        UnZip conditions.
    """
    FIELDS_TO_SKIP = {'rights', 'gid'}

    EMPTY_TO_SKIP = {'video_info'}

    UNZIP_DATA_FIELDS = {
        'dir': {'ctime', 'mtime', 'name', 'utime', 'visible', 'labels', 'original_id'},
        'file': {'mimetype', 'size', 'source', 'ctime', 'mtime', 'utime', 'name', 'visible', 'labels', 'original_id',
                 'media_type'}
    }

    _pool = None

    @classmethod
    def _init_pool(cls):
        if cls._pool is None:
            cls._pool = ThreadPool(processes=51)  # 50 - макс. число ОП + 1

    def get_version(self, uid):
        return get_version(uid)

    @classmethod
    def unzip_resource_data(cls, rtype, data, zdata):
        return cls._unzip_resource_data(rtype, data, zdata)

    @classmethod
    def _unzip_resource_data(cls, rtype, data, zdata):
        if rtype in cls.UNZIP_DATA_FIELDS:
            main_fields = cls.UNZIP_DATA_FIELDS[rtype]
        else:
            raise errors.StorageError('Unknown resource type: %s' % rtype)

        if 'file_id' not in data:
            data['file_id_zipped'] = True

        data = map_kv_for_read(data)
        zdata = decompress_data(zdata)
        rawdata = {'meta': {}}
        for data_scope in chain(zdata.itervalues(), (data,)):
            for k, v in data_scope.iteritems():
                if k in main_fields:
                    rawdata[k] = v
                elif k == 'stids' and isinstance(v, list):
                    rawdata['meta'].update((i['type'], i['stid']) for i in v)
                elif k not in cls.FIELDS_TO_SKIP and (v or k not in cls.EMPTY_TO_SKIP):
                    rawdata['meta'][k] = v
        return rawdata

    def _zip_resource_data(self, rtype, rawdata):
        if rtype == 'dir':
            return self._zip_folder_data(rawdata)
        elif rtype == 'file':
            return self._zip_file_data(rawdata)
        else:
            raise errors.StorageError()

    def _zip_file_data(self, rawdata):
        rawdata = map_kv_for_write(rawdata)
        meta_data = dict(rawdata.pop('meta', {}))
        empty_values = ('', None, [], {}, ())
        data = {
            'stids': [],
        }
        zdata = {
            'meta': {},
            'pub': {},
            'setprop': {},
        }

        def _clean(k, v, unit, container):
            if v not in empty_values:
                if isinstance(v, dict):
                    v_cleaned = {}
                    for _k, _v in v.iteritems():
                        if _v not in empty_values:
                            v_cleaned[_k] = _v
                    v = v_cleaned
                if v or v == 0:
                    if isinstance(container[unit], list):
                        container[unit].append({k: v})
                    elif isinstance(container[unit], dict):
                        container[unit][k] = v

        for data_scope in (rawdata, meta_data):
            for k, v in data_scope.iteritems():
                epmty_val = v in empty_values
                if k in self.FILE_DATA_FIELDS:
                    if not epmty_val:
                        if k in self.FILE_MULCA_FIELDS:
                            data['stids'].append({
                                'type': k,
                                'stid': v
                            })
                        elif k == 'previews':
                            if not FEATURE_TOGGLES_DISABLE_OLD_PREVIEWS:
                                if 'pmid' not in data_scope:
                                    for pk in ('XXXL', 'XXL', 'XL', 'L', 'M', 'S', 'XS', 'XXS', 'XXXS'):
                                        pv = v.get(pk)
                                        if pv:
                                            data['stids'].append({'type': 'pmid', 'stid': pv})
                                            break
                        else:
                            data[k] = v
                elif k in self.FILE_META_FIELDS:
                    _clean(k, v, 'meta', zdata)
                elif k in self.PUBLICATION_FIELDS:
                    if not epmty_val: zdata['pub'][k] = v
                else:
                    zdata['setprop'][k] = v

        for k, v in list(zdata.iteritems()):
            if not v:
                zdata.pop(k)
        if 'data' in data and 'visible' in data['data']:
            data['data']['visible'] = int(data['data']['visible'])
        return compress_data(zdata), data

    def _zip_folder_data(self, rawdata):
        meta_data = dict(rawdata.pop('meta', {}))
        empty_values = ('', None, [], {}, ())
        data = {}
        zdata = {
            'meta': {},
            'setprop': {},
            'pub': {},
        }
        for data_scope in (rawdata, meta_data):
            for k, v in data_scope.iteritems():
                epmty_val = v in empty_values
                if k in self.FOLDER_DATA_FIELDS:
                    if not epmty_val: data[k] = v
                elif k in self.FOLDER_META_FIELDS:
                    if not epmty_val:
                        if isinstance(v, dict):
                            v_cleaned = {}
                            for _k, _v in v.iteritems():
                                if _v not in empty_values:
                                    v_cleaned[_k] = _v
                            v = v_cleaned
                        zdata['meta'][k] = v
                elif k in self.PUBLICATION_FIELDS:
                    if not epmty_val:
                        zdata['pub'][k] = v
                else:
                    zdata['setprop'][k] = v
        for k, v in list(zdata.iteritems()):
            if not v:
                zdata.pop(k)
        if 'data' in data and 'visible' in data['data']:
            data['data']['visible'] = int(data['data']['visible'])
        return compress_data(zdata), data

    def show(self, uid, key, version=None):
        uid = propper_uid(uid)
        key = self._propper_key(key)
        ver = self.get_cached_version(uid)

        def process_one(resource):
            if not resource:
                return None
            else:
                result = {
                    'uid': uid,
                    'version': str(resource.get('version', generate_version_number())),
                    'key': resource['key'],
                    'type': resource['type'],
                }
                try:
                    zdata = resource['zdata']
                except KeyError:
                    try:
                        metadata = resource['data']['meta']
                    except KeyError:
                        pass
                    else:
                        resource['data']['meta'] = dict(metadata)
                    data = resource.get('data', {})
                else:
                    data = self._unzip_resource_data(
                        resource['type'],
                        resource.get('data', {}),
                        zdata
                    )

                for tag in self.FILE_ROOT_FIELDS:
                    if tag in resource:
                        data[tag] = resource[tag]
                data['type'] = resource['type']
                data['name'] = name_for_key(resource['key'])
                result['data'] = data
                return Document(**result)

        if isinstance(key, (list, tuple)):
            if is_new_fs_spec_required(self.name):
                query = self.query_for_find(uid=uid)
                query['path'] = {'$in': [self._propper_key(item) for item in key]}
            else:
                query = self.query_for_find(uid=uid)
                ids = []
                for item in key:
                    ids.append(hashed(uid + ':' + self._propper_key(item)))
                query['_id'] = {'$in': ids}
            response = self.db[self.name].find(query)
            result = map(lambda x: process_one(x), response)
        else:
            query = self.query_for_show(uid, key)
            resource = self.db[self.name].find_one(query)
            if resource:
                result = process_one(resource)
            else:
                result = None
                ver = None
        return MResponse(value=result, version=str(ver))

    def unpack_single_element(self, element):
        if not element:
            return None
        else:
            result = {
                'uid': element['uid'],
                'version': str(element.get('version', generate_version_number())),
                'key': element['key'],
                'type': element['type'],
            }
            try:
                zdata = element['zdata']
            except KeyError:
                try:
                    metadata = element['data']['meta']
                except KeyError:
                    # Мега-костылище для корневых папок и постгреса. Дело в том, что у них нет zdata, но в data
                    # прилетает file_id, который потом просто игнорируется. Так вот в этом случае я явно положу его в
                    # meta:
                    element_data = element.get('data', {})
                    element_address = Address.Make(element['uid'], element['key'])

                    if element_address.is_storage and 'file_id' in element_data:
                        if 'data' not in element:
                            element['data'] = {}
                        element['data']['meta'] = {
                            'file_id': element_data['file_id']
                        }
                else:
                    element['data']['meta'] = dict(metadata)
                data = element.get('data', {})
            else:
                data = self._unzip_resource_data(
                    element['type'],
                    element.get('data', {}),
                    zdata
                )

            for tag in self.FILE_ROOT_FIELDS:
                if tag in element:
                    data[tag] = element[tag]
            data['type'] = element['type']
            data['name'] = name_for_key(element['key'])
            data['version'] = result['version']
            result['data'] = data
            return result

    def show_single(self, uid, key, version):
        uid = propper_uid(uid)
        self.assert_user(uid)
        key = self._propper_key(key)

        if is_new_fs_spec_required(self.name):
            query = {
                'path': key,
                'uid': uid,
            }
        else:
            query = {
                '_id': hashed(uid + ':' + key),
                'uid': uid,
            }

        if version:
            query['version'] = int(version)
        result = self.db[self.name].find_one(query)
        if result:
            if not 'data' in result:
                result['data'] = {}
            if 'zdata' in result:
                result['data'] = self._unzip_resource_data(
                    result['type'],
                    result['data'],
                    result.pop('zdata')
                )
            elif 'meta' in result.get('data', {}):
                result['data']['meta'] = dict(result['data']['meta'])
            result['data']['type'] = result['type']
            result['data']['name'] = name_for_key(result['key'])
            for tag in self.FILE_ROOT_FIELDS:
                if tag in result:
                    result['data'][tag] = result[tag]
            return result
        else:
            raise errors.StorageNotFound(key)

    def update_where(self, uid, key, new_value, old_value, old_version):
        uid = propper_uid(uid)
        self.assert_user(uid)
        key = self._propper_key(key)
        _id = hashed(uid + ':' + key)
        new_version = generate_version_number()

        if is_new_fs_spec_required(self.name):
            find_query = self.query_for_find(path=key, uid=uid)
        else:
            find_query = self.query_for_find(_id=_id, uid=uid)

        for k in ('type', 'uid', 'hid', 'name'):
            old_value.pop(k, '')
        existing_val = self.db[self.name].find_one(find_query)
        if 'zdata' in existing_val:
            zdata, data = self._zip_resource_data(existing_val['type'], copy.deepcopy(old_value))
            if existing_val['data'] != data or decompress_data(existing_val['zdata']) != decompress_data(zdata):
                raise errors.StorageUpdateWhereFailed()
        else:
            data = copy.deepcopy(old_value)
            if isinstance(old_value, dict) and 'meta' in old_value:
                data['meta'] = sorted(map(list, old_value['meta'].iteritems()), key=operator.itemgetter(0))
            if existing_val['data'] != data:
                raise errors.StorageUpdateWhereFailed()
        _, query = self.data_for_save(uid, key, {}, old_version, new_version, existing_val['type'])
        query['version'] = existing_val['version']
        update = {"$set": {"version": new_version}}
        update["$set"]["zdata"], update["$set"]["data"] = self._zip_resource_data(existing_val['type'],
                                                                                  copy.deepcopy(new_value))
        if self.db[self.name].find_and_modify(query=query, update=update, **self._fsync_safe_w()):
            try:
                query = self.query_for_find(_id=_id, uid=uid)
                if is_new_fs_spec_required(self.name):
                    query.pop('_id')
                    query['path'] = key

                res = self.db[self.name].find_one(query)
            except TypeError:
                errors.StorageUpdateWhereFailed()
            else:
                self.insert_version(uid, key, "changed", new_version, data=res)
                return MResponse(value=True, version=str(new_version))
        else:
            raise errors.StorageUpdateWhereFailed()

    def _change_folder(self, uid, key, query, data, new_version, **kwargs):
        self.db[self.name].update(query, data, **self._fsync_safe_w())
        kwargs['rtype'] = 'dir'
        kwargs['data'] = data
        self.insert_version(uid, key, 'changed', new_version, **kwargs)

    def flat_index(self, uid, key, mediatype=None):
        uid = propper_uid(uid)
        self.assert_user(uid)
        key = self._propper_key(key)
        self.assert_key(uid, key)
        version = self.get_cached_version(uid)
        query = self.query_for_find(uid=uid, normal_key=key)

        if mediatype:
            int_mediatype_array = map(filetypes.getGroupNumber, mediatype)
            query['data.mt'] = {'$in': int_mediatype_array}

        result = {}
        for each in ifilter(lambda v: v['key'].startswith(key + '/'), self.db[self.name].find(query)):
            if 'zdata' in each:
                data = self._unzip_resource_data(each['type'], each['data'], each['zdata'])
            else:
                data = each['data']
                data['meta'] = dict(data.get('meta', {}))
            data['type'] = each['type']
            data['name'] = name_for_key(each['key'])
            data['version'] = each['version']
            result[each['key']] = data
        return MResponse(value=result, version=str(version))

    def tree_index(self, uid, key):
        uid = propper_uid(uid)
        key = self._propper_key(key)

        resources_tree = {}
        for element in self.iter_subtree(uid, key):
            if 'zdata' in element:
                element['data'] = self._unzip_resource_data(element['type'], element['data'], element['zdata'])
            else:
                element['data']['meta'] = dict(element['data'].get('meta', {}))
            element['data']['type'] = element['type']
            element['data']['name'] = name_for_key(element['key'])

            parent_key = parent_for_key(element['key'])
            if parent_key not in resources_tree:
                resources_tree[parent_key] = []

            resources_tree[parent_key].append({'this': element})

        def get_child_resources_of_node(node_name):
            node_children = []
            for child in resources_tree.get(node_name, []):
                child['list'] = get_child_resources_of_node(child['this']['key'])
                node_children.append(child)
            return node_children

        return get_child_resources_of_node(key)

    def get_last_files(self, limit, args):
        """Для `uid`, `key` из `args` получить последние `limit` файлов пользователя `uid` из папки `key`,
        отсортированные по убыванию `data.mtime`.

        ..warning:: Сначала выбираем первые `FETCH_LIMIT` документов, из них уже
            те, которые лежат в папке `key`. Можем вернуть не все файлы,
            поступаем так из-за отсутствия индекса с полем `key`.

        :type limit: int
        :type args: list[tuple]
        :rtype: collections.Iterable[tuple]
        """
        if not isinstance(limit, int):
            raise TypeError("`int` required, got `%s` instead" % type(limit))

        self._init_pool()

        FETCH_LIMIT = 1000  # Сколько документов изначально выбираем для фильтрации
        limit = min(limit, FETCH_LIMIT)

        def get_aggregate_args(collection, uid, key):
            if not key.endswith('/'):
                key += '/'
            return collection, [
                {"$match": {'uid': uid, 'type': 'file'}},
                {"$sort": SON([("data.mtime", pymongo.DESCENDING)])},
                {"$limit": FETCH_LIMIT},
                {"$match": {'key': {'$regex': '^%s' % re.escape(key)}}},
                {"$limit": limit}
            ]

        # type: List[dict]
        cursors = self._pool.map(
            lambda (coll, query): coll.aggregate(query),
            tuple(starmap(get_aggregate_args, args))
        )

        results = cursors
        if cursors and isinstance(cursors[0], dict):
            results = [c['result'] for c in cursors if c['ok'] == 1.0]

        failed_results = len(cursors) - len(results)
        if failed_results > 0:
            error_log.error(
                "AggregateError: %d of %d aggregates were failed" % (failed_results, len(cursors)), exc_info=True
            )
            raise errors.AggregateFailed()

        return imap(self._iter_key_data, results)

    def _iter_key_data(self, cursor):
        for item in cursor:
            if 'zdata' in item:
                data = self._unzip_resource_data(item['type'], item['data'], item['zdata'])
            else:
                # TODO
                # отпилить, т.к. после миграции не останется записей без zdata
                data = item['data']
                if 'meta' in data and isinstance(data.get('meta'), list):
                    data['meta'] = dict(data['meta'])
            data['type'] = item['type']
            data['name'] = name_for_key(item['key'])
            for k in self.FILE_ROOT_FIELDS:
                if k in item:
                    data[k] = item[k]
            yield item['key'], data

    #    TODO: Копипаст, нужно переделать
    # ===============================================================================

    def timeline(self, uid, key, version=None, filter_args={}, range_args={}, folders=False):
        """
        Плоский список файлов по пользователю
        """
        uid = propper_uid(uid)
        self.assert_user(uid)
        key = self._propper_key(key)
        self.assert_key(uid, key)
        result = []
        version = self.get_cached_version(uid)

        query = {
            'uid': uid,
        }
        if not folders:
            query['type'] = 'file'
        query.update(filter_args)

        amounts = dict(filter(lambda (k, v): k in ('skip', 'limit'), range_args.iteritems()))

        if len(filter(None, key.split('/'))) == 1 or not amounts.get('skip', 0):
            amount_args = amounts
        else:
            amount_args = {}

        if 'sort' in range_args:
            amount_args['sort'] = [(range_args['sort'], range_args.get('order', 1))]
        all_resources = self.db[self.name].find(query, **amount_args)

        for item in ifilter(lambda x: x['key'].startswith(key + '/'), all_resources):
            item_key = item['key']

            if 'zdata' in item:
                item_data = self._unzip_resource_data(item['type'], item['data'], item['zdata'])
            else:
                item_data = item['data']
                if 'meta' in item_data and isinstance(item_data.get('meta'), list):
                    item_data['meta'] = dict(item_data['meta'])
            item_data['type'] = item['type']
            item_data['name'] = name_for_key(item_key)
            if item_data['type'] == 'file':
                for k in self.FILE_ROOT_FIELDS:
                    if k in item:
                        item_data[k] = item[k]
            result.append(Document(
                data=item_data,
                version=item.get('version'),
                key=item_key,
                type=item.get('type'),
            ))
        if amounts and not amount_args:
            skip = amounts.get('skip')
            limit = amounts.get('limit')
            if limit is not None and skip is not None:
                limit = skip + limit
            result = result[skip:limit]
        return MResponse(value=result, version=str(version))

    # ===============================================================================

    def tree(self, uid, key, level, version=None):
        """
        Дерево директории до определенного уровня, без укладки в дерево
        """
        uid = propper_uid(uid)
        self.assert_user(uid)
        key = self._propper_key(key)
        version = self.get_cached_version(uid)
        result = {}

        query = self.query_for_find(uid=uid, type='dir')
        args = {
            'fields': ('parent', 'key', 'data.public', 'data.mtime', 'version')
        }
        all_resources = self.db[self.name].find(query, **args)
        if len(filter(None, key.split('/'))) > 1:
            all_resources = filter(lambda v: v['key'].startswith(key + '/'), all_resources)
        else:
            all_resources = list(all_resources)
        grandparent_level = len(key.split('/'))
        max_grandparent_level = grandparent_level + level

        for item in all_resources:
            item_id = item['_id']
            item_key = item['key']

            chunks = item_key.split('/')
            this_level = len(chunks)

            if max_grandparent_level >= this_level > grandparent_level:
                if 'zdata' in item:
                    item_data = self._unzip_resource_data(item['type'], item['data'], item['zdata'])
                    item_data['type'] = 'dir'
                else:
                    item_key = item['key']
                    item_data = item['data']
                    item_data['meta'] = {}
                    item_data['type'] = 'dir'
                    item_data['name'] = name_for_key(item_key)

                    if 'data' in item:
                        if 'public' in item['data']:
                            item_data['meta']['public'] = item['data']['public']

                    if 'meta' in item_data and isinstance(item_data.get('meta'), list):
                        item_data['meta'] = dict(item_data['meta'])

                item_data['numfolders'] = len(
                    filter(
                        lambda x: x.get('parent') == item_id,
                        all_resources
                    )
                )

                parent_key = '/'.join(chunks[:-1])
                result[item_key] = Document(
                    data=item_data,
                    version=item.get('version'),
                    key=item_key,
                    parent_key=parent_key,
                    type=item.get('type'),
                )

        return MResponse(value=result, version=str(version))

    def data_for_save(self, uid, key, rawdata, old_version, new_version, rtype):
        data, query = super(UserCollectionZipped, self).data_for_save(uid, key, rawdata, old_version, new_version,
                                                                      rtype)
        for k in ('hid', 'uid', 'type', 'name'):
            data['data'].pop(k, '')
        if self.compress_user_data:
            data['zdata'], data['data'] = self._zip_resource_data(rtype, data['data'])
        return data, query

    def _folder_content_python_sort(self, query, range_args):
        """
        Листинг директории.

        Сортировка и пагинация осуществляется в питоне
        """
        sort_field = range_args.get('sort')
        direction = range_args.get('order')
        folders = []
        files = []
        for each in self.db[self.name].find(query):
            if each['type'] == 'dir':
                folders.append(each)
            elif each['type'] == 'file':
                files.append(each)
        is_reverse = direction == -1
        try:
            folders.sort(key=lambda x: x[sort_field], reverse=is_reverse)
        except KeyError:
            folders.sort(key=lambda x: x['key'], reverse=is_reverse)
        try:
            files.sort(key=lambda x: x[sort_field], reverse=is_reverse)
        except KeyError:
            files.sort(key=lambda x: x['key'], reverse=is_reverse)
        listing_result = folders + files
        skip = int(range_args.get('skip', 0))
        limit = int(range_args.get('limit', len(listing_result)))
        listing_result = listing_result[skip:skip + limit]
        return listing_result

    @staticmethod
    def query_optimizer(query, sort):
        optimized_query = query.copy()
        optimized_sort = list(sort)

        # https://st.yandex-team.ru/CHEMODAN-32120
        # если в запросе есть media type, то фильтр/сортировка по type не имеет
        # смысла - у папок нет медиа типа
        mt_filter = query.get('data.mt')
        if isinstance(mt_filter, dict) and '$in' in mt_filter and query.get('type') == 'file':
            del optimized_query['type']
            optimized_sort = [i for i in optimized_sort if i[0] != 'type']

        return optimized_query, optimized_sort

    def folder_content(self, uid, key, version=None, filter_args={}, range_args={},
                       max_sample_size=None):
        """
        Листинг директории (выборка ключей и значений)

        :param max_sample_size: Параметр, ограничивающий значение сверху в ключе
        `meta.total_results_count`, а так же возвращаемое количество ресурсов.
        Даже если в базе 100500 результатов и их количество будет больше чем значение этого
        параметра, то `meta.total_results_count` будет выставлено в него.
        Нужно для уменьшения нагрузки на базу при каунтах, когда нам неважно абсолютно точное значение.
        :type max_sample_size: int | None
        """
        uid = propper_uid(uid)
        key = self._propper_key(key)

        # Не проверяем юзера - нельзя сделать запрос на контент, если
        # ранее не был найден фолдер
        # self.assert_user(uid)
        # self.assert_key(uid, key)

        result = OrderedDict()
        version = self.get_cached_version(uid)
        if is_new_fs_spec_required(self.name):
            query = {'uid': uid, 'parent': key}
        else:
            query = {'uid': uid, 'parent': id_for_key(uid, key)}
        query.update(filter_args)

        sort_field = range_args.get('sort')
        direction = range_args.get('order')
        sort = [('type', 1), (sort_field, direction)]
        optimized_query, optimized_sort = self.query_optimizer(query, sort)

        amount_args = {k: v for k, v in range_args.iteritems() if k in ('skip', 'limit')}
        if max_sample_size is None:
            total_len = self.db[self.name].find(optimized_query).count()
        else:
            # ограничиваем количество получаемых их базы данных за счет лимита
            # в драйвере монги надо выставлять дополнительно ключ `with_limit_and_skip` при этом
            if 'limit' in amount_args:
                amount_args['limit'] = min(amount_args['limit'], max_sample_size)
            else:
                amount_args['limit'] = max_sample_size

            total_len = self.db[self.name].find(
                optimized_query, limit=max_sample_size
            ).count(with_limit_and_skip=True)

        if range_args and range_args.get('limit') == 0:
            listing_result = []
        elif range_args:
            try:
                listing_result = list(self.db[self.name].find(
                    optimized_query,
                    sort=optimized_sort, **amount_args))
                # В проде наблюдалась ситуация, когда монга не падала, а
                # сортировала только часть записей(напр. 4000 из 30000).
                # В таком случае последним элементом была ошибка, а если
                # дальше увелить offset, то в ответе приходил пустой массив
                # Ниже обрабатываются такие случаи.
                if (len(listing_result) > 0 and
                    u'$err' in listing_result[-1] and
                    u'code' in listing_result[-1] and
                    listing_result[-1][u'code'] == 17406):
                    # последний элемент ошибка
                    raise pymongo.errors.OperationFailure('Error in result')
                skip = int(range_args.get('skip', 0))
                if len(listing_result) == 0 and total_len > skip:
                    # ответ пустой, хотя элементы на самом деле есть.
                    raise pymongo.errors.OperationFailure('Lost elements')
            except pymongo.errors.OperationFailure:
                # Переполнился буффер монги, пробуем сортировать в питоне
                listing_result = self._folder_content_python_sort(optimized_query, range_args)

        else:
            listing_result = self.db[self.name].find(
                optimized_query, **amount_args)

        for item in listing_result:
            item_key = item['key']
            item_name = filter(None, item_key.split('/'))[-1]
            if 'zdata' in item:
                item_data = self._unzip_resource_data(
                    item['type'],
                    item.get('data', {}),
                    item.get('zdata', {})
                )
            else:
                item_data = item.get('data', {})
                if 'meta' in item_data and isinstance(item_data.get('meta'), (list, tuple)):
                    item_data['meta'] = dict(item_data['meta'])

            for tag in self.FILE_ROOT_FIELDS:
                if tag in item:
                    item_data[tag] = item[tag]
            item_data['type'] = item['type']
            item_data['name'] = name_for_key(item_key)

            result[item_name] = Document(
                data=item_data,
                version=item.get('version'),
                key=item_key,
                type=item['type'],
            )

        return MResponse(value=result, version=str(version), total_results_count=total_len)

    def find_by_hid_all(self, hid):
        for element in self.db[self.name].find({'hid': Binary(str(hid))}):
            try:
                zdata = element.pop('zdata')
            except KeyError:
                pass
            else:
                element['data'] = self._unzip_resource_data(element['type'], element['data'], zdata)
            yield element

    def find_by_field(self, uid, args, sargs):
        for each in super(UserCollectionZipped, self).find_by_field(uid, args, sargs):
            try:
                zdata = each.pop('zdata')
            except Exception:
                pass
            else:
                each['data'] = self._unzip_resource_data(each['type'], each.pop('data', {}), zdata)
            yield each

    def find_one_by_field(self, uid, args):
        element = super(UserCollectionZipped, self).find_one_by_field(uid, args)
        if element:
            zdata = element.pop('zdata', None)
            if zdata is not None:
                element['data'] = self._unzip_resource_data(element['type'], element['data'], zdata)
        return element

    def change_folder_fields(self, uid, key, data, version=None):
        update = {}
        for k, v in data.iteritems():
            if k in self.FOLDER_DATA_FIELDS:
                update['data.%s' % k] = v
        if version is None:
            version = generate_version_number()

        if is_new_fs_spec_required(self.name):
            spec = {
                'path': key,
                'uid': uid,
            }
        else:
            spec = {
                '_id': id_for_key(uid, key),
                'uid': uid,
            }
        self.db[self.name].update(spec, {'$set': update}, **self._fsync_safe_w())
        self.insert_version(uid, key, 'changed', version, rtype='dir')
        return version

    def move_file_id(self, _id, uid, version, zdata):
        x_zdata = decompress_data(zdata)
        file_id = x_zdata['mulca'].pop('file_id')
        if not x_zdata['mulca']:
            x_zdata.pop('mulca')
        spec = {
            '_id': _id,
            'uid': uid,
            'version': version,
        }
        doc = {
            '$set': {'data.file_id': file_id, 'zdata': compress_data(x_zdata)},
        }
        self.db[self.name].update(spec, doc, **self._fsync_safe_w())
        return file_id

    def find_snapshot_chunk(self, uid, file_id, last_ids, last_file_type, limit):
        return self.collection.find_snapshot_chunk(uid, file_id, last_ids, last_file_type, limit)

    def find_snapshot_initial_chunk(self, uid):
        return self.collection.find_snapshot_initial_chunk(uid)


class ZipByDefault(UserCollectionZipped):
    compress_user_data = True


class ZipBySettings(UserCollectionZipped):
    pass


class TrashCollection(ZipBySettings):
    name = 'trash'
    is_sharded = True
    is_common = False

    def count(self, *args, **spec):
        """
        Count elements.
        :param uid:
        :param spec: Extended SON conditions.
        :return:
        """
        _spec = copy.copy(spec)
        if len(args) > 0:
            _spec['uid'] = args[0]
        return self.db[self.name].find(_spec).count()


class HiddenDataCollection(ZipByDefault):
    days_in_past_for_unmarked_files = (HIDDEN_DATA_CLEANER_WORKER_CLEAN_DELAY
                                       - HIDDEN_DATA_CLEANER_WORKER_CLEAN_DELAY_FOR_UNMARKED_FILES)
    short_store_shift_seconds = int(timedelta(days=days_in_past_for_unmarked_files).total_seconds())

    name = 'hidden_data'
    is_sharded = True
    is_common = False

    def _make_folder(self, uid, key, data, new_version):
        self.db[self.name].insert(data, **self._fsync_safe_w())

    def data_for_save(self, uid, key, rawdata, old_version, new_version, rtype):
        data, query = UserCollectionZipped.data_for_save(self, uid, key, rawdata, old_version, new_version, rtype)
        data['dtime'] = int(time.time())
        if HIDDEN_DATA_CLEANER_YAROVAYA_DTIME_DEPENDS_ON_MARK_ENABLED:
            yarovaya_mark = data['data'].get('yarovaya_mark', False)
            if not yarovaya_mark:
                # TODO после наступления апреля 2019 оставить под if одну строку:
                # data['dtime'] -= self.short_store_shift_seconds

                # 1 november
                yarovaya_mark_active_ts = 1541019600
                uploaded_after_yarovaya_mark_active = int(data['data'].get('utime', 0)) > yarovaya_mark_active_ts
                if uploaded_after_yarovaya_mark_active:
                    data['dtime'] -= self.short_store_shift_seconds
                else:
                    data['dtime'] -= min(int(time.time() - yarovaya_mark_active_ts), self.short_store_shift_seconds)
        return data, query

    def find_hardlink_by_field(self, field, value, **kw):
        hidden_clean_period = settings.system['system']['hidden_clean_period']
        bound_date = datetime.now() + timedelta(days=1 - hidden_clean_period)
        kw['dtime'] = {'$gt': int(time.mktime(bound_date.timetuple()))}
        return super(HiddenDataCollection, self).find_hardlink_by_field(field, value, **kw)


class AttachDataCollection(ZipByDefault):
    name = 'attach_data'
    is_sharded = True
    is_common = False

    def get_version(self, uid):
        return ''


class NotesDataCollection(ZipByDefault):
    name = 'notes_data'
    is_sharded = True
    is_common = False

    def make_folder(self, uid, key, rawdata, old_version=None):
        if key == NOTES_AREA_PATH:
            file_id = Resource.generate_file_id(uid, NOTES_AREA_PATH)
            if not rawdata:
                rawdata = {}
            rawdata.update({'file_id': file_id})
        super(NotesDataCollection, self).make_folder(uid, key, rawdata, old_version)


class PhotounlimDataCollection(ZipByDefault):
    name = 'photounlim_data'
    is_sharded = True
    is_common = False

    def make_folder(self, uid, key, rawdata, old_version=None):
        if key == PHOTOUNLIM_AREA_PATH:
            file_id = Resource.generate_file_id(uid, '/photounlim')
            if not rawdata:
                rawdata = {}
            rawdata.update({'file_id': file_id})
        super(PhotounlimDataCollection, self).make_folder(uid, key, rawdata, old_version)


class AdditionalDataCollection(ZipByDefault):
    name = 'additional_data'
    is_sharded = True
    is_common = False


class ClientDataCollection(ZipByDefault):
    name = 'client_data'
    is_sharded = True
    is_common = False


class NarodDataCollection(ZipByDefault):
    name = 'narod_data'
    is_sharded = True
    is_common = False

    def get_version(self, uid):
        return get_version(uid)


class MiscDataCollection(ZipByDefault):
    name = 'misc_data'
    is_sharded = True
    is_common = False


class UserDataCollection(ZipBySettings):
    name = 'user_data'
    is_sharded = True
    is_common = False
    internal_meta_fields = settings.system['internal_meta_fields']
    meta_fields_in_changelog = set(['broken', ])
    gid = None

    def __init__(self):
        super(UserDataCollection, self).__init__()
        if settings.feature_toggles['tags']:
            self.FILE_DATA_FIELDS.add('file_id')
            self.FOLDER_DATA_FIELDS.add('file_id')
            self.FILE_MULCA_FIELDS = tuple(filter(lambda x: x != 'file_id', self.FILE_MULCA_FIELDS))

    def _put(self, uid, key, query, data, new_version, rawdata={}, **kwargs):
        try:
            res = self.db[self.name].update(query, data, upsert=True, **self._fsync_safe_w())
            if res['updatedExisting']:
                state = 'changed'
            else:
                state = 'new'
            self.insert_version(uid, key, state, new_version, data=data, rtype='file', **kwargs)
        except (pymongo.errors.OperationFailure, KeyError):
            raise errors.StorageError()

    def _make_folder(self, uid, key, data, new_version):
        self.assert_parent(uid, key)
        self.db[self.name].insert(data, **self._fsync_safe_w())
        self.insert_version(uid, key, 'new', new_version, rtype='dir', data=data)

    def remove_single(self, uid, key, old_version, new_version=None, data=None):
        result, rtype = self._remove_single(uid, key, old_version, new_version)
        ver = new_version or generate_version_number()
        data, _ = self.data_for_save(uid, key, data, old_version, new_version, 'file')
        self.insert_version(uid, key, 'deleted', ver, rtype=rtype, data=data)
        return result

    def _remove(self, uid, key, old_version, new_version, rtype, data=None):
        super(UserDataCollection, self)._remove(self, uid, key, old_version, new_version)
        ver = new_version or generate_version_number()
        if data is None:
            data = {}
        data, _ = self.data_for_save(uid, key, data, old_version, new_version, 'file')
        self.insert_version(uid, key, 'deleted', ver, rtype=rtype, data=data)
        return ver

    def remove_single_without_changelog(self, uid, key, old_version=None, new_version=None):
        result, rtype = self._remove_single(uid, key, old_version, new_version)
        return result

    def diff(self, uid, key, version, *args, **kwargs):
        groups = {}
        grouplinks = {}

        # =======================================================================
        def set_shared(element):
            result = {}
            try:
                link = grouplinks[element['key']]
            except KeyError:
                try:
                    groups[element['key']]
                except KeyError:
                    pass
                else:
                    result['rights'] = 660
                    result['shared'] = 'owner'
            else:
                result['rights'] = link['rights']
                result['shared'] = 'group'
            return result

        def process_one(uid, element):
            rtype = element['type']
            result = {
                'uid': uid,
                'key': element['key'],
                'type': rtype,
                'op': 'new',
            }
            # ===============================================================
            # process shared folders
            result.update(set_shared(element))
            # ===============================================================

            try:
                zdata = element['zdata']
            except KeyError:
                data = element['data']
                try:
                    data['meta'] = dict(element['data']['meta'])
                except KeyError:
                    data['meta'] = {}
            else:
                data = self._unzip_resource_data(rtype, element['data'], zdata)

            if rtype == 'file':
                etime = data['meta'].get('etime', 0)
                if etime is None:
                    etime = 0
                else:
                    etime = int(etime)
                if etime < 0:
                    etime = 0

                result.update({
                    'md5': data['meta']['md5'],
                    'sha256': data['meta']['sha256'],
                    'size': int(data['size']),
                    'drweb': data['meta'].get('drweb', 4),
                    'broken': data['meta'].get('broken', 0),
                    'etime': etime,
                    'mtime': int(data.get('mtime')),
                    'has_preview': True if (data['meta'].get('pmid') or data['meta'].get('previews')) else False,
                    'mimetype': data.get('mimetype') or filetypes.DEFAULT_MIME_TYPE,
                    'media_type': data.get('media_type'),
                    'is_live_photo': data['meta'].get('is_live_photo', False),
                })

                if not settings.feature_toggles['broken']:
                    result['broken'] = 0
            # https://jira.yandex-team.ru/browse/CHEMODAN-11741
            if not version:
                if SETPROP_SYMLINK_FIELDNAME in data['meta']:
                    result['external_setprop'] = 1

            result['public'] = data['meta'].get('public', 0)
            result['fid'] = data.get('meta', {}).get('file_id')
            result['visible'] = int(data.get('visible', 0) or 0)
            return result

        def unzip_element(element, sh=True):
            if 'zdata' in element:
                data = decompress_data(element.pop('zdata'))
                element.update(data)
            if sh:
                element.update(set_shared(element))
            return element

        # =======================================================================

        def _diff_with_version(uid, version):
            if isinstance(version, (str, unicode)):
                try:
                    version = int(version)
                except Exception:
                    raise errors.StorageVersionNotFound()

            current_version = get_version(uid)
            changelog = ChangelogCollection()
            if current_version == version:
                val = []
            else:
                force_snapshot_version = get_force_snapshot_version(uid)
                if force_snapshot_version and version < force_snapshot_version:
                    raise errors.StorageVersionNotFound()

                last_quick_move_version = get_last_quick_move_version(uid)
                allow_quick_move_deltas = kwargs.get('allow_quick_move_deltas', False)
                if (not allow_quick_move_deltas and last_quick_move_version is not None and
                    version <= last_quick_move_version):
                    raise errors.StorageVersionNotFound()

                version_found = False
                # Возвращаем из-за https://jira.yandex-team.ru/browse/CHEMODAN-4477
                if changelog.find_one({'uid': uid, 'version': version}):
                    version_found = True

                groups_by_owner = defaultdict(list)
                grouplinks_by_gid = dict((link['gid'], link) for link in self.db.group_links.find({'uid': uid}))
                if grouplinks_by_gid:
                    joined_groups_by_gid = dict(
                        (item['_id'], item) for item in self.db.groups.find({'_id': {'$in': grouplinks_by_gid.keys()}}))
                else:
                    joined_groups_by_gid = {}

                for gid, link in grouplinks_by_gid.iteritems():
                    try:
                        group = joined_groups_by_gid[gid]
                    except KeyError:
                        pass
                    else:
                        link['group'] = group
                        grouplinks[link['path']] = link
                        groups_by_owner[group['owner']].append(gid)
                        if not version_found:
                            spec = {'version': version, 'gid': gid, 'uid': group['owner']}
                            if changelog.find_one(spec):
                                version_found = True

                if not version_found:
                    raise errors.StorageVersionNotFound()

                groups.update((group_data['path'], group_data) for group_data in self.db.groups.find({'owner': uid}))

                # https://jira.yandex-team.ru/browse/CHEMODAN-8587
                # Оптимизируем отбой по количеству версий
                if not settings.feature_toggles['process_big_changelog']:
                    spec_uid_changelog = {'uid': uid, 'version': {'$gt': version}}
                    total_count = changelog.count(spec_uid_changelog)
                    changelog_length = settings.system['system']['changelog_length']
                    if groups_by_owner:
                        for owner, gids in groups_by_owner.iteritems():
                            spec = {
                                'uid': owner,
                                'gid': {'$in': gids},
                                'version': {'$gt': version}
                            }
                            total_count += changelog.count(spec)
                            if total_count > changelog_length:
                                break
                    if total_count > changelog_length:
                        raise errors.StorageVersionNotFound()

                element_keys_set = set()
                elements = []
                for each in changelog.find({'uid': uid, 'version': {'$gt': version}}, fields={'dtime': False}):
                    element = unzip_element(each)

                    # filter subpath changes (we can't do it in mongo since 'key' is zipped for files):
                    if is_subpath(element['key'], key, use_regex=True):
                        element_keys_set.add(each['key'])
                        elements.append(element)

                for owner, gids in groups_by_owner.iteritems():
                    # filter subpath changes
                    subpath_gids = [gid for gid in gids if
                                    (is_subpath(grouplinks_by_gid[gid]['path'], key, use_regex=True) or
                                     is_subpath(key, grouplinks_by_gid[gid]['path'], use_regex=True))]
                    if len(subpath_gids) > 0:
                        spec = {
                            'uid': owner,
                            'gid': {'$in': subpath_gids},
                            'version': {'$gt': version},
                        }
                        group_changes = []

                        for each in changelog.find(spec):
                            each = unzip_element(each, sh=False)
                            gid = each['gid']
                            link = grouplinks_by_gid[gid]
                            group = link['group']
                            link_path = link['path']
                            try:
                                new_path = change_path_parent(each['key'], each['group_path'], link_path)
                            except Exception:
                                raise errors.ServicesPathError
                            if is_subpath(new_path, key, use_regex=True):
                                each['uid'] = uid
                                each['owner_uid'] = group['owner']
                                each['key'] = new_path
                                each['group_path'] = link_path
                                each['group'] = gid
                                each.update(set_shared(each))
                                group_changes.append(each)
                                element_keys_set.add(new_path)
                        elements.extend(group_changes)

                # хак для НДА папки
                # сейчас есть баг в синхронизации с ПО: если удалить папку и создать папку с таким же именем,
                # ПО получит только дельту создания и не сможет засинкаться
                # когда удаляется НДА папка у яндексоида, мы её сразу восстанавливаем и ПО не может засинкаться
                # поэтому яндексоидам с дельтами удаления и создания НДА папки возвращаем 404
                # https://st.yandex-team.ru/CHEMODAN-36440
                # https://st.yandex-team.ru/DISCSW-11922
                from mpfs.core.yateam.logic import user_has_nda_rights
                if user_has_nda_rights(uid):
                    nda_deltas = [x for x in elements if x['key'] == YATEAM_DIR_PATH]
                    if nda_deltas:
                        nda_deleted = [x for x in nda_deltas if x['op'] == 'deleted']
                        nda_new = [x for x in nda_deltas if x['op'] == 'new']
                        if nda_deleted and nda_new:
                            raise errors.StorageVersionNotFound()

                should_skip_deltas_filter = allow_quick_move_deltas and is_quick_move_enabled(uid)
                val = _process_chanelog_items(elements, skip_filter=should_skip_deltas_filter)

                # этот код работает для diff-ов при принятии приглашения в ОП
                if grouplinks:
                    groups_trees = GroupsTrees(self.db.user_data)
                    for element_value in val:
                        try:
                            link = grouplinks[element_value['key']]
                        except KeyError:
                            continue
                        if not (element_value['type'] == 'dir' and element_value['op'] == 'new'):
                            continue

                        if DELTAS_SNAPSHOT_ON_GROUP_FOLDER_INVITE_ENABLED:
                            log.info('Skip group folder loading for uid `%s` and path `%s`; changelog delta: %s',
                                     uid, link['path'], element_value)
                            if not DELTAS_SNAPSHOT_ON_GROUP_FOLDER_INVITE_DRY_RUN:
                                raise errors.StorageVersionNotFound()

                        group = link['group']
                        link_elements = []
                        element_value['gid'] = link['gid']
                        element_value['group_path'] = link['path']
                        link_path = link['path']
                        group_path = group['path']
                        group_owner = group['owner']

                        for each in groups_trees.elements(group_owner, group_path):
                            each_key = each['key']
                            if each_key not in element_keys_set:
                                try:
                                    each['key'] = new_key = change_path_parent(each_key, group_path, link_path)
                                except Exception:
                                    raise errors.ServicesPathError
                                each['uid'] = uid
                                each['parent'] = parent_id_for_key(uid, new_key)
                                each['_id'] = id_for_key(uid, new_key)
                                doc_version = each.get('version', version)
                                each = process_one(uid, each)
                                each['gid'] = link['gid']
                                each['group_path'] = link_path
                                each['owner_uid'] = group_owner
                                # иначе в документе не будет версии
                                if doc_version:
                                    each['version'] = doc_version
                                link_elements.append(each)
                        val.extend(link_elements)

                for item in val:
                    # добавление resource_id
                    file_id = item.get('fid')
                    if file_id is not None:
                        resource_owner = item.get('owner_uid', item.get('uid'))
                        if resource_owner:
                            item['resource_id'] = address.ResourceId(resource_owner, file_id).serialize()
            return val

        def _diff_full(uid):

            if FEATURE_TOGGLES_FULL_DIFF_FILE_LIMIT and key == "/disk":
                element_count = self.db.user_data.find({'uid': uid}).count()
                if element_count > FEATURE_TOGGLES_FULL_DIFF_FILE_LIMIT:
                    raise errors.TooManyElementsForFullDiffError()

            def _subtree(uid, subpath):
                coll = self.db.user_data
                if subpath == "/disk":
                    return list(coll.find({'uid': uid}))
                else:
                    # walk subtree
                    result = []
                    if isinstance(coll, BaseDAO):
                        query = {'path': subpath, 'uid': uid}
                    else:
                        query = {'_id': id_for_key(uid, subpath), 'uid': uid}
                    temp = list(coll.find(query))
                    while len(temp) > 0:
                        elem = temp.pop()
                        if elem['type'] == 'dir':
                            if isinstance(coll, BaseDAO):
                                query = {'parent': elem['key'], 'uid': uid}
                            else:
                                query = {'parent': id_for_key(uid, elem['key']), 'uid': uid}
                            sub = list(coll.find(query))
                            temp.extend(sub)
                        result.append(elem)
                    return result

            def _get_diskinfo_counters(uid):
                try:
                    total_size = self.db.disk_info.find_one({'uid': uid, '_id': hashed('%s:%s' % (uid, '/total_size'))},
                                                            fields=('data',))['data']
                    trash_size = self.db.disk_info.find_one({'uid': uid, '_id': hashed('%s:%s' % (uid, '/trash_size'))},
                                                            fields=('data',))['data']
                except Exception:
                    RecountDAO().add(uid)
                    raise errors.StorageSizeNotFound()
                else:
                    return total_size, trash_size

            total_size_start, trash_size_start = _get_diskinfo_counters(uid)

            elements = _subtree(uid, key)

            grouplinks_by_gid = dict((link['gid'], link) for link in self.db.group_links.find({'uid': uid}))
            if grouplinks_by_gid:
                joined_groups_by_gid = dict((item['_id'], item)
                                            for item in self.db.groups.find({'_id': {'$in': grouplinks_by_gid.keys()}}))
                groups_size_start = sum((v.get('size', 0) for k, v in joined_groups_by_gid.iteritems()))
            else:
                joined_groups_by_gid = {}
                groups_size_start = 0

            groups.update((group_data['path'], group_data) for group_data in self.db.groups.find({'owner': uid}))
            groups_trees = GroupsTrees(self.db.user_data)

            for gid, link in grouplinks_by_gid.iteritems():
                try:
                    group = joined_groups_by_gid[gid]
                except KeyError:
                    pass
                else:
                    if is_subpath(link['path'], key, use_regex=True) or is_subpath(key, link['path'], use_regex=True):
                        link_elements = []
                        link['group'] = group
                        grouplinks[link['path']] = link
                        link_path = link['path']
                        group_path = group['path']
                        group_owner = group['owner']

                        link_element = self.db[self.name].find_one(self.query_for_show(uid, link_path))
                        # папка удалена у пользователя, но group_link ещё остался: пропускаем
                        # https://st.yandex-team.ru/CHEMODAN-39443
                        if not link_element:
                            continue

                        for each in groups_trees.elements(group_owner, group_path):
                            try:
                                each['key'] = new_key = change_path_parent(each['key'], group_path, link_path)
                            except Exception:
                                raise errors.ServicesPathError

                            if is_subpath(new_key, key, use_regex=True):
                                each['uid'] = uid
                                if parent_for_key(each['key']) == link_path:
                                    # чтобы подклеить ветку ОП владельца участнику, мы подменяем идентификатор parent'а
                                    # у исходного элемента на идентификатор элемента ОП у участника
                                    each['parent'] = link_element['_id']
                                link_elements.append(each)
                        elements.extend(link_elements)

            '''
            Собираем дерево родителей вида
            {
                'a': 'b',
                'b': 'root',
                'c': 'd',
            }
            '''
            folder_tree = {}
            for each in elements:
                if (each['type'] == 'dir') and (each['key'] != '/'):
                    parent = each.get('parent')
                    folder_tree[each['_id']] = parent
                    if each['key'] == key:
                        folder_tree[parent] = 'root'

            '''
            Просеиватель парентов
            Смотрит, записан ли в карту хороших родитель элемента
            Если в карте нет, то делает восходящий поиск вверх, попутно проставляя статусы всем по пути
            '''
            folder_map = {'root': True}

            def in_good_thread(x):
                try:
                    return folder_map[x]
                except KeyError:
                    try:
                        parent = folder_tree[x]
                    except KeyError:
                        folder_map[x] = False
                        return False

                    try:
                        return folder_map[parent]
                    except KeyError:
                        if parent in folder_tree:
                            res = in_good_thread(parent)
                            folder_map[x] = res
                            return res
                        else:
                            folder_map[x] = False
                            return False

            val = []
            summary_size = 0
            for item in elements:
                if item.get('key') == '/disk':
                    continue

                parent = item.get('parent')
                if parent and in_good_thread(parent):
                    val.append(process_one(uid, item))

                try:
                    size = item['data']['size']
                except KeyError:
                    pass
                else:
                    # https://jira.yandex-team.ru/browse/CHEMODAN-6231
                    # логируем занимаемые размеры
                    summary_size += size

            val.sort(key=lambda x: x['key'])

            # TODO: get rid of hard-coded path
            if key == '/disk':
                total_size_end, trash_size_end = _get_diskinfo_counters(uid)
                if grouplinks_by_gid:
                    joined_groups_by_gid = dict(
                        (item['_id'], item) for item in self.db.groups.find({'_id': {'$in': grouplinks_by_gid.keys()}}))
                    groups_size_end = sum((v.get('size', 0) for k, v in joined_groups_by_gid.iteritems()))
                else:
                    groups_size_end = 0

                try:
                    percent_diff_start = 100.0 - (summary_size * 100.0) / (
                        groups_size_start + total_size_start - trash_size_start)
                except ZeroDivisionError:
                    percent_diff_start = 0.0

                log.info('%s | INDEX %s | START COUNTERS %s (DISK %s, TRASH %s, GROUPS %s) | DIFFERENCE %s (%.2f%%)' % (
                    uid,
                    summary_size,
                    groups_size_start + total_size_start - trash_size_start,
                    total_size_start,
                    trash_size_start,
                    groups_size_start,
                    summary_size - (groups_size_start + total_size_start - trash_size_start),
                    percent_diff_start
                ))

                try:
                    percent_diff_end = 100.0 - (summary_size * 100.0) / (
                        groups_size_end + total_size_end - trash_size_end)
                except ZeroDivisionError:
                    percent_diff_end = 0.0

                log.info('%s | INDEX %s | END COUNTERS %s (DISK %s, TRASH %s, GROUPS %s) | DIFFERENCE %s (%.2f%%)' % (
                    uid,
                    summary_size,
                    groups_size_end + total_size_end - trash_size_end,
                    total_size_end,
                    trash_size_end,
                    groups_size_end,
                    summary_size - (groups_size_end + total_size_end - trash_size_end),
                    percent_diff_end
                ))

                if percent_diff_start > 0 and percent_diff_end > 0:
                    RecountDAO().add(uid)

                    if settings.mongo['options']['shoot_partial_diff']:
                        raise errors.StorageReturnedPartialDiff()

            return val

        current_version = get_version(uid)
        if version:
            result = _diff_with_version(uid, version)
        else:
            result = _diff_full(uid)
        return MResponse(value=result, version=str(current_version))

    def insert_version(self, uid, key, op, new_version, *args, **kwargs):
        changelog = ChangelogCollection()
        data = kwargs.get('data', {})
        rtype = kwargs.get('rtype')
        group_version = kwargs.get('group_version', True)
        rtype = rtype or data['type']
        external_setprop = int(kwargs.get('external_setprop', False))
        version_data = {}
        if data:
            if 'zdata' in data:
                data = self._unzip_resource_data(rtype, data['data'], data['zdata'])
            elif 'meta' in data['data']:
                data = data['data']
                data['meta'] = dict(data['meta'])

            if 'meta' in data:
                version_data.update(self.get_folder_version_data(data))
                if rtype == 'file':
                    version_data.update(self.get_file_version_data(data))
                if external_setprop:
                    version_data['external_setprop'] = external_setprop

                changes = kwargs.get('changes', {})
                for k in ifilter(lambda x: x in self.meta_fields_in_changelog, changes):
                    version_data[k] = changes[k]
        changelog.create_version(uid, key, op, new_version, rtype, group_version=group_version, **version_data)

    @classmethod
    def get_folder_version_data(cls, data):
        return {
            'visible': int(data.get('visible', 1)),
            'fid': data['meta'].get('file_id'),
            'public': int(bool(data['meta'].get('public'))),
        }

    @classmethod
    def get_file_version_data(cls, data):
        etime = data['meta'].get('etime', 0)
        if etime is None:
            etime = 0
        else:
            etime = int(etime)
        if etime < 0:
            etime = 0

        return {
            'sha256': data['meta']['sha256'],
            'md5': data['meta']['md5'],
            'size': data.get('size'),
            'drweb': data['meta'].get('drweb', 4),
            'etime': etime,
            'mtime': int(data.get('mtime')),
            'media_type': data.get('media_type'),
            'mimetype': data.get('mimetype'),
            'has_preview': True if (data['meta'].get('pmid') or data['meta'].get('previews')) else False
        }

    def find_all(self, spec, **kwargs):
        return super(DiskCollection, self).find(spec, **kwargs)


class DeletedPreviewMidsCollection(ShardedById):
    name = 'deleted_preview_mids'
    is_sharded = False
    is_common = False


class Groups(KeyValue):
    name = 'groups'
    key_fields = ('_id',)
    doc_keys = ('_id', 'owner', 'v')
    is_sharded = False
    is_common = True
    uid_field = 'owner'

    def query_for_show(self, owner, gid):
        return {'owner': owner, '_id': gid}


class GroupInvites(KeyValue):
    name = 'group_invites'
    key_fields = ('_id',)
    is_sharded = False
    is_common = True

    def iter_by_gid(self, gid, statuses=None, offset=0, limit=None):
        assert isinstance(statuses, (list, types.NoneType))
        spec = {'gid': gid}
        if statuses:
            spec['status'] = {'$in': statuses}
        limit = 0 if limit is None else limit
        # сортировка нужна, чтобы гарантировать верную пагинацию.
        return self.collection.find(spec, skip=offset, limit=limit, sort=[('_id', pymongo.ASCENDING)])

    def count_by_gid(self, gid, statuses=None):
        return self.iter_by_gid(gid, statuses=statuses).count()


class GroupLinks(KeyValue):
    name = 'group_links'
    key_fields = ('lid',)
    doc_keys = ('_id', 'uid', 'v')
    is_sharded = False
    is_common = True

    def iter_by_gid(self, gid, excluded_b2b_key=None, offset=0, limit=None):
        spec = {'gid': gid}
        if excluded_b2b_key is not None:
            spec['b2b_key'] = {'$ne': excluded_b2b_key}
        limit = 0 if limit is None else limit
        # сортировка нужна, чтобы гарантировать верную пагинацию.
        return self.collection.find(spec, skip=offset, limit=limit, sort=[('_id', pymongo.ASCENDING)])

    def count_by_gid(self, gid, excluded_b2b_key=None):
        return self.iter_by_gid(gid, excluded_b2b_key=excluded_b2b_key).count()


class ExifCollection(KeyValue):
    name = 'exif_tmp_collection'
    is_sharded = False
    is_common = True


class FilesystemLocksCollection(KeyValue):
    name = 'filesystem_locks'
    is_sharded = True
    is_common = False
    key_fields = ('uid', 'key')
    TTL = 3600  # s

    def _get_search_spec(self, uid, key):
        uid = propper_uid(uid)
        key = propper_key(key)
        return {
            '_id': id_for_key(uid, key),
            'uid': uid,
            'key': key
        }

    def set(self, uid, key, data=None, time_offset=0):
        """
        Raises :class:`~mpfs.common.errors.ResourceLockFailed`

        :type uid: str
        :type key: str
        :type data: None | dict
        :type time_offset: int
        :rtype: str
        """
        if not data:
            data = {}

        assert isinstance(data, dict)
        assert isinstance(time_offset, int)
        assert time_offset < self.TTL

        search_spec = self._get_search_spec(uid, key)
        if self.db[self.name].find_one(search_spec):
            raise errors.ResourceLockFailed(uid, key)

        update_data = {'dtime': datetime.utcnow() - timedelta(seconds=time_offset)}
        update_data.update(search_spec)
        if data:
            update_data.update(data=data)

        result = self.db[self.name].insert(update_data, **self._fsync_safe_w())

        if not result:
            raise errors.ResourceLockFailed(uid, key)

        return result

    def get(self, uid, key):
        """
        :type uid: str
        :type key: str
        :rtype: dict
        """
        search_spec = self._get_search_spec(uid, key)
        return self.db[self.name].find_one(search_spec)

    def release(self, uid, key):
        search_spec = self._get_search_spec(uid, key)
        result = self.db[self.name].remove(search_spec, **self._fsync_safe_w())
        if not result:
            raise errors.ResourceLocked(uid, key)

    def update(self, uid, key, data=None, time_offset=0):
        if not data:
            data = {}

        assert isinstance(data, dict)
        assert isinstance(time_offset, int)
        assert time_offset < self.TTL

        search_spec = self._get_search_spec(uid, key)
        if not self.db[self.name].find_one(search_spec):
            return

        update_data = {'dtime': datetime.utcnow() - timedelta(seconds=time_offset)}
        if data:
            update_data.update(data=data)

        result = self.db[self.name].find_and_modify(search_spec, {'$set': update_data}, **self._fsync_safe_w())
        if not result:
            raise errors.ResourceLockFailed(uid, key)

    def find_all(self, uid):
        search_spec = {'uid': propper_uid(uid)}
        result = {}
        for item in self.db[self.name].find(search_spec):
            key = item.pop('key')
            result[key] = item
        return result


class GroupsTrees(object):

    def __init__(self, collection):
        self.collection = collection
        self.owners_full_indexes = {}
        self.owners_elements_counts = {}

    def elements(self, group_owner, group_path):
        if isinstance(self.collection, BaseDAO):
            parent_id = group_path
        else:
            parent_id = id_for_key(group_owner, group_path)
        if self.collection.find_one({'uid': group_owner, 'parent': parent_id}):
            try:
                owner_index = self.owners_full_indexes[group_owner]
            except KeyError:
                try:
                    elements_count = self.owners_elements_counts[group_owner]
                except KeyError:
                    elements_count = self.owners_elements_counts[group_owner] = self.collection.find(
                        {'uid': group_owner}).count()

                if elements_count > settings.system['system']['full_index_parents_limit']:
                    def group_tree(parent_path):
                        if isinstance(self.collection, BaseDAO):
                            spec = {
                                'uid': group_owner,
                                'parent': parent_path
                            }
                        else:
                            spec = {
                                'uid': group_owner,
                                'parent': id_for_key(group_owner, parent_path)
                            }
                        for element in self.collection.find(spec):
                            fetch_folder_children_only = (
                                group_owner in FEATURE_TOGGLES_FULL_DIFF_FETCHING_FILE_CHILDREN_BUG_FIX_ENABLED_UIDS or
                                not FEATURE_TOGGLES_FULL_DIFF_FETCHING_FILE_CHILDREN_BUG_FIX_ENABLED_UIDS
                            )
                            if not fetch_folder_children_only or element['type'] == 'dir':
                                for each in group_tree(element['key']):
                                    yield each
                            yield element
                else:
                    def group_tree(parent_path):
                        re_parent = re.compile('^%s/.*$' % re.escape(parent_path))
                        self.owners_full_indexes[group_owner] = []
                        for each in self.collection.find({'uid': group_owner}):
                            self.owners_full_indexes[group_owner].append(each)
                            if re_parent.match(each['key']):
                                yield each
            else:
                def group_tree(parent_path):
                    re_parent = re.compile('^%s/.*$' % re.escape(parent_path))
                    for each in owner_index:
                        if re_parent.match(each['key']):
                            yield each
            return group_tree(group_path)
        else:

            return []


class TrashCleanerQueue(KeyValue):
    name = 'trash_cleaner_queue'
    key_fields = ('uid', 'date')
    is_sharded = False
    is_common = True
