# -*- coding: utf-8 -*-
import collections
import hashlib
import itertools
import re
import uuid
from argparse import ArgumentParser
from datetime import timedelta, datetime
import time
from hashlib import sha256
from psycopg2.tz import FixedOffsetTimezone

import mpfs.engine.process
from mpfs.metastorage.postgres.queries import SQL_INCREMENT_VERSION_AND_SET_FORCE_SNAPSHOT_VERSION

mpfs.engine.process.setup_admin_script()
from mpfs.dao.session import Session


SQL_GET_SUB_FOLDERS = '''
SELECT *, (SELECT path FROM code.fid_to_path(fid, uid)) as path
FROM disk.folders
WHERE uid = :uid and parent_fid = (SELECT fid from code.path_to_fid(:path, :uid))
'''
SQL_GET_SUB_FILES = '''
SELECT *, (SELECT path FROM code.fid_to_path(fid, uid)) as path
FROM disk.files
WHERE uid = :uid and parent_fid = (SELECT fid from code.path_to_fid(:path, :uid))
'''
SQL_GET_FOLDER = '''
SELECT *, (SELECT path FROM code.fid_to_path(fid, uid)) as path
FROM disk.folders
WHERE uid = :uid and fid = (SELECT fid from code.path_to_fid(:path, :uid))
'''
SQL_GET_FILE = '''
SELECT *, (SELECT path FROM code.fid_to_path(fid, uid)) as path
FROM disk.files
WHERE uid = :uid and fid = (SELECT fid from code.path_to_fid(:path, :uid))
'''

DOC_TMPL = "%(uid)s | %(path)s | %(date_modified)s | %(date_hidden_data)s | %(path_before_remove)s"


class DBProxy(object):
    session_cache = {}

    @classmethod
    def force_snapshot(cls, uid):
        return cls.get_session(uid).execute(SQL_INCREMENT_VERSION_AND_SET_FORCE_SNAPSHOT_VERSION, {'uid': uid})

    @classmethod
    def unset_delete(cls, uid):
        return cls.get_session(uid).execute('UPDATE disk.user_index SET deleted = NULL where uid = :uid', {'uid': uid})

    @classmethod
    def get_folder(cls, uid, path):
        return cls.get_session(uid).execute(SQL_GET_FOLDER, {'uid': uid, 'path': path}).fetchone()

    @classmethod
    def get_file(cls, uid, path):
        return cls.get_session(uid).execute(SQL_GET_FILE, {'uid': uid, 'path': path}).fetchone()

    @classmethod
    def get_user_index(cls, uid):
        return cls.get_session(uid).execute(
            r'SELECT * FROM disk.user_index WHERE uid = :uid',
            {'uid': uid}
        ).fetchone()

    @classmethod
    def get_sub_folders(cls, uid, folder_path):
        return cls.get_session(uid).execute(SQL_GET_SUB_FOLDERS, {'uid': uid, 'path': folder_path})

    @classmethod
    def get_sub_files(cls, uid, folder_path):
        return cls.get_session(uid).execute(SQL_GET_SUB_FILES, {'uid': uid, 'path': folder_path})

    @classmethod
    def insert(cls, uid, table, row):
        insert_tmpl = cls.generate_insert_tmpl(table, row)
        return cls.get_session(uid).execute(insert_tmpl, row)

    @classmethod
    def get_session(cls, uid):
        if uid not in cls.session_cache:
            cls.session_cache[str(uid)] = Session.create_from_uid(uid)
        return cls.session_cache[str(uid)]

    @staticmethod
    def generate_insert_tmpl(table_name, row):
        columns = sorted(row.keys())
        return 'INSERT INTO %(table_name)s (%(columns)s) VALUES (%(values)s);' % {
                'table_name': table_name,
                'columns': ','.join(columns),
                'values': ','.join([":%s" % c for c in columns]),
            }

def iterate_over_tree(uid, path):
    stack = [path]
    while stack:
        cur_path = stack.pop()
        for doc in DBProxy.get_sub_folders(uid, cur_path):
            stack.append(doc.path)
            yield doc, True

        for doc in DBProxy.get_sub_files(uid, cur_path):
            yield doc, False


SUFFIX_TMPL = re.compile(':(\d{9,10}\.\d{1,5})$')


def remove_hidden_suffix(name):
    res = SUFFIX_TMPL.search(name)
    if res:
        return SUFFIX_TMPL.sub('', name), float(res.group(1))
    return name, None


def resolve_files_suffix_conflict(file_rows):
    """Отфильтровываем файлы с одинаковыми именами (без служебного суффикса) в одной папке.

    Оставляем с самым "свежим" суффиксом
    Пример файла с суффиксом: wallhaven-748282.jpg:1601479903.6
    """
    result = {}
    for row in file_rows:
        name, ts_suffix = remove_hidden_suffix(row.name)
        key = (name, row.parent_fid)
        value = (ts_suffix, row)
        if key not in result:
            result[key] = value
        else:
            result[key] = max(value, result[key])
    return [v[1] for v in result.itervalues()]


def get_now_dt_msk():
    dt = datetime.now()
    dt.replace(tzinfo=FixedOffsetTimezone(180))
    return dt


def pp_dict(data):
    for k, v in sorted(dict(data).items()):
        print "\t", k, type(v), v


def restore_root_folder(row):
    # выбираем куда восстановить файл в /disk, /attach, /trash
    if row.short_url and row.short_url.startswith('https://yadi.sk/mail'):
        return '/attach'
    if row.path.startswith('/hidden/yaruarchive') or row.path.startswith('/hidden/YaFotki'):
        return '/attach'
    is_parent_hidden = row.path.count('/') == 2
    if is_parent_hidden and re.search('_\d{10}\.\d{1,2}:', row.name):
        return '/attach'
    if row.path_before_remove:
        return '/trash'
    return '/disk'


def get_parent(path):
    return path.rsplit('/', 1)[0]


def get_name(path):
    return path.rsplit('/', 1)[1]


def change_area(path, new_root):
    _, old_root, other = path.split('/', 2)
    return "%s/%s" % (new_root, other)


def change_name(path, name):
    return "%s/%s" % (path.rsplit('/', 1)[0], name)


def change_row_fields(row, uid, parent_fid):
    # При восстановлении
    # 1. сгенерить новый fid и id (file_id)
    # 2. Указать правильный parent_fid
    # 3. Проставить date_create и date_modified если их нет
    # 4. Убрать date_hidden_data
    # 5. У name снять спец. суффикс

    new_row = dict(row)
    new_row['uid'] = uid
    new_row['parent_fid'] = parent_fid
    new_row['fid'] = uuid.uuid4()
    new_row['id'] = '\\x' + sha256(uuid.uuid4().hex).hexdigest()
    new_row['date_hidden_data'] = None
    new_row['name'] = remove_hidden_suffix(row.name)[0]

    now_dt_msk = get_now_dt_msk()
    if not new_row['date_created']:
        new_row['date_created'] = now_dt_msk
    if not new_row['date_modified']:
        new_row['date_modified'] = now_dt_msk
    new_row.pop('path')
    return new_row


def generate_new_folder_dict(uid, name, parent_fid):
    now_dt_msk = get_now_dt_msk()
    return {
        'uid': uid,
        'fid': uuid.uuid4(),
        'parent_fid': parent_fid,
        'id': '\\x' + sha256(uuid.uuid4().hex).hexdigest(),
        'name': name,
        'visible': True,
        'version': int(time.time() * 1000000),
        'public': False,
        'blocked': False,
        'published': False,
        'files_count': 0,
        'folders_count': 0,
        'date_uploaded': now_dt_msk,
        'date_created': now_dt_msk,
        'date_modified': now_dt_msk
    }


def ensure_folders(uid, path):
    if not path or path == '/':
        raise ValueError()
    folder_row = DBProxy.get_folder(uid, path)
    if folder_row:
        return uid, path, folder_row['fid']
    else:
        name = get_name(path)
        parent_path = get_parent(path)
        tmp_uid, tmp_parent_path, parent_fid = ensure_folders(uid, parent_path)
        assert parent_path == tmp_parent_path
        assert tmp_uid == uid
        new_folder_dict = generate_new_folder_dict(uid, name, parent_fid)
        DBProxy.insert(uid, 'disk.folders', new_folder_dict)
        return uid, path, new_folder_dict['fid']


def generate_link_data(row, target_path):
    '''
    id                    | 4eb084e0-a2ad-8982-6448-49dc354edd51
    uid                   | 638286708
    path                  | /3405b7fc796549dd8b8e07c7c1a9144d
    version               | 1602150623628812
    user_ip               | 2a02:6b8:b081:416::1:0
    public_uid            | 638286708
    target                | 638286708:/disk/Горы.jpg
    type                  | dir
    parent                |
    date_created          | 2020-10-08 12:50:23.628649+03
    date_modified         | 2020-10-08 12:50:23.628649+03
    date_deleted          |
    file_id               | \x453151b18b263d65f1a2379ee1db5d816d3b58cb2251673a88e88e337cb6b1e1
    resource_id_uid       | 638286708
    '''
    symlink_uid, symlink_id = row['symlink'].split(':', 1)
    now_dt_msk = get_now_dt_msk()
    path = '/%s' % symlink_id
    uid = row['uid']
    return {
        'id': uuid.UUID(hashlib.md5('%s:%s' % (symlink_uid, path)).hexdigest()),
        'uid': symlink_uid,
        'path': path,
        'version': int(time.time() * 1000000),
        'user_ip': '127.0.0.1',
        'public_uid': uid,
        'target': '%s:%s' % (uid, target_path),
        'type': 'dir',
        'date_created': now_dt_msk,
        'date_modified': now_dt_msk,
        'file_id': row['id'],
        'resource_id_uid': uid
    }


def recover_from_hidden(uid):
    folder_rows = []
    file_rows = []
    user_index = DBProxy.get_user_index(uid)

    msg_prefix = 'UID:%s' % uid
    if not user_index.deleted:
        print '%s user_index.deleted not set' % msg_prefix
        return

    # достаем все файлы и папки из hidden
    for row, is_folder in iterate_over_tree(uid, '/hidden'):
        assert row.path.startswith('/hidden/')
        if is_folder:
            folder_rows.append(row)
        else:
            file_rows.append(row)

    # оставляем только файлы, которые были удалены +/- N часов от даты удаления пользователя (user_index.deleted)
    # дату удаления ресурса берем из version, т.к. date_hidden_data по яровой может быть откручена на пол года, а у папок нет mtime и ctime
    user_deleted_ts = time.mktime(user_index.deleted.timetuple())
    deletion_filter = lambda r: abs(r.version / 1000000 - user_deleted_ts) < 7200

    file_rows = [r for r in file_rows if deletion_filter(r)]
    file_rows = resolve_files_suffix_conflict(file_rows)
    folder_rows = [r for r in folder_rows if deletion_filter(r)]
    folder_rows.sort(key=lambda r: r.path)

    # в files_rows и folders_rows сырые записи из базы, которые надо восстановить
    RECOVER_UID = uid  # для отладки можно восстановить данные другому пользователю

    path_fid_map = {}
    folder_gen = ((True, f) for f in folder_rows)
    file_gen = ((False, f) for f in file_rows)

    counter = collections.Counter()
    # в одной транзакции переносим файлы и папки
    with DBProxy.get_session(RECOVER_UID).begin():
        for is_folder, row in itertools.chain(folder_gen, file_gen):
            if is_folder:
                check_exists_func = DBProxy.get_folder
                table = 'disk.folders'
            else:
                check_exists_func = DBProxy.get_file
                table = 'disk.files'

            new_name = remove_hidden_suffix(row.name)[0]
            restore_to_area = restore_root_folder(row)
            dst_path = change_area(row.path, restore_to_area)
            dst_path = change_name(dst_path, new_name)
            # dst_path = '/disk/recover_test/%s%s' % (uid, dst_path)  # TODO REMOVE

            if check_exists_func(RECOVER_UID, dst_path) is not None:
                print '%s Already exists %s' % (msg_prefix, dst_path.encode('utf-8'))
                counter['already_exists'] += 1
                continue

            # получаем parent_fid
            dst_parent_path = get_parent(dst_path)
            if dst_parent_path in path_fid_map:
                parent_fid = path_fid_map[dst_parent_path]
            else:
                _, _, parent_fid = ensure_folders(RECOVER_UID, dst_parent_path)
                path_fid_map[dst_parent_path] = parent_fid

            print "%s Restore resource: %s:%s -> %s:%s" % (msg_prefix, uid, row.path.encode('utf-8'), RECOVER_UID, dst_path.encode('utf-8'))
            counter['restored_resources'] += 1
            insert_dict = change_row_fields(row, RECOVER_UID, parent_fid)
            DBProxy.insert(RECOVER_UID, table, insert_dict)
            # если файл публичный, то восстанавливаем ему link_data
            if insert_dict['public_hash'] and insert_dict['symlink'] and insert_dict['short_url']:
                link_data_row = generate_link_data(insert_dict, dst_path)
                DBProxy.insert(RECOVER_UID, 'disk.link_data', link_data_row)
                print '%s Restore public_url: %s' % (msg_prefix, insert_dict['short_url'])
                counter['restored_link_data'] += 1


            if is_folder:
                path_fid_map[dst_path] = insert_dict['fid']

        DBProxy.unset_delete(RECOVER_UID)
        DBProxy.force_snapshot(RECOVER_UID)

    counter['files'] = len(file_rows)
    counter['folders'] = len(folder_rows)
    print "%s %s" % (msg_prefix, sorted(counter.items()))


if __name__ == '__main__':
    parser = ArgumentParser()
    parser.add_argument('uid')
    args = parser.parse_args()

    recover_from_hidden(args.uid)
