# -*- coding: utf-8 -*-

"""
Скрипт восстановления файлов по логам удаления в корзину.

Принимает на вход uid в виде аргумента (этот uid будет подставляться в insert-ы) и логи в Stdin в формате tskv.
Необходимые поля в логах - timestamp_raw, message.

Пример входных данных - https://yql.yandex-team.ru/Operations/XIjo22im9ax0-Uf89JHe-m0KGI691EJOO87gst16cmQ=
timestamp_raw нужен для разрешения конфликтов (несколько вставок по одному пути)
Файлы и папки восстанавливаются в папку RESTORED в корне диска

Usage example:
cat /home/trash_append_10833005.tskv | python restore_files_by_requests_log.py 10833005  > inserts_10833005.sql
"""

import datetime as dt
import os
import re
import sys
import uuid

from collections import deque
from hashlib import sha256
from itertools import izip

import sqlparse


PATH_DELIMETER = '/'

get_sql = re.compile(r'message=dbname=diskdb user=(?P<pg_user>\w+) password=\w+ host=(?P<pg_host>(?P<pg_rs>\w+\d+)[^ ]+) port=\d+ connect_timeout=\d+ "(?P<query_sql>.+)" (?P<resp_rows>\d+) (?P<resp_duration>\d+\.\d+)$')
get_record_timestamp_re = re.compile(r'timestamp_raw=(?P<record_timestamp>\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2}(,\d+)?)\t?')  # timestamp_raw=2019-02-23 11:53:15,538
insert_into = re.compile(r'INSERT INTO disk\.(files|folders|storage_files) ')

def parse_pg_query_log_line(line):
    res = get_sql.search(line)
    if not res:
        return
    return res.groupdict()


class Path(object):
    def __init__(self, raw_path):
        self._raw_path = raw_path
        self._path = os.path.normpath(self._raw_path)
        self._path_partitions = filter(None, self._path.split(PATH_DELIMETER))
        self._parent_path, self._node_name = os.path.split(self.path)

    @property
    def path(self):
        return self._path

    @property
    def parent_path(self):
        return self.__class__(self._parent_path)

    @property
    def path_partitions(self):
        return self._path_partitions

    @property
    def node_name(self):
        return self._node_name

    def __eq__(self, other):
        return self.path == other.path

    def __hash__(self):
        return hash(self.path)

    def __len__(self):
        return len(self._path_partitions)

    def __str__(self):
        return self._raw_path

    def __repr__(self):
        return str(self)

    def is_subpath_of(self, other):
        """
        :param Path other:
        :return: True if this path is subpath of other path, else otherwise
        """
        if self == other or len(self) < len(other):
            return False
        for p1, p2 in izip(self.path_partitions, other.path_partitions):
            if p1 != p2:
                return False
        return True

    def is_parent_for(self, other):
        """

        :param Path other:
        :return: True if this path is parent of other path, else otherwise
        """
        return other.is_subpath_of(self)


class PathNode(object):
    def __init__(self, path, data):
        self._path = path
        self._data = data
        self._childs = {}
        self._uuid = uuid.uuid4()
        self._parent = None

    @property
    def path(self):
        return self._path

    @property
    def data(self):
        return self._data

    @property
    def timestamp(self):
        return self._data[5]

    @property
    def uuid(self):
        return self._uuid

    @property
    def parent(self):
        if self._parent is None:
            raise RuntimeError(u'Parent for %s is not defined' % self)
        return self._parent

    @property
    def parent_uuid(self):
        return self.parent.uuid

    @property
    def childs(self):
        return self._childs.itervalues()

    @property
    def is_folder(self):
        return self._data[0] == 'folders'

    def __str__(self):
        return u'%s[%s]' % (self.__class__.__name__, self._path)

    def add_child(self, node, node_ts=None):
        """
        :param PathNode node:
        :param node_ts:
        :return PathNode:
        """
        if not self.is_folder:
            raise TypeError(u'Impossible add child for non folder node %s' % self)

        if not self.path.is_parent_for(node.path):
            raise RuntimeError(u'%s is not parent for %s' % (self, node))

        if node_ts is not None and node.path in self._childs and node_ts < self._childs[node.path].timestamp:
            return self._childs[node.path]  # return existing node

        node._parent = self
        self._childs[node.path] = node
        return node


class RootNode(PathNode):
    def __init__(self, path, data, parent_uuid):
        super(RootNode, self).__init__(path, data)
        self._parent_uuid = parent_uuid

    @property
    def parent(self):
        raise RuntimeError(u'Parent for %s is undefined' % self.__class__.__name__)

    @property
    def parent_uuid(self):
        return self._parent_uuid


def tree_construction(root_node, queue):
    """

    :param RootNode root_node:
    :param deque queue:
    :return:
    """
    q_len = len(queue)
    path_to_node_map = {root_node.path: root_node}
    steps_without_progress = 0
    while queue:
        node_data = queue.popleft()
        _, _, _, _, _, timestamp, raw_origin_path = node_data

        path = Path(raw_origin_path)
        if path.parent_path in path_to_node_map:
            node = path_to_node_map[path.parent_path]
            new_node = node.add_child(PathNode(path, node_data), timestamp)
            path_to_node_map[path] = new_node
            steps_without_progress = 0
        elif path.is_subpath_of(root_node.path):
            queue.append(node_data)
            steps_without_progress += 1
        else:
            raise RuntimeError(u'%s is not parent for %s' % (root_node.path, path))

        if steps_without_progress > len(queue):
            raise RuntimeError(u'Infinite loop detected. %s items left unprocessed' % len(queue))


def print_hierarchy(node, _shift=0):
    print '    ' * _shift, "-> %s" % node.path
    for child in node.childs:
        print_hierarchy(child, _shift=_shift + 1)


def uuid_to_pg_uuid(uuid):
    # 'b97fd1e9-4bae-5fb2-a065-965432870ce5'::uuid
    return "'%s'::uuid" % uuid


def uuid_to_pg_file_id(uuid):
    '\xa9eb54e179b9f76be929b9484f66b9547fb193dba133695eeeefac151ae82f76'
    return "'\\x%s'" % sha256(uuid.hex).hexdigest()


def node_to_query(uid, node):
    """

    :param PathNode node:
    :return str: query
    """
    kv_map = node.data[3]
    kv_map['uid'] = str(uid)
    # новый fid
    kv_map['fid'] = uuid_to_pg_uuid(node.uuid)
    # проставляю parent_id
    kv_map['parent_fid'] = uuid_to_pg_uuid(node.parent_uuid)
    # генерируем новый file_id
    kv_map['id'] = uuid_to_pg_file_id(node.uuid)
    kv_map['short_url'] = 'NULL'
    kv_map['public_hash'] = 'NULL'
    kv_map['symlink'] = 'NULL'
    kv_map['public'] = 'false'
    kv_map['published'] = 'false'

    kv_map['date_removed'] = 'NULL'
    kv_map['path_before_remove'] = 'NULL'


    kv_items = kv_map.items()
    fields = ','.join([i[0] for i in kv_items])
    values = ','.join([i[1] for i in kv_items])

    return 'INSERT INTO disk.%s (%s) VALUES (%s)' % (
        node.data[0].encode('utf-8'),
        fields.encode('utf-8'),
        values.encode('utf-8')
    )


def generate_insert_sql(uid, root_node):
    """

    :param str uid: uid пользователя для которого нужно сгенерировать insert-ы
    :param PathNode root_node:
    :return:
    """
    stack = [root_node]

    while stack:
        node = stack.pop()
        query = node_to_query(uid, node)
        yield query
        for child_node in node.childs:
            stack.append(child_node)


def query_create_target_folder_for_restore(uid, folder_uuid):

    query = '''
        INSERT INTO disk.folders (
            uid,
            visible,
            id,
            blocked,
            version,
            fid,
            public,
            name,
            parent_fid,
            date_modified,
            date_uploaded,
            date_created,
            published
        ) VALUES (
            %(uid)s,
            true,
            %(id)s,
            false,
            1550758270929101,
            %(fid)s,
            false,
            'RESTORED',
            (
                select 
                    df.fid
                from (
                    select 
                        fid, 
                        (SELECT path FROM code.fid_to_path(fid, uid)) as path 
                    from disk.folders 
                    where uid = %(uid)s and name = 'disk'
                ) as df
                where path = '/disk'
            ),
            '%(current_iso_datetime)s'::timestamp,
            '%(current_iso_datetime)s'::timestamp,
            '%(current_iso_datetime)s'::timestamp,
            false
        );
    ''' % {
        'uid': uid,
        'id': uuid_to_pg_file_id(folder_uuid),
        'current_iso_datetime': dt.datetime.now().isoformat(),
        'fid': uuid_to_pg_uuid(folder_uuid)
    }
    return ' '.join(line.strip() for line in query.splitlines())  # need one-line query


if __name__ == '__main__':

    storage_files_inserts = []
    data = deque()
    for line in list(sys.stdin):
        line = line.strip()
        result = parse_pg_query_log_line(line)
        if not result:
            continue
        query = result['query_sql']
        result = insert_into.search(query)
        if not result:
            continue
        table_name = result.group(1)

        timestamp = get_record_timestamp_re.search(line).group(1)

        if table_name == 'storage_files':
            storage_files_inserts.append(query.replace('\\\\', '\\'))
        else:
            stmt = sqlparse.parse(query)[0]
            meaningful_tokens = filter(lambda t: not(t.is_whitespace), stmt.tokens)
            table_definition = meaningful_tokens[3]
            values_list = list(meaningful_tokens[5][1])
            keys = table_definition.value.split(" ", 1)[-1].strip('()').split(',')
            values = [rv.value.replace('\\\\', '\\') for rv in values_list if rv.value not in ',()']
            kv_map = dict(zip(keys, values))

            name = kv_map['name'].strip('\'"')
            origin_path = kv_map['path_before_remove'][1:-1]  # remove quotes

            data.append((table_name, kv_map['parent_fid'], name, kv_map, query, timestamp, origin_path))


    root_folder_data = data.popleft()
    table_name, _, _, _, _, _, raw_origin_path = root_folder_data
    assert table_name == 'folders'

    restore_folder_uuid = uuid.uuid4()

    root_node = RootNode(Path(raw_origin_path), root_folder_data, parent_uuid=restore_folder_uuid)
    tree_construction(root_node, data)


    assert len(sys.argv) == 2
    uid = sys.argv[1]

    print query_create_target_folder_for_restore(uid, restore_folder_uuid)

    for query in storage_files_inserts:
        print query + ';'

    for query in generate_insert_sql(uid, root_node):
        print query + ';'

    #print_hierarchy(root_node)
