# -*- coding: utf-8 -*-
"""

MPFS
CORE

Сервис Поиска

"""

import socket
import traceback
import urllib2
import urllib
from lxml import etree

import mpfs.engine.process
import mpfs.common.errors as errors

from mpfs.engine.http import client
from mpfs.common.util import from_json
from mpfs.common.util.mappers import EasyMapper
from mpfs.core.address import Address
from mpfs.core.services.common_service import StorageService, Service
from mpfs.core.services.mail_service import SimpleFilter, GapFilter
from mpfs.metastorage.mongo.collections.filesystem import is_reindexed_for_quick_move

service_log = mpfs.engine.process.get_service_log("search")
error_log = mpfs.engine.process.get_error_log()


class SearchService(StorageService):

    name = 'search'
    api_error = errors.SearchError
    log = service_log

    def __init__(self, *args, **kwargs):
        StorageService.__init__(self, *args, **kwargs)

    def listing(self, resource, request, sort='', force=False):
        result = []
        address = resource.address

        def fin(head, tag):
            res = []
            elements = head.findall(tag)
            try:
                for element in elements:
                    if not element.get('id'):
                        continue
                    element_dict = self.xml2dict(element)
                    element_dict['id'] = '/' + '/'.join(filter(None, address.storage_name.split('/'))) + '/' + element_dict.get('id', '')

                    if address.storage_name == 'mail':
                        id_split = element_dict['meta']['fullpath'].split('/')
                        element_dict['id'] = '/mail' + '/'.join(id_split[:-1]) + ':' + id_split[-1]

                    if element_dict.get('name') == None:
                        element_dict['name'] = element_dict['meta']['title']
                    if tag == 'file':
                        element_dict['source'] = self.name
                    element_dict['path'] = element_dict['meta']['fullpath']
                    if element_dict['meta'].get('preview'):
                        element_dict['meta']['sizes'] = [{'url': element_dict['meta'].get('preview'), 'name': 'S'}]
                    element_dict['mtime'] = element_dict['ctime']
                    res.append(element_dict)
            except Exception:
                error_log.info(traceback.format_exc())
            return res

        #request = "attachname:*%s*+OR+attachname_keyword:*%s*" % (request, request)

        _url = self.base_url % (address.storage_name, address.uid, request)
        response = self.open_url(_url, cookie={self.cookie_name: self.cookie_data})

        if response:
            parsed = etree.fromstring(response)
            if parsed.tag == 'resources' and 'count' in parsed.attrib and int(parsed.attrib['count']):
                for t in ('folder', 'file'):
                    result.extend(fin(parsed, t))
        return result



class TimeFilter(GapFilter):

    req_field = 'mtime'


class MediaTypeFilter(SimpleFilter):

    req_field = 'mediatype'


class VisibleFilter(SimpleFilter):

    req_field = 'visible'


class ExtensionFilter(SimpleFilter):

    req_field = 'extension'

    def __init__(self, val):
        self.val = val

class SearchResultItem(object):
    """Define interface of search result item returning by search engine."""

    scope = None
    # Where query string was found.

    resource = None
    # Resource where query string was found.

    def __init__(self, scope=None, resource=None):
        self.scope = scope
        self.resource = resource

    def __repr__(self):
        return u'%s(%s)' % (self.__class__.__name__,
                            ', '.join(['%s=%s' % (k, repr(v)) for k, v in self.__dict__.iteritems()]))


class SearchResults(list):
    """Provide convenient interface for search services result interface."""
    item_types = (SearchResultItem,)
    query = None

    def __setitem__(self, key, value):
        if not isinstance(value, self.item_types):
            raise TypeError('%s should be instance of one fo the following classes: %s.' % (
                self.__class__.__name__, ', '.join(self.item_types)))
        super(SearchResults, self).__setitem__(key, value)

    def __repr__(self):
        return u'%s(%s)' % (self.__class__.__name__, super(SearchResults, self).__repr__())

    def load_searchresult(self, uid, result_dict, count_lost_results):
        uniq_addresses = set()
        addresses = []
        id_scope_map = {}
        self.query = result_dict.get('request')
        results = result_dict.get('response', {}).get('results', [])
        for item in results:
            for group in item.get('groups', []):
                for document in group.get('documents', []):
                    props = document.get('properties')
                    if props:
                        scope = props.get('scope')
                        res_id = props.get('key')
                        # ugly, but what else we can do when getting resources in batch?
                        id_scope_map[res_id] = scope
                        res_type = props.get('type')
                        res_address = Address.Make(uid, res_id, type=res_type)

                        if res_address.id not in uniq_addresses:
                            addresses.append(res_address)
                            uniq_addresses.update(res_address.id)
        if addresses:
            from mpfs.core.factory import get_resources, get_resources_with_dummies
            if count_lost_results:
                dummies = [None] * len(addresses)
                resources = get_resources_with_dummies(uid, addresses, dummies)
            else:
                resources = get_resources(uid, addresses)
            for res in resources:
                if not res:
                    self.append(res)
                else:
                    # don't know what for this needed, so copied from
                    # https://github.yandex-team.ru/MPFS/MPFS/blob/d3fd852390205c569624a2333faff2719a10fc70/common/lib/mpfs/core/services/search_service.py#L197
                    if res.type == 'file':
                        res.print_to_listing_log()
                    self.append(SearchResultItem(scope=id_scope_map.get(res.id), resource=res))


class DiskSearch(SearchService):

    name = 'search_disk'

    def __init__(self, *args, **kwargs):
        SearchService.__init__(self, *args, **kwargs)
        self.available_filters = {
            'mtime'     : TimeFilter,
            'mediatype' : MediaTypeFilter,
            'visible'   : VisibleFilter,
            'extension' : ExtensionFilter,
        }

        self.available_bounds = {
            'amount' : 'numdoc',
            'sort'   : 'how',
            'offset' : 'p',
            'order'  : 'order',
        }

        self.available_sort_fields = {
            'name'  : 'name',
            'mtime' : 'mtime'
        }

    def open_url(self, url):
        try:
            response = client.open_url(url, self.log, timeout=self.timeout, rais=True, api_err=None)
        except urllib2.HTTPError, e:
            if e.code == 404:  # Not Found
                response = None
            elif e.code == 429:  # Too Many Requests
                raise errors.RequestsLimitExceeded429()
            else:
                raise self.api_error(e)
        except Exception, e:
            raise self.api_error(e)
        else:
            self.log.debug(response)
        return response

    def search(self, resource, query, force=False, count_lost_results=False, search_folders=None):
        """
        Get list of search results.
        :param resource: MPFS resource where to search (usually folder).
        :param query: Query string.
        :param force:
        :return: SearchResults
        """
        super(DiskSearch, self).folder_content(resource)
        address = resource.address

        qs = [urllib2.quote(query.encode('utf-8'))]

        path = address.path.replace(' ', '\ ')
        # Костыль, позволяющий искать в /photounlim (по-хорошему не долнжы передавать key, если передаем aux_folders)
        # https://st.yandex-team.ru/CHEMODAN-42746
        if path != '/disk':
            key = 'key=%s/*' % path
            qs.append(key)

        filter_values = filter(None,
            map(lambda (k,v): self.available_filters[k](v).get(),
                self.request_processing_fields['filters'].iteritems()
        ))

        if filter_values:
            for item in filter_values:
                qs.append(item.replace(':', '='))

        bounds = self.request_processing_fields['bounds']
        order = bounds.get('order', 1)
        if not order:
            qs.append('asc=1')

        page, numdoc = bounds.get('p', 0), bounds.get('numdoc')

        if page and numdoc:
            page = page // numdoc
            qs.append('p=%s' % page)
        if numdoc:
            qs.append('numdoc=%s' % numdoc)
        if force:
            qs.append('force=1')

        if is_reindexed_for_quick_move(resource.uid):
            qs.append('fast-moved')

        text = '&'.join(qs)
        _url = self.base_url % (address.uid, text, 'id,key')

        if search_folders:
            for search_folder in search_folders:
                _url += '&aux_folder=%s' % search_folder

        response = self.open_url(_url)

        ret = SearchResults()
        if response:
            parsed = from_json(response)
            ret.load_searchresult(address.uid, parsed, count_lost_results)
        return ret

    def geo_search(self, uid, start_date, end_date, latitude=None, longitude=None, distance=None,
                   count_lost_results=False):
        params = 'uid=%s&time_start=%s&time_end=%s' % (uid, start_date, end_date)

        if latitude is not None and longitude is not None and distance is not None:
            params += '&latitude=%s&longitude=%s&max_distance=%s' % (latitude, longitude, distance)
        elif latitude is not None or longitude is not None or distance is not None:
            raise Exception('Latiude, longitude, and distance should be specified all or no one')

        _url = self.host + self.geo_search_path % params

        response = self.open_url(_url)

        results = SearchResults()
        if response:
            parsed = from_json(response)
            results.load_searchresult(uid, parsed, count_lost_results)
        return results


    def listing(self, resource, request, force=False):
        sr = self.search(resource, request, force=force)
        if sr:
            return {
                'search_result': [r.resource for r in sr],
                'new_request': sr.query,
            }
        return {}

    def get_all_documents_for_user(self, uid):
        """
        Специальная ручка для дебага и тестов
        Возвращает id всех документов пользователя
        """
        result = []
        response = self.open_url(self.admin_url % uid)
        if response:
            parsed = from_json(response)
            for item in parsed['hitsArray']:
                result.append(item['id'])
        return result

    def list_periods_counts(self, uid, version):
        """
        Получить список периодов с количеством ресурсов для Фотосреза
        """
        method = 'list-periods'
        params = urllib.urlencode({
            'uid': uid,
            'version': version,
        })
        url = self.clusterizator_url % (method, params)
        if is_reindexed_for_quick_move(uid):
            url += '&fast-moved'

        response = self.open_url(url)
        return from_json(response)

    def list_periods_ids(self, uid, version):
        """
        Получить список периодов с id ресурсов для Фотосреза
        """
        method = 'list-resources-by-periods'
        params = urllib.urlencode({
            'uid': uid,
            'version': version,
        })
        url = self.clusterizator_url % (method, params)
        if is_reindexed_for_quick_move(uid):
            url += '&fast-moved'

        response = self.open_url(url)
        return from_json(response)

    def list_resources(self, uid, version, amount, offset):
        """
        Получить список ресурсов для Фотосреза
        """
        method = 'list-resources'
        params = urllib.urlencode({
            'uid': uid,
            'version': version,
            'offset': offset,
            'amount': amount,
        })
        url = self.clusterizator_url % (method, params)
        if is_reindexed_for_quick_move(uid):
            url += '&fast-moved'

        response = self.open_url(url)
        return from_json(response)

    def delete(self, uid):
        """Удалить поисковые данные пользователя `uid`.

        Используется только в тестах.
        """
        if not self.delete_url:
            return

        url = self.delete_url % {'uid': uid}
        self.open_url(url)


class SearchDB(Service):
    """
    Класс для работы с поиском в качестве базы MPFS

    Из-за архитектурных проблем в MPFS, мы:
        * не можем получить некоторые данные - отсутсвуют в БД MPFS(напр.: размеры картинок)
        * очень дорого генерировать некоторые данные из-за общих папок(напр.: full tree)

    Некоторые из этих проблем затыкаем поиском.
    Если хочется использовать этот класс, то нужно:
        * предварительно прикинуть RPS и проконсультироваться с поиском.
        * помнить, что поиск это кеш и он разъезжается с реальностью
    """

    name = 'search_disk'
    api_error = errors.SearchError
    log = service_log

    mpfs_search_mapper = EasyMapper({
        'file_id': 'id',
        'path': 'key',
    })
    """
    Переводчик полей из mpfs-ых в поисковые и наоборот

    Полный список полей в поиске можно посмотреть тут, [field.*]:
    https://git.yandex-team.ru/gitweb/saas/personal.git?a=blob;f=rpm/lucene-config/src/lucene.conf-disk;#l65
    """
    def _do_request(self, page, params, alternate_timeout=None, is_fast_moved=False):
        if 'key' in params and isinstance(params['key'], unicode):
            params = params.copy()
            params['key'] = params['key'].encode('utf-8')
        encoded_params = urllib.urlencode(params, doseq=True)
        url = "%s%s?%s" % (self.host, page, encoded_params)
        if is_fast_moved:
            url += '&fast-moved'
        return from_json(self.open_url(url, timeout=alternate_timeout))

    def _do_listing_request(self, page, params, alternate_timeout=None, is_fast_moved=False):
        """
        Сделать запрос в ручку поиска и привести ответ к MPFS-ым полям

        Использовать только для ручек поиска, отдающих структуру вида
            {
                "hitsCount": <INT>,
                "hitsArray": [
                    {
                        <search_field1>: <search_value1>,
                        ...
                    },
                    ...
                ]
            }
        """
        response = self._do_request(page, params, alternate_timeout=alternate_timeout, is_fast_moved=is_fast_moved)
        # переименовываем поля, отданные поиском, в mpfs-ые названия
        for i, value in enumerate(response['hitsArray']):
            response['hitsArray'][i] = self.mpfs_search_mapper.unmap_dict_keys(value)

        return {
            'total': response['hitsCount'],
            'items': response['hitsArray']
        }

    def resources_info_by_file_ids(self, uid, file_ids, sort_field=None, asc_order=True, fields=('path', 'file_id')):
        """
        Ручка получения информации о пачке ресурсков по file_id

        https://wiki.yandex-team.ru/ps/documentation/yadisk-search-proxy/#ruchkapoluchenijainformaciiofajjlax
        """
        search_fields = tuple(self.mpfs_search_mapper.map_iterable(fields))
        if sort_field:
            sort_field = self.mpfs_search_mapper.map(sort_field)
        params = {
            'uid': uid,
            'id': file_ids,
            'get': ','.join(search_fields)
        }
        for name, value in (('sort', sort_field),
                            ('asc', asc_order)):
            if value:
                params[name] = value

        is_fast_moved = is_reindexed_for_quick_move(uid)

        return self._do_listing_request('/get', params, is_fast_moved=is_fast_moved)

    def find_indexed_files(self, uid, path, fields=('path', 'file_id'), alternate_timeout=None):
        """
        Получить информацию о файле из индекса. Может вернуть больше одного результата, т.к. индекс не всегда
        синхронизирован с базой.
        """
        search_fields = tuple(self.mpfs_search_mapper.map_iterable(fields))
        params = {
            'uid': uid,
            'key': path,
            'get': ','.join(search_fields),
            'type': 'file'
        }

        is_fast_moved = is_reindexed_for_quick_move(uid)

        result = self._do_listing_request('/listing', params, alternate_timeout=alternate_timeout,
                                          is_fast_moved=is_fast_moved)
        # поиск ищет по префиксу, нам же нужны только точные соответствия пути файла
        result = [it for it in result['items'] if it['path'] == path]
        return result

    def get_indexed_files(self, uid, file_ids):
        """
        Проверяет, проиндексировались ли ресурсы по file_id у пользователя в фотосрезе
        """
        params = {
            'uid': uid,
            'id': file_ids,
            'get': 'id',
            'apply-photoslice-filter': 'true',
        }

        is_fast_moved = is_reindexed_for_quick_move(uid)

        return self._do_listing_request('/get', params, is_fast_moved=is_fast_moved)

    def list_subresources(self, uid, path,
                          mimetypes=None, mediatypes=None, resource_types=None,
                          offset=0, amount=10, sort_field=None, asc_order=True,
                          fields=('path', 'type', 'ctime', 'name')):
        """
        Ручка листинга всех файлов в папке и всех её подпапках

        https://old.wiki.yandex-team.ru/ps/documentation/yadisk-search-proxy/#ruchkalistingavsexfajjlovvpapkeivsexejopodpapkax
        mediatypes должны приходить в числовых значения: [1, 3, 4]
        """
        # иначе загребет лишнего
        if not path.endswith('/'):
            path = "%s/" % path
        search_fields = tuple(self.mpfs_search_mapper.map_iterable(fields))
        if sort_field:
            sort_field = self.mpfs_search_mapper.map(sort_field)
        params = {
            'uid': uid,
            'key': path,
            'offset': offset,
            'length': amount,
            'get': ','.join(search_fields),
        }
        for name, value in (('mimetype', mimetypes),
                            ('mediatype', mediatypes),
                            ('type', resource_types),
                            ('sort', sort_field),
                            ('asc', asc_order)):
            if value:
                params[name] = value

        is_fast_moved = is_reindexed_for_quick_move(uid)

        return self._do_listing_request('/listing', params, is_fast_moved=is_fast_moved)

    def list_subresources_iterator(self, uid, path, offset=0, amount=None, batch_size=1000, **kwargs):
        """
        Итератор по всем ресурсам папки и её подпапок

        Обертка над `self.list_subresources`
        Позволяет ходить в поиск пачками размером `batch_size` и получить запрашиваемое кол-во документов `amount`, `offset`
        Остальные параметры идентичны `self.list_subresources`
        """
        if not isinstance(batch_size, (int, long)) or batch_size < 1:
            raise ValueError('Bad "batch_size"')
        if amount == 0:
            raise StopIteration()

        iteration_num = 0
        yielded_res_num = 0
        while True:
            cur_offset = offset + batch_size * iteration_num
            cur_amount = batch_size
            resp = self.list_subresources(uid, path, offset=cur_offset, amount=cur_amount, **kwargs)

            total_resources = resp['total']
            for item in resp['items']:
                yield item
                yielded_res_num += 1
                if amount and yielded_res_num == amount:
                    raise StopIteration()

            iteration_num += 1
            if total_resources <= iteration_num * batch_size:
                raise StopIteration()

    def folder_size(self, uid, path, resource_id):
        """
        Ручка подсчёта занимаего места папкой

        https://wiki.yandex-team.ru/ps/documentation/yadisk-search-proxy/#ruchkapodschjotazanimaegomestapapkojj
        """
        # иначе загребет лишнего
        key = path if path.endswith('/') else "%s/" % path

        is_fast_moved = is_reindexed_for_quick_move(uid)

        if is_fast_moved:
            params = {
                'uid': uid,
                'resource_id': resource_id.serialize()
            }
        else:
            params = {
                'uid': uid,
                'key': key
            }

        response = self._do_request('/folder-size', params, is_fast_moved=is_fast_moved)

        if response:
            if not is_fast_moved and key in response:
                return {
                    'path': path,
                    'size': response[key]['size'],
                    'files_count': response[key]['count']
                }
            if is_fast_moved and resource_id is not None and resource_id.serialize() in response:
                resource_id_string = resource_id.serialize()
                return {
                    'path': path,
                    'size': response[resource_id_string]['size'],
                    'files_count': response[resource_id_string]['count']
                }
        return {
            'path': path,
            'size': 0,
            'files_count': 0
        }

    def get_sizes_per_mediatype(self, uid, path):
        """Вернуть информацию о занимаемом дисковом пространстве ресурсров
        поддерева ``path``, сгруппированных по медиатипу, отсортированных по размеру.

        :rtype: dict
        """
        if not path.endswith('/'):
            path = "%s/" % path

        is_fast_moved = is_reindexed_for_quick_move(uid)

        response = self._do_listing_request(
            '/listing', {'uid': uid, 'key': path,
                         'type': 'file',
                         'get': 'mediatype,total_size',
                         'group': 'mediatype', 'merge_func': 'count',
                         'aggregate': 'sum(size)+total_size',
                         'sort': 'total_size'},
            is_fast_moved=is_fast_moved
        )
        items = []
        for item in response['items']:
            item.pop('merged_docs_count', None)
            item['mediatype'] = int(item['mediatype'])
            item['size'] = int(item.pop('total_size'))
            items.append(item)
        return {'items': items, 'total': len(items)}


disk_search = DiskSearch()
