import os
from logging import getLogger

from django.utils.dateparse import parse_datetime
from ids.services.at.repositories.utils import is_club_id
from ids.exceptions import BackendError, AuthError

from intranet.search.core.snippets.at import PostSnippet
from intranet.search.core.sources.at.utils import (
    repair_link,
    replace_ya,
    get_marks,
    UIDs,
    get_id,
    get_post_number,
    parse_long_id
)
from intranet.search.core.sources.utils import (
    get_persons,
    get_text_content,
    get_page_clicked_marked_texts,
    get_page_links,
    date_as_factor,
    normalize
)
from intranet.search.core.swarm.indexer import Indexer
from intranet.search.core.utils import get_ids_repository
from intranet.search.core.sources.at.at import TRASH_CLUBS
from intranet.search.core.utils.http import cut_sheme

from intranet.search.abovemeta.utils import string_to_bool
from intranet.search.core.tvm import tvm2_client

logger = getLogger(__name__)


def get_post_repo():
    return get_ids_repository('at', 'post',
                              service_ticket=tvm2_client.get_service_ticket('at'),
                              user_ticket_needed=True,
                              timeout=60)


def get_club_repo():
    return get_ids_repository('at', 'club',
                              service_ticket=tvm2_client.get_service_ticket('at'),
                              user_ticket_needed=True,
                              timeout=60)


def get_comment_repo():
    return get_ids_repository('at', 'comment',
                              service_ticket=tvm2_client.get_service_ticket('at'),
                              user_ticket_needed=True,
                              timeout=60)


class Source(Indexer):
    """ Индексатор постов Этушки
    """
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.uids = UIDs(self.cache_storage)

    def do_setup(self, **kwargs):
        club_repo = get_club_repo()
        if self.options['from_cache']:
            self.next('load')
            return

        person_filters = {'_sort': 'id', '_fields': 'uid'}
        if self.options['keys']:
            person_filters['login'] = ','.join(self.options['keys'])

        for user in get_persons(person_filters):
            self.next('walk', uid=int(user['uid']))

            for club in club_repo.search({'owner': user['uid']}):
                # В этушке есть "фейковые" клубы, например, сама страница пользователя -
                # это клуб, где он owner. У таких клубов community_type == None,
                # их не нужно индексировать.
                if club['community_type'] is not None:
                    self.next('walk', uid=get_id(club['id']))

    def need_index(self, post):
        return post.get('access') == 'public' and (post.get('content') or post.get('title'))

    def do_walk(self, uid, **kwargs):
        """ Обходит все посты клуба или пользователя.
        """
        post_repo = get_post_repo()
        for post in post_repo.search({'uid': uid}):
            if self.need_index(post):
                self.next('fetch', uid=uid, post=post)
            else:
                logger.info('Private or empty post %s, skip', post.get('id'))

    def do_push(self, data, delete=False, **kwargs):
        uid, post_number = parse_long_id(data['id'])
        post_repo = get_post_repo()
        try:
            post_data = list(post_repo.search({'uid': uid, 'post_no': post_number}))
        except (BackendError, AuthError):
            logger.warning('Cannot get data about post: %s', data)
            return

        for post in post_data:
            if self.need_index(post):
                self.next('fetch', uid=uid, post=post)

    def do_fetch(self, uid, post, **kwargs):
        post_number = get_post_number(post['id'])
        comment_repo = get_comment_repo()

        if post.get('comments_disabled'):
            comments = []
        else:
            comments = list(comment_repo.search({'uid': uid, 'post_no': post_number}))

        post['is_trash'] = uid in TRASH_CLUBS
        post['page_links'] = get_page_links([self.get_doc_url(post)])
        post['search_queries'] = get_page_clicked_marked_texts([self.get_doc_url(post)])

        self.next('create', uid=uid, post=post, comments=comments)

    def get_doc_url(self, post):
        return repair_link(post['links']['alternate'])

    def get_doc_updated(self, post):
        return parse_datetime(post['edited'] or post['published'])

    def do_create(self, uid, post, comments, **kwargs):
        doc = self.create_document(
            doc_url=self.get_doc_url(post),
            updated=self.get_doc_updated(post)
        )

        post['text_content'] = get_text_content(replace_ya(post['content'], self.uids))
        post['marks'] = get_marks(post)
        post['comments_count'] = len(comments)
        post['post_type'] = ''
        post['tags'] = []
        for cat in post['categories']:
            if cat['scheme'] == 'urn:ya.ru:posttypes':
                post['post_type'] = cat['term']
            else:
                post['tags'].append(cat['term'])

        if is_club_id(uid):
            post['club'] = self.uids[uid]
            post['author'] = self.uids[get_id(post['author']['id'])]
        else:
            post['author'] = self.uids[uid]
            post['club'] = {}

        doc.emit_body(self.create_body(post, comments))
        doc.emit_snippet(self.create_snippet(post), 'ru')

        self.emit_factors(doc, post)
        self.emit_attrs(doc, post)
        self.next('content', url=doc.url, raw_data=[post, comments], updated_ts=doc.updated_ts)
        self.next('store', document=doc)

    def create_body(self, post, comments):
        comments_data = []
        for comment in comments:
            comments_data.append(' '.join([
                comment['author']['name'],
                comment['author']['login'],
                get_text_content(replace_ya(comment['content'], self.uids))
            ]))

        body = {
            'content': post['text_content'],
            'comments': comments_data,
            'z_ns_hidden': {
                'owner': post['author']['name'],
                'title': post['title'],
                'keywords': post['tags'],
                'links_urls': post.get('page_links', {}).get('urls', ''),
                'links_texts': post.get('page_links', {}).get('texts', ''),
            },
        }
        body['z_ns_hidden'].update(post.get('search_queries', {}))
        return body

    def create_snippet(self, post):
        short_content = post['text_content'][:300]
        url = self.get_doc_url(post)
        snippet = {
            'url': url,
            'title': post['title'] or cut_sheme(url),
            'description': short_content,
            'updated': self.get_doc_updated(post),
            'tags': post['tags'],
            'entry_type': post['post_type'],
            'type': 'post',
            'content': short_content,
            'author': post['author'],
            'comments_count': post['comments_count'],
            'likes': post['marks']['likes'],
            'dislikes': post['marks']['dislikes'],
            'club': post['club'] or None,
        }
        return PostSnippet(snippet)

    def emit_factors(self, doc, post):
        doc.emit_meta_factor('createdAt', date_as_factor(parse_datetime(post['published'])))
        doc.emit_meta_factor('modifiedAt', date_as_factor(doc.updated))
        doc.emit_meta_factor('isTrashTitle', int(post['is_trash']))
        doc.emit_meta_factor('commentsCount', normalize(post['comments_count'], 50))

        # данных о настоящих просмотрах у нас нет, но их точно было не меньше,
        # чем лайков и дислайков
        doc.emit_meta_factor('pageViews', post['marks']['total'])
        doc.emit_meta_factor('likesCount', post['marks']['likes'])
        doc.emit_meta_factor('dislikesCount', post['marks']['dislikes'])

    def emit_attrs(self, doc, post):
        if string_to_bool(os.getenv('ISEARCH_ENABLE_AT_SUGGEST')):
            doc.emit_suggest_attr(post['title'])

        doc.emit_facet_attr(
            name='post_author',
            value=post['author']['login'],
            label=post['author']['name'],
        )
        if post['club']:
            doc.emit_facet_attr(
                name='club',
                value=str(get_id(post['club']['id'])),
                label=post['club']['name'],
            )
            if string_to_bool(os.getenv('ISEARCH_ENABLE_AT_SUGGEST')):
                doc.emit_suggest_attr(post['club']['name'])
                doc.emit_suggest_attr(post['club']['name_eng'])
                url = post['club']['links']['www']
                if url:
                    doc.emit_suggest_attr(url.strip().strip('/').rsplit('/', 1)[-1])

        post_updated = doc.updated.strftime('%Y-%m')
        doc.emit_facet_attr('post_updated', post_updated, post_updated)

        doc.emit_group_attr('thread_id_grp', post['id'], label=post['title'], url=doc.url)
        doc.emit_search_attr('s_type', 'post')
        doc.emit_search_attr('public', '1')

    def _parse_cache_row(self, row):
        uid, post, comments = row['raw']
        return {'uid': int(uid), 'post': post, 'comments': comments}
