"""
Module contains helpers for comments
"""
import logging
import threading
from collections import deque

import lxml.html
from django.db import transaction

from wiki.pages.models import Comment
from wiki.utils.base_command import print_percents, spawner
from wiki.utils.models import get_chunked
from wiki.utils.timer import track_time

generator_lock = threading.RLock()


def strip_body_from_html(comment_body):
    """
    Strip comment body from html.

    @type comment_body: unicode
    @return: text-only comment body
    """

    # we have to manually replace "\x01" with blank string, or
    # parser will return blank sting instead of comment in some cases.
    # example: u"\x01linklhttp://www.youtube.com/watch?v=pH-a9j0UXfM ==\x01linkr"
    raw = comment_body.replace('\x01', '')

    # wrap comment in <div></div> and parse it like normal html
    html = lxml.html.fragment_fromstring(raw, create_parent='div')

    # get all text from comment
    return html.text_content().strip()


def strip_chunks_from_html(chunks, count, errors, logger):
    """
    Strip comments text from html tags except <br/>
    @param chunks: comments querysets generator
    @param count: count structure, need for printing percents count
    @param errors: errors deque
    @param logger: script logger
    """
    loaded = 0
    local_errors = []

    while True:
        generator_lock.acquire()
        try:
            # get new comments chunk
            chunk = next(chunks)
        except StopIteration:
            break

        finally:
            generator_lock.release()

        # get сomment.id from chunk
        for comment_id in chunk:
            try:
                # we need to wrap entire operation in one transaction
                with transaction.atomic():
                    # get current state of a comment
                    comment = Comment.objects.get(id=comment_id)
                    # change comment body
                    comment.body = strip_body_from_html(comment.body)
                    comment.save()

            except Comment.DoesNotExist:
                # if this comment was already deleted -- go to next comment
                continue

            except Exception as exc:
                logger.error('General error while processing comment %s: %s', comment.id, repr(exc))
                local_errors.append(repr(exc))

            else:
                logger.info('Comment id = %s processed', comment.id)

                loaded += 1
                # pretty print with percents -- entirely non-functional
                loaded = print_percents(loaded, count, logger=logger)

    # add local error to process errors deque
    errors.extend(local_errors)


@track_time
def strip_comments_from_html(comments, threads=0, logger=None):
    """
    Strip comments body from html tags
    @type comments: QuerySet
    @param threads: number of threads to run
    @param logger: script logger
    """
    # get script logger if custom logger not specified
    if not logger:
        logger = logging.getLogger(__name__)

    chunks = get_chunked(comments)  # break big comments queryset in chunks

    # all script errors will be stored here
    errors = deque()
    # count structure -- needed only for pretty output with percents
    count = {'all': comments.count(), 'current': 0}

    logger.info('{0} comments to process by {1} threads'.format(count['all'], threads))

    # do logic
    spawner(target=strip_chunks_from_html, args=(chunks, count, errors, logger), maxthreads=threads)

    if errors:
        logger.error('Command execution done with %d errors', len(errors))
