# -*- coding: utf-8 -*-
from __future__ import unicode_literals

import datetime
from collections import Counter
from itertools import ifilterfalse, islice

from sins2_clickhouse.sql.dicts import STOPWORDS


def word_count(records):
    counter = Counter()
    for record in records:
        for word in ifilterfalse(isstopword, record.split()):
            counter[word] += 1
    return counter


def unique_everseen(iterable, key=None):
    "List unique elements, preserving order. Remember all elements ever seen."
    # unique_everseen('AAAABBBCCDAABBB') --> A B C D
    # unique_everseen('ABBCcAD', str.lower) --> A B C D
    seen = set()
    seen_add = seen.add
    if key is None:
        for element in ifilterfalse(seen.__contains__, iterable):
            seen_add(element)
            yield element
    else:
        for element in iterable:
            k = key(element)
            if k not in seen:
                seen_add(k)
                yield element


def maybe_unicode_to_utf8(maybe_unicode):
    if isinstance(maybe_unicode, unicode):
        return maybe_unicode.encode('utf-8')
    else:
        return maybe_unicode


def isstopword(word):
    """they are in unicode"""
    return word in STOPWORDS


def asdate(yyyymmdd):
    return datetime.datetime.strptime(yyyymmdd, '%Y-%m-%d').date()


def nth(iterable, n, default=None):
    "Returns the nth item or a default value"
    return next(islice(iterable, n, None), default)
