# -*- coding: utf-8 -*-
import requests

from django.conf import settings

# source http://en.wikipedia.org/wiki/Dash
HYPHENS = [u'\u2010', u'\u00AD', u'\u2011', u'\u2043']
DASHES = [u'\u2012', u'\u2013', u'\u2014', u'\u2015']


def unify_minuses(text):
    replace_with = u"-"

    for h in HYPHENS:
        if h in text:
            text = text.replace(h, replace_with)

    for d in DASHES:
        if d in text:
            text = text.replace(d, replace_with)

    return text


def check_dirty_lang(text):
    if not settings.DIRTY_LANG_URL:
        return

    text_blocks = blocks_split(text, 500)
    for block in text_blocks:
        url = '%s/wizard?text=%s&lr=213&action=markup' % (settings.DIRTY_LANG_URL, block)
        r = requests.get(
            url
        )

        j = r.json()

        dirty_lang = j.get("DirtyLang")

        return dirty_lang.get('Class') if dirty_lang else None


def blocks_split(text, block_max_len):
    text_blocks = []

    while len(text):
        text_block = text[0:block_max_len]
        while not text_block.endswith(" ") and len(text) > block_max_len:
            text_block = text_block[0:len(text_block) - 1]

        text_blocks.append(text_block)
        text = text[len(text_block):]

    return text_blocks
