#!/usr/bin/env python
# coding: utf-8

from __future__ import unicode_literals

import argparse
import collections
import difflib
import fnmatch
import json
import os
import re
import sys

import polib


# START EX utils.py

def get_diff(x, y):
    return [i for i in difflib.ndiff(x, y) if i[0] in {'-', '+'}]


def parse_args():
    """
    Return abs path to translations, format (po/json), deserialized
    exceptions (or None).
    """
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--exceptions',
        help='path to json with exceptions',
    )
    parser.add_argument(
        '--format',
        help='translations format',
        choices=['json', 'po'],
        required=True,
    )
    parser.add_argument(
        'translations_path',
        help='path to files with translations',
    )

    args = parser.parse_args()

    translations_path = os.path.abspath(args.translations_path)

    format_ = args.format

    exceptions_path = args.exceptions
    if exceptions_path:
        exceptions = json.load(open(exceptions_path, 'rb'))
    else:
        exceptions = None

    return translations_path, format_, exceptions


def process_res(errors):
    """Print results and exit with related status code."""
    if errors:
        print json.dumps(errors, indent=2, ensure_ascii=False).encode('utf-8')
    else:
        print 'Ok'

# END EX utils.py

# START EX VALIDATE EMAILS

TAGS_RE = re.compile(r'(\[%.*?%\])', re.UNICODE | re.MULTILINE | re.DOTALL)


def normalize_tag(t):
    # don't count whitespaces and newlines and even \n sequences
    t = t.replace('\n', '').replace(r'\n', '').replace(' ', '')
    # looks like comments
    t = re.sub(r'#.*([-~%$])', '#<cuted-comment>\1', t)
    # cut text inside single quotes
    t = re.sub(r"'.*?(?<!\\)'", "'<cuted-string>'", t)
    # cut text inside double quotes
    t = re.sub(r'".*?(?<!\\)"', '"<cuted-string>"', t)

    # t = t.lower()

    return t


def get_tags(text):
    text = TAGS_RE.findall(text)
    text = map(normalize_tag, text)
    return sorted(text)


def main_emails(path, exceptions):
    path = os.path.join(path, 'emails.json')

    # convert exceptions dict of lists of dicts to dict of sets
    exception_sets = {}
    if exceptions:
        for lang, exc_dicts in exceptions.iteritems():
            exception_sets[lang] = {i['translation'] for i in exc_dicts}
    del exceptions

    data = json.load(open(path, 'rb'))

    # In the scheme below we are interested in the following keys:
    # <string to translate>, <lang>, <translation string>
    # {
    #     "keysets": {
    #         <keyset name>: {  # 1
    #             "keys": {
    #                 <string to translate>: {  # 2
    #                     "translations": {
    #                         <lang>: {  # 3
    #                             "form": <translation string>
    #                         }
    #                     }
    #                 }
    #             }
    #         }
    #     }
    # }

    errors = collections.defaultdict(list)

    for value in data['keysets'].itervalues():  # <-- 1
        for key, value, in value['keys'].iteritems():  # <-- 2
            tags = get_tags(key)

            # strings in translation are with normal newlines
            translated_key = value['translations']['ru']['form']
            if translated_key.replace('\n', r'\n') != key:
                errors['ru'].append({
                    'origin': key,
                    'translation': translated_key,
                })
            else:
                key = translated_key

            for lang, translation_info in value['translations'].iteritems():  # <-- 3  # noqa
                if translation_info.get('status') == 'requires_translation':
                    continue

                translation = translation_info['form']
                exceptions = exception_sets.get(lang, frozenset())
                if lang != 'ru' and translation not in exceptions:
                    translation_tags = get_tags(translation)
                    if tags != translation_tags:
                        # only lang/translation pair is necessary to
                        # store exceptions info
                        errors[lang].append({
                            'origin': key,
                            'translation': translation,
                            'normalized_tags_diff': get_diff(
                                tags, translation_tags),
                        })

    process_res(errors)


# END EX VALIDATE EMAILS

# START EX VALIDATE PO

IGNORE_MISSING = True


def parse_sub_var(s):
    # parse %s, %{smth#<cuted-string>}
    vars_ = re.findall(r'%[sd]', s)

    named_vars = re.findall(r'%{.*?}', s)
    named_vars = [re.sub(r'#.+}', '#<cutted-string>}', v) for v in named_vars]

    return sorted(vars_ + named_vars)


def validate_po(po_f_name, exceptions):
    """Return list of errors"""
    errors = []

    for entry in polib.pofile(po_f_name):
        if IGNORE_MISSING and not entry.msgstr:
            continue

        if entry.msgstr in exceptions:
            continue

        diff = get_diff(
            parse_sub_var(entry.msgid),
            parse_sub_var(entry.msgstr),
        )
        if diff:
            errors.append({
                'msgid': entry.msgid,
                'msgstr': entry.msgstr,
                'normalized_tags_diff': diff,
            })

    return errors


def main_po(path, exceptions):
    # convert exceptions dict of lists of dicts to dict of sets
    exception_sets = {}
    if exceptions:
        for lang, exc_dicts in exceptions.iteritems():
            exception_sets[lang] = {i['msgstr'] for i in exc_dicts}
    del exceptions

    errors = {}
    for root, dirnames, filenames in os.walk(path):
        for filename in fnmatch.filter(filenames, '*.po'):
            exceptions = exception_sets.get(filename, frozenset())
            po_errors = validate_po(os.path.join(root, filename), exceptions)
            if po_errors:
                errors[filename] = po_errors

    process_res(errors)


# END EX VALIDATE PO


def main():
    path, format_, exceptions = parse_args()

    if format_ == 'json':
        main_emails(path, exceptions)
    elif format_ == 'po':
        main_po(path, exceptions)
    else:
        print >> sys.stderr, 'Unsupported format %s' % format_
        sys.exit(1)


if __name__ == '__main__':
    main()
