# -*- coding: utf-8 -*-

import urllib2
import logging
import re
import itertools


def debug_msg(msg):
    logging.info("DEBUG: {}".format(msg))


def convert(s):
    if isinstance(s, unicode):
        return s.encode('utf8')
    else:
        return s


class CheckerException(Exception):
    pass


class CheckerStrictException(Exception):
    pass


class ValidatorController(object):
    def __init__(self):
        self.custom_errors = []
        self.schema_errors = []
        self.seen_links = set()

    def add_custom_error(self, error):
        self.custom_errors.append(error)

    def add_schema_error(self, error):
        self.schema_errors.append(error)


def run_with_error_keeper(vdc):
    def error_keeper_decorator(func):
        def wrapped(value):
            if value in vdc.seen_links:
                return True

            vdc.seen_links.add(value)
            try:
                res = func(value)
            except CheckerException as e:
                vdc.add_custom_error(e)
                # Do not consider common problems with URI as a schema error.
                return True
            except CheckerStrictException as se:
                vdc.add_custom_error(se)
                # Consider format problems with URI as a schema error.
                return False
            return res
        return wrapped
    return error_keeper_decorator


def uri_validator(value):
    debug_msg("URI validator called")
    value = convert(value)
    debug_msg("Value: {}".format(value))

    # Consider empty values as correct url.
    # Add appropriate field to "required" list and
    # check for type of this field in schema
    # for considering this as an error.
    if value is None:
        return True

    if value == '':
        raise CheckerStrictException("Some url is empty")

    if value.find('http', 0, 4) != 0:
        if value.find('//', 0, 2) == 0:
            value = 'http:' + value
        else:
            raise CheckerStrictException("Unknown format of url: '{}'".format(value))

    res = None
    try:
        request = urllib2.Request(value, headers={'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; F3311 Build/37.0.A.2.108; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/61.0.3163.98 Mobile Safari/537.36 YandexSearch/7.06'})
        res = urllib2.urlopen(request, timeout=5)
    except Exception as e:
        raise CheckerException("Url '{}' open error: {}".format(value, e))

    code = res.getcode()
    debug_msg("Code: {}".format(code))

    redirect_codes = [301, 302]
    good_codes = [200] + redirect_codes

    if code not in good_codes:
        raise CheckerException("Bad HTTP code for url {}: {}".format(value, code))

    return True


def make_validation(data, schema):
    import jsonschema

    vdc = ValidatorController()
    uri_validator_decorated = run_with_error_keeper(vdc)(uri_validator)

    custom_checker = jsonschema.FormatChecker()
    custom_checker.checks('uri')(uri_validator_decorated)

    #jsonschema.validate(data, schema, format_checker = my_checker)
    validator = jsonschema.Draft4Validator(schema, format_checker=custom_checker)
    for error in sorted(validator.iter_errors(data)):
        vdc.add_schema_error("{}: {}".format('/'.join(unicode(e) for e in error.schema_path), error.message))

    return vdc


def create_schema_tree(data, sch, path, lastkey):
    # TODO: warning if field exists and of another type
    if isinstance(data, dict):
        if sch and isinstance(sch, dict) and "type" in sch and sch["type"] != "object":
            logging.warn("CREATING SCHEMA: try to create 'object' type for path {} of another type: {}".format(path, sch["type"]))

        for (k, v) in data.iteritems():
            t = {}
            r = create_schema_tree(v, t, path + "/" + k, k)
            if r:
                if "properties" not in sch:
                    sch["properties"] = {}
                sch["properties"][k] = merge_schema_trees(sch["properties"].get(k, {}), t, path + "/" + k, k)

                if "__property_count" not in sch:
                    sch["__property_count"] = {}
                sch["__property_count"][k] = sch["__property_count"].get(k, 0) + 1
        if "properties" in sch:
            sch["type"] = "object"
            #sch["required"] = sch["properties"].keys()
            return 1
    elif isinstance(data, list):
        if sch and isinstance(sch, list) and "type" in sch and sch["type"] != "array":
            logging.warn("CREATING SCHEMA: try to create 'array' type for path {} of another type: {}".format(path, sch["type"]))

        t1 = {}
        rr = 0
        items_len = len(data)
        for v in data:
            t2 = {}
            r = create_schema_tree(v, t2, path + "/*", "*")
            if r:
                t1 = merge_schema_trees(t1, t2, path + "/*", "*", False)
                rr += r
        if rr:
            reduce_property_count(t1, items_len)
            if "items" not in sch:
                sch["items"] = [t1]
            else:
                sch["items"] = merge_schema_trees(sch["items"][0], t1, path + "/*", "*", True)
        if "items" in sch:
            sch["type"] = "array"
            return 1
    elif isinstance(data, (str, unicode)):
        if "type" in sch:
            merge_types(sch, "string", path)
            return 1
        sch["type"] = "string"
        if re.search("\/\/", data):
            sch["format"] = "uri"
        return 1
    elif isinstance(data, (bool)):
        # check for boolean must be before check for int!
        if "type" in sch:
            merge_types(sch, "boolean", path)
            return 1
        sch["type"] = "boolean"
        return 1
    elif isinstance(data, (int, float, long, complex)):
        if "type" in sch:
            merge_types(sch, "number", path)
            return 1
        sch["type"] = "number"
        return 1
    elif data is None:
        if "type" in sch:
            merge_types(sch, "null", path)
            return 1
        sch["type"] = "null"
        return 1
    else:
        return 0


def merge_types(sch, typename, path):
    if isinstance(sch["type"], list):
        if typename not in sch["type"]:
            sch["type"].append(typename)
    elif isinstance(sch["type"], (str, unicode)):
        if sch["type"] != typename:
            logging.warn("MERGE TYPES for path {}, {} + {}".format(path, sch.get("type"), typename))
            sch["type"] = [sch["type"], typename]
    else:
        logging.warn("CREATING SCHEMA: field 'type' had strange type: {}, now it is {}".format(type(sch["type"]), typename))
        sch["type"] = typename


def merge_schema_trees(a, b, path, lastkey, isarray=False):
    if a is None:
        return b

    if b is None:
        return a

    if isinstance(a, dict):
        if isinstance(b, dict):
            d = dict(a)
            if lastkey == "__property_count":
                if isarray:
                    d.update({k: min(a.get(k, 0), b[k]) for k in b})
                else:
                    d.update({k: a.get(k, 0) + b[k] for k in b})
                return d
            else:
                d.update({k: merge_schema_trees(a.get(k), b[k], path + "/" + k, k, isarray) for k in b})
                return d
        else:
            raise Exception("Different types by path {}".format(path))

    if isinstance(a, list):
        if isinstance(b, list):
            return [merge_schema_trees(x, y, path + "/*", "*") for x, y in itertools.izip_longest(a, b)]
        elif isinstance(b, (str, unicode)):
            if b in a:
                return a
            else:
                logging.warn("MERGE SCHEMA: appending string to array. Path {}, array {}, appending {}".format(path, a, b))
                return a.append(b)
        else:
            raise Exception("Different types by path {}".format(path))

    if isinstance(a, (str, unicode)) and isinstance(b, (str, unicode)):
        if a == b:
            return a
        else:
            logging.warn("MERGE SCHEMA: Path {}: join two strings ({} + {}) into array.".format(path, a, b))
            return [a, b]

    raise Exception("Strange type of nodes by path {}: {}, {}".format(path, a, b))


def reduce_property_count(sch, limit):
    if isinstance(sch, dict):
        if "__property_count" in sch:
            pcount = sch["__property_count"]
            for k in pcount:
                if pcount[k] < limit:
                    pcount[k] = 0
                else:
                    pcount[k] = 1
        for k in sch.keys():
            if k == "required" or k == "__property_count":
                continue
            reduce_property_count(sch[k], limit)
    elif isinstance(sch, list):
        for el in sch:
            reduce_property_count(el, limit)


def make_required(sch, limit):
    if isinstance(sch, dict):
        if "__property_count" in sch:
            properties = sch["__property_count"]
            required = []
            for k in properties.keys():
                if properties[k] == limit:
                    required.append(k)
                    del properties[k]
            if len(sch["__property_count"]) == 0:
                del sch["__property_count"]
            if len(required) > 0:
                sch["required"] = required
        for k in sch.keys():
            if k == "required" or k == "__property_count":
                continue
            make_required(sch[k], limit)
    elif isinstance(sch, list):
        for el in sch:
            make_required(el, limit)
