#!/usr/bin/python
# -*- coding: utf-8 -*-

import re
import requests
import urllib
import urlparse
import time
import base64
import hashlib
import json
import itertools
from multiprocessing import Pool
from argparse import ArgumentParser


def json_find(path, JSON):
    result = JSON
    for key in path.split('.'):
        result = result[key]
    return result


def iri2uri(iri, encoding='utf8'):
    "Takes a Unicode string that can contain an IRI and emits a URI."
    scheme, authority, path, query, frag = urlparse.urlsplit(iri)
    scheme = scheme.encode(encoding)
    if ":" in authority:
        host, port = authority.split(":", 1)
        authority = host.encode('idna') + ":%s" % port
    else:
        authority = authority.encode('idna')
    path = urllib.quote(
        path.encode(encoding),
        safe="/;%[]=:$&()+,!?*@'~"
    )
    query = urllib.quote(
        query.encode(encoding),
        safe="/;%[]=:$&()+,!?*@'~"
    )
    frag = urllib.quote(
        frag.encode(encoding),
        safe="/;%[]=:$&()+,!?*@'~"
    )
    return urlparse.urlunsplit((scheme, authority, path, query, frag))


def upload_url(image_url_and_params):
    image_dict, params = image_url_and_params
    image_url = json_find(params.url_field, image_dict)
    image_url = iri2uri(image_url)
    try_count = params.try_count
    cur_try = 0

    while cur_try < try_count:
        cur_try += 1
        try:
            namespace = params.namespace
            image_name = base64.urlsafe_b64encode(hashlib.md5(image_url).digest()[0:9])
            url_params = urllib.urlencode({'url': image_url})
            avatars_url = 'http://avatars-int.mds.yandex.net:13000/put-{namespace}/{image_name}?'.format(
                namespace=namespace,
                image_name=image_name
            ) + url_params

            res = requests.get(avatars_url)

            if res.status_code != 200 and res.status_code != 403 and res.status_code != 400:
                print res.content
                raise Exception('Can\'t upload image')

            response = json.loads(res.content)
            if res.status_code == 400 and response['status'] != 'error':
                print res.content
                raise Exception('Can\'t upload image 400 code')
            return response
        except Exception as ex:
            print 'Bad try put: ', image_url, '. Exception: ', str(ex)

        time.sleep(10)

    print 'Can\'t upload file', image_url
    return None


def get_url_info(image_url_and_params):
    image_dict, params = image_url_and_params
    image_url = json_find(params.url_field, image_dict)
    try_count = params.try_count
    cur_try = 0

    while cur_try < try_count:
        cur_try += 1
        try:
            matches = re.search('(mdst?).yandex.net/get-(.*?)/(.*?)/([^/]+)', image_url)
            if not matches:
                break
            env, namespace, group_id, image_name = matches.group(1), matches.group(2), matches.group(3), matches.group(4)

            avatars_url = 'http://avatars-int.{env}.yandex.net:13000/getimageinfo-{namespace}/{group_id}/{image_name}?'.format(
                env=env,
                namespace=namespace,
                group_id=group_id,
                image_name=image_name
            )

            res = requests.get(avatars_url)

            if res.status_code != 200 and res.status_code != 403 and res.status_code != 400:
                print res.content
                raise Exception('Can\'t upload image')

            response = json.loads(res.content)
            if res.status_code == 400 and response['status'] != 'error':
                print res.content
                raise Exception('Can\'t upload image 400 code')

            return response
        except Exception as ex:
            print 'Bad try put: ', image_url, '. Exception: ', str(ex)

        time.sleep(10)

    print 'Can\'t get file info', image_url
    return None


def main():
    parser = ArgumentParser(description='')
    parser.add_argument('--input_path', dest='input_path', required=True)
    parser.add_argument('--namespace', dest='namespace', required=True)
    parser.add_argument('--url_field', required=True)
    parser.add_argument('--element', dest='element', required=True)
    parser.add_argument('--try_count', dest='try_count', type=int, required=True)
    parser.add_argument('--pool_size', dest='pool_size', type=int, required=True)
    parser.add_argument('--result', dest='result', required=True)
    parser.add_argument('--getimageinfo', dest='getimageinfo', action='store_true')
    parser.add_argument('--errors', required=True)
    params = parser.parse_args()

    with open(params.input_path) as input_file:
        image_urls = json.load(input_file)

    pool = Pool(params.pool_size)
    upload_result = []

    avatars_fn = get_url_info if params.getimageinfo else upload_url

    try:
        # for idx, item in enumerate(image_urls):
        #    upload_result.append(avatars_fn((item, params)))
        upload_result = pool.map(avatars_fn, itertools.izip(image_urls, itertools.repeat(params)))
    except KeyboardInterrupt:
        print 'Caught KeyboardInterrupt, terminating workers'
        pool.terminate()
        pool.join()

    result = []
    errors = []

    for idx, image_dict in enumerate(image_urls):
        avatars_url = ''
        uploaded_info = upload_result[idx]

        if params.getimageinfo:
            # в режиме getimageinfo считаем, что avatars_url подали на вход
            avatars_url = json_find(params.url_field, image_dict)
        elif params.element != '' and uploaded_info is not None:
            if 'sizes' in uploaded_info:
                avatars_url = 'http://avatars.mds.yandex.net{path}'.format(
                    path=uploaded_info['sizes'][params.element]['path']
                )
            elif 'attrs' in uploaded_info and 'sizes' in uploaded_info['attrs']:
                avatars_url = 'http://avatars.mds.yandex.net{path}'.format(
                    path=uploaded_info['attrs']['sizes'][params.element]['path']
                )

        if avatars_url:
            result.append({
                'input_dict': image_dict,
                'info': uploaded_info,
                'avatars_url': avatars_url
            })
        else:
            errors.append({
                'input_dict': image_dict,
            })

    with open(params.result, 'w') as f_out:
        json.dump(result, f_out, indent=4)

    with open(params.errors, 'w') as f_out:
        json.dump(errors, f_out, indent=4)


if __name__ == '__main__':
    main()
