import json
import logging
import re
import requests
import urllib

from sandbox.projects.common import decorators

DEFAULT_TLD = "ru"

DEFAULT_CGI_PARAMS = "&".join((
    "json_dump=searchdata.images",
    "nocache=da",
    "no-tests=da",
    "srcask=IMAGES",
    "numdoc=120"
))
DEFAULT_SHARD_INDEX = "000"
DEFAULT_TIMEOUT = 60
SIGNATURE_RE = re.compile(r'similarnn{(.+)}')
CBIRDAEMON_REQUEST = {
    "ImageFeatures": [
        {
            "Name": "FeatV9"
        }
    ],
    "ImageCropFeatures": [
        {
            "Name": "FeatCropV9"
        }
    ],
    "Image2TextFeatures": [
        {
            "Name": "I2TVer11"
        }
    ],
    "CbirFeatures": {
        "Dscr": False,
        "Quant": True
    },
    "ImageInfo": {},
    "ImageClassification": {
        "Name": "Classification9"
    },
    "AutoClassification": {
        "Name": "AutoVer9"
    },
    "Barcode": {}
}


def split_query_line(line):
    fields = line.rstrip('\n').split('\t')
    query_text = fields[0]
    query_region = fields[1] if len(fields) >= 2 else ""
    query_cgi_params = fields[2] if len(fields) >= 3 else ""
    query_tld = fields[3] if len(fields) >= 4 else DEFAULT_TLD
    return query_text, query_region, query_cgi_params, query_tld


def join_query_line(query_text, query_region, query_cgi_params, query_tld):
    return "\t".join((
        query_text,
        query_region,
        query_cgi_params,
        query_tld
    ))


def get_image_urls(url, query_text, query_cgi_params, shard_index=DEFAULT_SHARD_INDEX):
    try:
        response = _get_json_response(url, query_text, query_cgi_params)
        return _extract_image_urls(response, shard_index)
    except Exception as e:
        logging.info("Problem during processing [{},{},{}] : {}".format(url, query_text, query_cgi_params, e))
        return []


def get_image_thumbs(url, query_text, query_cgi_params):
    try:
        response = _get_json_response(url, query_text, query_cgi_params)
        return _extract_image_thumbs(response)
    except Exception as e:
        logging.info("Problem during processing [{},{},{}] : {}".format(url, query_text, query_cgi_params, e))
        return []


@decorators.retries(max_tries=3, delay=1)
def get_image_signature(cbirdaemon_url, image_url):
    image_data = requests.get(image_url, verify=False, timeout=DEFAULT_TIMEOUT)
    image_data.raise_for_status()
    signature = requests.post(
        cbirdaemon_url,
        verify=False,
        files={'upfile': ('somefilename', image_data.content)},
        headers={"Sigrequest": "configurable_v2",
                 "RequestInfo": json.dumps(CBIRDAEMON_REQUEST)},
        timeout=DEFAULT_TIMEOUT,
    )
    signature.raise_for_status()
    m = SIGNATURE_RE.search(signature.text)
    if m:
        return m.group(1)
    else:
        return None


def _extract_image_urls(response, shard_index):
    for doc in response["searchdata.images"]:
        images_shard = doc["shard"].split('-')[-3]
        is_valid_shard = shard_index is None or images_shard == shard_index
        if is_valid_shard:
            for preview in doc["preview_dups"]:
                yield preview["img_href"]


def _extract_image_thumbs(response):
    for doc in response["searchdata.images"]:
        for preview in doc["preview_dups"]:
            yield preview["global_img_id"]


@decorators.retries(max_tries=3, delay=1)
def _get_json_response(url, query_text, query_cgi_params):
    query_url = "{}{}&text={}".format(
        url,
        DEFAULT_CGI_PARAMS + query_cgi_params,
        urllib.quote(query_text, safe='')
    )
    r = requests.get(query_url, verify=False, timeout=DEFAULT_TIMEOUT)
    r.raise_for_status()
    return r.json(encoding="utf-8")
