# coding: utf-8
"""
Sample image URLs for CBIR tests from IMAGES_RATED_URLS.
"""
from sandbox.sandboxsdk.task import SandboxTask
from sandbox.sandboxsdk.parameters import ResourceSelector
from sandbox.sandboxsdk.parameters import SandboxIntegerParameter
from sandbox.sandboxsdk.errors import SandboxTaskFailureError
from sandbox.common.types.misc import OSFamily
from sandbox.projects import resource_types
from sandbox.projects.common import apihelpers
import requests
import random
import imghdr

_OUTPUT_RESOURCE_FILENAME = "image_urls"
_CTX_OUTPUT_RESOURCE_ID = "output_resource_id"
_REQUESTS_GET_TIMEOUT = 1.0  # seconds


class InputResource(ResourceSelector):
    name = "input_resource_id"
    description = "Input resource"
    resource_type = resource_types.IMAGES_RATED_URLS


class NumberOfSamples(SandboxIntegerParameter):
    name = "number_of_samples"
    description = 'Number of URLs to sample'
    default_value = 1000
    required = True


class ImagesSampleCbirPlan(SandboxTask):
    type = 'IMAGES_SAMPLE_CBIR_PLAN'

    input_parameters = (InputResource, NumberOfSamples)

    def on_enqueue(self):
        "Creation of resources right after start"

        SandboxTask.on_enqueue(self)
        resource = self.create_resource(
            self.descr,
            _OUTPUT_RESOURCE_FILENAME,
            resource_types.IMAGES_CBIR_PLAN,
            arch=OSFamily.ANY
        )
        self.ctx[_CTX_OUTPUT_RESOURCE_ID] = resource.id

    def on_execute(self):
        self.ensure_input_resource()
        input_resource_path = self.sync_resource(self.ctx[InputResource.name])
        with open(_OUTPUT_RESOURCE_FILENAME, 'w') as output_file:
            for url in self.sample_image_urls(input_resource_path, int(self.ctx[NumberOfSamples.name])):
                print >> output_file, url

    def ensure_input_resource(self):
        if self.ctx.get(InputResource.name):
            return

        resource = apihelpers.get_last_resource_with_attribute(InputResource.resource_type)

        if not resource:
            raise SandboxTaskFailureError('Unable to find input resource')

        self.ctx[InputResource.name] = resource.id

    def sample_image_urls(self, input_path, count):
        with open(input_path) as f:
            # Remote to the end of file to measure it's size
            f.seek(0, 2)
            file_size = f.tell()
            lines_sampled = 0
            while lines_sampled < count:
                f.seek(random.randint(0, file_size))
                # To read current line till the end
                f.readline()
                # The line of intrest
                line = f.readline().strip()
                # In case we picked position in the last line and line == ""
                if line:
                    if len(line.split()) > 1:
                        continue
                    try:
                        r = requests.get(line, verify=False, timeout=_REQUESTS_GET_TIMEOUT)
                        r.raise_for_status()
                        if imghdr.what(None, r.content) in ("jpeg", "gif", "png"):
                            yield line
                            lines_sampled += 1
                    except:
                        continue


__Task__ = ImagesSampleCbirPlan
