# -*- coding: utf-8 -*-

import urllib
import urllib2
import httplib
import logging

__all__ = ['URLgen', 'Fetcher']


class URLgen(object):
    """
        creates urlgen functional object (generator)
        urlgen yields pairs of URLs
    """

    def __init__(self, urls, tails):
        """
            urls for list of url heads
            tails for file with url tails OR list of url tails
        """
        self.urls = urls
        self.tails = tails

    def __iter__(self):
        return URLgen.__urlgen(self.urls, self.tails)

    @staticmethod
    def __urlgen(urls, tails):
        if isinstance(tails, basestring):
            try:
                with open(tails) as tailsfile:
                    for line in tailsfile:
                        pline = line.strip('\r\n')
                        if len(pline) == 0:
                            continue
                        yield map(lambda x: x + pline, urls)
            except IOError:
                logging.error('couldn\'t open or read from tails file')
                raise
        elif type(tails) in (list, tuple):
            for tail in tails:
                ptail = tail.strip('\r\n')
                if len(ptail) == 0:
                    continue
                yield map(lambda x: x + ptail, urls)
        else:
            raise TypeError('tails parameter must be file or list, got {0}'.format(type(tails)))


class Fetcher(object):
    """
        creates fetcher functional object
        fetcher returns raw contents retrieved by URL
    """

    def __init__(self, quote=True, safe=False):
        self.quote = quote
        self.safe = safe

    def __call__(self, url, n=3):
        if self.quote:
            url = self.quote_url(url)
        if self.safe:
            try:
                return self.__getter(url, n)
            except IOError:
                return ''
        else:
            return self.__getter(url, n)

    @staticmethod
    def quote_url(url):
        """
            quotes text part of search url
            url must end with text=[query text] part
        """
        txt = url[url.find('text='):]
        txtq = urllib.quote(txt.replace('text=', '').encode('utf-8'))
        return url.replace(txt, 'text=' + txtq)

    def __getter(self, url, n):
        for i in xrange(n):  # n tries, then proceed
            try:
                logging.info('Trying to read {0}'.format(url))
                res = urllib2.urlopen(url, timeout=30).read()
                logging.info('READ SUCCESSFULLY')
                return res
            except httplib.IncompleteRead:
                logging.error('INCOMPLETE READ')
                continue
            except urllib2.URLError:
                logging.error('URL ACCESS ERROR')
                continue
            except httplib.HTTPException as e:
                logging.error('HTTP EXCEPTION {0}'.format(e))
                continue
            except StandardError as e:
                logging.info('some connection error: {0}'.format(e))
        logging.error('Retry limit reached. Skipping...')
        raise IOError('Couldn\'t access url')
