#!/usr/bin/env python
# encoding: utf-8

from gevent import monkey
monkey.patch_all()

from argparse import ArgumentParser
import json
import gevent
from gevent.subprocess import Popen, PIPE
import requests
import datetime
from collections import defaultdict
import random
import six
import sys
import os
import urlparse


READ_DEPTH = 20


def log(msg):
    print("%s: %s" % (datetime.datetime.now().strftime("%H:%M:%S"), msg))
    sys.stdout.flush()


class Replicator(object):
    REDIR_REQ_START = '/redir?'

    def __init__(self, offercache_url, redir_url, offercache_percent, session_count, stat_owner, redir_percent,
                 delayed_redir_percent, min_redir_delay_sec, max_redir_delay_sec):
        self.offercache_url = offercache_url
        self.redir_url = redir_url
        self.offercache_percent = offercache_percent
        self.redir_percent = redir_percent
        self.delayed_redir_percent = delayed_redir_percent
        self.min_redir_delay_sec = min_redir_delay_sec
        self.max_redir_delay_sec = max_redir_delay_sec
        self.stat_owner = stat_owner
        self.free_sessions = []
        self.budget = 0
        for x in xrange(session_count):
            self.free_sessions.append(requests.session())

    def replicate(self, req):
        self.budget += self.offercache_percent
        while self.budget >= 100:
            self.budget -= 100
            if self.free_sessions:
                session = self.free_sessions.pop()
                gevent.spawn(self.replicate_glet, req, session)
                gevent.sleep(0.001)
            else:
                self.stat_owner.stat.on_no_free_session()

    def replicate_glet(self, req, session):
        url = self.offercache_url + req
        try:
            rsp = session.get(url)
            if rsp.status_code == 200:
                if random.randrange(0, 100) < self.redir_percent:
                    redir_url = self._extract_redirect_url(rsp.text)
                    if redir_url is not None:
                        session.get(redir_url, allow_redirects=False)
                if random.randrange(0, 100) < self.delayed_redir_percent:
                    redir_url = self._extract_redirect_url(rsp.text)
                    if redir_url is not None:
                        delay = random.randrange(self.min_redir_delay_sec, self.max_redir_delay_sec)
                        gevent.spawn_later(delay, self._delayed_redirect, redir_url)
            self.stat_owner.stat.on_ok()
        except Exception as e:
            self.stat_owner.stat.on_fail(e)
        self.free_sessions.append(session)

    def _extract_redirect_url(self, rsp):
        parsed_rsp = json.loads(rsp)
        hotels = parsed_rsp.get('Hotels')
        if not hotels:
            return
        links = []
        for hotel in six.itervalues(hotels):
            prices = hotel.get('Prices')
            if not prices:
                continue
            for price in prices:
                link = price.get('PartnerLink')
                if not link:
                    continue
                links.append(link)
        if not links:
            return
        selected_url = random.choice(links)
        req_index = selected_url.index(self.REDIR_REQ_START)
        url = self.redir_url + selected_url[req_index:]
        return url

    def _delayed_redirect(self, redir_url):
        if self.free_sessions:
            session = self.free_sessions.pop()
            try:
                session.get(redir_url, allow_redirects=False)
            finally:
                self.free_sessions.append(session)
        else:
            self.stat_owner.stat.on_no_free_session()


class Fail(object):
    def __init__(self):
        self.txt = ''
        self.cnt = 0

    def __repr__(self):
        return "%s times, last: '%s'" % (self.cnt, self.txt)


class Stat(object):
    def __init__(self):
        self.ok_cnt = 0
        self.failed = defaultdict(Fail)  # Cause -> count
        self.no_free_session_cnt = 0

    def on_ok(self):
        self.ok_cnt += 1

    def on_fail(self, e):
        f = self.failed[e.__class__]
        f.cnt += 1
        f.txt = str(e)

    def on_no_free_session(self):
        self.no_free_session_cnt += 1


class StatOwner(object):
    def __init__(self):
        self.stat = Stat()
        self.printer = gevent.spawn(self.print_loop)

    def print_loop(self):
        while True:
            gevent.sleep(10)
            stat = self.stat
            self.stat = Stat()
            failed = [(k, v) for k, v in stat.failed.iteritems()]
            failed = sorted(failed, key=lambda p: p[1].cnt, reverse=True)
            log("OK: %s, NoFreeSession: %s, Failed: %s, causes: %s" % (
                stat.ok_cnt, stat.no_free_session_cnt, len(failed), failed))


class LineParseException(Exception):
    pass


class LineIter(object):
    def __init__(self, line):
        self.line = line
        self.pos = 0

    def current(self):
        return self.line[self.pos]

    def advance(self, msg='something'):
        self.pos += 1
        if self.pos > len(self.line):
            raise LineParseException("Unexpected EOL while looking for " + msg)

    def extract(self, start_char, end_char=None):
        if end_char is None:
            end_char = start_char
        while self.current() == ' ':
            self.advance('non-space')
        if start_char != ' ':
            if self.current() != start_char:
                raise LineParseException("Expected '%s' got '%s' at pos %s" % (start_char, self.current(), self.pos))
            self.advance('contents')
        start_pos = self.pos
        while self.current() != end_char:
            self.advance("'%s'" % end_char)
        res = self.line[start_pos:self.pos]
        self.advance('future')
        return res

    def tail(self):
        return self.line[self.pos:]


def read_log(filename, args, kwargs):
    while True:
        error_count = 0
        try:
            log("Opening log file '%s'" % filename)
            tail_args = ['tail', '-F', filename, '-n', str(READ_DEPTH)]
            p = Popen(tail_args, stdout=PIPE, stderr=PIPE, shell=False, close_fds=True)
            while p.returncode is None:
                line = p.stdout.readline()
                try:
                    line = line.decode('utf-8')
                    process_line(line, *args, **kwargs)
                    error_count = 0
                except Exception as e:
                    print("Failed to process log line: %s" % str(e))
                    print("Line: %s" % line)
                    error_count += 1
                    if error_count > 100:
                        raise Exception("Too much errors, restart")
        except Exception as e:
            print("Error during log monitoring: %s" % str(e))


def start_log_mon(filename, *args, **kwargs):
    return gevent.spawn(read_log, filename, args, kwargs)


def process_line(line, replicators):
    iter = LineIter(line)
    level = iter.extract(' ')
    if level != 'INFO:':
        return
    iter.extract(' ')  # date
    iter.extract(' ')  # time
    iter.extract(' ')  # time offset
    iter.extract(' ')  # file:line
    job_id = iter.extract(' ')  # Id_xxx:
    if not job_id.startswith("IdS_") and not job_id.startswith("IdH_"):
        return
    action = iter.extract(' ')
    if action != 'Served':
        return
    req = iter.extract("'")
    if not req.startswith('GET /read?'):
        return
    req = req[4:]
    query = urlparse.urlparse('http://localhost' + req).query
    params = urlparse.parse_qs(query)
    geoclient_id = params.get('GeoClientId')
    if geoclient_id is not None and geoclient_id[0] == 'travel.portal':
        # Do not replicate, because portal is replicated via API request replicator
        return
    for r in replicators:
        r.replicate(req)


def main():
    parser = ArgumentParser()
    parser.add_argument('--log-file', required=True)
    parser.add_argument('--offercache-replica-url', action='append')
    parser.add_argument('--redir-replica-url', action='append')
    parser.add_argument('--offercache-replica-percent', required=True, type=int)
    parser.add_argument('--redir-replica-percent', required=True, type=int)
    parser.add_argument('--delayed-redir-replica-percent', default=0, type=int)
    parser.add_argument('--session-count', type=int, default=100)
    parser.add_argument('--min-redir-delay-sec', type=int, default=600)
    parser.add_argument('--max-redir-delay-sec', type=int, default=3600)

    args = parser.parse_args()

    if len(args.offercache_replica_url) != len(args.redir_replica_url):
        raise ValueError("Please specify as many redir replicas as offercache ones")
    print(args.offercache_replica_url)
    print(args.redir_replica_url)

    # Disable buffering
    sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)

    replicators = []
    for offercache_url, redir_url in zip(args.offercache_replica_url, args.redir_replica_url):
        r = Replicator(offercache_url, redir_url, args.offercache_replica_percent, args.session_count, StatOwner(),
                       args.redir_replica_percent, args.delayed_redir_replica_percent, args.min_redir_delay_sec, args.max_redir_delay_sec)
        replicators.append(r)
    glet = start_log_mon(args.log_file, replicators)
    glet.join()


if __name__ == '__main__':
    main()
