#!/usr/bin/env python
from __future__ import division

import sys, os
import threading
from six.moves.queue import Queue
from subprocess import check_output
from collections import defaultdict, Counter
import requests
import json
import subprocess
import mmap

def backwards(filename):
    with open(filename) as f:
        m = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
        end = m.size()
        while end > 0:
            begin = m.rfind('\n', 0, end)
            yield m[begin+1:end]
            end = begin

class Request(object):
    def __init__(self, tag, url):
        self.tag = tag
        self.url = url

def done(queue):
    queue.put(None)
    queue.task_done()

class Worker(object):
    trace = False
    def run(self, *args):
        threading.Thread(target=self.target, args=args).start()

    def target(self, queue, *args):
        while True:
            t = queue.get()
            if t is None:
                return done(queue)
            else:
                self.process(t, *args)
            queue.task_done()

class Fetcher(Worker):
    def __init__(self, test_mode):
        super(Worker, self).__init__()
        self.addparams = {'test-candidates-mode': 1 if test_mode else 0}

    def process(self, rq, host, count_queue):
        reply = requests.get('http://' + host + rq.url, params=self.addparams).json()
        client_results = reply.get('client-results', [])
        counter = Counter()
        if not client_results:
            counter['total'] += 1
            counter['empty'] += 1
        for c in client_results:
            if not c:
                continue
            counter['total'] += 1
            if 'docs' not in c:
                counter['empty'] += 1
            if c.has_key('error'):
                counter['errors'] += 1
        count_queue.put(counter)

def parse_atomsearch(ammo_file, task_queue, limit):
    while limit > 0:
        header_line = ammo_file.readline()
        if not header_line:
            break
        rq_size, tag = header_line.split()
        rq = ammo_file.read(int(rq_size)).split('\n')
        if rq[0][:4] != 'GET ':
            continue
        url, _, proto = rq[0][4:].partition(' ')
        task_queue.put(Request(tag, url))
        limit -= 1
    done(task_queue)

class Tally(Worker):
    def __init__(self):
        self.counter = Counter()
        self.total = 0
    def process(self, t):
        self.total += 1
        self.counter += t

def trie_deployed_on(trie_name, timestamp, host):
    """
    Parse updater log on ``host'' to find out if the ``trie_name''
    deployed there has version near ``timestamp''.
    """
    cache_dir = os.path.join('/tmp/atom-logs', host + '/')
    if not os.path.isdir(cache_dir):
        os.makedirs(cache_dir)
    log_name = 'current-rerankd_updater-rerank-7300'
    subprocess.check_output(['rsync', '{host}::logs/rerankd/{log}'.format(
        host=host, log=log_name), cache_dir, '--append-verify'],
        stderr=subprocess.STDOUT)
    for line in backwards(os.path.join(cache_dir, log_name)):
        fields = line.strip().split('\t')
        if not fields[0]:
            continue
        line_ts = int(fields[0])
        if line_ts < timestamp - 100:
            return False # we have read the logs past timestamp
        if len(fields) == 3 and fields[1] == 'updated, version:':
            ts, _, log_trie_name = fields[2].partition(' @ ')
            if trie_name == log_trie_name:
                return abs(int(ts) - timestamp) < 5
    return False

hosts = ['ws38-317.search.yandex.net']

def trie_deployed(trie_name, timestamp):
    return all(trie_deployed_on(trie_name, timestamp, host) for host in hosts)

def get_empty_rate(f, test_mode, limit):
    queue = Queue()
    count_queue = Queue()
    for host in hosts:
        for i in range(5):
            Fetcher(test_mode).run(queue, host + ':7348', count_queue)
    tally = Tally()
    tally.run(count_queue)
    parse_atomsearch(f, queue, limit)
    queue.join()
    done(count_queue)
    count_queue.join()
    return tally.counter['empty'] / tally.counter.get('total', 1)

if __name__ == '__main__':
    print(get_empty_rate(file(sys.argv[1]), True, int(sys.argv[2])))
