#!/usr/bin/python

import argparse
import hashlib
import itertools
import re
import requests
import sys
import time

parser = argparse.ArgumentParser()
parser.add_argument('host', default='localhost', nargs='?')
parser.add_argument('--table-re')
args = parser.parse_args()

def q(host, query):
    result = []
    m = re.match('SELECT\s+(.*)\s+FROM', query)
    args = []
    if m:
        args = re.split(r'\s*,\s*', m.group(1))
    for line in requests.get('http://{}:8123'.format(host), params={'user': 'readonly', 'query': query}).iter_lines():
        x = line.split('\t')
        if len(x) == 1:
            result.append(x[0])
        else:
            result.append(x)
    if re.search(r'^SELECT\s+(MIN|MAX|COUNT)', query, flags=re.I) and not re.search(r'\sGROUP\s+BY\s', query, flags=re.I):
        try:
            result = result[0]
        except IndexError:
            result = [None] * len(args)
    if re.match(r'SHOW\s+CREATE\s+TABLE\s', query, flags=re.I):
        result = result[0]
#    print query, result
    return result
shard_for_host = {}
hosts_for_shard = {}
for line in q(args.host, "SELECT host_name, shard_num FROM system.clusters WHERE host_name != 'localhost' AND cluster = 'logs'"):
    #host, shard = line.split('\t')
    host, shard = line
    if '05' in host or '06' in host:
        shard = str(int(shard) + 4)
    shard_for_host[host] = shard
    hosts_for_shard.setdefault(shard, []).append(host)
tables = [
'balance_mergetree',
'bsexport_data_mergetree',
'bsexport_prices_mergetree',
'dbshards_ids_mergetree',
'mediaplan_mergetree',
'messages_mergetree',
'moderate_mergetree',
'ppclog_api_mergetree',
'ppclog_cmd_mergetree',
'ppclog_price_mergetree',
]
tables_v2 = map(lambda s: s.replace('_mergetree', '_v2_mergetree'), tables)
tables_v3 = map(lambda s: s.replace('_mergetree', '_v3_mergetree'), tables)
tables += tables_v2 + tables_v3
if args.table_re:
    tables = [t for t in tables if re.search(args.table_re, t)]
hosts = []
for shard, hs in hosts_for_shard.iteritems():
    hosts.append(hs[0])
hosts = sorted(hosts)
for t in tables:
    partitions_on_host = {}
    for h in hosts:
        partitions_on_host[h] = q(h, "SELECT DISTINCT(partition) FROM system.parts WHERE table = '{}'".format(t))
    partitions = sorted(list(set(reduce(lambda a, b: a + b, partitions_on_host.values()))))
    col_type = dict(q(hosts[0], "SELECT name, type FROM system.columns WHERE table = '{}'".format(t)))
    for p in partitions:
        data = {}
        for h in hosts:
            if p not in partitions_on_host[h]:
                continue
            year = int(p[:4])
            month = int(p[4:])
            log_date_cond = "toYear(log_date) = {} AND toMonth(log_date) = {}".format(year, month)
            prod_cond = "source NOT LIKE 'file:%dev%' AND source NOT LIKE 'file:%test%' AND source NOT LIKE 'file:ws6-029-30140%' AND source NOT LIKE 'file:sas1-9069-7120%'"
#            prod_cond = '1'
            if 'source' not in col_type:
                if t.startswith('messages'):
                    prod_cond = "host NOT LIKE '%dev%' AND host NOT LIKE '%test%' AND host NOT LIKE 'ws6-029-30140%' AND host NOT LIKE 'sas1-9069-7120%'"
                prod_cond = '1'
            if t.startswith('dbshards_ids'):
                prod_cond = "source NOT LIKE 'file:%dev%' AND (source NOT LIKE 'file:%test%' OR source LIKE 'file:limtest%') AND source NOT LIKE 'file:ws6-029-30140%' AND source NOT LIKE 'file:sas1-9069-7120%'"
            cnt_by_day = [0] * 31
            for row in q(h, "SELECT toDayOfMonth(log_date), COUNT(*) FROM {} WHERE {} AND {} GROUP BY toDayOfMonth(log_date)".format(t, log_date_cond, prod_cond)):
                cnt_by_day[int(row[0]) - 1] = int(row[1])
            cnt = sum(cnt_by_day)
            if cnt > 0:
                print '\t'.join([t, p, shard_for_host[h], str(cnt), ','.join([str(n) for n in cnt_by_day])])
