#!/usr/bin/python3

import json
import os
import re
import subprocess
import sys
import yt.wrapper


def dbs_sql(env, sql):
    output = subprocess.check_output(['dbs-sql', env + ':ppc:all', '-q', sql, '-B']).decode('utf-8')
    lines = output.rstrip().split('\n')
    return lines


def get_ids_for_client_id(env, table, id_type, client_id):
    sql = 'SELECT {} FROM {} WHERE ClientID = {}'.format(id_type, table, client_id)
    lines = dbs_sql(env, sql)
    return [int(line) for line in lines if line != '' and re.match(r'[0-9]+', line)]


def get_ids_for_cids(env, table, id_type, cids):
    if not cids:
        return []
    sql = 'SELECT {} FROM {} WHERE cid IN ({})'.format(id_type, table, ','.join([str(cid) for cid in cids]))
    lines = dbs_sql(env, sql)
    return [int(line) for line in lines if line != '' and re.match(r'[0-9]+', line)]


def get_ids_for_pids(env, table, id_type, pids):
    if not pids:
        return []
    sql = 'SELECT {} FROM {} WHERE pid IN ({})'.format(id_type, table, ','.join([str(pid) for pid in pids]))
    lines = dbs_sql(env, sql)
    return [int(line) for line in lines if line != '' and re.match(r'[0-9]+', line)]


def main():
    usage = '''
Usage: {0} <env> <ClientID>
Example: {0} devtest 36504228
    '''.format(os.path.basename(sys.argv[0]))
    if len(sys.argv) != 3:
        print(usage)
        if len(sys.argv) == 2 and sys.argv[1] in ['-h', '--help']:
            sys.exit(0)
        else:
            sys.exit(1)

    env = sys.argv[1]
    client_id = int(sys.argv[2])

    root = None
    proxy = None
    try:
        root = {
            'devtest': '//home/direct/test/mysql-sync/devtest/current/combined',
            'dev7': '//home/direct/test/mysql-sync/dev7/current/combined',
            'test': '//home/direct/test/mysql-sync/testing/current/combined',
            'ts': '//home/direct/test/mysql-sync/testing/current/combined',
            'prod': '//home/direct/mysql-sync/current/combined',
        }[env]
        proxy = {
            'devtest': 'zeno',
            'dev7': 'zeno',
            'test': 'zeno',
            'ts': 'zeno',
            'prod': 'seneca-sas',
        }[env]
    except KeyError:
        sys.exit("unknown environment '{}'".format(env))
    yt.wrapper.config.set_proxy(proxy)

    cids = get_ids_for_client_id(env, 'campaigns', 'cid', client_id)
    bids = get_ids_for_cids(env, 'banners', 'bid', cids)
    bids_ids = get_ids_for_cids(env, 'bids', 'id', cids) + get_ids_for_cids(env, 'bids_arc', 'id', cids) + get_ids_for_cids(env, 'bids_base', 'bid_id', cids)
    ret_ids = get_ids_for_cids(env, 'bids_retargeting', 'ret_id', cids)
    ret_cond_ids = get_ids_for_cids(env, 'bids_retargeting', 'ret_cond_id', cids)
    pids = get_ids_for_cids(env, 'phrases', 'pid', cids)
    creative_ids = get_ids_for_client_id(env, 'perf_creatives', 'creative_id', client_id)
    tag_ids = get_ids_for_pids(env, 'tag_group', 'tag_id', pids)

    keys_for_table = {
        'banners': {'bid': bids, 'cid': cids, 'pid': pids},
        'banners.index': {'bid': bids},
        'bids': {'id': bids_ids, 'cid': cids, 'pid': pids},
        'bids.index': {'id': bids_ids},
        'bids_retargeting': {'ret_id': ret_ids, 'ret_cond_id': ret_cond_ids, 'cid': cids, 'pid': pids},
        'bids_retargeting.index': {'ret_id': ret_ids},
        'campaigns': {'cid': cids},
        'perf_creatives': {'ClientID': [client_id], 'creative_id': creative_ids},
        'perf_creatives.index': {'ClientID': [client_id], 'creative_id': creative_ids},
        'phrases': {'pid': pids, 'cid': cids},
        'phrases.index': {'pid': pids},
        'recommendations_online': {'client_id': [client_id]},
        'recommendations_status': {'client_id': [client_id]},
        'retargeting_conditions': {'ret_cond_id': ret_cond_ids, 'ClientID': [client_id]},
        'retargeting_conditions.index': {'ClientID': [client_id]},
        'tag_group': {'pid': pids, 'tag_id': tag_ids},
        'tag_group.index': {'pid': pids, 'tag_id': tag_ids},
    }
    order_by = {
        'banners': 'bid',
        'banners.index': 'bid',
        'bids': 'id',
        'bids.index': 'id',
        'bids_retargeting': 'ret_id',
        'bids_retargeting.index': 'ret_id',
        'campaigns': 'cid',
        'perf_creatives': 'creative_id',
        'perf_creatives.index': 'creative_id',
        'phrases': 'pid',
        'phrases.index': 'pid',
        'recommendations_online': 'client_id,type,cid,pid,bid,user_key_1,user_key_2,user_key_3,timestamp',
        'recommendations_status': 'client_id,type,cid,pid,bid,user_key_1,user_key_2,user_key_3,timestamp',
        'retargeting_conditions': 'ret_cond_id',
        'retargeting_conditions.index': 'ret_cond_id',
        'tag_group': 'tag_id',
        'tag_group.index': 'tag_id',
    }

    tables = subprocess.check_output(['yt', '--proxy', proxy, 'list', root]).decode('utf-8').rstrip().split('\n')
    result = {}
    for table in tables:
        path = '/'.join([root, table])

        key = None
        where = '1 = 1'
        if table in keys_for_table:
            for key, values in keys_for_table[table].items():
                if len(values) > 0:
                    where += ' AND {} IN ({})'.format(key, ','.join([str(value) for value in values]))
                else:
                    where += ' AND 1 = 0'
        rows = []
        query = '* from [{}] where {}'.format(path, where)
        rows = list(yt.wrapper.select_rows(query))
        if table in order_by:
            rows.sort(key=lambda row: row[order_by[table]])
        result[table] = rows
    print(json.dumps(result, sort_keys=True, indent=4))

if __name__ == '__main__':
    main()
