#!/usr/bin/python
# -*- coding: utf-8 -*-

import json
import urllib
import subprocess
from pprint import pprint
from argparse import ArgumentParser


def parse_args():
    parser = ArgumentParser(description='')
    parser.add_argument('--l2_formulas', nargs='+', dest='l2_formulas_files', required=True)
    parser.add_argument('--l3_formulas', nargs='+', dest='l3_formulas_files', required=True)
    parser.add_argument('--ranking_models', dest='ranking_models_archive', required=True)
    parser.add_argument('--l2_archive_path', dest='l2_archive_path', required=True)
    parser.add_argument('--l3_archive_path', dest='l3_archive_path', required=True)
    parser.add_argument('--l2_archive_url', dest='l2_archive_url', required=True)
    parser.add_argument('--l3_archive_url', dest='l3_archive_url', required=True)
    parser.add_argument('--formulas_info', dest='formulas_info_path', required=True)
    parser.add_argument('--meta_info', dest='meta_info_path', required=True)
    return parser.parse_args()


def get_info(formula_id):
    response = urllib.urlopen('https://fml.yandex-team.ru/rest/api/formula/fml/' + formula_id)
    return json.loads(response.read())


def get_pool_id(formula_path):
    res = subprocess.check_output("./mx_ops info {formula_path} | grep 'pool-id'".format(formula_path=formula_path), shell=True)
    # keep only digits for pool_id
    return ''.join(c for c in res if c.isdigit())

def download_formula(formula_id, formula_path):
    fml_url = 'https://fml.yandex-team.ru/download/computed/formula?id={formula_id}&file=matrixnet.info'.format(formula_id=formula_id)
    urllib.urlretrieve(fml_url, formula_path)


def formulas_join(t1, t2, column):
    result = []
    for entry in t1:
        for match in [d for d in t2 if d[column] == entry[column]]:
            result.append((entry, match))
    return result


def get_formulas_data(l2_formulas_files, l3_formulas_files, l2_archive_path, l3_archive_path):
    l2, l3 = [], []
    for formula_type in ('l2', 'l3'):
        for formula_json_path in locals()[formula_type + '_formulas_files']:
            with open(formula_json_path) as f:
                data = json.load(f)

            formula_id = data['id']
            info = get_info(formula_id)

            formula_path = './models/' + formula_type + '/prod/ExpFormula{formula_id}.info'.format(formula_id=formula_id)
            download_formula(formula_id, formula_path)

            locals()[formula_type].append({
                'type': formula_type,
                'id': formula_id,
                'pool': get_pool_id(formula_path),
                'fml_url': 'https://fml.yandex-team.ru/formula/' + formula_id,
                'dateTime': info['dateTime'],
                'owner': info['owner'],
                'description': info['description'],
            })

        # create archives with exp formulas
        subprocess.call('./archiver -q -r -p -o {out} ./models/{formula_type}/prod/*.info'.format(
            out=locals()[formula_type + '_archive_path'],
            formula_type=formula_type
        ), shell=True)

    print subprocess.call('svn st models', shell=True)

    return l2, l3


def get_stats(all_formulas):
    max_description_length = max([len(x[0]['description']) for x in all_formulas])
    stats =  '{:{padding}}  :   L2   /  L3   / pool / serpset / cgi-params\n'.format('formula', padding=max_description_length)
    for item in all_formulas:
        l2, l3 = item[0], item[1]
        stats += '{:{padding}}  : {l2}, {l3}, {pool}, {serp}, {cgi}\n'.format(
            l2['description'],
            l2=l2['id'],
            l3=l3['id'],
            pool=l2['pool'],
            serp='--------',
            cgi='&pron=exp_imagesformula_full.fast.common_{l2}&pron=exp_imagesformula_full.fast.common.noboost_{l2}'.format(l2=l2['id']) \
               +'&pron=exp_imagesformula_full.common_{l3}&pron=exp_imagesformula_full.common.noboost_{l3}'.format(l3=l3['id']),
            padding=max_description_length
        )
    return stats


def main():
    args = parse_args()
    print args

    # extract archive with svn folder ranking_models
    subprocess.call('mkdir -p models', shell=True)
    subprocess.call('tar xf {arch} -C models'.format(arch=args.ranking_models_archive), shell=True)

    l2, l3 = get_formulas_data(args.l2_formulas_files, args.l3_formulas_files, args.l2_archive_path, args.l3_archive_path)

    all_formulas = formulas_join(l2, l3, 'pool')

    with open(args.formulas_info_path, 'w') as f:
        f.write(get_stats(all_formulas))

    with open(args.meta_info_path, 'w') as f:
        f.write(json.dumps({
            'all_formulas': all_formulas,
            'l2_archive_url': args.l2_archive_url,
            'l3_archive_url': args.l3_archive_url,
        }, indent=4, sort_keys=True))


if __name__ == '__main__':
    main()

