#!/usr/bin/python
# encoding: utf-8

import re
from datetime import date, timedelta

__all__ = ('get_date_str', 'make_ammo_and_uuid_files', 'get_output_file_names',)

PATHS = (
    '/api/v1/grouped_apps',
    '/api/v2/recommend',
    '/api/v2/app_info'
)

AMMO_BODY = (
    "GET {query} HTTP/1.1\r\n"
    "User-Agent: com.yandex.launcher/1.7.4.qa.5002215 (LYF LS-4505; Android 6.0.1)\r\n"
    "Host: launcher.regression.phone.yandex.net\r\n"
    "X-YaUuid: {uuid}\r\n"
    "{misc_headers}\r\n"
)

AMMO_BLOCK = "{body_length} {tag}\r\n{body}"

re_handler = re.compile(r'^(?P<request>%s)/.*' % '|'.join('(?:%s)' % path for path in PATHS), re.I)
re_request = re.compile('|'.join('(%s/)' % path for path in PATHS), re.I)
re_yauuid = re.compile('[0-9a-fA-F]{32}', re.I)


def parse_misc_headers(headers):
    if headers == '-':
        return ''
    return "\r\n".join(item.replace('=', ': ') for item in headers.split('|')) + "\r\n"


def make_ammo(uuid, request, yamisc):
    ammo_body = AMMO_BODY.format(
        query=request,
        uuid=uuid,
        misc_headers=parse_misc_headers(yamisc))

    ammo = AMMO_BLOCK.format(
        body_length=len(ammo_body),
        tag=re_handler.match(request).groupdict().get('request'),
        body=ammo_body)

    return ammo


def get_date_str(days_ago):
    return (date.today() - timedelta(days_ago)).isoformat()


def get_output_file_names(date_str):
    info = {
        'ammo_filename': '%s.ammo.txt' % date_str,
        'uuid_filename': '%s.uuid.txt' % date_str,
    }
    return info


def make_ammo_and_uuid_files(**kwargs):
    import os
    print('os.getcwd() => %s' % os.getcwd())

    # check parameters
    for parameter in ('ammo_filename', 'uuid_filename', 'yt_token', 'days_ago', 'max_ammo_count'):
        assert kwargs.get(parameter), 'required "%s" parameter' % parameter

    from nile.api.v1 import (
        clusters,
        extractors as ne,
        filters as nf,
    )

    date_str = get_date_str(kwargs.get('days_ago'))
    src_filepath = os.path.join('//statbox/advisor-access-log/', date_str)
    dst_filepath = os.path.join('//home/advisor/rtuaev/nivrana/ammo/', date_str)

    cluster = clusters.yt.Hahn(token=kwargs.get('yt_token'))
    job = cluster.job()

    # '2018-04-21'
    stream = job.table(
        src_filepath
    ).filter(
        nf.custom(
            lambda request, uuid: re_request.match(request) and re_yauuid.match(uuid),
            'request',
            'upstream_http_x_yauuid'
        )
    ).random(
        kwargs['max_ammo_count']
    ).project(
        uuid='upstream_http_x_yauuid',
        request='request',
        yamisc='upstream_http_x_yamisc'
    ).project(
        'uuid',
        ammo=ne.custom(make_ammo, 'uuid', 'request', 'yamisc')
    ).put(
        dst_filepath
    )
    job.run()

    # DEBUG:
    # stream = job.table(
    #     dst_filepath
    # )

    records = stream.read()
    ammo_count = records.total_row_count

    uuid_set = set()
    info = {
        'ammo_filename': kwargs.get('ammo_filename'),
        'uuid_filename': kwargs.get('uuid_filename'),
        'ammo_count': ammo_count,
    }
    with open(kwargs.get('ammo_filename'), 'wb') as f:
        for row in records:
            uuid_set.add(row.uuid)
            f.write(row.ammo)

    info['uuid_count'] = len(uuid_set)
    with open(kwargs.get('uuid_filename'), 'wb') as f:
        f.write('\n'.join(uuid_set))

    return info


if __name__ == '__main__':

    import os
    import sys

    SCRIPT_ARGS = os.environ.get('SCRIPT_ARGS')
    if not SCRIPT_ARGS:
        print('No SCRIPT_ARGS found among environment variables')
        sys.exit(1)

    import json
    from datetime import datetime
    from pprint import pprint

    started_at = datetime.now()
    print('Started: %s' % started_at.isoformat(' '))
    info = make_ammo_and_uuid_files(**json.loads(SCRIPT_ARGS))
    finished_at = datetime.now()
    print('Finished: %s' % finished_at)
    print('Elapsed: %s' % (finished_at - started_at))
    pprint(info)
    sys.exit(0)
