PRAGMA yt.Pool = ?;

$date = ?;
$result_path = ?;
$currency_rates_table_path = ?;
$binlog_table_prefix = ?;
$ppclog_api_table_prefix = ?;
$mysql_sync_path = ?;
$binlog_table = $binlog_table_prefix || $date;
$ppclog_api_table = $ppclog_api_table_prefix || $date;

/*-------- Конвертер в рубли по последнему доступному курсу ---------*/
$ynd_fixed_rate = 30;
$rates_list =
    SELECT AGGREGATE_LIST(AsTuple(currency, rate))
    FROM (
        SELECT currency
            , UnWrap(MAX_BY(CAST(rate AS Double), `date`)) AS rate
        FROM $currency_rates_table_path
        GROUP BY UnWrap(currency) AS currency
    );

$converter_to_rub_from_rates = ($rates) -> {
    $dict = ToDict($rates);
    RETURN ($sum, $source_currency) -> {
        RETURN CASE
            WHEN $source_currency = 'RUB' THEN $sum
            WHEN $source_currency = 'YND_FIXED' THEN $sum * $ynd_fixed_rate
            WHEN NOT DictContains($dict, $source_currency) THEN NULL
            ELSE $sum * $dict[$source_currency]
            END;
    };
};
$cur_convert = $converter_to_rub_from_rates($rates_list);
/* ------------------------------------------------------------------------- */


$script = @@
import cyson
import json


def get_value(v):
    if v is None:
        return None
    elif b'null' in v:
        return None
    elif b'integer' in v:
        return str(v[b'integer'])
    elif b'string' in v:
        return v[b'string'].decode('utf-8')
    elif b'unsigned_bigint' in v:
        return str(v[b'unsigned_bigint'])
    elif b'floating_point' in v:
        return str(v[b'floating_point'])
    elif b'fixed_point' in v:
        return v[b'fixed_point'].decode('utf-8')
    elif b'null' in v:
        return None
    else:
        return str(v)


def cols(data):
    if not data:
        return {}
    return {col[b'name'].decode('utf-8'): get_value(col[b'value']) for col in data}


def pk_by_name(row, name):
    for field in row[b'primary_key']:
        if field[b'name'] == name:
            return get_value(field[b'value'])
    return None


def pk(row):
    return ','.join([get_value(f[b'value']) for f in row[b'primary_key']])


service_type_by_name = {
    'direct.api5': 'api',
    'direct.json-api': 'api',
    'direct.soap-api': 'api',

    'direct.web': 'web'
}


commander_application_ids = ['c906d379328e4541a6d06e4894f2a03b', 'c135345132e9449dab6416f3cc34ffab']


def is_api_commander_source(application_id):
    application_id = application_id.decode('utf-8') if application_id else ''
    return application_id in commander_application_ids


def get_service_type(service, method, application_id, operator_uid):
    service = service.decode('utf-8') if service else ''
    method = method.decode('utf-8') if method else ''

    service_type = service_type_by_name.get(service, 'internal')
    if service_type == 'web':
        service_type = 'new_web' if method.startswith('grid.') else 'old_web'
    elif service_type == 'api':
        service_type = 'api_commander' if is_api_commander_source(application_id) else service_type
    elif service == 'direct.script' and method == 'ppcCampQueue':
        service_type = 'old_web'
    elif service == 'direct.intapi' and operator_uid is not None:
        service_type = 'old_web'

    return service_type


def extract_events(tbl, operation, rows):
    tbl = tbl.decode('utf-8') if tbl else ''
    operation = operation.decode('utf-8') if operation else ''
    ret = []
    for row in rows or []:
        if tbl == 'campaigns':
            ret.extend(process_campaigns_row(operation, row))
        elif tbl == 'bids':
            ret.extend(process_bids_row(operation, row))
        elif tbl == 'autobudget_alerts':
            ret.extend(process_autobudget_alerts_row(operation, row))
        elif tbl == 'clients':
            ret.extend(process_clients(operation, row))
        elif tbl == 'users':
            ret.extend(process_users_row(operation, row))
        elif tbl == 'campaign_permalinks':
            ret.extend(process_campaign_permalinks_row(operation, row))
        elif tbl == 'banner_permalinks':
            ret.extend(process_banner_permalinks_row(operation, row))
        elif tbl == 'banner_turbolandings':
            ret.extend(process_banner_turbolandings_row(operation, row))
        elif tbl == 'camp_metrika_counters':
            ret.extend(process_camp_metrika_counters_row(operation, row))
    return ret


def process_campaigns_row(operation, row):
    cid = pk(row)
    before = cols(row.get(b'before'))
    after = cols(row.get(b'after'))
    camp_type = after.get('type')

    ret = []

    if operation == 'INSERT':
        if after.get('type') == 'wallet':
            ret.append({'cid': cid, 'event': 'wallet_create'})
        elif after.get('type') == 'billing_aggregate':
            ret.append({'cid': cid, 'event': 'billing_aggregate_create'})
        else :
            ret.append({'cid': cid, 'event': 'camp_create'})
            ret.append({'cid': cid, 'event': camp_type + '_camp_create'})

    if after.get('statusModerate') == 'Ready' and (operation == 'INSERT' or before.get('statusModerate') == 'New'):
        ret.append({'cid': cid, 'event': 'camp_moderate_ready'})

    if after.get('day_budget') and (operation == 'INSERT' or after.get('day_budget') != before.get('day_budget')):
        ret.append({'cid': cid, 'event': 'camp_day_budget', 'money_value': after.get('day_budget')})
        if camp_type is not None and camp_type in ('dynamic', 'performance'):
            ret.append({'cid': cid, 'event': camp_type + '_camp_day_budget', 'money_value': after.get('day_budget')})

    if (after.get('strategy_name') or '').startswith('autobudget') and \
            (operation == 'INSERT' or not (before.get('strategy_name') or '').startswith('autobudget')):
        ret.append({'cid': cid, 'event': 'camp_enabled_autobudget'})
        if camp_type is not None and camp_type in ('dynamic', 'performance'):
            ret.append({'cid': cid, 'event': camp_type + '_camp_enabled_autobudget'})
    if not (after.get('strategy_name') or '').startswith('autobudget') and \
            (operation == 'INSERT' or (before.get('strategy_name') or '').startswith('autobudget')):
        ret.append({'cid': cid, 'event': 'camp_disabled_autobudget'})
        if camp_type is not None and camp_type in ('dynamic', 'performance'):
            ret.append({'cid': cid, 'event': camp_type + '_camp_disabled_autobudget'})

    if 'DontShow' in after and (operation == 'INSERT' or (after.get('DontShow') or '') != (before.get('DontShow') or '')):
        ret.append({'cid': cid, 'event': 'camp_set_dont_show'})

    if after.get('strategy_data') and (operation == 'INSERT' or after.get('strategy_data') != before.get('strategy_data')):
        try:
            old_sum = json.loads(before.get('strategy_data') or '{}').get('sum')
            new_sum = json.loads(after.get('strategy_data') or '{}').get('sum')
            if new_sum and float(new_sum) > 0 and new_sum != old_sum:
                ret.append({'cid': cid, 'event': 'camp_set_autobudget_sum', 'money_value': str(new_sum)})
                if camp_type is not None and camp_type in ('dynamic', 'performance'):
                    ret.append({'cid': cid, 'event': camp_type + '_camp_set_autobudget_sum', 'money_value': str(new_sum)})
        except ValueError:
            pass

    if after.get('shows') and operation == 'UPDATE' and after.get('shows') != before.get('shows'):
        ret.append({'cid': cid, 'event': 'shows_update'})

    if after.get('sum') and operation == 'UPDATE' and after.get('sum') != before.get('sum'):
        ret.append({'cid': cid, 'event': 'sum_change', 'money_value': str(float(after.get('sum')) - float(before.get('sum')))})

    return ret


def process_bids_row(operation, row):
    before = cols(row.get(b'before'))
    after = cols(row.get(b'after'))
    cid = after.get('cid') or before.get('cid')

    ret = []
    if operation == 'INSERT' or after.get('price') and before.get('price') != after.get('price'):
        ret.append({'cid': cid, 'event': 'bids_price', 'money_value': after.get('price')})

    return ret


def process_autobudget_alerts_row(operation, row):
    before = cols(row.get(b'before'))
    after = cols(row.get(b'after'))
    cid = after.get('cid') or before.get('cid')

    ret = []
    if after.get('problems') and before.get('problems') != after.get('problems'):
        ret.append({'cid': cid, 'event': 'autobudget_alert'})

    return ret


def process_clients(operation, row):
    client_id = pk(row)
    ret = []
    if operation == 'INSERT':
        ret.append({'event': 'client_create', 'client_id': client_id})

    return ret


def process_users_row(operation, row):
    uid = pk(row)
    before = cols(row.get(b'before'))
    after = cols(row.get(b'after'))

    ret = []

    ret.extend(process_verified_phone_events(uid, operation, before, after))

    return ret


def process_verified_phone_events(uid, operation, before, after):
    verified_phone_before = before.get('verified_phone_id')
    verified_phone_after = after.get('verified_phone_id')

    if operation == 'INSERT':
        if verified_phone_after is not None:
            yield {'uid': uid, 'event': 'verified_phone_insert'}
    elif operation == 'UPDATE':
        if verified_phone_before is None and verified_phone_after is not None:
            yield {'uid': uid, 'event': 'verified_phone_fill'}
        elif verified_phone_before is not None and verified_phone_after is not None and verified_phone_before != verified_phone_after:
            yield {'uid': uid, 'event': 'verified_phone_change'}


def process_campaign_permalinks_row(operation, row):
    cid = pk(row)
    before = cols(row.get(b'before'))
    after = cols(row.get(b'after'))

    if operation == 'INSERT':
        yield {'cid': cid, 'event': 'default_organization_add'}
    elif operation == 'UPDATE' and before.get('permalink_id') != after.get('permalink_id'):
        yield {'cid': cid, 'event': 'default_organization_update'}
    elif operation == 'DELETE':
        yield {'cid': cid, 'event': 'default_organization_delete'}


def process_banner_permalinks_row(operation, row):
    bid = pk_by_name(row, b'bid')

    if operation == 'INSERT':
        yield {'bid': bid, 'event': 'banner_organization_add'}
    elif operation == 'DELETE':
        yield {'bid': bid, 'event': 'banner_organization_delete'}


def process_banner_turbolandings_row(operation, row):
    bid = pk_by_name(row, b'bid')
    before = cols(row.get(b'before'))
    after = cols(row.get(b'after'))

    if operation == 'INSERT':
        yield {'bid': bid, 'event': 'banner_turbolanding_add'}
    elif operation == 'DELETE':
        yield {'bid': bid, 'event': 'banner_turbolanding_delete'}
    elif operation == 'UPDATE' and before.get('tl_id') != after.get('tl_id'):
        yield {'bid': bid, 'event': 'banner_turbolanding_update'}


def process_camp_metrika_counters_row(operation, row):
    cid = pk(row)

    if operation == 'INSERT':
        yield {'cid': cid, 'event': 'camp_metrika_counters_add'}
    elif operation == 'UPDATE':
        yield {'cid': cid, 'event': 'camp_metrika_counters_update'}
    elif operation == 'DELETE':
        yield {'cid': cid, 'event': 'camp_metrika_counters_delete'}


extract_events._yql_convert_yson = (cyson.loads, cyson.dumps)
@@;

$extract_events = Python3::extract_events(
    Callable<(Optional<String>, Optional<String>, Optional<Yson>)->List<Dict<Utf8, Optional<String>>>>,
    $script
);
$get_service_type = Python3::get_service_type(
    Callable<(Optional<String>, Optional<String>, Optional<String>, Optional<Int64>)->String>,
    $script);
$is_api_commander_source = Python3::is_api_commander_source(Callable<(Optional<String>)->Bool>, $script);

$ppclog_api_commander = (
    SELECT reqid, application_id
    FROM $ppclog_api_table
    WHERE $is_api_commander_source(application_id)
);

$events = (
    SELECT table,
        iso_eventtime, reqid, cmd, service, operator_uid,
        event,
        cast(event['event'] as String) as event_type,
        cast(event['cid'] as Int64) as cid,
        cast(event['money_value'] as Double) as money_value,
        cast(event['client_id'] as Int64) as ClientID,
        cast(event['uid'] as Int64) as uid,
        cast(event['bid'] as Int64) as bid,
        gt_id as gt_id
    FROM (
        SELECT table,
            iso_eventtime, trance_info_req_id as reqid, trance_info_method as cmd, trance_info_service as service,
            trance_info_operator_uid as operator_uid,
            gtid_transaction_id as gt_id,
            $extract_events(table, IF(operation is null or operation == '', 'INSERT', operation), rows) as events
        FROM $binlog_table
        WHERE table in (
            'campaigns',
            'bids',
            'autobudget_alerts',
            'clients',
            'users',
            'campaign_permalinks',
            'banner_permalinks',
            'banner_turbolandings',
            'camp_metrika_counters'
        )
        --решардинг может генерировать ложные события, чтобы их избежать события из методы ppcReSharder фильтруем
        AND trance_info_method != 'ppcReSharder'
        --не хотим учитывать привязку организаций джобой UpdateBannerPermalinks
        AND (
            table != 'banner_permalinks'
            OR trance_info_method NOT LIKE '%UpdateBannerPermalinks%'
        )
    )
    FLATTEN LIST BY events as event
);

$events = (
    SELECT e.*,
    $get_service_type(e.service, e.cmd, p.application_id, e.operator_uid) as source
    FROM $events as e
    LEFT JOIN ANY $ppclog_api_commander as p ON e.reqid = p.reqid
);

--относящиеся к кампаниям события, которые хотим аггрегировать в рамках целого дня
$grouped_events_with_cid = (
    SELECT cid,
        event_type,
        source,
        count(*) as events_count,
        sum(money_value) as money_value,
        min(iso_eventtime) as first_event_time
    FROM $events
    WHERE event_type IN (
        'wallet_create',
        'billing_aggregate_create',
        'camp_create',
        'camp_moderate_ready',
        'camp_day_budget',
        'camp_enabled_autobudget',
        'camp_disabled_autobudget',
        'camp_set_dont_show',
        'camp_set_autobudget_sum',
        'shows_update',
        'bids_price',
        'autobudget_alert'
    )
    OR event_type LIKE '%_camp_create'
    OR event_type LIKE '%_camp_day_budget'
    OR event_type LIKE '%_camp_enabled_autobudget'
    OR event_type LIKE '%_camp_disabled_autobudget'
    OR event_type LIKE '%_camp_set_autobudget_sum'
    OR event_type LIKE 'default_organization_%'
    OR event_type LIKE 'camp_metrika_counters_%'
    GROUP BY cid, event_type, source
);

$grouped_events_with_cid_result = (
    SELECT c.ClientID as ClientID,
        c.cid as cid,
        e.event_type as event_type,
        e.source as source,
        e.events_count as events_count,
        e.first_event_time as first_event_time,
        $cur_convert(e.money_value, c.currency) as money_sum_rub
    FROM $grouped_events_with_cid as e
    JOIN LIKE($mysql_sync_path, `ppc:%`, `straight/campaigns`) as c
        ON c.cid = e.cid
    WHERE c.statusEmpty = 'No'
);

--относящиеся к кампаниям события, которые мы не хотим аггрегировать в рамках целого дня
$grouped_events_with_cid_and_id = (
    SELECT cid,
        event_type,
        source,
        count(*) as events_count,
        sum(money_value) as money_value,
        min(iso_eventtime) as first_event_time,
    FROM $events
    WHERE event_type IN ('sum_change')
    GROUP BY cid, gt_id, event_type, source
);

$grouped_events_with_cid_and_id_result = (
    SELECT c.ClientID as ClientID,
        c.cid as cid,
        e.event_type as event_type,
        e.source as source,
        e.events_count as events_count,
        e.first_event_time as first_event_time,
        $cur_convert(e.money_value, c.currency) as money_sum_rub
    FROM $grouped_events_with_cid_and_id as e
    JOIN LIKE($mysql_sync_path, `ppc:%`, `straight/campaigns`) as c ON c.cid = e.cid
    WHERE c.statusEmpty = 'No'
);

--события относящиеся непосредственно к клиенту
$grouped_events_with_client_id_result = (
    SELECT ClientID,
        NULL AS cid,
        event_type,
        source,
        count(*) as events_count,
        min(iso_eventtime) as first_event_time,
        NULL AS money_sum_rub
    FROM $events
    WHERE event_type IN (
        'client_create'
    )
    GROUP BY ClientID, event_type, source
);

--события относящиеся к пользователю
$grouped_events_with_uid_result = (
    SELECT u.ClientID AS ClientID,
        NULL AS cid,
        e.event_type AS event_type,
        e.source AS source,
        COUNT(*) AS events_count,
        MIN(e.iso_eventtime) AS first_event_time,
        NULL AS money_sum_rub
    FROM $events AS e
    JOIN LIKE($mysql_sync_path, `ppc:%`, `straight/users`) AS u ON u.uid = e.uid
    WHERE e.event_type LIKE 'verified_phone_%'
    GROUP BY u.ClientID, e.event_type, e.source
);

--события, относящиеся к баннеру
$grouped_events_with_bid_result = (
    SELECT c.ClientID AS ClientID,
        NULL AS cid,
        e.event_type AS event_type,
        e.source AS source,
        count(*) AS events_count,
        min(e.iso_eventtime) AS first_event_time,
        NULL AS money_sum_rub
    FROM $events AS e
    JOIN LIKE($mysql_sync_path, `ppc:%`, `straight/banners`) AS b ON e.bid = b.bid
    JOIN LIKE($mysql_sync_path, `ppc:%`, `straight/campaigns`) AS c ON c.cid = b.cid
    WHERE e.bid IS NOT NULL
    GROUP BY c.ClientID, e.event_type, e.source
);


INSERT INTO $result_path WITH TRUNCATE
SELECT * FROM (
    SELECT * FROM $grouped_events_with_cid_result
    UNION ALL SELECT * FROM $grouped_events_with_cid_and_id_result
    UNION ALL SELECT * FROM $grouped_events_with_client_id_result
    UNION ALL SELECT * FROM $grouped_events_with_uid_result
    UNION ALL SELECT * FROM $grouped_events_with_bid_result)
ORDER BY ClientID, cid, event_type, source;
