#!/usr/bin/env python
import sys
import argparse
import logging

from datetime import datetime, timedelta
from yql.api.v1.client import YqlClient

from util import get_dates

####################################################################################################

log_format = '[%(asctime)s] %(lineno)d: %(levelname)-2s: %(message)s'
logging.basicConfig(format=log_format, level=logging.INFO)

####################################################################################################

# Documentaion about suggest records in redir-log: https://wiki.yandex-team.ru/data/logs/redir/suggest/

yql_script_template = '''
PRAGMA yt.Pool = '@@YT_POOL@@';
PRAGMA yt.DataSizePerJob = '4294967296'; -- 4 GB
PRAGMA yt.UseTmpfs = '1073741824'; -- 1 GB
PRAGMA inferscheme;

USE @@MR_SERVER@@;

$util = FileContent("util.py");

$Unquote = Python::Unquote("(String?)->String?", $util);
$GetService = Python::GetService("(String?)->String?", $util);
$GetActionType = Python::GetActionType("(String?)->String?", $util);
$GetSuggestionPos = Python::GetSuggestionPos("(String?)->Int64?", $util);
$GetSubmitType = Python::GetSubmitType("(String?)->String?", $util);
$GetLr = Python::GetLr("(String?)->Int64?", $util);
$GetSuggestTypes = Python::GetSuggestTypes("(String?)->List<String>?", $util);
$GetSymbolsCount = Python::GetSymbolsCount("(String?)->Int64?", $util);
$GetQueryLength = Python::GetQueryLength("(String?)->Int64?", $util);
$GetUserActionsCount = Python::GetUserActionsCount("(String?)->Int64?", $util);
$GetIp = Python::GetIp("(String?)->String?", $util);

INSERT INTO [@@OUTPUT_TABLE@@] WITH TRUNCATE

SELECT
    redir.dict{'path'} as path,
    $GetService(redir.dict{'path'}) as service,
    $GetActionType(redir.dict{'path'}) as action_type,
    $GetSuggestionPos(redir.dict{'path'}) as suggestion_pos,
    $GetSubmitType(redir.dict{'path'}) as submit_type,

    $Unquote(redir.dict{'text'}) as text,
    $Unquote(redir.dict{'user_input'}) as user_input,
    $Unquote(redir.dict{'prev_query'}) as prev_query,

    CAST(redir.dict{'since_first_change'} as Int64) as since_first_change,
    CAST(redir.dict{'since_last_change'} as Int64) as since_last_change,
    CAST(redir.dict{'total_input_time'} as Int64) as total_input_time,

    CAST(redir.dict{'pos'} as Int64) as pos,

    $GetSuggestTypes(redir.dict{'log'}) as log,

    CAST(redir.dict{'region'} as Int64) as region,
    --Geo::RegionByIp(redir.dict{'ip'}).id as region_by_ip,
    --Geo::FindCountry(Geo::RegionByIp(redir.dict{'ip'}).id) as country_by_ip,
    Geo::RegionByIp($GetIp(redir.dict{'ip'})).id as region_by_ip,
    Geo::FindCountry(Geo::RegionByIp($GetIp(redir.dict{'ip'})).id) as country_by_ip,
    $GetLr(redir.dict{'HTTP_REFERER'}) as lr,

    redir.dict{'ratio'} as ratio,
    $GetSymbolsCount(redir.dict{'ratio'}) as symbols_count,
    $GetQueryLength(redir.dict{'ratio'}) as query_length,
    $GetUserActionsCount(redir.dict{'ratio'}) as user_actions_count,

    redir.dict{'session'} as session,
    redir.dict{'ip'} as ip,
    redir.dict{'yandexuid'} as yandexuid,
    redir.dict{'url'} as url,
    redir.dict{'exprt'} as exprt,
    redir.dict{'timestamp'} as timestamp,
    redir.dict{'iso_eventtime'} as iso_eventtime,
    redir.dict{'timings'} as timings,
    redir.dict{'times'} as times,
    redir.dict{'render_times'} as render_times,
    redir.dict{'tpah_log'} as tpah_log,
    redir.dict{'suggest_reqid'} as suggest_reqid,
    redir.dict{'HTTP_REFERER'} as referer

FROM [@@INPUT_TABLE@@] as redir
where redir.dict{'cid'} = '2873'
ORDER BY timestamp;
'''

####################################################################################################

def get_yql_script(input_table, output_table, mr_server, yt_pool):
    return yql_script_template.replace("@@INPUT_TABLE@@", input_table) \
                              .replace("@@OUTPUT_TABLE@@", output_table) \
                              .replace("@@MR_SERVER@@", mr_server) \
                              .replace("@@YT_POOL@@", yt_pool)

def build_preparates(dates, mr_server, yt_pool, util_file):
    client = YqlClient()
    failed_dates = []
    for date in dates:
        logging.info('Starting calculation for date: {}...'.format(date))
        input_table = '//logs/redir-log/1d/{}'.format(date)
        #output_table = '//home/suggest-dev/suggest_logs/redir-log_preparates/raw/{}'.format(date)
        output_table = '//home/suggest-dev/galamaj/test/{}'.format(date)
        yql_script = get_yql_script(input_table, output_table, mr_server, yt_pool)
        request = client.query(yql_script)
        request.attach_file(util_file, alias='util.py')
        request.run()
        if request.get_results().is_success:
            logging.info('Success: {}'.format(date))
        else:
            logging.error('Failed: {}'.format(date))
            failed_dates.append(date)

    if len(failed_dates) > 0:
        logging.error('Failed dates: {}'.format(', '.join(failed_dates)))
    return len(failed_dates) == 0

def main(args):
    dates = get_dates(args.timestamp, args.from_date, args.to_date)
    logging.info('Selected {} dates'.format(len(dates)))
    is_suceess = build_preparates(dates, args.mr_server, args.yt_pool, args.util_file)
    if not is_suceess:
        sys.exit('Some preparates were not built')

####################################################################################################

def parse_args():
    parser = argparse.ArgumentParser(add_help=True, description='Suggest redir-log preparates builder')
    parser.add_argument('util_file', default='util.py', help='path to util.py')
    parser.add_argument('--timestamp', help='date timestamp for calculation')
    parser.add_argument('--from_date', help='from date for calculation (format: YYYY-MM-DD)')
    parser.add_argument('--to_date', help='to date for calculation (format: YYYY-MM-DD)')
    parser.add_argument('--mr_server', default='hahn', help='MR server (hahn, banach, ...)')
    parser.add_argument('--yt_pool', default='robot-suggestor-dev', help='YT pool')
    args = parser.parse_args()
    return args

main(parse_args())

####################################################################################################
