#!/usr/bin/env python
# -*- coding: utf-8 -*-

import re
import os
import urlparse
import argparse
import yt.wrapper as yt
from datetime import date, datetime, timedelta
from yql.api.v1.client import YqlClient
from yql.client.operation import YqlOperationShareIdRequest

######################################################################################
# encoding=utf8
import sys
reload(sys)
sys.setdefaultencoding('utf8')
######################################################################################


yql_script = '''
PRAGMA yt.Pool = 'robot-suggestor-dev';
PRAGMA yt.DataSizePerJob = '4294967296'; -- 4 GB
PRAGMA yt.InferSchema;

USE hahn;

$filter = Re2::Grep('[а-я]+');
$i = Re2::Grep('і');

$parse_response = FileContent("parse_response.py");
$parse_response = Python::parse_response("(String?)->List<String?>?", $parse_response);


--$tmp = (
--select reqid, response, _logfeller_timestamp
--from [//home/logfeller/logs/suggest-response-log/1d/@@date]
--);

$tmp = (
select reqid, timestamp, response, _logfeller_timestamp
from [//home/logfeller/logs/suggest-response-log/1d/@@date]
where response like '%{\"src\":\"In\"}"%' or response like '%sgtype:In%'
limit 1000
);

$parsed_log = (
select
reqid,
timestamp,
_logfeller_timestamp,
COALESCE($parse_response(response){0}, '') as prefix,
COALESCE($parse_response(response){1}, '') as response,
COALESCE($parse_response(response){2}, '') as ending
from $tmp
);

INSERT INTO [//home/suggest-dev/galamaj/infix/all_shows/@@date] with truncate
select *
from $parsed_log;

$randomize_index = (
select prefix, response, ending, cast(count(*) as double) as total_shows, Random(prefix) as rand_i
from $parsed_log
where prefix is not Null and response is not Null and prefix != '' and response != ''
group by prefix, response, ending
);

insert into [//home/suggest-dev/galamaj/infix/random_for_toloka/@@date] with truncate
select *
from $randomize_index
--where $filter(prefix) and not $i(prefix) and length(ending) > 6 and ending != response
where length(ending) > 6 and ending != response
order by rand_i
limit 2000;
'''

def get_yql_operation_url(yql_operation):
    share_id_request = YqlOperationShareIdRequest(yql_operation.operation_id)
    share_id_request.run()
    if share_id_request.is_ok:
        return 'https://yql.yandex-team.ru/Operations/' + share_id_request.json

    # return non-sharable link
    return 'https://yql.yandex-team.ru/Operations/' + yql_operation.operation_id


def parse_args():
    parser = argparse.ArgumentParser(add_help=True, description='Suggest metrics calc')
    parser.add_argument('--timestamp', help='date timestamp for calculation')
    parser.add_argument('--from_date', help='from date for calculation (format: YYYY-MM-DD)')
    parser.add_argument('--to_date', help='to date for calculation (format: YYYY-MM-DD)')
    parser.add_argument('--mr_server', default='hahn', help='MR server (hahn, banach, ...)')
    parser.add_argument('--yql_token', help='YQL token')
    parser.add_argument('parse_response', default='parse_response.py', help='path to parse_response.py')
    parser.add_argument('--output', default='for_toloka', help='output date')

    args = parser.parse_args()
    return args


def get_dates(timestamp, from_date, to_date):
    dates = []
    date_format = '%Y-%m-%d'
    if timestamp:
        date = datetime.fromtimestamp(int(timestamp[:10]))
        dates.append(date.strftime(date_format))
    if from_date and not to_date:
        to_date = datetime.strftime(datetime.now(), date_format)
    if not from_date and not to_date:
        from_date = datetime.strftime(datetime.now()- timedelta(days=1), date_format)
        to_date = from_date
    if from_date and to_date:
        current_date = datetime.strptime(from_date, date_format)
        while current_date <= datetime.strptime(to_date, date_format):
            dates.append(current_date.strftime(date_format))
            current_date += timedelta(1)
    return dates


if __name__ == '__main__':
    args = parse_args()
    dates = get_dates(args.timestamp, args.from_date, args.to_date)
    print dates
    #yql_client = YqlClient(token=args.yql_token)
    yql_client = YqlClient(token=os.environ['YQL_TOKEN'])
    for date in dates:
        yql_operation  = yql_client.query(yql_script.replace('@@date', date))
        yql_operation.attach_file(args.parse_response, alias='parse_response.py')
        yql_operation.run()
        yql_operation.wait_progress()
        if not yql_operation.is_success:
            raise RuntimeError('YQL operation failed: ' + get_yql_operation_url(yql_operation))
        yt.config['memory_limit'] = 100 * 1024 * 1024 * 1024
        yt.config["proxy"]["url"] = "hahn.yt.yandex.net"
        with open(args.output, 'w') as result:
            for row in yt.read_table('//home/suggest-dev/galamaj/infix/random_for_toloka/{}'.format(date)):
                result.write('{}\t{}\n'.format(row['prefix'], row['ending']))



