import sys
import argparse
import datetime
import logging
from yql.api.v1.client import YqlClient

logging.basicConfig(
    level=logging.INFO,
    format='[%(levelname)s %(asctime)s]: %(message)s',
    datefmt='%Y-%m-%d %I:%M:%S'
)


build_users_table_yql = """
$script = @@
class Record(object):
    def __init__(self, iid):
        self.hardwareId = None
        self.installId = iid
        self.deviceId = None
        self.appId = None
        self.appVersion = None
        self.deviceName = None
        self.regionId = None
        self.pushToken = None
        self.platform = None
        self.timestamp = 0L

class Counters(object):
    def __init__(self):
        self.hardwareId = 0L
        self.deviceId = 0L
        self.appId = 0L
        self.appVersion = 0L
        self.deviceName = 0L
        self.regionId = 0L
        self.pushToken = 0L
        self.platform = 0L

def reducer(key, val):
    tms = Counters()

    result = Record(key)

    for r in val:
        tm = r.timestamp

        if tm > tms.hardwareId and r.hardwareId:
            result.hardwareId = r.hardwareId
            tms.hardwareId = tm

        if tm > tms.deviceId and r.deviceId:
            result.deviceId = r.deviceId
            tms.deviceId = tm

        if tm > tms.appId and r.appId:
            result.appId = r.appId
            tms.appId = tm

        if tm > tms.appVersion and r.appVersion:
            result.appVersion = r.appVersion
            tms.appVersion = tm

        if tm > tms.deviceName and r.deviceName:
            result.deviceName = r.deviceName
            tms.deviceName = tm

        if tm > tms.regionId and r.regionId:
            result.regionId = r.regionId
            tms.regionId = tm

        if tm > tms.pushToken and r.pushToken:
            result.pushToken = r.pushToken
            tms.pushToken = tm

        if tm > tms.platform and r.platform:
            result.platform = r.platform
            tms.platform = tm

        if tm > result.timestamp:
            result.timestamp = tm

    return result
@@;

$reducer = Python::reducer(@@
    (String?,List<Struct<hardwareId:String?,deviceId:String?, appId:String?, appVersion:String?, deviceName:String?, timestamp:Uint64, regionId:Int32?, pushToken:String?, platform:String?>>)->
    Struct<hardwareId:String?,installId:String?,deviceId:String?, appId:String?, appVersion:String?, deviceName:String?, regionId:Int32?, pushToken:String?, platform:String?, timestamp:Uint64>@@,
    $script);

USE {cluster};

$events_table = (
    SELECT
        AndroidID AS hardwareId,
        UUID AS installId,
        DeviceID AS deviceId,
        AppID AS appId,
        AppVersionName AS appVersion,
        CASE
            WHEN Manufacturer IS NULL THEN Model
            WHEN Model IS NULL THEN Manufacturer
            WHEN String::HasPrefix(Model, (Manufacturer ?? '')) THEN Model
            ELSE (Manufacturer ?? '') || ' ' || (Model ?? '')
        END AS deviceName,
        CAST(StartTimestamp AS UInt64) ?? 0 AS timestamp,
        CAST(RegionID AS Int32) AS regionId,
        IF({token_extractor}, CAST(NULL AS String)) AS pushToken,
        String::ToLower(AppPlatform) AS platform
    FROM RANGE(
        [{pathin}],
        [{min}],
        [{max}]
    )
    WHERE APIKey IN ({apikey})
    AND UUID IS NOT NULL
    AND StartTimestamp IS NOT NULL
);

$all_regs = (
    REDUCE $events_table
    ON installId
    USING $reducer((hardwareId AS hardwareId, deviceId AS deviceId, appId AS appId, appVersion AS appVersion, deviceName AS deviceName, timestamp AS timestamp, regionId AS regionId, pushToken AS pushToken, platform AS platform))
);


INSERT INTO [{pathout}] WITH TRUNCATE
SELECT
        hardwareId,
        installId,
        deviceId,
        pushToken,
        appId,
        appVersion,
        deviceName,
        regionId,
        platform,
        Geo::RegionById(regionId).timezone_name AS zoneId,
        Geo::RoundRegionById(regionId, 'country').id AS countryId,
        timestamp
FROM $all_regs {push_token_exists};
"""


def main():
    yesterday = datetime.datetime.now() - datetime.timedelta(days=1)

    parser = argparse.ArgumentParser()
    parser.add_argument('--token', help='YT token')
    parser.add_argument('--cluster', help='YT cluster', default='hahn')
    parser.add_argument('--input', default='//statbox/metrika-mobile-log')
    parser.add_argument('--output', default='//home/search-functionality/putrin/sup/users')
    parser.add_argument('--dfrom', default=yesterday.strftime('%Y-%m-%d'))
    parser.add_argument('--dto', default=yesterday.strftime('%Y-%m-%d'))
    parser.add_argument('--apikeys', nargs='+', type=int, default=[10318, 10321, 42984, 42989, 86151])
    parser.add_argument('--with_tokens_only', action='store_true')
    parser.add_argument('--browser_tokens', action='store_true')

    args = parser.parse_args()
    cluster = args.cluster
    yt_token = args.token
    client = YqlClient(token=yt_token)

    input_path = args.input
    results_path = args.output
    min_date = args.dfrom
    max_date = args.dto
    api_keys = ','.join("'" + str(e) + "'" for e in args.apikeys)
    push_token_condition = "WHERE pushToken IS NOT NULL" if args.with_tokens_only else ""
    token_extractor = \
        "EventName = 'push_token' OR EventName = 'PUSH_TOKEN_EVENT', Json::GetField(EventValue, 'token'){0}" \
            if not args.browser_tokens else "EventType = 'EVENT_PUSH_TOKEN', EventValue"

    dt_yql = build_users_table_yql.format(
        pathout=results_path, pathin=input_path, min=min_date, max=max_date, apikey=api_keys,
        cluster=cluster, push_token_exists=push_token_condition, token_extractor=token_extractor
    )
    logging.info(dt_yql)
    request = client.query(dt_yql)
    request.run()
    if not request.get_results().is_success:
        error_description = '\n'.join([str(err) for err in request.get_results().errors])
        logging.error(error_description)
        return 1
    return 0


if __name__ == '__main__':
    sys.exit(main())
