from __future__ import print_function
import os
from yt.wrapper import YtClient
from ads.libs.yql import run_yql_query, get_yql_client


def run_yql(source, target, cluster, context_type, sim_distance, score=None):
    bm_target = target + '/BroadMatchTable'
    bmw_target = target + '/BroadMinuswordTable'
    bp_target = target + '/BroadPhraseTable'
    p_target = target + '/ProcessedBannersTable'

    yt_client = YtClient(proxy=cluster, token=os.environ.get('YT_TOKEN'))
    if not yt_client.exists(source):
        raise Exception('Table {} does not exist'.format(source))
    source_mod_time = yt_client.get_attribute(source, 'modification_time')

    query = '''
PRAGMA yt.Pool = "broadmatching";
PRAGMA yt.ForceInferSchema = '100';

$sim_distance = {sim_distance};
-- Defaults!
--

$context_type = {context_type};
$def_ctr = 0.;
$def_pctr = 0.;
$def_score = 0.;
$def_spec_place = 1;
$def_no_psearch = 0;
$norm_type = 1;

-- Convert string to BS id
--

$bs_id_script = @@
import md5
import struct

def bs_id(str=""):
    a = struct.unpack("!4I", md5.new(str).digest())
    return ((a[1] ^ a[3]) << 32) | (a[0] ^ a[2])
@@
;

$bs_id_callable = Python2::bs_id(
    Callable<(String?)->Uint64>,
    $bs_id_script
)
;


$external_table = (
    SELECT
        CAST(key AS UINT64) AS BannerID,
        CAST(subkey AS UINT64) AS OrderID,
        String::SplitToList(value, "\t") AS Phrase
    FROM `{source}`
);

$external_data = (
SELECT
    BannerID,
    OrderID,
    PhraseParts[0] as Phrase,
    $bs_id_callable(PhraseParts[0]) AS BroadPhraseID,
    CAST(CAST(PhraseParts[1] AS DOUBLE) * 1000000 AS INT32) as APCRatio,
    CAST(PhraseParts[2] AS UINT64) as OrigPhraseID,
    -- Ignore origianl SimDistance
    -- CAST(PhraseParts[3] AS INT32) ?? $sim_distance AS SimDistance,
    CAST($sim_distance AS INT32) AS SimDistance,
    CAST(PhraseParts[4] AS INT32) ?? $def_spec_place AS SpecPlace,
    CAST((CAST(PhraseParts[5] AS DOUBLE) ?? $def_ctr) * 1000000 AS INT32) AS CTR,
    CAST(PhraseParts[6] AS INT32) ?? $def_no_psearch AS NoPSearch,
    CAST((CAST(PhraseParts[7] AS DOUBLE) ?? $def_pctr) * 1000000 AS INT32) AS PCTR,
    CAST((CAST(PhraseParts[8] AS DOUBLE) ?? {score} ?? $def_score) * 1000000 AS UINT64) AS Score,
    $context_type as ContextType
FROM
(
    SELECT
        BannerID,
        OrderID,
        String::SplitToList(Phrase, ":") as PhraseParts
    FROM $external_table
    FLATTEN BY Phrase
)
);

$update_time = (
SELECT
    CAST(
        CAST(
            CAST("{source_mod_time}" AS Timestamp)  -- With microseconds
            AS Datetime                             -- Get rid of microseconds
        )
        AS INT64
    ) AS `Timestamp`
);

$banner2update_time = (
    SELECT ET.BannerID AS BannerID, UT.`Timestamp` AS `Timestamp`
    FROM $update_time AS UT
    CROSS JOIN $external_table AS ET
);

INSERT INTO `{bm_target}`
    WITH TRUNCATE
SELECT
    ED.CTR AS CTR,
    ED.Score AS Score,
    ED.PCTR AS PCTR,
    ED.OrigPhraseID AS PhraseID,
    ED.APCRatio AS APCRatio,
    UT.`Timestamp` AS `TimeStamp`,
    ED.SimDistance AS SimDistance,
    ED.ContextType AS ContextType,
    ED.BroadPhraseID AS BroadPhraseID,
    (IF(ED.SpecPlace == 1, 1, 0) + IF(ED.NoPSearch == 1, 2, 0)) AS Options,
    ED.BannerID AS BannerID,
    ED.OrderID AS OrderID
FROM $external_data AS ED
CROSS JOIN $update_time AS UT
;

INSERT INTO `{bp_target}`
    WITH TRUNCATE
SELECT
    SOME(ED.Phrase) AS Data,
    CAST($norm_type AS UINT64) AS NormType,
    ED.BroadPhraseID AS BroadPhraseID,
    SOME(UT.`Timestamp`) AS `TimeStamp`
FROM $external_data AS ED
CROSS JOIN $update_time AS UT
GROUP BY ED.BroadPhraseID
;

-- Fake build minuswords
INSERT INTO `{bmw_target}`
    WITH TRUNCATE
SELECT
    CAST(0 AS INT64) AS BannerID,
    CAST("" AS STRING) AS Data,
    CAST(0 AS INT64) AS `TimeStamp`,
    CAST(0 AS UINT64) AS BaseNo,
    CAST(0 AS UINT64) AS OrderID
FROM (
    SELECT 1
)
WHERE 1 = 0
;

INSERT INTO `{p_target}`
    WITH TRUNCATE
SELECT
    ET.BannerID AS BannerID,
    UT.`Timestamp` AS `TimeStamp`,
    CAST($sim_distance AS INT32) AS SimDistance
FROM $external_table AS ET
CROSS JOIN $update_time AS UT
;

'''.format(
        source=source,
        source_mod_time=source_mod_time,
        bm_target=bm_target,
        bmw_target=bmw_target,
        bp_target=bp_target,
        p_target=p_target,
        sim_distance=sim_distance,
        context_type=context_type,
        score=score if score is not None else 'NULL'
    )

    run_yql_query(query, client=get_yql_client(db=cluster), title="dynamic_speedup SD={}".format(sim_distance))


def need_run(source, target, cluster):
    yt_client = YtClient(proxy=cluster, token=os.environ.get('YT_TOKEN'))

    if not yt_client.exists(source):
        raise Exception('Table {} does not exist'.format(source))
    source_mod_time = yt_client.get_attribute(source, 'modification_time')

    target_table = '{}/BroadMatchTable'.format(target)
    if not yt_client.exists(target_table):
        return True

    target_mod_time = yt_client.get_attribute(target_table, 'modification_time')

    print("Source mod time:", source_mod_time)
    print("Target mod time:", target_mod_time)

    return source_mod_time > target_mod_time
