USE HAHN;
pragma yt.InferSchema;

DROP TABLE `//home/market/[[ENV_TYPE]]/ugc/order_delivery/[[WEEK_AGO_DATE]]`;

$check_tablename = Python::check_tablename(Callable<(String)->Bool>, @@
from datetime import date, timedelta
def check_tablename(table_name):
    table_name = table_name.decode('utf-8')
    yesterday = date.today() - timedelta(days=4)
    return table_name == yesterday.strftime('%Y-%m-%d')
@@);

$check_offer_tablename = Python::check_offer_tablename(Callable<(String)->Bool>, @@
from datetime import date, timedelta
SELECTED_TABLE = ""
def check_offer_tablename(table_name):
    global SELECTED_TABLE
    table_name = table_name.decode('utf-8')
    yesterday = date.today() - timedelta(days=4)
    if SELECTED_TABLE == table_name:
        return True
    if not SELECTED_TABLE and table_name.split("_")[0] == yesterday.strftime('%Y%m%d'):
        SELECTED_TABLE = table_name
        return True
    return False
@@);

$calc_delivery_days = Python::calc_delivery_days(Callable<(String?, List<Int32>, Bool?)->Uint32>, @@
import json

def calc_delivery_days(row, region_road, downloadable):
    daysIfUnknownDays = 7
    maxDays = 60
    if not row:
      return daysIfUnknownDays

    if downloadable:
        return 0
    maxDeliveryDaysByRegion = json.loads(row)

    deliveryDays = -1
    for region in region_road:
        if region in maxDeliveryDaysByRegion:
            deliveryDays = max(deliveryDays, maxDeliveryDaysByRegion[region])
    if (deliveryDays < 0) or (deliveryDays > maxDays):
        return daysIfUnknownDays
    return deliveryDays
@@);

$calc_max_delivery_by_region = Python::calc_max_delivery_by_region(Callable<(String?)->String>, @@
import json

def calc_max_delivery_by_region(row):
    if not row:
        return ""
    rowData = json.loads(row)

    maxDeliveryDays = {}
    for item in rowData:
        region = item.get("RegionId", None)
        if not region:
            continue
        max_days = -1
        for option in item.get("DeliveryOptions", []):
            max_days = max(max_days, option.get("max_days", -1))
        if max_days > 0 and max_days < 60:
            maxDeliveryDays[region] = max(max_days, maxDeliveryDays.get(region, -1))
    if not maxDeliveryDays:
        return ""
    return json.dumps(maxDeliveryDays)
@@);

$merge_delivery_day_dicts = Python::merge_delivery_day_dicts(Callable<(String?, String?)->String>, @@
import json

def merge_delivery_day_dicts(r1, r2):
    if not r1:
        return r2
    if not r2:
        return r1
    regions = json.loads(r1)
    for k, v in json.loads(r2).items():
        regions[k] = max(v, regions.get(k, -1))

    return json.dumps(regions)
@@);

$merge_delivery_days = @@
import json

def create(item):
    if not item:
        return {}
    return json.loads(item)

def add(state, item):
    if item:
        for k, v in json.loads(item).items():
            state[k] = max(v, state.get(k, -1))
    return state

def merge(state_a, state_b):
    for k, v in state_a.items():
        state_b[k] = max(v, state_b.get(k, -1))
    return state_b

def get_result(state):
    return json.dumps(state)

def serialize(state):
    return json.dumps(state)

def deserialize(serialized):
    return json.loads(serialized)
@@;

$create = Python3::create(Callable<(String)->Resource<Python3>>, $merge_delivery_days);
$add = Python3::add(Callable<(Resource<Python3>,String)->Resource<Python3>>, $merge_delivery_days);
$merge = Python3::merge(Callable<(Resource<Python3>,Resource<Python3>)->Resource<Python3>>, $merge_delivery_days);
$get_result = Python3::get_result(Callable<(Resource<Python3>)->String>, $merge_delivery_days);
$serialize = Python3::serialize(Callable<(Resource<Python3>)->String>, $merge_delivery_days);
$deserialize = Python3::deserialize(Callable<(String)->Resource<Python3>>, $merge_delivery_days);

$calc_market_begin = Python::calc_market_begin(Callable<(Uint64, Int32)->String>, @@
import datetime
from dateutil.relativedelta import relativedelta
def calc_market_begin(timestamp, delivery_days):
    timestamp = int(timestamp) / 1000
    return (datetime.datetime.fromtimestamp(timestamp) +
                datetime.timedelta(days=delivery_days)).strftime("%s")
@@);

$calc_market_end = Python::calc_market_end(Callable<(Uint64, Int32, Uint32, Uint32)->String>, @@
import datetime
from dateutil.relativedelta import relativedelta
def calc_market_end(timestamp, delivery_days, threshold_months, threshold_days):
    timestamp = int(timestamp) / 1000
    return (datetime.datetime.fromtimestamp(timestamp) +
                datetime.timedelta(days=delivery_days) +
                relativedelta(months=threshold_months, days=threshold_days)).strftime("%s")
@@);

$purchases = (
SELECT
    COALESCE(p.shop_id, 0) as shop_id,
    COALESCE(p.yandexuid, "") as yandexuid,
    COALESCE(p.puid, "") as passportuid,
    COALESCE(DateTime::ToMilliseconds(cast(p.`date` as date)), 0) as `timestamp`, --
    -- timestamp и date - зарезервированные имена, при обращении к полю с таким именем
    -- необходимо обернуть его в бэктики `` 
    COALESCE(p.domain, "") as shop_domain,
    COALESCE(p.model_id, 0) as model_id,
    models.title as model_title,
    COALESCE(models.category_id, 0) as category_id,
    categories.name as category_name,
    COALESCE(p.region_id, 0) as region_id,
    p.clickout_source as clickout_source,
    YQL::Concat("http:", models.picture) as model_picture_url
FROM FILTER(`//home/antifraud/export/market/buyers`, $check_tablename) as p
LEFT JOIN `//home/market/production/indexer/stratocaster/in/models/recent` as models
    ON COALESCE(p.model_id, 0) = models.id
LEFT JOIN `//home/market/production/indexer/stratocaster/in/categories/recent` as categories
    ON COALESCE(models.category_id, 0) = categories.hyper_id
);

$bucket_max_delivery_days = SELECT
    mbi_bucket_id, $calc_max_delivery_by_region(regional_delivery) as delivery
FROM `//home/market/production/indexer/stratocaster/out/delivery/buckets/recent` WHERE program <> 6;

$shop_bucket_info_flat = SELECT * FROM (
SELECT
    shop_id as shop_id,
    MIN_BY(shop_name, LENGTH(shop_name)) as shop_name,
    Yson::ConvertToList(Yson::ParseJson(SOME(COALESCE(IF(NOT is_blue_offer, mbi_delivery_bucket_ids, "[]"), "[]")))) as mbi_bucket_id,
    model_id as model_id,
    min(COALESCE(downloadable, false)) as downloadable,
    $calc_max_delivery_by_region(SOME(COALESCE(delivery_options, "[]"))) as local_delivery
FROM FILTER(`//home/market/production/indexer/stratocaster/offers`, $check_offer_tablename)
WHERE model_id != -1 GROUP BY shop_id, model_id)
FLATTEN BY mbi_bucket_id;

$shop_offer_info = (
SELECT s.shop_id as shop_id, s.model_id as model_id, SOME(s.shop_name) as shop_name, SOME(s.downloadable) as downloadable, $merge_delivery_day_dicts(UDAF(m.delivery,
    $create,
    $add,
    $merge,
    $get_result,
    $serialize,
    $deserialize), SOME(local_delivery)) as delivery
    FROM $shop_bucket_info_flat as s JOIN $bucket_max_delivery_days as m ON m.mbi_bucket_id = Yson::ConvertToUint64(s.mbi_bucket_id) GROUP BY s.shop_id, s.model_id
UNION ALL
SELECT
    shop_id as shop_id,
    MIN_BY(shop_name, LENGTH(shop_name)) as shop_name,
    "{}" as delivery,
    -1 as model_id,
    min(COALESCE(downloadable, false)) as downloadable
FROM FILTER(`//home/market/production/indexer/stratocaster/offers`, $check_offer_tablename)
GROUP BY shop_id
);

$shops_names_info = (
SELECT DISTINCT
    shop_id,
    shopname
FROM `//home/market/production/mstat/dictionaries/shops/latest`
);

$interview_data = (
SELECT
    p.yandexuid as yandexuid,
    p.passportuid as passportuid,
    p.`timestamp` as `timestamp`,
    p.shop_domain as shop_domain,
    p.model_id as model_id,
    p.model_title as model_title,
    p.category_id as category_id,
    p.category_name as category_name,
    p.region_id as region_id,
    p.clickout_source as clickout_source,
    p.model_picture_url as model_picture_url,
    p.shop_id as shop_id,
    shop_info.shopname as shop_name,
    soi.regional_delivery as regional_delivery,
    soi.downloadable as downloadable,
    $calc_delivery_days(soi.regional_delivery, Geo::GetParents(CAST(p.region_id AS Int32)), soi.downloadable) as delivery_days
FROM $purchases as p
LEFT JOIN $shop_offer_info as soi
    ON p.shop_id = soi.shop_id and p.model_id = soi.model_id
LEFT JOIN $shops_names_info as shop_info
    ON p.shop_id = shop_info.shop_id
);



$make_json = Python::make_json(Callable<(String, Uint64, Int64?, String?, String?, Int64, String?, Int64, String?, String?, String, String, String)->String>, @@
import json
import random

def get_unicode_string(str):
    # https://borman.at.yandex-team.ru/747
    if str is None:
        return ""
    else:
        return str.decode('utf-8')

def make_json(type, unixtime, shop_id, shop_name, shop_host, model_id, model_title, category_id, category_name, model_picture_url, begin_date, end_date, market_end_date):
    if shop_id is None:
        shop_id = 0
    shop_name = get_unicode_string(shop_name)
    shop_host = get_unicode_string(shop_host)
    if model_id < 0:
        model_id = 0
    model_title = get_unicode_string(model_title)
    category_name = get_unicode_string(category_name)
    model_picture_url = get_unicode_string(model_picture_url)
    type = get_unicode_string(type)

    timestamp = int(unixtime) / 1000
    return json.dumps({
        "Type": type,
        "Weight": 100,
        "BeginDate": int(begin_date),
        "EndDate": int(end_date),
        "MarketBeginDate": int(begin_date),
        "MarketEndDate": int(market_end_date),
        "Transaction": {
            "Date": int(timestamp),
            "Type": "cpc-order",
            "Status": 4,
            "Id": str(random.getrandbits(128))
        },
        "Shop": {
            "Id": int(shop_id),
            "Url": shop_host,
            "Name": shop_name
        },
        "Model": {
            "Id": model_id,
            "Name": model_title,
            "Category": {
                "Id": int(category_id),
                "Name": category_name
            },
            "PictureUrl": model_picture_url,
        }
    })
@@);


$active_shop_checker = Python::get_active_shops_checker(Callable<()->Callable<(Int64)->Bool>>, @@
def get_environment_type():
    with open('/etc/yandex/environment.type', 'r') as f:
        return f.read().strip()


# проверка что магазин отключен более limit_days дней назад
def check_turned_off_time(shop_turned_off_time, limit_days=30):
    import datetime

    # запись о времени отключения имеет подобный формат '2018-04-03 18:12:21.0'
    yyyy_mm_dd = shop_turned_off_time.split(' ')[0]
    year, month, day = [int(x) for x in yyyy_mm_dd.split('-')]
    from_date = datetime.date(year=year, month=month, day=day)
    return datetime.date.today() - from_date > datetime.timedelta(days=limit_days)


def get_value(fields, index):  # все значения в двойных кавычках
    return fields[index][1 : -1]


def get_active_shops_checker():
    import urllib

    # выгрузка с отключениями магазинов, поле FROM_TIME - когда магазин был отключен
    # если магазина в файле нет, то значит он не отключался
    url = 'http://s3.mds.yandex.net/market-mbi-prod/mbi-premoderation/v-shop-cur-active-period/current_v-shop-cur-active-period.csv' if get_environment_type() == 'production' else 'http://s3.mdst.yandex.net/market-mbi-test/mbi-premoderation/v-shop-cur-active-period/current_v-shop-cur-active-period.csv'
    response = urllib.urlopen(url)
    long_turned_off_shops = set()
    index_from_time = None
    index_shop_id = None
    for line in response.readlines():
        fields = line.split(',')
        if not index_from_time:
            index_shop_id = fields.index('"ID|NUMBER"')
            index_from_time = fields.index('"FROM_TIME|TIMESTAMP"')
            continue
        shop_id = int(get_value(fields, index_shop_id))
        shop_turned_off_time = get_value(fields, index_from_time)
        if check_turned_off_time(shop_turned_off_time):
            long_turned_off_shops.add(shop_id)

    def is_active_shop(shop_id):
        return shop_id not in long_turned_off_shops

    return is_active_shop
@@);

-- $check_active_shop = $active_shop_checker();

-- exclude BLUE_MARKET (shop_id=431782) from shop interview
$external_purchases = (
SELECT "shop" AS poll_type,
       shop_id,
       shop_name,
       shop_domain,
       model_id,
       model_title,
       category_id,
       category_name,
       model_picture_url,
       passportuid,
       yandexuid,
       `timestamp`,
       delivery_days,
       clickout_source,
       $calc_market_begin(`timestamp`, delivery_days) as begin_time,
       $calc_market_end(`timestamp`, delivery_days, 0, 7) as end_time,
       $calc_market_end(`timestamp`, delivery_days, 3, 0) as market_end_time,
       YQL::Concat("s", YQL::ToString(TableRecordIndex())) as row_index
FROM $interview_data
WHERE shop_id IS NOT NULL AND shop_id != 0 AND shop_id != 431782
       AND shop_domain IS NOT NULL AND shop_domain != ""
       AND shop_name IS NOT NULL AND shop_name != ""
UNION ALL
SELECT "model" AS poll_type,
       shop_id,
       shop_name,
       shop_domain,
       model_id,
       model_title,
       category_id,
       category_name,
       model_picture_url,
       passportuid,
       yandexuid,
       `timestamp`,
       delivery_days,
       clickout_source,
       $calc_market_begin(`timestamp`, delivery_days) as begin_time,
       $calc_market_end(`timestamp`, delivery_days, 0, 7) as end_time,
       $calc_market_end(`timestamp`, delivery_days, 3, 0) as market_end_time,
       YQL::Concat("m", YQL::ToString(TableRecordIndex())) as row_index
FROM $interview_data
WHERE model_id > 0 AND model_title IS NOT NULL AND model_title != ""
);

INSERT INTO `//home/market/[[ENV_TYPE]]/ugc/order_delivery/[[TARGET_DATE]]`
SELECT * WITHOUT row_index FROM $external_purchases
WHERE poll_type != ""; --таких не должно быть, перестраховка от не правильного изменения
--в фильтрации при создании external_purchases

--Могут встречаться записи, отличающиеся только значением clickout_source
--В poll_requests это поле не записывается, и тогда там получается дублирование записей.
--Поэтому из таких записей надо выбирать одну. Для этого группируем по всем параметрам,
--кроме clickout_source, собирая номера строк. Из этих строк выбирается одна случайная.
--$unique_requests_indexes = (
--SELECT SOME(row_index) as value FROM $external_purchases 
--WHERE clickout_source != "" AND poll_type != ""
--GROUP BY poll_type, shop_id, model_id, timestamp, yandexuid, passportuid
--);

--INNER JOIN тут отсеивает все строки, не указанные в unique_requests_indexes
$new_requests = (
SELECT
    CASE
        WHEN passportuid = ""
        THEN YQL::Concat("/visitor/", yandexuid)
        ELSE YQL::Concat("/user/", passportuid)
    END as Key,
    "" as Subkey,
    "" as Value,
    $make_json(p.poll_type, p.`timestamp`, p.shop_id, p.shop_name, p.shop_domain, p.model_id,
              p.model_title, p.category_id, p.category_name, p.model_picture_url,
              p.begin_time,
              p.end_time,
              p.market_end_time) as JsonValue
FROM $external_purchases as p
WHERE p.clickout_source != ""
--INNER JOIN $unique_requests_indexes as unique_index ON p.row_index == unique_index.value
);

INSERT INTO `//home/market/[[ENV_TYPE]]/ugc/poll_requests/[[TARGET_DATE]]` WITH TRUNCATE
SELECT Key, Subkey, Value, JsonValue
FROM `//home/market/[[ENV_TYPE]]/ugc/poll_requests/[[TARGET_DATE]]`
UNION ALL
SELECT * FROM $new_requests

