#!/usr/bin/python
# -*- coding: utf-8 -*-

# Скрипт выполняет YQL запросы на кластерах HAHN (только! канвасных выгрузок нет еще на арнольде) для рассчета разной статистики
# для креативов из BannerStorage (DIRECT-117030), результаты работы запросов складывает в графит.
#
# Cкрипт запускается на 'ppcdev3' из крона
#* 10 * * * /usr/bin/python /home/aliho/test/bannerstorage_creative_stat_yql_to_graphite.py >> /home/aliho/test/bannerstorage_creative_stat_yql_to_graphite.log

import os
import datetime
import json
import socket
import logging
import subprocess as sp

from yql.api.v1.client import YqlClient

from functools import partial
from datetime import datetime
from calendar import timegm

PREFIX = 'junk.aliho.bannerstorage'
YQL_TOKEN = '/etc/direct-tokens/yql_robot-direct-yt-test'

graphite_host = 'localhost' # 'ppcdev2.yandex.ru'
graphite_port = 42000       # 7089

def netcat(hostname, port, content):
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    s.connect((hostname, port))
    s.sendall(content)
    logging.info("Sent: %s", content)
    s.shutdown(socket.SHUT_WR)
    while 1:
        data = s.recv(1024)
        if data == "":
            break
        logging.info("Received: %s", repr(data))
    s.close()

dbs = ['hahn']

QUERY = u'''
$date_format = DateTime::Format("%Y-%m-%d");
$cost2rub = ($cost) -> { return 30. * $cost / 1e6 / 1.18; };
$isNUll = ($i) -> { return case when $i is null then 0 else $i end };
$isNUllStr = ($s) -> { RETURN CASE WHEN $s is null THEN "unknown" ELSE $s END };
$period = DateTime::IntervalFromDays(1);
$yesturday = $date_format(CurrentUtcDatetime() - $period);
$replace_before_translit = Re2::Replace("([^\\wА-Яа-я])");
$replace = Re2::Replace("(\\s|')");
$normalize_metric_name = ($str) -> {return $replace(Unicode::Translit(cast($replace_before_translit($str, " ") as utf8), 'Russian'), "_")};

$cooked_chevent = "cooked_logs/bs-chevent-cooked-log/1d/" || $yesturday;
$tirs = "//home/comdep-analytics/public/client_tiers/fact/latest";

-- creatives created from bannerstorage
$bannerstorage_creatives = (
    select
        ClientID as ClientID,
        creative_id as creative_id,
        template_id as template_id,
        duration as duration,
        ClientID as creative_client_id,
        statusModerate as statusModerate
    from LIKE(`//home/direct/mysql-sync/current`, `ppc:%`, `straight/perf_creatives`)
    where creative_type = 'bannerstorage'
);
$bannerstorage_creatives_stats = (
    select
        cs.orderid as OrderIds,
        cs.creativeid as CreativeIds,
        cf.creative_client_id as creative_client_id,
        camp.ClientID as campaign_client_id,
        cf.template_id as templateId,
        cf.duration as duration,
        cs.bannerid as BannerIds,
        cs.countertype as counterType,
        sum(cs.Shows) as Shows,
        sum(cs.Clicks) as Clicks,
        Math::Round(sum(Cost), -2) as Cost
    from (
        select
            creativeid,
            bannerid,
            orderid,
            countertype,
            count_if(countertype == 1) as Shows,
            count_if(countertype == 2) as Clicks,
            $cost2rub(sum(eventcost)) as Cost -- rub without NDS
        FROM (
            select
                creativeid,
                bannerid,
                orderid,
                eventtime,
                eventcost,
                countertype
            from $cooked_chevent
            where fraudbits == 0
            --and placeid == 542  -- want all types of campaigns
            --and contexttype == 2
        ) AS e
        group by
            creativeid,
            bannerid,
            orderid,
            countertype
    ) as cs
    join $bannerstorage_creatives as cf on cf.creative_id = cs.creativeid
    join LIKE(`//home/direct/mysql-sync/current`, `ppc:%`, `straight/campaigns`) as camp on camp.OrderID = cs.orderid
    group by
        cs.orderid as OrderIds,
        cs.creativeid as CreativeIds,
        cf.creative_client_id as creative_client_id,
        camp.ClientID as campaign_client_id,
        cf.template_id as templateId,
        cf.duration as duration,
        cs.bannerid as BannerIds,
        cs.countertype as counterType
);

$prefix = 'direct.creatives.';

select $prefix || 'ClientID ' || cast(count(cnt) as string) as val from (select DISTINCT ClientID as cnt from $bannerstorage_creatives)
union all
select $prefix || 'campaign_client_id ' || cast(count(cnt) as string) as val from (select DISTINCT campaign_client_id as cnt from $bannerstorage_creatives_stats)
union all
select $prefix || 'OrderIds ' || cast(count($isNUll(OrderIds)) as string) as val from (select DISTINCT OrderIds from $bannerstorage_creatives_stats)
union all
select $prefix || 'CreativeIds ' || cast(count($isNUll(CreativeIds)) as string) as val from (select DISTINCT CreativeIds from $bannerstorage_creatives_stats)
union all
select $prefix || 'shows ' || cast($isNUll(Shows) as string) as val from (select sum(Shows) as Shows from $bannerstorage_creatives_stats)
union all
select $prefix || 'clicks ' || cast($isNUll(Clicks) as string) as val from (select sum(Clicks) as Clicks from $bannerstorage_creatives_stats)
union all
select $prefix || 'cost ' || cast($isNUll(Cost) as string) as val from (select sum(Cost) as Cost from $bannerstorage_creatives_stats)
union all
select $prefix || 'creatives.status.' || cast(statusModerate as string) || " " || cast($isNUll(cnt) as string) as val from (
    SELECT
        statusModerate,
        count(*) as cnt
    from $bannerstorage_creatives
    group by statusModerate
)
union all
select $prefix || 'countertype.' || cast(counterType as string) || " " || cast($isNUll(Cost) as string) as val from (
    select
        counterType,
        sum(Cost) as Cost
    from $bannerstorage_creatives_stats
    group by counterType
)
union all
select $prefix || 'templateId.' || cast(templateId as string) || " " || cast($isNUll(Cost) as string) as val from (
    select
        templateId,
        sum(Cost) as Cost
    from $bannerstorage_creatives_stats
    group by templateId
    order by Cost desc
    limit 10
)
union all
select $prefix || 'creatives_by_client.' || cast(creative_cnt as string) || " " || cast($isNUll(cnt) as string) as val from (
    select
        creative_cnt,
        count(*) as cnt
    from (
        select
            campaign_client_id,
            count(CreativeIds) as creative_cnt
        from (
            select
                DISTINCT
                    campaign_client_id,
                    CreativeIds
            from $bannerstorage_creatives_stats
        )
        group by campaign_client_id
    )
    group by creative_cnt
    order by cnt desc
    limit 10
)
union all
select $prefix || 'industry.' || cast(industry as string) || " " || cast(Cost as string) as val
from (
    select
        $normalize_metric_name(industry) as industry, -- tier from client than created campaign
        sum(Cost) as Cost
    from (
        select
            $isNUllStr(t.curr_counterparty_industry.industry) as industry,
            r.Cost as Cost
        from $bannerstorage_creatives_stats as r
        left join $tirs as t on t.client_id = r.campaign_client_id
    ) as r
    group by industry
    order by Cost desc
    limit 10
)
union all
select $prefix || 'tir.' || cast(campaign_tier as string) || " " || cast(Cost as string) as val
from (
    select
        $normalize_metric_name(campaign_tier) as campaign_tier, -- tier from client than created campaign
        sum(Cost) as Cost
    from (
        select
            $isNUllStr(t.curr_counterparty_crm_tier.name) as campaign_tier,
            r.Cost as Cost
        from $bannerstorage_creatives_stats as r
        left join $tirs as t on t.client_id = r.campaign_client_id
    ) as r
    group by campaign_tier
    order by Cost desc
    limit 10
)
union all
select $prefix || 'clients.' || cast(creative_client_id as string) || " " || cast($isNUll(Cost) as string) as val from (
    select
        creative_client_id,
        sum(Cost) as Cost
    from $bannerstorage_creatives_stats
    group by creative_client_id
    order by Cost desc
    limit 10
)
union all
select $prefix || 'duration.' || cast($isNUll(duration) as string) || " " || cast($isNUll(cnt) as string) as val from (
    select
        duration,
        count(*) as cnt
    from $bannerstorage_creatives_stats
    group by duration
    order by cnt desc
    limit 10
);
'''

if __name__ == '__main__':
    yql_file = open(YQL_TOKEN)
    yql_token = yql_file.read()
    logFormat = u'[%(asctime)s pid: %(process)d. %(threadName)s] %(filename)s:%(lineno)d %(levelname)s: %(message)s'

    logging.basicConfig(level=logging.DEBUG, format=logFormat)

    nc = partial(netcat, graphite_host, graphite_port)
    tm = datetime.now().strftime("%s")

    for db in dbs:
        client = YqlClient(token=yql_token, db=db)
        request = client.query(QUERY,syntax_version=1)
        response = request.run()
        for table in response.get_results():
            table.fetch_full_data()
            for row in table.rows:
                try:
                    cells = [unicode(cell) for cell in row]
                    key_val = cells[0] # внутри YQL формируется вся строка с точкми и пробелами для value
                    nc("{}.{}.{} {}".format(PREFIX, db, key_val, tm))
                except Exception as e:
                    print e
                    continue

