USE hahn;
--USE arnold;

pragma yt.InferSchema = '3';
pragma yt.DefaultMaxJobFails = '1';
pragma yt.PoolTrees = "physical";
pragma yson.DisableStrict;
pragma SimpleColumns;
pragma yt.Pool = "goods_quality";
--pragma yt.OperationSpec = "{reducer={cpu_limit=0.05}}";

$date_start = '2022-06-27';
$date_end   = '2022-07-03';

/* ================================================================= */

$source = '//home/goods_quality/charts/category_coverage/prime/cache';

$basketdir = '//home/goods_quality/topilskiy-ak/catfilter/collect_gzt/basket/';
$output = $basketdir || $date_start || '_' || $date_end;

/* ================================================================= */

$get_clean_query = ($query, $url) -> {
    $cgi_text = Url::GetCGIParam(Url::Decode($url), 'text') ?? '';
    $query_or_text = if($query != '', $query, $cgi_text);
    return cast(Unicode::ToLower(cast($query_or_text as utf8)) as string);
};

/* ================================================================= */

$input = select * from range($source, $date_start, $date_end, 'user_session');

$input_cleaned = (
select
  $get_clean_query(query, url) as query,
  t.* without query,
from $input as t
);

insert into $output with truncate
select
  query,
  count(*) as cnt,
  max((GoodsCategoryFilterWorkedRuleMarker is not null)) as is_covered_prod,
  max(cast(wizdetection_ecom_classifier_prob as double))          as wizdetection_ecom_classifier_prob,
  max(cast(wizdetection_cehac_ecom_classifier_prob as double))    as wizdetection_cehac_ecom_classifier_prob,
  max(cast(wizdetection_fashion_ecom_classifier_prob as double))  as wizdetection_fashion_ecom_classifier_prob,
  max(cast(wizdetection_home_ecom_classifier_prob as double))     as wizdetection_home_ecom_classifier_prob,
  max(cast(wizdetection_diy_ecom_classifier_prob as double))      as wizdetection_diy_ecom_classifier_prob,
  max(cast(wizdetection_pharma_ecom_classifier_prob as double))   as wizdetection_pharma_ecom_classifier_prob,
  max(cast(query_about_one_product as double))                    as query_about_one_product,
  max(cast(query_about_many_products as double))                  as query_about_many_products,
from $input_cleaned
group by query
order by cnt desc
;
