USE hahn;

PRAGMA AnsiInForEmptyOrNullableItemsCollections;

DECLARE $param_dict AS Dict<String, String>;

$base_dir = IF(
  $param_dict["branch"] = "prod",
  "//home/vipplanners/sow",
  "//home/vipplanners/sow_dev" || '/' || $param_dict["branch"]
);

-- input
$spark_all = 'home/comdep-analytics/zedlaa/spark/all_2';
$McK_main_industries = '//home/vipplanners/sow/dict/mck/McK_main_industries';
$McK_okved_industries = '//home/vipplanners/sow/dict/mck/McK_okved_industries';
$OKVAD_classes = '//home/vipplanners/sow/dict/okved';

-- output
$raw_data_for_cluster = $base_dir || '/' || 'dict/cluster/raw_data_for_cluster'; -- Сырые данные
$all_signs_from_stat = $base_dir || '/' || 'dict/cluster/mck/all_signs_from_stat'; -- Показатели для кластризации
$all_clusters = $base_dir || '/' || 'dict/cluster/all_clusters';
$final_table = $base_dir || '/' || 'dict/cluster/final_table';

---------- Подготовка сырых данных--------------

$get_one_element_from_finance = ($p_str_yson) ->
{
    $v_date_begin = NVL($p_str_yson.date_begin,"");
    $v_date_end = NVL($p_str_yson.date_end,"");
    $v_period_name = NVL($p_str_yson.period_name,"");
    RETURN ListMap($p_str_yson.string_list,($x) -> {return 
            AsDict(
        AsTuple("date_begin", $v_date_begin),
        AsTuple("date_end", $v_date_end),
        AsTuple("period_name", $v_period_name),
        AsTuple("name", NVL($x.name,"")),
        --AsTuple("name", '1'),
        AsTuple("value", NVL(CAST($x.value As String),""))
        --AsTuple("value", '1')
        )})
 
};

$get_finance = ($p_finance) ->
{
    RETURN ListMap($p_finance,($x) -> {return $get_one_element_from_finance ($x)})
};

$parse_class_okvad = Re2::Capture("(\\d+)\.*.*");

$get_class_okvad = ($p_str) ->
{
    RETURN CAST($parse_class_okvad($p_str)._1 As Int64)
};

$tmp = 
(
Select spark_id
,      inn
,      domain
,      company_type
,      main_okved2_code
,      main_okved2_name
,      population
,      workers_range
,      company_size.revenue As company_size_revenue
,      company_size.description As company_size_description
,      okato.region_code As okato_region_code
,      $get_finance(finance) As finance
from $spark_all As spark_all
);

$tmp1 = 
(
Select spark_id
,      inn
,      domain
,      company_type
,      main_okved2_code
,      main_okved2_name
,      population
,      workers_range
,      company_size_revenue
,      company_size_description
,      okato_region_code
,      finance_temp1
from $tmp
FLATTEN LIST BY finance As finance_temp1
);

$tmp2 = 
(
Select spark_id As spark_id
,      inn
,      domain
,      company_type
,      main_okved2_code
,      main_okved2_name
,      population
,      workers_range
,      company_size_revenue
,      company_size_description
,      okato_region_code
,      finance_temp2['date_begin'] As date_begin_finance
,      finance_temp2['date_end'] As date_end_finance
,      finance_temp2['period_name'] As period_name_finance
,      finance_temp2['name'] As name_finance
,      finance_temp2['value'] As value_finance
from $tmp1
FLATTEN LIST BY finance_temp1 As finance_temp2
);

$res = 
(
Select spark_id
,      inn
,      domain
,      Url::GetDomain(domain,2) As domain_formatted
,      company_type
,      main_okved2_code
,      main_okved2_name
,      $get_class_okvad (main_okved2_code) As main_class_okvad
,      okvad_name As main_class_okvad_name
,      population
,      workers_range
,      company_size_revenue
,      company_size_description
,      okato_region_code
,      date_begin_finance As finance_date_begin
,      date_end_finance As finance_date_end
,      period_name_finance As finance_period_name
,      name_finance As finance_name
,      value_finance As finance_value
from $tmp2 As tmp2
left join $OKVAD_classes As OKVAD_classes
ON $get_class_okvad (tmp2.main_okved2_code) == OKVAD_classes.okvad_id
);

insert into $raw_data_for_cluster with truncate
Select spark_id
,      inn
,      domain
,      domain_formatted
,      company_type
,      main_okved2_code
,      main_okved2_name
,      main_class_okvad
,      main_class_okvad_name
,      population
,      workers_range
,      company_size_revenue
,      company_size_description
,      okato_region_code
,      finance_date_begin
,      finance_date_end
,      finance_period_name
,      finance_name
,      finance_value
from $res
group by spark_id
,        inn
,        domain
,        domain_formatted
,        company_type
,        main_okved2_code
,        main_okved2_name
,        main_class_okvad
,        main_class_okvad_name
,        population
,        workers_range
,        company_size_revenue
,        company_size_description
,        okato_region_code
,        finance_date_begin
,        finance_date_end
,        finance_period_name
,        finance_name
,        finance_value
;
COMMIT;
---------- Подготовка метрик для кластеризации--------------

--Все ID Спарка
$all_spark_id = 
(
Select spark_id
,      finance_period_name
from $raw_data_for_cluster As raw_data_for_cluster
where finance_period_name in ("2017","2018","2019")
  and spark_id IS NOT NULL
group by spark_id
,        finance_period_name
);


--Оборотные активы
$obortniy_activi = 
(
Select finance_name
,      finance_period_name
,      spark_id
,      finance_value
from $raw_data_for_cluster As raw_data_for_cluster
where finance_name == "Оборотные активы"
  and finance_period_name in ("2017","2018","2019")
  and spark_id IS NOT NULL
  and finance_value IS NOT NULL
);

Select *
from $obortniy_activi
;

--Внеоборотные активы
$vneobortniy_activi = 
(
Select finance_name
,      finance_period_name
,      spark_id
,      finance_value
from $raw_data_for_cluster As raw_data_for_cluster
where finance_name == "Внеоборотные активы"
  and finance_period_name in ("2017","2018","2019")
  and spark_id IS NOT NULL
  and finance_value IS NOT NULL
); 
 
Select *
from $vneobortniy_activi
;
  
--Прочие оборотные активы
$prochie_obortniy_activi = 
(
Select finance_name
,      finance_period_name
,      spark_id
,      finance_value
from $raw_data_for_cluster As raw_data_for_cluster
where finance_name == "Прочие оборотные активы"
  and finance_period_name in ("2017","2018","2019")
  and spark_id IS NOT NULL
  and finance_value IS NOT NULL
);

Select *
from $prochie_obortniy_activi
;
  

--Прочие внеоборотные активы
$prochie_vneobortniy_activi = 
(
Select finance_name
,      finance_period_name
,      spark_id
,      finance_value
from $raw_data_for_cluster As raw_data_for_cluster
where finance_name == "Прочие внеоборотные активы"
  and finance_period_name in ("2017","2018","2019")
  and spark_id IS NOT NULL
  and finance_value IS NOT NULL
); 

Select *
from $prochie_vneobortniy_activi
;

--Активы  всего
$activi_vsego = 
(
Select finance_name
,      finance_period_name
,      spark_id
,      finance_value
from $raw_data_for_cluster As raw_data_for_cluster
where finance_name == "Активы  всего"
  and finance_period_name in ("2017","2018","2019")
  and spark_id IS NOT NULL
  and finance_value IS NOT NULL
); 

Select *
from $activi_vsego
;

--Чистая прибыль (убыток) 
$chistaya_pribil = 
(
Select finance_name
,      finance_period_name
,      spark_id
,      finance_value
from $raw_data_for_cluster As raw_data_for_cluster
where finance_name == "Чистая прибыль (убыток)"
  and finance_period_name in ("2017","2018","2019")
  and spark_id IS NOT NULL
  and finance_value IS NOT NULL
); 

Select *
from $chistaya_pribil
;

--Прибыль (убыток) до налогообложения
$pribil_do_naloga = 
(
Select finance_name
,      finance_period_name
,      spark_id
,      finance_value
from $raw_data_for_cluster As raw_data_for_cluster
where finance_name == "Прибыль (убыток) до налогообложения"
  and finance_period_name in ("2017","2018","2019")
  and spark_id IS NOT NULL
  and finance_value IS NOT NULL
); 

Select *
from $pribil_do_naloga
;

--Прибыль (убыток) от продажи  
$pribil_ot_prodag = 
(
Select finance_name
,      finance_period_name
,      spark_id
,      finance_value
from $raw_data_for_cluster As raw_data_for_cluster
where finance_name == "Прибыль (убыток) от продажи"
  and finance_period_name in ("2017","2018","2019")
  and spark_id IS NOT NULL
  and finance_value IS NOT NULL
); 

Select *
from $pribil_ot_prodag
;

--Выручка
$viruchka = 
(
Select finance_name
,      finance_period_name
,      spark_id
,      finance_value
from $raw_data_for_cluster As raw_data_for_cluster
where finance_name == "Выручка"
  and finance_period_name in ("2017","2018","2019")
  and spark_id IS NOT NULL
  and finance_value IS NOT NULL
); 

Select *
from $viruchka
;

--Денежные средства
$denegnie_sredstva = 
(
Select finance_name
,      finance_period_name
,      spark_id
,      finance_value
from $raw_data_for_cluster As raw_data_for_cluster
where finance_name == "Денежные средства и денежные эквиваленты"
  and finance_period_name in ("2017","2018","2019")
  and spark_id IS NOT NULL
  and finance_value IS NOT NULL
); 

Select *
from $denegnie_sredstva
;

--Краткосрочные финансовые вложения
$short_fin_vlogeniya = 
(
Select finance_name
,      finance_period_name
,      spark_id
,      finance_value
from $raw_data_for_cluster As raw_data_for_cluster
where finance_name == "Краткосрочные финансовые вложения"
  and finance_period_name in ("2017","2018","2019")
  and spark_id IS NOT NULL
  and finance_value IS NOT NULL
); 

Select *
from $short_fin_vlogeniya
;

--Дебиторская задолженность
$deb_zadolgennost = 
(
Select finance_name
,      finance_period_name
,      spark_id
,      finance_value
from $raw_data_for_cluster As raw_data_for_cluster
where finance_name == "Дебиторская задолженность"
  and finance_period_name in ("2017","2018","2019")
  and spark_id IS NOT NULL
  and finance_value IS NOT NULL
); 

Select *
from $deb_zadolgennost
;

--Запасы
$zapasi = 
(
Select finance_name
,      finance_period_name
,      spark_id
,      finance_value
from $raw_data_for_cluster As raw_data_for_cluster
where finance_name == "Запасы"
  and finance_period_name in ("2017","2018","2019")
  and spark_id IS NOT NULL
  and finance_value IS NOT NULL
); 

Select *
from $zapasi
;

--Уставный капитал
$ustavnoi_capital = 
(
Select finance_name
,      finance_period_name
,      spark_id
,      finance_value
from $raw_data_for_cluster As raw_data_for_cluster
where finance_name == "Уставный капитал"
  and finance_period_name in ("2017","2018","2019")
  and spark_id IS NOT NULL
  and finance_value IS NOT NULL
); 

Select *
from $ustavnoi_capital
;

--Оборотные активы
$oborotnii_active = 
(
Select finance_name
,      finance_period_name
,      spark_id
,      finance_value
from $raw_data_for_cluster As raw_data_for_cluster
where finance_name == "Оборотные активы"
  and finance_period_name in ("2017","2018","2019")
  and spark_id IS NOT NULL
  and finance_value IS NOT NULL
); 

Select *
from $oborotnii_active
;


--Себестоимость продаж
$sebestoimost_prodag = 
(
Select finance_name
,      finance_period_name
,      spark_id
,      finance_value
from $raw_data_for_cluster As raw_data_for_cluster
where finance_name == "Себестоимость продаж"
  and finance_period_name in ("2017","2018","2019")
  and spark_id IS NOT NULL
  and finance_value IS NOT NULL
); 

Select *
from $sebestoimost_prodag
;

------------------------
--Оборотные активы
--$obortniy_activi
--Внеоборотные активы
--$vneobortniy_activi
--Прочие оборотные активы
--$prochie_obortniy_activi
--Прочие внеоборотные активы
--$prochie_vneobortniy_activi
--Активы  всего
--$activi_vsego
--Чистая прибыль (убыток) 
--$chistaya_pribil
--Прибыль (убыток) до налогообложения
--$pribil_do_naloga
--Прибыль (убыток) от продажи  
--$pribil_ot_prodag
--Выручка
--$viruchka
--Денежные средства
--$denegnie_sredstva
--Краткосрочные финансовые вложения
--$short_fin_vlogeniya
--Дебиторская задолженность
--$deb_zadolgennost
--Запасы
--$zapasi
--Уставный капитал
--$ustavnoi_capital
--Оборотные активы
--$oborotnii_active
--Себестоимость продаж
--$sebestoimost_prodag


--Рентабельность (убыточность) активов (группы активов - внеоборотных и оборотных) общая: Р = прибыль (убыток) до налогообложения / активы.
--При этом знаменатель дроби представляет собой среднюю величину активов за период (то есть сумма активов на начало и конец периода делится на 2).
$all_meat_coef = 
(
Select all_spark_id.spark_id As spark_id
,      all_spark_id.finance_period_name As finance_period_name

--Все значения без преобразований
,      obortniy_activi.finance_value As value_obortniy_activi
,      vneobortniy_activi.finance_value As value_vneobortniy_activi
,      pribil_do_naloga.finance_value As value_pribil_do_naloga
,      chistaya_pribil.finance_value As value_chistaya_pribil
,      pribil_ot_prodag.finance_value As value_pribil_ot_prodag
,      sebestoimost_prodag.finance_value As value_sebestoimost_prodag
,      viruchka.finance_value As value_viruchka
,      denegnie_sredstva.finance_value As value_denegnie_sredstva
,      short_fin_vlogeniya.finance_value As value_short_fin_vlogeniya
,      deb_zadolgennost.finance_value As value_deb_zadolgennost
,      zapasi.finance_value As value_zapasi
,      activi_vsego.finance_value As value_activi_vsego
,      ustavnoi_capital.finance_value As value_ustavnoi_capital
,      oborotnii_active.finance_value As value_oborotnii_active


--НИЖЕ КОЭФЫ ДЛЯ КЛАСТЕРИЗАЦИИ
--рентабельность (убыточность) активов общая: прибыль (убыток) до налогообложения / (внеоборотных и оборотных активы)
,      if (pribil_do_naloga.finance_value is not null and (obortniy_activi.finance_value is not null or vneobortniy_activi.finance_value is not null) and CAST(NVL(obortniy_activi.finance_value,"0.0") As Float) + CAST(NVL(vneobortniy_activi.finance_value,"0.0") As Float) <> 0.0,
           CAST(NVL(pribil_do_naloga.finance_value,"0.0") As Float) / (CAST(NVL(obortniy_activi.finance_value,"0.0") As Float) + CAST(NVL(vneobortniy_activi.finance_value,"0.0") As Float)) *100,
           1) As sign_rentabelnost_activov_obshaya 
           
--Рентабельность (убыточность) активов чистая: чистая прибыль (непокрытый убыток) за период / (внеоборотных и оборотных активы)
,      if (chistaya_pribil.finance_value is not null and (obortniy_activi.finance_value is not null or vneobortniy_activi.finance_value is not null) and CAST(NVL(obortniy_activi.finance_value,"0.0") As Float) + CAST(NVL(vneobortniy_activi.finance_value,"0.0") As Float) <> 0.0,
            CAST(NVL(chistaya_pribil.finance_value,"0.0") As Float) / (CAST(NVL(obortniy_activi.finance_value,"0.0") As Float) + CAST(NVL(vneobortniy_activi.finance_value,"0.0") As Float)) *100,
            1) As sign_rentabelnost_activov_chistaya 

--Рентабельность (убыточность) проданных товаров, продукции, работ, услуг: прибыль (убыток) от продаж / себестоимость продаж.
,      if (pribil_ot_prodag.finance_value is not null and sebestoimost_prodag.finance_value is not null and NVL(sebestoimost_prodag.finance_value,"0.0") <> "0.0",
            CAST(NVL(pribil_ot_prodag.finance_value,"0.0") As Float) / CAST(NVL(sebestoimost_prodag.finance_value,"0.0") As Float)*100,
            1) As sign_rentabelnost_prodannih_tovarov 

--Рентабельность (убыточность) продаж чистая: чистая прибыль / выручка
,      if (chistaya_pribil.finance_value is not null and viruchka.finance_value is not null and NVL(viruchka.finance_value,"0.0") <> "0.0",
            CAST(NVL(chistaya_pribil.finance_value,"0.0") As Float) / CAST(NVL(viruchka.finance_value,"0.0") As Float)*100,
            1) As sign_rentabelnost_prodag_chstaya 
            
--Денежные средства
,      if (denegnie_sredstva.finance_value is not null, CAST(denegnie_sredstva.finance_value As Float),1) As sign_denegnie_sredstva

--Краткосрочные финансовые вложения
,      if (short_fin_vlogeniya.finance_value is not null, CAST(short_fin_vlogeniya.finance_value As Float),1) As sign_short_fin_vlogeniya

--Краткосрочные дебиторские задолженности
,      if (deb_zadolgennost.finance_value is not null, CAST(deb_zadolgennost.finance_value As Float),1) As sign_deb_zadolgennost

--Запасы
,      if (zapasi.finance_value is not null, CAST(zapasi.finance_value As Float),1) As sign_zapasi

--Соотношение между чистым активом и уставного капитала
,      if (activi_vsego.finance_value is not null and ustavnoi_capital.finance_value is not null,
            CAST(NVL(activi_vsego.finance_value,"0.0") As Float) > CAST(NVL(ustavnoi_capital.finance_value,"0.0") As Float)
            ,true) As sign_active_more_stav

--Оборотный капитал
,      if (oborotnii_active.finance_value is not null, CAST(oborotnii_active.finance_value As Float),1) As sign_oborotnii_active
from $all_spark_id As all_spark_id
--Оборотные активы
left join $obortniy_activi As obortniy_activi
ON all_spark_id.spark_id == obortniy_activi.spark_id and all_spark_id.finance_period_name == obortniy_activi.finance_period_name  
--Внеоборотные активы
left join $vneobortniy_activi As vneobortniy_activi
ON all_spark_id.spark_id == vneobortniy_activi.spark_id and all_spark_id.finance_period_name == vneobortniy_activi.finance_period_name
--Прибыль (убыток) до налогообложения
left join $pribil_do_naloga As pribil_do_naloga
ON all_spark_id.spark_id == pribil_do_naloga.spark_id and all_spark_id.finance_period_name == pribil_do_naloga.finance_period_name
--Чистая прибыль (убыток) 
left join $chistaya_pribil As chistaya_pribil
ON all_spark_id.spark_id == chistaya_pribil.spark_id and all_spark_id.finance_period_name == chistaya_pribil.finance_period_name
--Прибыль (убыток) от продажи  
left join $pribil_ot_prodag As pribil_ot_prodag
ON all_spark_id.spark_id == pribil_ot_prodag.spark_id and all_spark_id.finance_period_name == pribil_ot_prodag.finance_period_name
--Себестоимость продаж
left join $sebestoimost_prodag As sebestoimost_prodag
ON all_spark_id.spark_id == sebestoimost_prodag.spark_id and all_spark_id.finance_period_name == sebestoimost_prodag.finance_period_name
--Выручка
left join $viruchka As viruchka
ON all_spark_id.spark_id == viruchka.spark_id and all_spark_id.finance_period_name == viruchka.finance_period_name
--Денежные средства
left join $denegnie_sredstva As denegnie_sredstva
ON all_spark_id.spark_id == denegnie_sredstva.spark_id and all_spark_id.finance_period_name == denegnie_sredstva.finance_period_name
--Краткосрочные финансовые вложения
left join $short_fin_vlogeniya As short_fin_vlogeniya
ON all_spark_id.spark_id == short_fin_vlogeniya.spark_id and all_spark_id.finance_period_name == short_fin_vlogeniya.finance_period_name
--Дебиторская задолженность
left join $deb_zadolgennost As deb_zadolgennost
ON all_spark_id.spark_id == deb_zadolgennost.spark_id and all_spark_id.finance_period_name == deb_zadolgennost.finance_period_name
--Запасы
left join $zapasi As zapasi
ON all_spark_id.spark_id == zapasi.spark_id and all_spark_id.finance_period_name == zapasi.finance_period_name
--Актив
left join $activi_vsego As activi_vsego
ON all_spark_id.spark_id == activi_vsego.spark_id and all_spark_id.finance_period_name == activi_vsego.finance_period_name
--Уставной капитал
left join $ustavnoi_capital As ustavnoi_capital
ON all_spark_id.spark_id == ustavnoi_capital.spark_id and all_spark_id.finance_period_name == ustavnoi_capital.finance_period_name
--Оборотный капитал
left join $oborotnii_active As oborotnii_active
ON all_spark_id.spark_id == oborotnii_active.spark_id and all_spark_id.finance_period_name == oborotnii_active.finance_period_name
);

insert into $all_signs_from_stat with truncate
Select spark_id
,      finance_period_name

--Все значения без преобразований
,      value_obortniy_activi
,      value_vneobortniy_activi
,      value_pribil_do_naloga
,      value_chistaya_pribil
,      value_pribil_ot_prodag
,      value_sebestoimost_prodag
,      value_viruchka
,      value_denegnie_sredstva
,      value_short_fin_vlogeniya
,      value_deb_zadolgennost
,      value_zapasi
,      value_activi_vsego
,      value_ustavnoi_capital
,      value_oborotnii_active


--НИЖЕ КОЭФЫ ДЛЯ КЛАСТЕРИЗАЦИИ
--рентабельность (убыточность) активов общая: прибыль (убыток) до налогообложения / (внеоборотных и оборотных активы)
,      sign_rentabelnost_activov_obshaya 
--Рентабельность (убыточность) активов чистая: чистая прибыль (непокрытый убыток) за период / (внеоборотных и оборотных активы)
,      sign_rentabelnost_activov_chistaya 
--Рентабельность (убыточность) проданных товаров, продукции, работ, услуг: прибыль (убыток) от продаж / себестоимость продаж.
,      sign_rentabelnost_prodannih_tovarov 
--Рентабельность (убыточность) продаж чистая: чистая прибыль / выручка
,      sign_rentabelnost_prodag_chstaya 
--Денежные средства
,      sign_denegnie_sredstva
--Краткосрочные финансовые вложения
,      sign_short_fin_vlogeniya
--Краткосрочные дебиторские задолженности
,      sign_deb_zadolgennost
--Запасы
,      sign_zapasi
--Соотношение между чистым активом и уставного капитала
,      sign_active_more_stav
--Оборотный капитал
,      sign_oborotnii_active
from $all_meat_coef
group by spark_id
,        finance_period_name
,        value_obortniy_activi
,        value_vneobortniy_activi
,        value_pribil_do_naloga
,        value_chistaya_pribil
,        value_pribil_ot_prodag
,        value_sebestoimost_prodag
,        value_viruchka
,        value_denegnie_sredstva
,        value_short_fin_vlogeniya
,        value_deb_zadolgennost
,        value_zapasi
,        value_activi_vsego
,        value_ustavnoi_capital
,        value_oborotnii_active
,        sign_rentabelnost_activov_obshaya 
,        sign_rentabelnost_activov_chistaya 
,        sign_rentabelnost_prodannih_tovarov 
,        sign_rentabelnost_prodag_chstaya 
,        sign_denegnie_sredstva
,        sign_short_fin_vlogeniya
,        sign_deb_zadolgennost
,        sign_zapasi
,        sign_active_more_stav
,        sign_oborotnii_active
;
COMMIT;

---------- Формирование кластеров--------------

$has_minimum_coef =
(
Select finance_period_name
,      spark_id
from $all_signs_from_stat
where 
(
--есть одна из рентабильностей
(value_pribil_do_naloga is not null and (value_obortniy_activi is not null or value_vneobortniy_activi is not null))
or 
(value_chistaya_pribil is not null and (value_obortniy_activi is not null or value_vneobortniy_activi is not null))
or
(value_pribil_ot_prodag is not null and value_sebestoimost_prodag is not null)
or
(value_chistaya_pribil is not null and value_viruchka is not null)
)
and
--есть один из остальных параметров
(
value_denegnie_sredstva is not null
or
value_short_fin_vlogeniya is not null
or
value_deb_zadolgennost is not null
or
value_zapasi is not null
or
(value_activi_vsego is not null and value_ustavnoi_capital is not null)
or
value_oborotnii_active is not null
)
group by finance_period_name
,        spark_id
);

Select *
from $has_minimum_coef
;

$companies_w_positive_metrics_2019 = 
(
Select spark_id
,      Count(distinct all_signs_from_stat.finance_period_name) As finance_period_name
from $all_signs_from_stat As all_signs_from_stat
left semi join $has_minimum_coef As has_minimum_coef
ON all_signs_from_stat.spark_id == has_minimum_coef.spark_id and all_signs_from_stat.finance_period_name == has_minimum_coef.finance_period_name
where NVL(sign_rentabelnost_activov_chistaya,0.0)>0 --Рентабельность активов чистая (%) > 0
  and NVL(sign_rentabelnost_activov_obshaya,0.0)>0 --Рентабельность активов общая (%) > 0
  and NVL(sign_rentabelnost_prodannih_tovarov,0.0)>0 --Рентабельность проданных товаров, продукции, работ, услуг (%) > 0
  and NVL(sign_rentabelnost_prodag_chstaya,0.0)>0 --Рентабельность продаж чистая, (%) > 0
  and sign_active_more_stav --Чистые активы > уставного капитала
  and NVL(sign_oborotnii_active,0.0)>0 --Оборотный капитал > 0
  and NVL(sign_denegnie_sredstva,0.0)>0 --Отсутствие дефицита денежных средств
  and NVL(sign_short_fin_vlogeniya,0.0)>0 ----Отсутствие дефицита краткосрочных финансовых вложений
  and NVL(sign_deb_zadolgennost,0.0)>0 --Отсутствие дефицита краткосрочной дебиторской задолженности
  and NVL(sign_zapasi,0.0)>0 --Отсутствие дефицита запасов
  and finance_period_name in ("2019")
group by all_signs_from_stat.spark_id As spark_id
);

$excellent_companies = 
(
Select spark_id
,      Count(distinct all_signs_from_stat.finance_period_name) As finance_period_name
from $all_signs_from_stat As all_signs_from_stat
left semi join $companies_w_positive_metrics_2019 As companies_w_positive_metrics_2019
ON all_signs_from_stat.spark_id == companies_w_positive_metrics_2019.spark_id
left semi join $has_minimum_coef As has_minimum_coef
ON all_signs_from_stat.spark_id == has_minimum_coef.spark_id and all_signs_from_stat.finance_period_name == has_minimum_coef.finance_period_name
where NVL(all_signs_from_stat.sign_rentabelnost_activov_chistaya,0.0)>0 --Рентабельность активов чистая (%) > 0
  and NVL(all_signs_from_stat.sign_rentabelnost_activov_obshaya,0.0)>0 --Рентабельность активов общая (%) > 0
  and NVL(all_signs_from_stat.sign_rentabelnost_prodannih_tovarov,0.0)>0 --Рентабельность проданных товаров, продукции, работ, услуг (%) > 0
  and NVL(all_signs_from_stat.sign_rentabelnost_prodag_chstaya,0.0)>0 --Рентабельность продаж чистая, (%) > 0
  and all_signs_from_stat.sign_active_more_stav --Чистые активы > уставного капитала
  and NVL(all_signs_from_stat.sign_oborotnii_active,0.0)>0 --Оборотный капитал > 0
  and NVL(all_signs_from_stat.sign_denegnie_sredstva,0.0)>0 --Отсутствие дефицита денежных средств
  and NVL(all_signs_from_stat.sign_short_fin_vlogeniya,0.0)>0 ----Отсутствие дефицита краткосрочных финансовых вложений
  and NVL(all_signs_from_stat.sign_deb_zadolgennost,0.0)>0 --Отсутствие дефицита краткосрочной дебиторской задолженности
  and NVL(all_signs_from_stat.sign_zapasi,0.0)>0 --Отсутствие дефицита запасов
  and all_signs_from_stat.finance_period_name in ("2017","2018")
group by all_signs_from_stat.spark_id As spark_id
having Count(distinct all_signs_from_stat.finance_period_name) == 2
union all
Select spark_id
,      Count(distinct all_signs_from_stat.finance_period_name) As finance_period_name
from $all_signs_from_stat As all_signs_from_stat
left semi join $companies_w_positive_metrics_2019 As companies_w_positive_metrics_2019
ON all_signs_from_stat.spark_id == companies_w_positive_metrics_2019.spark_id
left semi join $has_minimum_coef As has_minimum_coef
ON all_signs_from_stat.spark_id == has_minimum_coef.spark_id and all_signs_from_stat.finance_period_name == has_minimum_coef.finance_period_name
where NVL(all_signs_from_stat.sign_denegnie_sredstva,0.0)>0 --Отсутствие дефицита денежных средств
  and NVL(all_signs_from_stat.sign_short_fin_vlogeniya,0.0)>0 ----Отсутствие дефицита краткосрочных финансовых вложений
  and NVL(all_signs_from_stat.sign_deb_zadolgennost,0.0)>0 --Отсутствие дефицита краткосрочной дебиторской задолженности
  and NVL(all_signs_from_stat.sign_zapasi,0.0)>0 --Отсутствие дефицита запасов
  and NVL(all_signs_from_stat.sign_oborotnii_active,0.0)>0 --Оборотный капитал > 0
  and all_signs_from_stat.sign_active_more_stav --Чистые активы > уставного капитала
  and all_signs_from_stat.finance_period_name in ("2017","2018")
group by all_signs_from_stat.spark_id As spark_id
having Count(distinct all_signs_from_stat.finance_period_name) == 2
  and abs(SUM(NVL(all_signs_from_stat.sign_rentabelnost_activov_chistaya,0.0)) / AVG(NVL(all_signs_from_stat.sign_rentabelnost_activov_chistaya,0.0))-1.0) <0.1 --Рентабельность активов чистая (%) > 0
  and abs(SUM(NVL(all_signs_from_stat.sign_rentabelnost_activov_obshaya,0.0)) / AVG(NVL(all_signs_from_stat.sign_rentabelnost_activov_obshaya,0.0))-1.0) < 0.1 --Рентабельность активов общая (%) > 0
  and abs(SUM(NVL(all_signs_from_stat.sign_rentabelnost_prodannih_tovarov,0.0)) / AVG(NVL(all_signs_from_stat.sign_rentabelnost_prodannih_tovarov,0.0))-1.0) < 0.1 --Рентабельность проданных товаров, продукции, работ, услуг (%) > 0
  and abs(SUM(NVL(all_signs_from_stat.sign_rentabelnost_prodag_chstaya,0.0)) / AVG(NVL(all_signs_from_stat.sign_rentabelnost_prodag_chstaya,0.0))-1.0) < 0.1 --Рентабельность продаж чистая, (%) > 0
  and abs(SUM(NVL(all_signs_from_stat.sign_oborotnii_active,0.0)) / AVG(NVL(all_signs_from_stat.sign_oborotnii_active,0.0))-1.0) < 0.1
);

Select *
from $excellent_companies
;

$good_companies = 
(
Select spark_id
,      Count(distinct all_signs_from_stat.finance_period_name) As finance_period_name
from $all_signs_from_stat As all_signs_from_stat
left semi join $companies_w_positive_metrics_2019 As companies_w_positive_metrics_2019
ON all_signs_from_stat.spark_id == companies_w_positive_metrics_2019.spark_id
left only join $excellent_companies As excellent_companies
ON all_signs_from_stat.spark_id == excellent_companies.spark_id
left semi join $has_minimum_coef As has_minimum_coef
ON all_signs_from_stat.spark_id == has_minimum_coef.spark_id and all_signs_from_stat.finance_period_name == has_minimum_coef.finance_period_name
where NVL(all_signs_from_stat.sign_rentabelnost_activov_chistaya,0.0)>0 --Рентабельность активов чистая (%) > 0
  and NVL(all_signs_from_stat.sign_rentabelnost_activov_obshaya,0.0)>0 --Рентабельность активов общая (%) > 0
  and NVL(all_signs_from_stat.sign_rentabelnost_prodannih_tovarov,0.0)>0 --Рентабельность проданных товаров, продукции, работ, услуг (%) > 0
  and NVL(all_signs_from_stat.sign_rentabelnost_prodag_chstaya,0.0)>0 --Рентабельность продаж чистая, (%) > 0
  and all_signs_from_stat.sign_active_more_stav --Чистые активы > уставного капитала
  and NVL(all_signs_from_stat.sign_oborotnii_active,0.0)>0 --Оборотный капитал > 0
  and NVL(all_signs_from_stat.sign_denegnie_sredstva,0.0)>0 --Отсутствие дефицита денежных средств
  and NVL(all_signs_from_stat.sign_short_fin_vlogeniya,0.0)>0 ----Отсутствие дефицита краткосрочных финансовых вложений
  and NVL(all_signs_from_stat.sign_deb_zadolgennost,0.0)>0 --Отсутствие дефицита краткосрочной дебиторской задолженности
  and NVL(all_signs_from_stat.sign_zapasi,0.0)>0 --Отсутствие дефицита запасов
  and all_signs_from_stat.finance_period_name in ("2018")
group by all_signs_from_stat.spark_id As spark_id
union all
Select spark_id
,      Count(distinct all_signs_from_stat.finance_period_name) As finance_period_name
from $all_signs_from_stat As all_signs_from_stat
left semi join $companies_w_positive_metrics_2019 As companies_w_positive_metrics_2019
ON all_signs_from_stat.spark_id == companies_w_positive_metrics_2019.spark_id
left only join $excellent_companies As excellent_companies
ON all_signs_from_stat.spark_id == excellent_companies.spark_id
left semi join $has_minimum_coef As has_minimum_coef
ON all_signs_from_stat.spark_id == has_minimum_coef.spark_id and all_signs_from_stat.finance_period_name == has_minimum_coef.finance_period_name
where NVL(all_signs_from_stat.sign_denegnie_sredstva,0.0)>0 --Отсутствие дефицита денежных средств
  and NVL(all_signs_from_stat.sign_short_fin_vlogeniya,0.0)>0 ----Отсутствие дефицита краткосрочных финансовых вложений
  and NVL(all_signs_from_stat.sign_deb_zadolgennost,0.0)>0 --Отсутствие дефицита краткосрочной дебиторской задолженности
  and NVL(all_signs_from_stat.sign_zapasi,0.0)>0 --Отсутствие дефицита запасов
  and NVL(all_signs_from_stat.sign_oborotnii_active,0.0)>0 --Оборотный капитал > 0
  and all_signs_from_stat.sign_active_more_stav --Чистые активы > уставного капитала
  and all_signs_from_stat.finance_period_name in ("2018")
group by all_signs_from_stat.spark_id As spark_id
having abs(SUM(NVL(all_signs_from_stat.sign_rentabelnost_activov_chistaya,0.0)) / AVG(NVL(all_signs_from_stat.sign_rentabelnost_activov_chistaya,0.0))-1.0) <0.1 --Рентабельность активов чистая (%) > 0
  and abs(SUM(NVL(all_signs_from_stat.sign_rentabelnost_activov_obshaya,0.0)) / AVG(NVL(all_signs_from_stat.sign_rentabelnost_activov_obshaya,0.0))-1.0) < 0.1 --Рентабельность активов общая (%) > 0
  and abs(SUM(NVL(all_signs_from_stat.sign_rentabelnost_prodannih_tovarov,0.0)) / AVG(NVL(all_signs_from_stat.sign_rentabelnost_prodannih_tovarov,0.0))-1.0) < 0.1 --Рентабельность проданных товаров, продукции, работ, услуг (%) > 0
  and abs(SUM(NVL(all_signs_from_stat.sign_rentabelnost_prodag_chstaya,0.0)) / AVG(NVL(all_signs_from_stat.sign_rentabelnost_prodag_chstaya,0.0))-1.0) < 0.1 --Рентабельность продаж чистая, (%) > 0
  and abs(SUM(NVL(all_signs_from_stat.sign_oborotnii_active,0.0)) / AVG(NVL(all_signs_from_stat.sign_oborotnii_active,0.0))-1.0) < 0.1
);

Select *
from $good_companies
;

$adequately_companies = 
(
Select spark_id
,      Count(distinct all_signs_from_stat.finance_period_name) As finance_period_name
from $all_signs_from_stat As all_signs_from_stat
left semi join $has_minimum_coef As has_minimum_coef
ON all_signs_from_stat.spark_id == has_minimum_coef.spark_id and all_signs_from_stat.finance_period_name == has_minimum_coef.finance_period_name
left only join $excellent_companies As excellent_companies
ON all_signs_from_stat.spark_id == excellent_companies.spark_id
left only join $good_companies As good_companies
ON all_signs_from_stat.spark_id == good_companies.spark_id
where NVL(sign_rentabelnost_activov_chistaya,0.0)>0 --Рентабельность активов чистая (%) > 0
  and NVL(sign_rentabelnost_activov_obshaya,0.0)>0 --Рентабельность активов общая (%) > 0
  and NVL(sign_rentabelnost_prodannih_tovarov,0.0)>0 --Рентабельность проданных товаров, продукции, работ, услуг (%) > 0
  and NVL(sign_rentabelnost_prodag_chstaya,0.0)>0 --Рентабельность продаж чистая, (%) > 0
  --and NVL(sign_denegnie_sredstva,0.0)>0 --Отсутствие дефицита денежных средств
  --and NVL(sign_short_fin_vlogeniya,0.0)>0 ----Отсутствие дефицита краткосрочных финансовых вложений
  --and NVL(sign_deb_zadolgennost,0.0)>0 --Отсутствие дефицита краткосрочной дебиторской задолженности
  and finance_period_name == "2019"
group by all_signs_from_stat.spark_id As spark_id
having Count(distinct all_signs_from_stat.finance_period_name) == 1
);

Select *
from $adequately_companies
;

$negative_companies = 
(
Select spark_id
,      Count(distinct all_signs_from_stat.finance_period_name) As finance_period_name
from $all_signs_from_stat As all_signs_from_stat
left only join $excellent_companies As excellent_companies
ON all_signs_from_stat.spark_id == excellent_companies.spark_id
left only join $good_companies As good_companies
ON all_signs_from_stat.spark_id == good_companies.spark_id
left only join $adequately_companies As adequately_companies
ON all_signs_from_stat.spark_id == adequately_companies.spark_id
where (NVL(sign_rentabelnost_activov_chistaya,0.0)<0 --Рентабельность активов чистая (%) > 0
   or NVL(sign_rentabelnost_activov_obshaya,0.0)<0 --Рентабельность активов общая (%) > 0
   or NVL(sign_rentabelnost_prodannih_tovarov,0.0)<0 --Рентабельность проданных товаров, продукции, работ, услуг (%) > 0
   or NVL(sign_rentabelnost_prodag_chstaya,0.0)<0) --Рентабельность продаж чистая, (%) > 0
--  and NVL(sign_denegnie_sredstva,0.0)>0 --Отсутствие дефицита денежных средств
--  and NVL(sign_short_fin_vlogeniya,0.0)>0 ----Отсутствие дефицита краткосрочных финансовых вложений
--  and NVL(sign_deb_zadolgennost,0.0)>0 --Отсутствие дефицита краткосрочной дебиторской задолженности
  and finance_period_name == "2019"
group by all_signs_from_stat.spark_id As spark_id
having Count(distinct all_signs_from_stat.finance_period_name) == 1
);

Select *
from $negative_companies
;

insert into $all_clusters with truncate 
Select 'excellent_companies' As cluster
,      spark_id
from $excellent_companies
group by spark_id
UNION ALL
Select 'good_companies' As cluster
,      spark_id
from $good_companies
group by spark_id
UNION ALL
Select 'adequately_companies' As cluster
,      spark_id
from $adequately_companies
group by spark_id
UNION ALL
Select 'negative_companies' As cluster
,      spark_id
from $negative_companies
group by spark_id
UNION ALL
Select 'other' As cluster
,      spark_id
from $all_signs_from_stat As all_signs_from_stat
left only join $excellent_companies As excellent_companies
ON all_signs_from_stat.spark_id == excellent_companies.spark_id
left only join $good_companies As good_companies
ON all_signs_from_stat.spark_id == good_companies.spark_id
left only join $adequately_companies As adequately_companies
ON all_signs_from_stat.spark_id == adequately_companies.spark_id
left only join $negative_companies As negative_companies
ON all_signs_from_stat.spark_id == negative_companies.spark_id
where all_signs_from_stat.finance_period_name == "2019"
group by all_signs_from_stat.spark_id As spark_id
;
COMMIT;

---------- Подготовка финальной таблицы--------------

$all_main_class_okvad = 
(
Select spark_id
,      main_class_okvad
,      main_class_okvad_name
from $raw_data_for_cluster
group by spark_id
,        main_class_okvad
,        main_class_okvad_name
);


insert into $final_table with truncate 
Select cluster
,      Spark_id
,      Spark_main_class_okvad
,      Spark_main_class_okvad_name

,      Spark_inn
,      Spark_domain
,      Spark_company_type
,      Spark_main_okved2_code
,      Spark_main_okved2_name
,      Spark_population
,      Spark_workers_range
,      Spark_company_size_revenue
,      Spark_company_size_description
,      Spark_okato_region_code

,      McKinsey_okved_number
,      McKinsey_okved_name
,      McKinsey_okved_detail
,      McKinsey_sector
,      McKinsey_sector2
,      McKinsey_sector3
,      Yandex_industry

,      McKinsey_main_industry
,      McKinsey_industry
,      McKinsey_qty_ur_lic
,      McKinsey_revenue
,      McKinsey_ads_to_revenue
,      McKinsey_profit_pool
,      McKinsey_0_20_and_ip
,      McKinsey_20_350
,      McKinsey_350_1000

from $all_clusters As all_clusters_v1
left join $all_main_class_okvad As all_main_class_okvad
ON all_clusters_v1.spark_id == all_main_class_okvad.spark_id
left join $spark_all As spark_all
ON all_clusters_v1.spark_id == spark_all.spark_id
left join $McK_okved_industries As McK_okved_industries
ON all_main_class_okvad.main_class_okvad == McK_okved_industries.McKinsey_okved_number
left join $McK_main_industries As McK_main_industries
ON McK_okved_industries.McKinsey_sector3 == McK_main_industries.McKinsey_industry
group by all_clusters_v1.cluster As cluster
,        all_clusters_v1.spark_id As Spark_id
,        all_main_class_okvad.main_class_okvad As Spark_main_class_okvad
,        all_main_class_okvad.main_class_okvad_name As Spark_main_class_okvad_name

,        spark_all.inn As Spark_inn
,        spark_all.domain As Spark_domain
,        spark_all.company_type As Spark_company_type
,        spark_all.main_okved2_code As Spark_main_okved2_code
,        spark_all.main_okved2_name As Spark_main_okved2_name
,        spark_all.population As Spark_population
,        spark_all.workers_range As Spark_workers_range
,        spark_all.company_size.revenue As Spark_company_size_revenue
,        spark_all.company_size.description As Spark_company_size_description
,        spark_all.okato.region_code As Spark_okato_region_code

,        McK_okved_industries.McKinsey_okved_number As McKinsey_okved_number
,        McK_okved_industries.McKinsey_okved_name As McKinsey_okved_name
,        McK_okved_industries.McKinsey_okved_detail As McKinsey_okved_detail
,        McK_okved_industries.McKinsey_sector As McKinsey_sector
,        McK_okved_industries.McKinsey_sector2 As McKinsey_sector2
,        McK_okved_industries.McKinsey_sector3 As McKinsey_sector3
,        McK_okved_industries.Yandex_industry As Yandex_industry

,        McK_main_industries.McKinsey_main_industry As McKinsey_main_industry
,        McK_main_industries.McKinsey_industry As McKinsey_industry
,        McK_main_industries.McKinsey_qty_ur_lic As McKinsey_qty_ur_lic
,        McK_main_industries.McKinsey_revenue As McKinsey_revenue
,        McK_main_industries.McKinsey_ads_to_revenue As McKinsey_ads_to_revenue
,        McK_main_industries.McKinsey_profit_pool As McKinsey_profit_pool
,        McK_main_industries.McKinsey_0_20_and_ip As McKinsey_0_20_and_ip
,        McK_main_industries.McKinsey_20_350 As McKinsey_20_350
,        McK_main_industries.McKinsey_350_1000 As McKinsey_350_1000
;
