use arnold;

-- pragma yt.AutoMerge="disabled";
-- pragma yt.Pool="robot-webmaster";
pragma yt.InferSchema='1';

$table_with_dates = 
select Host, Rival, Popularity, TableName(TablePath()) as `Date`
from range(`//home/webmaster/prod/searchqueries/niche/import/rival_queries_report`)
order by Popularity desc
limit 500000
;

$niche_rivals =
select `Date`, Host, Rival
from $table_with_dates
where Rival != ""
;

$host2vec_rivals = 
select `Host`, `Analogy`
from concat(`//home/webmaster/prod/searchqueries/niche/source/host2vec/similargroup-300`, `//home/webmaster/prod/searchqueries/niche/source/host2vec/spylog-300`)
;

$intersection_rivals = 
select a.Host as `Host`, a.Rival as `Rival`, a.`Date` as `Date`
from $niche_rivals as a
inner join $host2vec_rivals as b
on a.Host = b.Host and a.Rival = b.Analogy  
;

$intersection_cnts_by_date = 
select `Date`, count(*) as cnt
from $intersection_rivals
group by `Date`
;

$total = 
select count(*) 
from $niche_rivals
;

insert into  `//home/webmaster/prod/analytics/niche/host2vec_stats` WITH TRUNCATE 
select `Date`, cnt as intersection_cnt, cnt / $total as p
from $intersection_cnts_by_date
;