# -*- coding: utf-8 -*-

import os
import time
import json
import logging
import requests

from requests.packages.urllib3.util.retry import Retry
from requests.adapters import HTTPAdapter

from sandbox import sandboxsdk
from sandbox.sandboxsdk import parameters
from sandbox.sandboxsdk import process

from sandbox.projects import resource_types
from sandbox.projects.common import utils
from sandbox.projects.common.mediasearch import ban as mediaban
from sandbox.projects.common.nanny import auto_deploy
from sandbox.sandboxsdk.svn import Arcadia
from sandbox.projects.VideoSearch import video_resource_types as video_resource_types

from timestamped_promo import produce_timestamped_promo_trie
from dump_promo import dump_promo_to_yt


def format_videohub_data(line):
    a = line.strip().split('\t')
    if len(a) == 0:
        # skip empty lines
        return ""
    if len(a) != 4:
        logging.error("Expected 4 columns, got {} in trie entry: {}".format(len(a), line.strip()))
        return ""
    query, tld, ipregion, json_data = a
    return "#query\t%s\t%s\t%s\t%s\n" % (query, tld, ipregion, json_data)


class TrieKeyEntrySize(parameters.SandboxIntegerParameter):
    name = "trie_key_entry_size"
    description = "Trie Key Entry Size"
    required = True
    default_value = 4096
    group = 'TrieParams'


class TrieValueEntrySize(parameters.SandboxIntegerParameter):
    name = "trie_value_entry_size"
    description = "Trie Value Entry Size"
    required = True
    default_value = 32768
    group = 'TrieParams'


class SpecialEventsYTServerParameter(parameters.SandboxStringParameter):
    name = "special_events_yt_server"
    description = "Special Events YT server"
    required = True
    default_value = "hahn"
    group = 'SpecialEvents'


class SpecialEventsFutureYTPathParameter(parameters.SandboxStringParameter):
    name = "special_events_future_yt_path"
    description = "Special Events Future YT table"
    required = True
    default_value = "//home/ether_prod/specevents/future_filtered"
    group = 'SpecialEvents'


class SpecialEventsFutureTestYTPathParameter(parameters.SandboxStringParameter):
    name = "special_events_future_test_yt_path"
    description = "Special Events Future Test YT table"
    required = True
    default_value = "//home/ether_prod/specevents/future_test_filtered"
    group = 'SpecialEvents'


class SpecialEventsFutureSportYTPathParameter(parameters.SandboxStringParameter):
    name = "special_events_future_sport_yt_path"
    description = "Special Events Future Sport YT table"
    required = True
    default_value = "//home/ether_prod/specevents/future_sport_filtered"
    group = 'SpecialEvents'


class SpecialEventsFutureCyberSportYTPathParameter(parameters.SandboxStringParameter):
    name = "special_events_future_cyber_sport_yt_path"
    description = "Special Events Future Cyber Sport YT table"
    required = True
    default_value = "//home/ether_prod/specevents/future_cyber_sport_filtered"
    group = 'SpecialEvents'


class SpecialEventsFutureMusicYTPathParameter(parameters.SandboxStringParameter):
    name = "special_events_future_music_yt_path"
    description = "Special Events Future Music YT table"
    required = True
    default_value = "//home/ether_prod/specevents/future_music_filtered"
    group = 'SpecialEvents'


class SpecialEventsFutureStayhomeYTPathParameter(parameters.SandboxStringParameter):
    name = "special_events_future_stayhome_yt_path"
    description = "Special Events Future Stayhome YT table"
    required = True
    default_value = "//home/ether_prod/specevents/stayhome_future"
    group = 'SpecialEvents'


class SpecialEventsFutureAnnouncedEventYTPathParameter(parameters.SandboxStringParameter):
    name = "special_events_future_announced_event_yt_path"
    description = "Special Events Future Announced Event YT table"
    required = True
    default_value = "//home/ether_prod/specevents/announced_event_future"
    group = 'SpecialEvents'


class SpecialEventsPastYTPathParameter(parameters.SandboxStringParameter):
    name = "special_events_past_yt_path"
    description = "Special Events Past YT table"
    required = True
    default_value = "//home/ether_prod/specevents/past_filtered"
    group = 'SpecialEvents'


class SpecialEventsPastSportYTPathParameter(parameters.SandboxStringParameter):
    name = "special_events_past_sport_yt_path"
    description = "Special Events Past Sport YT table"
    required = True
    default_value = "//home/ether_prod/specevents/past_sport_filtered"
    group = 'SpecialEvents'


class SpecialEventsPastCyberSportYTPathParameter(parameters.SandboxStringParameter):
    name = "special_events_past_cyber_sport_yt_path"
    description = "Special Events Past Cyber Sport YT table"
    required = True
    default_value = "//home/ether_prod/specevents/past_cyber_sport_filtered"
    group = 'SpecialEvents'


class SpecialEventsPastMusicYTPathParameter(parameters.SandboxStringParameter):
    name = "special_events_past_music_yt_path"
    description = "Special Events Past Music YT table"
    required = True
    default_value = "//home/ether_prod/specevents/past_music_filtered"
    group = 'SpecialEvents'


class SpecialEventsFutureDocsLimitParameter(parameters.SandboxIntegerParameter):
    name = "special_events_future_docs_limit"
    description = "Special Events Future Docs Limit"
    required = True
    default_value = 100
    group = 'SpecialEvents'


class SpecialEventsPastDocsLimitParameter(parameters.SandboxIntegerParameter):
    name = "special_events_past_docs_limit"
    description = "Special Events Past Docs Limit"
    required = True
    default_value = 200
    group = 'SpecialEvents'


class SpecialEventsFutureCategIsBloggerParameter(parameters.SandboxBoolParameter):
    name = "special_events_future_categ_is_blogger"
    description = "Specal Events Future Categ Is Blogger"
    required = True
    default_value = False
    group = 'SpecialEvents'


class SpecialEventsPastCategIsBloggerParameter(parameters.SandboxBoolParameter):
    name = "special_events_past_categ_is_blogger"
    description = "Specal Events Past Categ Is Blogger"
    required = True
    default_value = False
    group = 'SpecialEvents'


class SpecialEventsFutureCategNameParameter(parameters.SandboxStringParameter):
    name = "special_events_future_categ_name"
    description = "Specal Events Future Categ Name"
    required = True
    default_value = 'specevents_future'
    group = 'SpecialEvents'


class SpecialEventsFutureTestCategNameParameter(parameters.SandboxStringParameter):
    name = "special_events_future_test_categ_name"
    description = "Specal Events Future Test Categ Name"
    required = True
    default_value = 'specevents_future_test'
    group = 'SpecialEvents'


class SpecialEventsFutureSportCategNameParameter(parameters.SandboxStringParameter):
    name = "special_events_future_sport_categ_name"
    description = "Specal Events Future Sport Categ Name"
    required = True
    default_value = 'specevents_future_sport'
    group = 'SpecialEvents'


class SpecialEventsFutureCyberSportCategNameParameter(parameters.SandboxStringParameter):
    name = "special_events_future_cyber_sport_categ_name"
    description = "Specal Events Future Cyber Sport Categ Name"
    required = True
    default_value = 'specevents_future_cyber_sport'
    group = 'SpecialEvents'


class SpecialEventsFutureMusicCategNameParameter(parameters.SandboxStringParameter):
    name = "special_events_future_music_categ_name"
    description = "Specal Events Future Music Categ Name"
    required = True
    default_value = 'specevents_future_music'
    group = 'SpecialEvents'


class SpecialEventsFutureStayhomeCategNameParameter(parameters.SandboxStringParameter):
    name = "special_events_future_stayhome_categ_name"
    description = "Specal Events Future Stayhome Categ Name"
    required = True
    default_value = 'specevents_future_stayhome'
    group = 'SpecialEvents'


class SpecialEventsFutureAnnouncedEventCategNameParameter(parameters.SandboxStringParameter):
    name = "special_events_future_announced_event_categ_name"
    description = "Specal Events Future Announced Event Categ Name"
    required = True
    default_value = 'specevents_future_announced_event'
    group = 'SpecialEvents'


class SpecialEventsPastCategNameParameter(parameters.SandboxStringParameter):
    name = "special_events_past_categ_name"
    description = "Specal Events Past Categ Name"
    required = True
    default_value = 'specevents_past'
    group = 'SpecialEvents'


class SpecialEventsPastSportCategNameParameter(parameters.SandboxStringParameter):
    name = "special_events_past_sport_categ_name"
    description = "Specal Events Past Sport Categ Name"
    required = True
    default_value = 'specevents_past_sport'
    group = 'SpecialEvents'


class SpecialEventsPastCyberSportCategNameParameter(parameters.SandboxStringParameter):
    name = "special_events_past_cyber_sport_categ_name"
    description = "Specal Events Past Cyber Sport Categ Name"
    required = True
    default_value = 'specevents_past_cyber_sport'
    group = 'SpecialEvents'


class SpecialEventsPastMusicCategNameParameter(parameters.SandboxStringParameter):
    name = "special_events_past_music_categ_name"
    description = "Specal Events Past Music Categ Name"
    required = True
    default_value = 'specevents_past_music'
    group = 'SpecialEvents'


class SpecialEventsFutureCategTermParameter(parameters.SandboxStringParameter):
    name = "special_events_future_categ_term"
    description = "Specal Events Future Categ Term"
    required = True
    default_value = 'hhqfmpzcqbpqgdcdhh'  # specevents_future
    group = 'SpecialEvents'


class SpecialEventsFutureTestCategTermParameter(parameters.SandboxStringParameter):
    name = "special_events_future_test_categ_term"
    description = "Specal Events Future Test Categ Term"
    required = True
    default_value = 'hhdjcufxvvvtmldbhh'  # specevents_future_test
    group = 'SpecialEvents'


class SpecialEventsFutureSportCategTermParameter(parameters.SandboxStringParameter):
    name = "special_events_future_sport_categ_term"
    description = "Specal Events Future Sport Categ Term"
    required = True
    default_value = 'hhiidfiixzyvsuhh'  # specevents_future_sport
    group = 'SpecialEvents'


class SpecialEventsFutureCyberSportCategTermParameter(parameters.SandboxStringParameter):
    name = "special_events_future_cyber_sport_categ_term"
    description = "Specal Events Future Cyber Sport Categ Term"
    required = True
    default_value = 'hhruyfnvxqzgbrjchh'  # specevents_future_cyber_sport
    group = 'SpecialEvents'


class SpecialEventsFutureMusicCategTermParameter(parameters.SandboxStringParameter):
    name = "special_events_future_music_categ_term"
    description = "Specal Events Future Music Categ Term"
    required = True
    default_value = 'hhxakcdpjlfbrizbhh'  # specevents_future_music
    group = 'SpecialEvents'


class SpecialEventsFutureStayhomeCategTermParameter(parameters.SandboxStringParameter):
    name = "special_events_future_stayhome_categ_term"
    description = "Specal Events Future Stayhome Categ Term"
    required = True
    default_value = 'hhwnihpigqsrqthchh'  # specevents_future_stayhome
    group = 'SpecialEvents'


class SpecialEventsFutureAnnouncedEventCategTermParameter(parameters.SandboxStringParameter):
    name = "special_events_future_announced_event_categ_term"
    description = "Specal Events Future Announced Event Categ Term"
    required = True
    default_value = 'hhwofltnqbkrctddhh'  # specevents_future_announced_event
    group = 'SpecialEvents'


class SpecialEventsPastCategTermParameter(parameters.SandboxStringParameter):
    name = "special_events_past_categ_term"
    description = "Specal Events Past Categ Term"
    required = True
    default_value = 'hhjqlifzdpxzqphh'  # specevents_past
    group = 'SpecialEvents'


class SpecialEventsPastSportCategTermParameter(parameters.SandboxStringParameter):
    name = "special_events_past_sport_categ_term"
    description = "Specal Events Past Sport Categ Term"
    required = True
    default_value = 'hhturixqswpzgrdchh'  # specevents_past_sport
    group = 'SpecialEvents'


class SpecialEventsPastCyberSportCategTermParameter(parameters.SandboxStringParameter):
    name = "special_events_past_cyber_sport_categ_term"
    description = "Specal Events Past Cyber Sport Categ Term"
    required = True
    default_value = 'hhyqyylwtafuhznchh'  # specevents_past_cyber_sport
    group = 'SpecialEvents'


class SpecialEventsPastMusicCategTermParameter(parameters.SandboxStringParameter):
    name = "special_events_past_music_categ_term"
    description = "Specal Events Past Music Categ Term"
    required = True
    default_value = 'hhbpicuzqzybueuchh'  # specevents_past_music
    group = 'SpecialEvents'


class SpecialEventsFutureTitleParameter(parameters.SandboxStringParameter):
    name = "special_events_future_title"
    description = "Specal Events Future Title"
    required = True
    default_value = 'Ближайшие события'
    group = 'SpecialEvents'


class SpecialEventsPastTitleParameter(parameters.SandboxStringParameter):
    name = "special_events_past_title"
    description = "Specal Events Past Title"
    required = True
    default_value = 'Недавние события'
    group = 'SpecialEvents'


class SpecialEventsThumbnailParameter(parameters.SandboxStringParameter):
    name = "special_events_thumbnail"
    description = "Specal Events Thumbnail"
    required = False
    default_value = ''
    group = 'SpecialEvents'


class CustomUrlsYTServerParameter(parameters.SandboxStringParameter):
    name = "custom_urls_yt_server"
    description = "Custom Urls YT server"
    required = True
    default_value = "hahn"
    group = 'CustomUrls'


class CustomUrlsYTPathsParameter(parameters.ListRepeater, parameters.SandboxStringParameter):
    name = "custom_urls_yt_paths"
    description = "Custom Urls YT tables"
    required = True
    default_value = ["//home/videoindex/recommender/custom_tags/yandex_show"]
    group = 'CustomUrls'


class CustomUrlsCategUseReleaseDateParameter(parameters.SandboxBoolParameter):
    name = "custom_urls_use_release_date"
    description = "Custom Urls: Use Release Date"
    required = True
    default_value = False
    group = 'CustomUrls'


class OntoIdAssocYTServerParameter(parameters.SandboxStringParameter):
    name = "ontoid_assoc_yt_server"
    description = "OntoId Assoc YT Server"
    required = True
    default_value = "hahn"
    group = 'OntoIdAssoc'


class OntoIdAssocYTPathParameter(parameters.SandboxStringParameter):
    name = "ontoid_assoc_yt_path"
    description = "OntoId Assoc YT Path"
    required = True
    default_value = "//home/videoindex/recommender/ontoid_assoc/assoc_grouped"
    group = 'OntoIdAssoc'


class OntoIdAssocMinDocsParameter(parameters.SandboxIntegerParameter):
    name = "ontoid_assoc_min_docs"
    description = "OntoId Assoc Min Docs"
    required = True
    default_value = 4
    group = 'OntoIdAssoc'


class OntoIdAssocMaxDocsParameter(parameters.SandboxIntegerParameter):
    name = "ontoid_assoc_max_docs"
    description = "OntoId Assoc Max Docs"
    required = True
    default_value = 16
    group = 'OntoIdAssoc'


class OntoIdAssocHavePosters(parameters.SandboxBoolParameter):
    name = "ontoid_assoc_have_posters"
    description = "OntoId Assoc Have Posters"
    required = True
    default_value = True
    group = 'OntoIdAssoc'


class TimestampedPromoYTServerParameter(parameters.SandboxStringParameter):
    name = "timestamped_promo_yt_server"
    description = "Timestamped Promo YT server"
    required = True
    default_value = "hahn"
    group = 'TimestampedPromo'


class TimestampedPromoYTPathDictParameter(parameters.DictRepeater, parameters.SandboxStringParameter):
    name = "timestamped_promo_yt_path"
    description = "Timestamped Promo YT table dict per supertag;request-type"
    required = True
    default_value = {"videohub;prod": "//home/videoindex/recommender/timestamped_promo_all"}
    group = 'TimestampedPromo'


class DumpPromoYTServerParameter(parameters.SandboxStringParameter):
    name = "dump_promo_yt_server"
    description = "Dump Promo YT server"
    required = True
    default_value = "hahn"
    group = 'DumpPromo'


class DumpPromoDirParameter(parameters.SandboxStringParameter):
    name = "dump_promo_dir"
    description = "Dump Promo Dir"
    required = True
    default_value = "//home/videoindex/recommender/promo_dumps"
    group = 'DumpPromo'


class KpSelectionsYtServerParameter(parameters.SandboxStringParameter):
    name = "yt_server"
    description = "yt server"
    required = True
    default_value = "arnold"
    group = 'KpSelections'


class KpSelectionsWatchNowCarouselTableParameter(parameters.SandboxStringParameter):
    name = "watch_now_carousel_table"
    description = "Path to yt table containing docs for watch now carousel"
    required = True
    default_value = "//home/videorecom/projects/kp_selections/special_selections/markup"
    group = 'KpSelections'


class RankedPromoYTServerParameter(parameters.SandboxStringParameter):
    name = "ranked_promo_yt_server"
    description = "Ranked Promo YT server"
    required = True
    default_value = "hahn"
    group = 'RankedPromo'


class Promo_PersChan_Docs_YTPathParameter(parameters.SandboxStringParameter):
    name = "promo_pers_chan_docs_yt_path"
    description = "Promo-Pers-Chan docs YT Path"
    required = True
    default_value = "//home/ether_prod/promo/ranked_promo/promo_pers_chan_docs"
    group = 'RankedPromo'


class MordaPromo_M_YTPathParameter(parameters.SandboxStringParameter):
    name = "morda_promo_M_yt_path"
    description = "Morda Promo-M YT Path"
    required = True
    default_value = "//home/ether_prod/promo/ranked_promo/promo_M"
    group = 'RankedPromo'


class MordaPromo_M_CategNameParameter(parameters.SandboxStringParameter):
    name = "morda_promo_M_categ_name"
    description = "Morda Promo-M Categ Name"
    required = True
    default_value = "morda_promo_M"
    group = 'RankedPromo'


class MordaPromo_M_CategTermParameter(parameters.SandboxStringParameter):
    name = "morda_promo_M_categ_term"
    description = "Morda Promo-M Categ Term"
    required = True
    default_value = "hhcwdrremhjnqohh"
    group = 'RankedPromo'


class MordaPromo_L_YTPathParameter(parameters.SandboxStringParameter):
    name = "morda_promo_L_yt_path"
    description = "Morda Promo-L YT Path"
    required = True
    default_value = "//home/ether_prod/promo/ranked_promo/promo_L"
    group = 'RankedPromo'


class PersChan_YTPathParameter(parameters.SandboxStringParameter):
    name = "pers_channel_yt_path"
    description = "Pers Channel YT Path"
    required = True
    default_value = "//home/ether_prod/promo/ranked_promo/promo_pers_chan"
    group = 'RankedPromo'


class MordaPromo_L_CategNameParameter(parameters.SandboxStringParameter):
    name = "morda_promo_L_categ_name"
    description = "Morda Promo-L Categ Name"
    required = True
    default_value = "morda_promo_L"
    group = 'RankedPromo'


class MordaPromo_L_CategTermParameter(parameters.SandboxStringParameter):
    name = "morda_promo_L_categ_term"
    description = "Morda Promo-L Categ Term"
    required = True
    default_value = "hhrymyzhmbvmmethh"
    group = 'RankedPromo'


class MordaPromo_XL_YTPathParameter(parameters.SandboxStringParameter):
    name = "morda_promo_XL_yt_path"
    description = "Morda Promo-XL YT Path"
    required = True
    default_value = "//home/ether_prod/promo/ranked_promo/promo_XL"
    group = 'RankedPromo'


class MordaPromo_XL_CategNameParameter(parameters.SandboxStringParameter):
    name = "morda_promo_XL_categ_name"
    description = "Morda Promo-XL Categ Name"
    required = True
    default_value = "morda_promo_XL"
    group = 'RankedPromo'


class MordaPromo_XL_CategTermParameter(parameters.SandboxStringParameter):
    name = "morda_promo_XL_categ_term"
    description = "Morda Promo-XL Categ Term"
    required = True
    default_value = "hhcnjgrbgxoetxkdhh"
    group = 'RankedPromo'


class VideohubPromoCategNameParameter(parameters.SandboxStringParameter):
    name = "videohub_promo_categ_name"
    description = "Videohub Promo Videohub Categ Name"
    required = True
    default_value = "videohub_promo_carousel"
    group = 'RankedPromo'


class VideohubPromoCategTermParameter(parameters.SandboxStringParameter):
    name = "videohub_promo_categ_term"
    description = "Videohub Promo Categ Term"
    required = True
    default_value = "hhmelmsbpmtbgmchh"
    group = 'RankedPromo'


class AddSmartTvCategsParameter(parameters.SandboxBoolParameter):
    name = "add_smart_tv_categs"
    description = "Add SmartTV categories"
    required = True
    default_value = False
    group = 'SmartTv'


class VideohubFastArcadiaUrlParameter(parameters.SandboxStringParameter):
    name = "videohub_fast_arcadia_url"
    description = "arcadia url to videohub.fast"
    required = True
    default_value = "arcadia:/arc/trunk/data/extsearch/video/quality/recommender/videohub.fast"
    group = 'TrieParams'


class AddKpCarouselParameter(parameters.SandboxBoolParameter):
    name = "add_kp_carousel"
    description = "Add Kinopoisk carousels"
    required = True
    default_value = False
    group = 'Kinopoisk'


class KpCarouselSourceUrlParameter(parameters.SandboxStringParameter):
    name = "kp_carousel_source_url"
    description = "Kinopoisk carousel source url"
    required = True
    default_value = "https://api.ott.kinopoisk.ru/v10/selections/5e3bfb22a77eac00232c2d5b?limit=100&offset=0&selectionWindowId=ya_external&serviceId=42"
    group = 'Kinopoisk'


class VideoReleaseVideohubTrie(auto_deploy.AutoNannyDeployTask, mediaban.VideoBaseReleaseBanTask):
    """
        Builds videohub trie for Yandex.Ether service
    """

    type = "VIDEO_RELEASE_VIDEOHUB_TRIE"

    environment = (
        sandboxsdk.environments.PipEnvironment('yandex-yt'),
        sandboxsdk.environments.PipEnvironment('yandex-yt-yson-bindings-skynet'),
    )

    execution_space = 3072

    input_parameters = (
        TrieKeyEntrySize,
        TrieValueEntrySize,
        SpecialEventsYTServerParameter,
        SpecialEventsFutureYTPathParameter,
        SpecialEventsFutureTestYTPathParameter,
        SpecialEventsFutureSportYTPathParameter,
        SpecialEventsFutureCyberSportYTPathParameter,
        SpecialEventsFutureMusicYTPathParameter,
        SpecialEventsFutureStayhomeYTPathParameter,
        SpecialEventsFutureAnnouncedEventYTPathParameter,
        SpecialEventsPastYTPathParameter,
        SpecialEventsPastSportYTPathParameter,
        SpecialEventsPastCyberSportYTPathParameter,
        SpecialEventsPastMusicYTPathParameter,
        SpecialEventsFutureDocsLimitParameter,
        SpecialEventsPastDocsLimitParameter,
        SpecialEventsFutureTitleParameter,
        SpecialEventsPastTitleParameter,
        SpecialEventsFutureCategNameParameter,
        SpecialEventsFutureTestCategNameParameter,
        SpecialEventsFutureSportCategNameParameter,
        SpecialEventsFutureCyberSportCategNameParameter,
        SpecialEventsFutureMusicCategNameParameter,
        SpecialEventsFutureStayhomeCategNameParameter,
        SpecialEventsFutureAnnouncedEventCategNameParameter,
        SpecialEventsPastCategNameParameter,
        SpecialEventsPastSportCategNameParameter,
        SpecialEventsPastCyberSportCategNameParameter,
        SpecialEventsPastMusicCategNameParameter,
        SpecialEventsFutureCategIsBloggerParameter,
        SpecialEventsPastCategIsBloggerParameter,
        SpecialEventsFutureCategTermParameter,
        SpecialEventsFutureTestCategTermParameter,
        SpecialEventsFutureSportCategTermParameter,
        SpecialEventsFutureCyberSportCategTermParameter,
        SpecialEventsFutureMusicCategTermParameter,
        SpecialEventsFutureStayhomeCategTermParameter,
        SpecialEventsFutureAnnouncedEventCategTermParameter,
        SpecialEventsPastCategTermParameter,
        SpecialEventsPastSportCategTermParameter,
        SpecialEventsPastCyberSportCategTermParameter,
        SpecialEventsPastMusicCategTermParameter,
        SpecialEventsThumbnailParameter,
        CustomUrlsYTServerParameter,
        CustomUrlsYTPathsParameter,
        OntoIdAssocYTServerParameter,
        OntoIdAssocYTPathParameter,
        OntoIdAssocMinDocsParameter,
        OntoIdAssocMaxDocsParameter,
        OntoIdAssocHavePosters,
        TimestampedPromoYTServerParameter,
        TimestampedPromoYTPathDictParameter,
        DumpPromoYTServerParameter,
        DumpPromoDirParameter,
        RankedPromoYTServerParameter,
        KpSelectionsWatchNowCarouselTableParameter,
        KpSelectionsYtServerParameter,
        Promo_PersChan_Docs_YTPathParameter,
        MordaPromo_M_YTPathParameter,
        MordaPromo_M_CategNameParameter,
        MordaPromo_M_CategTermParameter,
        MordaPromo_L_YTPathParameter,
        MordaPromo_L_CategNameParameter,
        MordaPromo_L_CategTermParameter,
        MordaPromo_XL_YTPathParameter,
        MordaPromo_XL_CategNameParameter,
        MordaPromo_XL_CategTermParameter,
        PersChan_YTPathParameter,
        VideohubPromoCategNameParameter,
        VideohubPromoCategTermParameter,
        AddSmartTvCategsParameter,
        VideohubFastArcadiaUrlParameter,
        AddKpCarouselParameter,
        KpCarouselSourceUrlParameter,
    ) + mediaban.VideoBaseReleaseBanTask.input_parameters

    release_subject = "video/middle/videohub-trie-{timestamp}"
    release_comment = "videohub trie release"
    release_resources = (
        video_resource_types.VIDEO_MIDDLESEARCH_VIDEOHUB_TRIE,
        video_resource_types.VIDEO_MIDDLESEARCH_VIDEOHUB_TRIE_PLAIN,
    )

    def get_stable_services(self):
        return [self._SERVICE_ID]

    def get_nanny_oauth_token(self):
        return self.get_vault_data('VIDEO-ROBOT', 'robot-video-crawl-nanny-oauth')

    def _load_special_events(self, table):
        import yt.wrapper as yt
        yt.config['token'] = self.get_vault_data('VIDEODEV', 'yt_token')
        yt.config['proxy']['url'] = self.ctx[SpecialEventsYTServerParameter.name]
        if not yt.exists(table):
            return []
        docs = []
        for row in yt.read_table(table):
            doc = {
                "url": "frontend.vh.yandex.ru/player/" + row["episode_uuid"],
                "tm_start": row["start_time_ts"],
                "tm_end": row["finish_time_ts"],
                "tm_expire": row["expiration_date_ts"]
            }
            docs.append(doc)
        return docs

    def _add_special_events(self, file_name, table, categ_name, categ_term, for_future):

        thumbnail = self.ctx[SpecialEventsThumbnailParameter.name].strip('{').strip('}')

        if for_future:
            title = self.ctx[SpecialEventsFutureTitleParameter.name].strip('"')
            categ_is_blogger = self.ctx[SpecialEventsFutureCategIsBloggerParameter.name]
            limit = self.ctx[SpecialEventsFutureDocsLimitParameter.name]
        else:
            title = self.ctx[SpecialEventsPastTitleParameter.name].strip('"')
            categ_is_blogger = self.ctx[SpecialEventsPastCategIsBloggerParameter.name]
            limit = self.ctx[SpecialEventsPastDocsLimitParameter.name]

        docs = self._load_special_events(table)

        if not docs:
            logging.warning("No special events loaded from table " + table)
            return

        docs = docs[:limit]

        data = {}
        data['docs'] = docs
        data['name'] = categ_name
        data['is_blogger'] = categ_is_blogger
        if title:
            data["title"] = title
        if thumbnail:
            data["thumbnail"] = '{' + thumbnail + '}'

        if for_future:
            data["check_not_finished"] = True
        else:
            data["check_started"] = True
            data["reverse_sort"] = True

        with open(file_name, "a") as ofile:
            # prepend a newline - in trie file lines do not end with a new line
            ofile.write("\n" + categ_term + ";prod\t*\t0\t" + json.dumps(data))
            # add for 'test' as well - to be able to add promo for other tested supertags
            ofile.write("\n" + categ_term + ";test\t*\t0\t" + json.dumps(data))

    def _add_custom_urls(self, file_name):

        import yt.wrapper as yt

        yt.config['proxy']['url'] = self.ctx[CustomUrlsYTServerParameter.name]
        yt.config['token'] = self.get_vault_data('VIDEODEV', 'yt_token')

        path_list = utils.get_or_default(self.ctx, CustomUrlsYTPathsParameter)

        with open(file_name, "a") as ofile:
            for custom_urls_table in path_list:
                for row in yt.read_table(custom_urls_table):
                    # prepend a newline - in trie file lines do not end with a new line
                    ofile.write("\n" + row["key"] + ";prod\t*\t0\t" + row["value"])

    def _add_kp_selections_watch_now(self, file_name):

        import yt.wrapper as yt
        from collections import defaultdict

        yt.config['proxy']['url'] = self.ctx[KpSelectionsYtServerParameter.name]
        yt.config['token'] = self.get_vault_data('VIDEODEV', 'yt_token')

        table_path = utils.get_or_default(self.ctx, KpSelectionsWatchNowCarouselTableParameter)

        urls_by_categ = defaultdict(list)
        for row in yt.read_table(table_path):
            urls_by_categ[row['categ_id']].append(row['url'])

        with open(file_name, "a") as ofile:
            for categ_id, urls in urls_by_categ.items():
                data = {
                    "doc_ids": urls
                }
                ofile.write("\n" + categ_id + ";prod\t*\t0\t" + json.dumps(data))

    def _add_ontoid_assoc(self, file_name):

        import yt.wrapper as yt

        cluster = self.ctx[OntoIdAssocYTServerParameter.name]
        table = self.ctx[OntoIdAssocYTPathParameter.name]

        yt.config['proxy']['url'] = cluster
        yt.config['token'] = self.get_vault_data('VIDEODEV', 'yt_token')
        if not yt.exists(table):
            print("No ontoId assoc table: [{}] '{}'".format(cluster, table))
            return

        out = []
        for row in yt.read_table(table):
            title = row['title']
            thumb = row['thumbnail']
            categ_name = row['categ_name']
            categ_term = row['categ_term']
            if not title or not thumb:
                # skip ontoids with no title
                continue
            episodes = [json.loads(s) for s in row['assoc_episodes']]
            if len(episodes) < self.ctx[OntoIdAssocMinDocsParameter.name]:
                continue
            docs = [dict(url='frontend.vh.yandex.ru/player/' + ep['uuid'],
                         group_url='frontend.vh.yandex.ru/player/' + ep['group_id'],
                         drm=ep['drm'], ugc_type=ep['ugc_type'], tm_start=0, tm_end=0, tm_expire=0) for ep in episodes]
            data = {}
            data['docs'] = docs[:self.ctx[OntoIdAssocMaxDocsParameter.name]]
            data['name'] = categ_name
            data['title'] = title
            if self.ctx[OntoIdAssocHavePosters.name]:
                data['has_poster'] = True
            # TODO: get publisher avatar and background
            data['thumbnail'] = '{"thumbnail": ' + thumb + '}'
            out.append(categ_term + ";prod\t*\t0\t" + json.dumps(data))

        if not out:
            return

        with open(file_name, "a") as ofile:
            for line in out:
                # prepend a newline - in trie file lines do not end with a new line
                ofile.write("\n" + line)

    def _read_promo_docs(self, table, fields=['tm_start', 'tm_end', 'tm_expire']):
        import yt.wrapper as yt

        cluster = self.ctx[RankedPromoYTServerParameter.name]
        yt.config['proxy']['url'] = cluster
        yt.config['token'] = self.get_vault_data('VIDEODEV', 'yt_token')

        docs = []
        if not yt.exists(table):
            logging.info("No ranked promo table: [{}] '{}'".format(cluster, table))
            return docs

        for row in yt.read_table(table):
            url = row['url']
            if not url:
                continue
            doc = {'url': url}
            for k in fields:
                if k in row and row[k] is not None:
                    doc[k] = row[k]

            if 'tm_end' in doc and 'tm_expire' not in doc:
                doc['tm_expire'] = doc['tm_end']
            docs.append(doc)

        return docs

    def _add_ranked_promo_carousel(self, file_name, table, categ_name, categ_term, is_promo):
        docs = self._read_promo_docs(table)
        if not docs:
            return

        data = {"name": categ_name, "docs": docs}
        data["check_started"] = True
        data["check_not_finished"] = True
        # temporary fix for categ metadata to apply (requires docs non-empty)
        if is_promo:
            data["is_promo"] = True

        with open(file_name, "a") as ofile:
            # prepend a newline - in trie file lines do not end with a new line
            ofile.write("\n" + categ_term + ";test\t*\t0\t" + json.dumps(data))
            ofile.write("\n" + categ_term + ";prod\t*\t0\t" + json.dumps(data))

    def _add_feed_videohub_promo_carousel(self, file_name, table_xl, table_pers_chan, table_l, table_m, categ_name, categ_term):
        docs = self._read_promo_docs(table_xl)
        docs += self._read_promo_docs(table_pers_chan)

        base_docs = self._read_promo_docs(table_l)
        base_docs += self._read_promo_docs(table_m)

        data = {"name": categ_name, "base_docs": base_docs, "docs": docs}
        data["check_started"] = True
        data["check_not_finished"] = True
        data["is_promo"] = True

        with open(file_name, "a") as ofile:
            ofile.write("\n" + categ_term + ";test\t*\t0\t" + json.dumps(data))
            ofile.write("\n" + categ_term + ";prod\t*\t0\t" + json.dumps(data))

    def _add_timestamped_promo(self, file_name):
        import yt.wrapper as yt
        yt.config['token'] = self.get_vault_data('VIDEODEV', 'yt_token')

        proxy = utils.get_or_default(self.ctx, TimestampedPromoYTServerParameter)
        path_dict = utils.get_or_default(self.ctx, TimestampedPromoYTPathDictParameter)
        produce_timestamped_promo_trie(proxy, path_dict, file_name, file_name)

    def _read_pers_channel_promo_docs(self,  table):
        pers_chan_promo = self._read_promo_docs(table, fields=['tm_start', 'tm_end', 'promo_position'])
        pers_chan_promo.sort(key=lambda x: x.get("promo_position", float('inf')), reverse=True)

        ts = int(time.time())
        filtered = filter(lambda x: x.get("tm_end", 0) > ts and x.get("tm_start", 0) < ts, pers_chan_promo)
        return [doc["url"] for doc in filtered]

    def _update_mixed_promo_url_data(self, filename, table):
        pers_channel_promo_tags = {"videohub;prod",
                                   "videohub;blog",
                                   "videohub;no_bloggers",
                                   "videohub_author;prod",
                                   "videohub_author;no_bloggers",
                                   "videohub_author;with_bloggers"}
        pers_chan_promo_docs = self._read_pers_channel_promo_docs(table)
        patched_file_lines = []
        try:
            with open(filename, "r") as f:
                for line in f:
                    record = line.split('\t')
                    obj = json.loads(record[-1])

                    if record[0] not in pers_channel_promo_tags:
                        patched_file_lines.append(line.strip())
                        continue

                    if "mixed_promo_url_data" not in obj:
                        obj["mixed_promo_url_data"] = {}

                    if "docs" not in obj["mixed_promo_url_data"]:
                        obj["mixed_promo_url_data"]["docs"] = []

                    svn_docs = obj["mixed_promo_url_data"]["docs"]
                    svn_urls = set([doc["url"] for doc in svn_docs if "url" in doc])

                    result_docs = [{"url": url} for url in filter(lambda x: x not in svn_urls, pers_chan_promo_docs)]
                    result_docs.extend(svn_docs)
                    result_docs.reverse()

                    obj["mixed_promo_url_data"]["docs"] = result_docs
                    record[-1] = json.dumps(obj)
                    patched_file_lines.append("\t".join(record))
        except Exception:
            return

        with open(filename, "w") as f:
            f.write("\n".join(patched_file_lines))

    def _dump_promo(self, file_name):
        import yt.wrapper as yt
        yt.config['token'] = self.get_vault_data('VIDEODEV', 'yt_token')

        proxy = utils.get_or_default(self.ctx, DumpPromoYTServerParameter)
        dump_dir = utils.get_or_default(self.ctx, DumpPromoDirParameter)
        dump_promo_to_yt(proxy, dump_dir, file_name)

    def _add_smart_tv_categories(self, file_name):
        if not self.ctx[AddSmartTvCategsParameter.name]:
            return

        svn_data_path = self.abs_path(video_resource_types.VIDEO_MIDDLESEARCH_VIDEOHUB_TRIE_PLAIN.basename) + ".smarttv"
        svn_arcadia_url = "arcadia:/arc/trunk/data/extsearch/video/quality/recommender/smarttv.fast"
        Arcadia.export(svn_arcadia_url, svn_data_path)

        data = None
        with open(svn_data_path, 'r') as f:
            data = json.loads(f.read())

        with open(file_name, "a") as ofile:
            for carousel in data["carousels"]:
                carousel_name = carousel["carousel_id"]
                carousel_term = carousel["carousel_term"]
                title = carousel.get("title", "")
                exp_name = carousel.get("exp", "prod")
                region = carousel.get("region", 0)
                domain = carousel.get("domain", "*")

                docs = []
                for doc in carousel["set"]:
                    doc_data = {}
                    doc_data["url"] = "frontend.vh.yandex.ru/player/" + doc["uuid"]
                    docs.append(doc_data)

                carousel_data = {}
                carousel_data["name"] = carousel_name
                carousel_data["title"] = title
                carousel_data["docs"] = docs
                carousel_data["is_local"] = carousel.get("is_local", True)
                carousel_data["has_poster"] = carousel.get("has_poster", True)

                ofile.write("\n{};{}\t{}\t{}\t{}".format(carousel_term, exp_name, domain, region, json.dumps(carousel_data)))

    def _add_kp_categories(self, file_name):
        if not self.ctx[AddKpCarouselParameter.name]:
            return

        try:
            url = self.ctx[KpCarouselSourceUrlParameter.name]
            session = requests.Session()
            retries = Retry(total=5, backoff_factor=1.0, status_forcelist=[429, 500, 502, 503, 504])
            session.mount('https://', HTTPAdapter(max_retries=retries))
            session.mount('http://', HTTPAdapter(max_retries=retries))
            session.headers.update({
                'Accept': '*/*',
                'X-Forwarded-For': '2a00:65e0:6:1007:2031:77a3:378e:fcb',
                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 YaBrowser/19.12.0.769 Yowser/2.5 Safari/537.3',
                })
            content = session.get(url, timeout=30).json()

            with open(file_name, "a") as ofile:
                carousel_name = "kp_new_and_popular"
                carousel_term = "hhzuuhkotdwbqzxhh"
                exp_name = "prod"
                region = 0
                domain = "*"

                docs = []
                for doc in content["data"]:
                    doc_data = {}
                    doc_data["url"] = "frontend.vh.yandex.ru/player/" + doc["filmId"]
                    docs.append(doc_data)

                carousel_data = {}
                carousel_data["name"] = carousel_name
                carousel_data["title"] = content["title"]
                carousel_data["order"] = "personal"
                carousel_data["has_poster"] = True
                carousel_data["docs"] = docs
                # carousel_data["base_docs"] = docs
                # carousel_data["is_local"] = carousel.get("is_local", True)

                ofile.write("\n{};{}\t{}\t{}\t{}".format(carousel_term, exp_name, domain, region, json.dumps(carousel_data)))

        except Exception as e:
            logging.warning(e)

    def _build_ban(self):
        trie_data_path = self.abs_path(video_resource_types.VIDEO_MIDDLESEARCH_VIDEOHUB_TRIE_PLAIN.basename)
        svn_data_path = trie_data_path + ".svn"
        svn_arcadia_url = self.ctx[VideohubFastArcadiaUrlParameter.name]
        Arcadia.export(svn_arcadia_url, svn_data_path)

        self._update_mixed_promo_url_data(svn_data_path, self.ctx[Promo_PersChan_Docs_YTPathParameter.name])
        self._add_timestamped_promo(svn_data_path)

        # general specevents page
        self._add_special_events(svn_data_path,
                                 self.ctx[SpecialEventsFutureYTPathParameter.name],
                                 self.ctx[SpecialEventsFutureCategNameParameter.name],
                                 self.ctx[SpecialEventsFutureCategTermParameter.name],
                                 True)

        self._add_special_events(svn_data_path,
                                 self.ctx[SpecialEventsPastYTPathParameter.name],
                                 self.ctx[SpecialEventsPastCategNameParameter.name],
                                 self.ctx[SpecialEventsPastCategTermParameter.name],
                                 False)

        # test specevents
        self._add_special_events(svn_data_path,
                                 self.ctx[SpecialEventsFutureTestYTPathParameter.name],
                                 self.ctx[SpecialEventsFutureTestCategNameParameter.name],
                                 self.ctx[SpecialEventsFutureTestCategTermParameter.name],
                                 True)

        # sport events
        self._add_special_events(svn_data_path,
                                 self.ctx[SpecialEventsFutureSportYTPathParameter.name],
                                 self.ctx[SpecialEventsFutureSportCategNameParameter.name],
                                 self.ctx[SpecialEventsFutureSportCategTermParameter.name],
                                 True)

        self._add_special_events(svn_data_path,
                                 self.ctx[SpecialEventsPastSportYTPathParameter.name],
                                 self.ctx[SpecialEventsPastSportCategNameParameter.name],
                                 self.ctx[SpecialEventsPastSportCategTermParameter.name],
                                 False)

        # cyber sport events
        self._add_special_events(svn_data_path,
                                 self.ctx[SpecialEventsFutureCyberSportYTPathParameter.name],
                                 self.ctx[SpecialEventsFutureCyberSportCategNameParameter.name],
                                 self.ctx[SpecialEventsFutureCyberSportCategTermParameter.name],
                                 True)

        self._add_special_events(svn_data_path,
                                 self.ctx[SpecialEventsPastCyberSportYTPathParameter.name],
                                 self.ctx[SpecialEventsPastCyberSportCategNameParameter.name],
                                 self.ctx[SpecialEventsPastCyberSportCategTermParameter.name],
                                 False)

        # music events
        self._add_special_events(svn_data_path,
                                 self.ctx[SpecialEventsFutureMusicYTPathParameter.name],
                                 self.ctx[SpecialEventsFutureMusicCategNameParameter.name],
                                 self.ctx[SpecialEventsFutureMusicCategTermParameter.name],
                                 True)

        self._add_special_events(svn_data_path,
                                 self.ctx[SpecialEventsPastMusicYTPathParameter.name],
                                 self.ctx[SpecialEventsPastMusicCategNameParameter.name],
                                 self.ctx[SpecialEventsPastMusicCategTermParameter.name],
                                 False)

        # stayhome events
        self._add_special_events(svn_data_path,
                                 self.ctx[SpecialEventsFutureStayhomeYTPathParameter.name],
                                 self.ctx[SpecialEventsFutureStayhomeCategNameParameter.name],
                                 self.ctx[SpecialEventsFutureStayhomeCategTermParameter.name],
                                 True)

        # announced events
        self._add_special_events(svn_data_path,
                                 self.ctx[SpecialEventsFutureAnnouncedEventYTPathParameter.name],
                                 self.ctx[SpecialEventsFutureAnnouncedEventCategNameParameter.name],
                                 self.ctx[SpecialEventsFutureAnnouncedEventCategTermParameter.name],
                                 True)

        self._add_custom_urls(svn_data_path)

        self._add_kp_selections_watch_now(svn_data_path)

        self._add_ontoid_assoc(svn_data_path)

        self._add_ranked_promo_carousel(svn_data_path,
                                        self.ctx[MordaPromo_M_YTPathParameter.name],
                                        self.ctx[MordaPromo_M_CategNameParameter.name],
                                        self.ctx[MordaPromo_M_CategTermParameter.name],
                                        False)

        self._add_ranked_promo_carousel(svn_data_path,
                                        self.ctx[MordaPromo_L_YTPathParameter.name],
                                        self.ctx[MordaPromo_L_CategNameParameter.name],
                                        self.ctx[MordaPromo_L_CategTermParameter.name],
                                        False)

        self._add_ranked_promo_carousel(svn_data_path,
                                        self.ctx[MordaPromo_XL_YTPathParameter.name],
                                        self.ctx[MordaPromo_XL_CategNameParameter.name],
                                        self.ctx[MordaPromo_XL_CategTermParameter.name],
                                        False)

        self._add_feed_videohub_promo_carousel(svn_data_path,
                                               self.ctx[MordaPromo_XL_YTPathParameter.name],
                                               self.ctx[PersChan_YTPathParameter.name],
                                               self.ctx[MordaPromo_L_YTPathParameter.name],
                                               self.ctx[MordaPromo_M_YTPathParameter.name],
                                               self.ctx[VideohubPromoCategNameParameter.name],
                                               self.ctx[VideohubPromoCategTermParameter.name])

        self._add_smart_tv_categories(svn_data_path)

        self._add_kp_categories(svn_data_path)

        self._format_file(svn_data_path, trie_data_path, format_func=format_videohub_data)

        has_changes = self._update_resource(video_resource_types.VIDEO_MIDDLESEARCH_VIDEOHUB_TRIE_PLAIN, path=trie_data_path)

        if not has_changes:
            return 0

        self._dump_promo(svn_data_path)

        trie_path = self.abs_path(video_resource_types.VIDEO_MIDDLESEARCH_VIDEOHUB_TRIE.basename)
        indexer_tool = self._tool(resource_types.VIDEO_QUERYDATAINDEXER_EXECUTABLE)
        process.run_process([
            indexer_tool,
            "-S", "videohub",
            "-N", "exacturl,tld,ipregregion",
            "-J", "-j",
            "-f", str(self.ctx[TrieKeyEntrySize.name]) + "," + str(self.ctx[TrieValueEntrySize.name]),
            "-i", trie_data_path,
            "-o", trie_path], outputs_to_one_file=False, log_prefix="indexer")

        viewer_tool = self._tool(resource_types.VIDEO_QUERYDATAVIEWER_EXECUTABLE)
        process.run_process([
            viewer_tool,
            "-H",
            "-i", trie_path], outputs_to_one_file=False, log_prefix="viewer")

        self.create_resource(self.descr, trie_path, video_resource_types.VIDEO_MIDDLESEARCH_VIDEOHUB_TRIE)

        return os.stat(trie_path).st_size


__Task__ = VideoReleaseVideohubTrie
