#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
Crypta user profile model training runner

Usage:
  main.py get_labeled_socdem [--log-file-path=<path>]
  main.py download_sessions [--log-file-path=<path>] [--source-path=<path>]
  main.py prepare_vocabulary [--log-file-path=<path>] [--source-path=<path>]
  main.py upload_vectors [--log-file-path=<path>] [--source-path=<path>]
  main.py update_yandexuid2vec [--log-file-path=<path>]
  main.py train_and_upload_custom_models --custom-model-name=<name> [--log-file-path=<path>]
  main.py train_email_gender_tf_model [--prepare-train-sample] [--log-file-path=<path>]
  main.py (-h | --help)

Options:
  main                                  Main export profile task
  train_and_upload_custom_models        Do everything to train and upload custom classification models
  train_email_gender_tf_model           Do everything to train and upload email-gender tf model
  download_sessions                     Download sessions for site2vec retrain
  prepare_vocabulary                    Prepare vocabulary for site2vec retrain
  upload_vectors                        Upload site2vec vectors to Hahn
  get_labeled_socdem                    Run get_labeled_socdem before main task for performance optimization
  update_yandexuid2vec                  Update yandexuid2vec after site2vec retraining
  --log-file-path=<path>                Path for logs
  --custom-model-name=<name>            Name of custom classification pipeline
  --prepare-train-sample                Prepare new train sample for email-gender model training
"""

import datetime
import logging
import os

import luigi
from docopt import docopt

from crypta.lib.python.juggler.juggler_helpers import report_event_to_juggler
from crypta.lib.python.logging import logging_helpers
from crypta.lib.python.nirvana.nirvana_helpers.nirvana_transaction import NirvanaTransaction
from crypta.profile.lib import (
    date_helpers,
    luigi_helpers,
)
from crypta.profile.utils.config import config
from crypta.profile.utils.yt_utils import get_yt_client
from crypta.profile.utils.luigi_utils import YtDailyRewritableTarget

from crypta.profile.runners.segments.lib.coded_segments.custom_classification.bed_model_training import (
    BedModelTrainHelper,
)
from crypta.profile.runners.segments.lib.coded_segments.custom_classification.cash_settlement_services_model_training import (
    CashSettlementServicesModelTrainHelper,
)
from crypta.profile.runners.segments.lib.coded_segments.custom_classification.cosmetics_buyers_model_training import (
    CosmeticsModelTrainHelper,
)
from crypta.profile.runners.segments.lib.coded_segments.custom_classification.credit_card_model_training import (
    CreditCardModelTrainHelper,
)
from crypta.profile.runners.segments.lib.coded_segments.custom_classification.custom_income_model_training import (
    CustomIncomeModelTrainHelper,
)
from crypta.profile.runners.segments.lib.coded_segments.custom_classification.dating_model_training import (
    DatingModelTrainHelper,
)
from crypta.profile.runners.segments.lib.coded_segments.custom_classification.debit_cards_activation_model_training import (
    DebitCardsActivationModelTrainHelper,
)
from crypta.profile.runners.segments.lib.coded_segments.custom_classification.ebook_model_training import (
    EbookModelTrainHelper,
)
from crypta.profile.runners.segments.lib.coded_segments.custom_classification.electronic_model_training import (
    ElectronicModelTrainHelper,
)
from crypta.profile.runners.segments.lib.coded_segments.custom_classification.english_online_education_model_training import (
    EnglishOnlineEducationModelTrainHelper,
)
from crypta.profile.runners.segments.lib.coded_segments.custom_classification.food_delivery_model_training import (
    FoodDeliveryModelTrainHelper,
)
from crypta.profile.runners.segments.lib.coded_segments.custom_classification.furniture_buyers_model_training import (
    FurnitureModelTrainHelper,
)
from crypta.profile.runners.segments.lib.coded_segments.custom_classification.game_login_model_training import (
    GameLoginModelTrainHelper,
)
from crypta.profile.runners.segments.lib.coded_segments.custom_classification.generic_car_credit_approval_model_training import (
    GenericCarCreditApprovalModelTrainHelper,
)
from crypta.profile.runners.segments.lib.coded_segments.custom_classification.generic_credit_approval_model_training import (
    GenericCreditApprovalModelTrainHelper,
)
from crypta.profile.runners.segments.lib.coded_segments.custom_classification.generic_insurance_model_training import (
    GenericInsuranceModelTrainHelper,
)
from crypta.profile.runners.segments.lib.coded_segments.custom_classification.generic_microfinance_approval_model_training import (
    GenericMicrofinanceApprovalModelTrainHelper,
)
from crypta.profile.runners.segments.lib.coded_segments.custom_classification.generic_scoring_model_training import (
    GenericScoringModelTrainHelper,
)
from crypta.profile.runners.segments.lib.coded_segments.custom_classification.grocery_delivery_model_training import (
    GroceryDeliveryModelTrainHelper,
)
from crypta.profile.runners.segments.lib.coded_segments.custom_classification.healthy_food_model_training import (
    HealthyFoodModelTrainHelper,
)
from crypta.profile.runners.segments.lib.coded_segments.custom_classification.home_depot_model_training import (
    HomeDepotModelTrainHelper,
)
from crypta.profile.runners.segments.lib.coded_segments.custom_classification.hotel_model_training import (
    HotelModelTrainHelper,
)
from crypta.profile.runners.segments.lib.coded_segments.custom_classification.investment_model_training import (
    InvestmentModelTrainHelper,
)
from crypta.profile.runners.segments.lib.coded_segments.custom_classification.legal_entities_model_training import (
    LegalEntitiesModelTrainHelper,
)
from crypta.profile.runners.segments.lib.coded_segments.custom_classification.legal_office_visits_model_training import (
    LegalOfficeVisitsModelTrainHelper,
)
from crypta.profile.runners.segments.lib.coded_segments.custom_classification.market_model_training import (
    MarketModelTrainHelper,
)
from crypta.profile.runners.segments.lib.coded_segments.custom_classification.market_rfm_model_training import (
    MarketRfmModelTrainHelper,
)
from crypta.profile.runners.segments.lib.coded_segments.custom_classification.marriage_model_training import (
    MarriageModelTrainHelper,
)
from crypta.profile.runners.segments.lib.coded_segments.custom_classification.medical_clinic_model_training import (
    MedicalClinicModelTrainHelper,
)
from crypta.profile.runners.segments.lib.coded_segments.custom_classification.mortgage_approval_model_training import (
    MortgageApprovalModelTrainHelper,
)
from crypta.profile.runners.segments.lib.coded_segments.custom_classification.online_cinema_model_training import (
    OnlineCinemaModelTrainHelper,
)
from crypta.profile.runners.segments.lib.coded_segments.custom_classification.online_payment_model_training import (
    OnlinePaymentModelTrainHelper,
)
from crypta.profile.runners.segments.lib.coded_segments.custom_classification.online_sales_register_model_training import (
    OnlineSalesRegisterModelTrainHelper,
)
from crypta.profile.runners.segments.lib.coded_segments.custom_classification.online_shopping_model_training import (
    OnlineShoppingModelTrainHelper,
)
from crypta.profile.runners.segments.lib.coded_segments.custom_classification.pharmacy_model_training import (
    PharmacyModelTrainHelper,
)
from crypta.profile.runners.segments.lib.coded_segments.custom_classification.realty_visit_model_training import (
    RealtyVisitModelTrainHelper,
)
from crypta.profile.runners.segments.lib.coded_segments.custom_classification.tv_viewers_model_training import (
    TvViewersModelTrainHelper,
)
from crypta.profile.runners.segments.lib.coded_segments.custom_classification.windows_installation_model_training import (
    WindowsInstallationModelTrainHelper,
)
from crypta.profile.runners.segments.lib.coded_segments.custom_regression.heavy_suite_model_training import (
    HeavySuiteModelTrainHelper,
)
from crypta.profile.runners.segments.lib.coded_segments.ml_tools.utils import (
    get_custom_segment_model_train_sample,
    nirvana_train_custom_segment_model,
    upload_models_and_features_mapping,
)

from crypta.profile.runners.training.lib.train_email_gender_tf_model import EmailGenderTrainHelper

from crypta.profile.manual_start_scripts.vectors_learning import (
    get_session_tables,
    download,
    train,
    upload,
)
from crypta.profile.tasks.features.calculate_id_vectors import GetMonthlyYandexuidVectors


logger = logging.getLogger(__name__)

custom_models_retrain_frequency_days = 7


def run_custom_model_training(train_helper):
    with NirvanaTransaction(train_helper.yt) as transaction:
        last_training_date = train_helper.yt.get_attribute(
            train_helper.model_params.train_sample_path,
            'model_training_date',
            '1970-01-01',
        )
        logger.info('last training date: {}'.format(last_training_date))

        # Retrain every week on Wednesday
        if date_helpers.get_date_from_past(
            current_date=train_helper.date,
            days=custom_models_retrain_frequency_days,
        ) < last_training_date:
            return

        get_custom_segment_model_train_sample(train_helper, transaction)

        logger.info('{} training sample is ready'.format(train_helper.model_params.train_sample_path))

        nirvana_train_custom_segment_model(train_helper)


def main():
    arguments = docopt(__doc__)

    # configure root logger
    logging_helpers.configure_stderr_logger(logging.getLogger(), level=logging.INFO)

    logger.info('Start training')

    yt = get_yt_client()
    today = str(datetime.date.today())

    luigi_helpers.setup_external_tasks_retry()

    if arguments['--log-file-path']:
        config.LOCAL_LOGS_DIRECTORY = config.TASKS_LOGS_DIRECTORY = arguments['--log-file-path']
        config.LOCAL_STORAGE_DIRECTORY = arguments['--log-file-path']

    if arguments['train_and_upload_custom_models']:
        custom_model_name = arguments['--custom-model-name']
        logger.info('custom model name: {}'.format(custom_model_name))
        custom_model_date = yt.get_attribute(
            config.CATEGORICAL_FEATURES_CUSTOM_ML_MATCHING_DIR,
            'generate_date',
            '1970-01-01'
        )
        logger.info('custom model date: {}'.format(custom_model_date))

        if custom_model_name == 'features_mapping':
            output_matching_dir = config.CATEGORICAL_FEATURES_CUSTOM_ML_MATCHING_DIR

            if date_helpers.get_date_from_past(current_date=today, days=custom_models_retrain_frequency_days) >= \
                    yt.get_attribute(output_matching_dir, 'generate_date', '1970-01-01'):
                yt.copy(
                    config.CATEGORICAL_FEATURES_MATCHING_DIR,
                    output_matching_dir,
                    recursive=True,
                    force=True,
                )

                yt.set_attribute(
                    output_matching_dir,
                    'generate_date',
                    today,
                )

        elif custom_model_name == 'upload_models_and_features_mapping':
            upload_models_and_features_mapping(logger, yt)

        elif custom_model_name == 'market':
            run_custom_model_training(
                train_helper=MarketModelTrainHelper(
                    yt=yt,
                    logger=logger,
                    date=custom_model_date,
                    storage_sample_path=config.MARKET_TRAIN_SAMPLE_BY_PUID,
                    train_sample_path=config.MARKET_TRAIN_SAMPLE_BY_YUID,
                ),
            )

        elif custom_model_name == 'marriage':
            run_custom_model_training(
                train_helper=MarriageModelTrainHelper(
                    yt=yt,
                    logger=logger,
                    date=custom_model_date,
                    storage_sample_path=config.MARRIAGE_TRAIN_SAMPLE_BY_PUID,
                    train_sample_path=config.MARRIAGE_TRAIN_SAMPLE_BY_YUID,
                ),
            )

        elif custom_model_name == 'tv_viewers':
            run_custom_model_training(
                train_helper=TvViewersModelTrainHelper(
                    yt=yt,
                    logger=logger,
                    date=custom_model_date,
                    storage_sample_path=config.TV_VIEWERS_TRAIN_SAMPLE_BY_PUID,
                    train_sample_path=config.TV_VIEWERS_TRAIN_SAMPLE_BY_YUID,
                ),
            )

        elif custom_model_name == 'generic_scoring':
            run_custom_model_training(
                train_helper=GenericScoringModelTrainHelper(
                    yt=yt,
                    logger=logger,
                    date=custom_model_date,
                    storage_sample_path=config.GENERIC_SCORING_TRAIN_SAMPLE_BY_PUID,
                    train_sample_path=config.GENERIC_SCORING_TRAIN_SAMPLE_BY_YUID,
                ),
            )

        elif custom_model_name == 'generic_credit_approval':
            run_custom_model_training(
                train_helper=GenericCreditApprovalModelTrainHelper(
                    yt=yt,
                    logger=logger,
                    date=custom_model_date,
                    storage_sample_path=config.GENERIC_CREDIT_APPROVAL_TRAIN_SAMPLE_BY_PUID,
                    train_sample_path=config.GENERIC_CREDIT_APPROVAL_TRAIN_SAMPLE_BY_YUID,
                ),
            )

        elif custom_model_name == 'generic_insurance':
            run_custom_model_training(
                train_helper=GenericInsuranceModelTrainHelper(
                    yt=yt,
                    logger=logger,
                    date=custom_model_date,
                    storage_sample_path=config.GENERIC_INSURANCE_TRAIN_SAMPLE_BY_PUID,
                    train_sample_path=config.GENERIC_INSURANCE_TRAIN_SAMPLE_BY_YUID,
                ),
            )

        elif custom_model_name == 'generic_microfinance_approval':
            run_custom_model_training(
                train_helper=GenericMicrofinanceApprovalModelTrainHelper(
                    yt=yt,
                    logger=logger,
                    date=custom_model_date,
                    storage_sample_path=config.GENERIC_MICROFINANCE_APPROVAL_TRAIN_SAMPLE_BY_PUID,
                    train_sample_path=config.GENERIC_MICROFINANCE_APPROVAL_TRAIN_SAMPLE_BY_YUID,
                ),
            )

        elif custom_model_name == 'game_login':
            run_custom_model_training(
                train_helper=GameLoginModelTrainHelper(
                    yt=yt,
                    logger=logger,
                    date=custom_model_date,
                    storage_sample_path=config.GAME_LOGIN_TRAIN_SAMPLE_BY_PUID,
                    train_sample_path=config.GAME_LOGIN_TRAIN_SAMPLE_BY_YUID,
                ),
            )

        elif custom_model_name == 'english_online_education':
            run_custom_model_training(
                train_helper=EnglishOnlineEducationModelTrainHelper(
                    yt=yt,
                    logger=logger,
                    date=custom_model_date,
                    storage_sample_path=config.ENGLISH_ONLINE_EDUCATION_TRAIN_SAMPLE_BY_PUID,
                    train_sample_path=config.ENGLISH_ONLINE_EDUCATION_TRAIN_SAMPLE_BY_YUID,
                ),
            )

        elif custom_model_name == 'custom_income':
            run_custom_model_training(
                train_helper=CustomIncomeModelTrainHelper(
                    yt=yt,
                    logger=logger,
                    date=custom_model_date,
                    storage_sample_path=config.CUSTOM_INCOME_TRAIN_SAMPLE_BY_PUID,
                    train_sample_path=config.CUSTOM_INCOME_TRAIN_SAMPLE_BY_YUID,
                ),
            )

        elif custom_model_name == 'legal_entities':
            run_custom_model_training(
                train_helper=LegalEntitiesModelTrainHelper(
                    yt=yt,
                    logger=logger,
                    date=custom_model_date,
                    storage_sample_path=config.LEGAL_ENTITIES_TRAIN_SAMPLE_BY_PUID,
                    train_sample_path=config.LEGAL_ENTITIES_TRAIN_SAMPLE_BY_YUID,
                ),
            )

        elif custom_model_name == 'medical_clinics':
            run_custom_model_training(
                train_helper=MedicalClinicModelTrainHelper(
                    yt=yt,
                    logger=logger,
                    date=custom_model_date,
                    storage_sample_path=config.MEDICAL_CLINIC_TRAIN_SAMPLE_BY_PUID,
                    train_sample_path=config.MEDICAL_CLINIC_TRAIN_SAMPLE_BY_YUID,
                ),
            )

        elif custom_model_name == 'furniture':
            run_custom_model_training(
                train_helper=FurnitureModelTrainHelper(
                    yt=yt,
                    logger=logger,
                    date=custom_model_date,
                    storage_sample_path=config.FURNITURE_TRAIN_SAMPLE_BY_PUID,
                    train_sample_path=config.FURNITURE_TRAIN_SAMPLE_BY_YUID,
                ),
            )

        elif custom_model_name == 'heavy_suite':
            run_custom_model_training(
                train_helper=HeavySuiteModelTrainHelper(
                    yt=yt,
                    logger=logger,
                    date=custom_model_date,
                    storage_sample_path=config.HEAVY_SUITE_TRAIN_SAMPLE_BY_PUID,
                    train_sample_path=config.HEAVY_SUITE_TRAIN_SAMPLE_BY_YUID,
                ),
            )

        elif custom_model_name == 'hotel':
            run_custom_model_training(
                train_helper=HotelModelTrainHelper(
                    yt=yt,
                    logger=logger,
                    date=custom_model_date,
                    storage_sample_path=config.HOTEL_TRAIN_SAMPLE_BY_PUID,
                    train_sample_path=config.HOTEL_TRAIN_SAMPLE_BY_YUID,
                ),
            )

        elif custom_model_name == 'investment':
            run_custom_model_training(
                train_helper=InvestmentModelTrainHelper(
                    yt=yt,
                    logger=logger,
                    date=custom_model_date,
                    storage_sample_path=config.INVESTMENT_TRAIN_SAMPLE_BY_PUID,
                    train_sample_path=config.INVESTMENT_TRAIN_SAMPLE_BY_YUID,
                ),
            )

        elif custom_model_name == 'online_shopping':
            run_custom_model_training(
                train_helper=OnlineShoppingModelTrainHelper(
                    yt=yt,
                    logger=logger,
                    date=custom_model_date,
                    storage_sample_path=config.ONLINE_SHOPPING_TRAIN_SAMPLE_BY_PUID,
                    train_sample_path=config.ONLINE_SHOPPING_TRAIN_SAMPLE_BY_YUID,
                ),
            )

        elif custom_model_name == 'ebook':
            run_custom_model_training(
                train_helper=EbookModelTrainHelper(
                    yt=yt,
                    logger=logger,
                    date=custom_model_date,
                    storage_sample_path=config.EBOOK_TRAIN_SAMPLE_BY_PUID,
                    train_sample_path=config.EBOOK_TRAIN_SAMPLE_BY_YUID,
                ),
            )

        elif custom_model_name == 'electronic':
            run_custom_model_training(
                train_helper=ElectronicModelTrainHelper(
                    yt=yt,
                    logger=logger,
                    date=custom_model_date,
                    storage_sample_path=config.ELECTRONIC_TRAIN_SAMPLE_BY_PUID,
                    train_sample_path=config.ELECTRONIC_TRAIN_SAMPLE_BY_YUID,
                ),
            )

        elif custom_model_name == 'cosmetics':
            run_custom_model_training(
                train_helper=CosmeticsModelTrainHelper(
                    yt=yt,
                    logger=logger,
                    date=custom_model_date,
                    storage_sample_path=config.COSMETICS_TRAIN_SAMPLE_BY_PUID,
                    train_sample_path=config.COSMETICS_TRAIN_SAMPLE_BY_YUID,
                ),
            )

        elif custom_model_name == 'bed':
            run_custom_model_training(
                train_helper=BedModelTrainHelper(
                    yt=yt,
                    logger=logger,
                    date=custom_model_date,
                    storage_sample_path=config.BED_TRAIN_SAMPLE_BY_PUID,
                    train_sample_path=config.BED_TRAIN_SAMPLE_BY_YUID,
                ),
            )

        elif custom_model_name == 'market_rfm':
            run_custom_model_training(
                train_helper=MarketRfmModelTrainHelper(
                    yt=yt,
                    logger=logger,
                    date=custom_model_date,
                    storage_sample_path=config.MARKET_RFM_STORAGE,
                    train_sample_path=config.MARKET_RFM_TRAIN_SAMPLE_BY_YUID,
                ),
            )

        elif custom_model_name == 'food_delivery':
            run_custom_model_training(
                train_helper=FoodDeliveryModelTrainHelper(
                    yt=yt,
                    logger=logger,
                    date=custom_model_date,
                    storage_sample_path=config.FOOD_DELIVERY_TRAIN_SAMPLE_BY_PUID,
                    train_sample_path=config.FOOD_DELIVERY_TRAIN_SAMPLE_BY_YUID,
                ),
            )

        elif custom_model_name == 'online_sales_register':
            run_custom_model_training(
                train_helper=OnlineSalesRegisterModelTrainHelper(
                    yt=yt,
                    logger=logger,
                    date=custom_model_date,
                    storage_sample_path=config.ONLINE_SALES_REGISTER_TRAIN_SAMPLE_BY_PUID,
                    train_sample_path=config.ONLINE_SALES_REGISTER_TRAIN_SAMPLE_BY_YUID,
                ),
            )

        elif custom_model_name == 'realty_visit':
            run_custom_model_training(
                train_helper=RealtyVisitModelTrainHelper(
                    yt=yt,
                    logger=logger,
                    date=custom_model_date,
                    storage_sample_path=config.REALTY_VISIT_TRAIN_SAMPLE_BY_PUID,
                    train_sample_path=config.REALTY_VISIT_TRAIN_SAMPLE_BY_YUID,
                ),
            )

        elif custom_model_name == 'healthy_food':
            run_custom_model_training(
                train_helper=HealthyFoodModelTrainHelper(
                    yt=yt,
                    logger=logger,
                    date=custom_model_date,
                    storage_sample_path=config.HEALTHY_FOOD_TRAIN_SAMPLE_BY_PUID,
                    train_sample_path=config.HEALTHY_FOOD_TRAIN_SAMPLE_BY_YUID,
                ),
            )

        elif custom_model_name == 'cash_settlement_services':
            run_custom_model_training(
                train_helper=CashSettlementServicesModelTrainHelper(
                    yt=yt,
                    logger=logger,
                    date=custom_model_date,
                    storage_sample_path=config.CASH_SETTLEMENT_SERVICES_TRAIN_SAMPLE_BY_PUID,
                    train_sample_path=config.CASH_SETTLEMENT_SERVICES_TRAIN_SAMPLE_BY_YUID,
                ),
            )

        elif custom_model_name == 'pharmacy':
            run_custom_model_training(
                train_helper=PharmacyModelTrainHelper(
                    yt=yt,
                    logger=logger,
                    date=custom_model_date,
                    storage_sample_path=config.PHARMACY_TRAIN_SAMPLE_BY_PUID,
                    train_sample_path=config.PHARMACY_TRAIN_SAMPLE_BY_YUID,
                ),
            )

        elif custom_model_name == 'online_cinema':
            run_custom_model_training(
                train_helper=OnlineCinemaModelTrainHelper(
                    yt=yt,
                    logger=logger,
                    date=custom_model_date,
                    storage_sample_path=config.ONLINE_CINEMA_TRAIN_SAMPLE_BY_PUID,
                    train_sample_path=config.ONLINE_CINEMA_TRAIN_SAMPLE_BY_YUID,
                ),
            )

        elif custom_model_name == 'legal_office_visits':
            run_custom_model_training(
                train_helper=LegalOfficeVisitsModelTrainHelper(
                    yt=yt,
                    logger=logger,
                    date=custom_model_date,
                    storage_sample_path=config.LEGAL_OFFICE_VISITS_TRAIN_SAMPLE_BY_PUID,
                    train_sample_path=config.LEGAL_OFFICE_VISITS_TRAIN_SAMPLE_BY_YUID,
                ),
            )

        elif custom_model_name == 'online_payment':
            run_custom_model_training(
                train_helper=OnlinePaymentModelTrainHelper(
                    yt=yt,
                    logger=logger,
                    date=custom_model_date,
                    storage_sample_path=config.ONLINE_PAYMENT_TRAIN_SAMPLE_BY_PUID,
                    train_sample_path=config.ONLINE_PAYMENT_TRAIN_SAMPLE_BY_YUID,
                ),
            )

        elif custom_model_name == 'home_depot':
            run_custom_model_training(
                train_helper=HomeDepotModelTrainHelper(
                    yt=yt,
                    logger=logger,
                    date=custom_model_date,
                    storage_sample_path=config.HOME_DEPOT_TRAIN_SAMPLE_BY_PUID,
                    train_sample_path=config.HOME_DEPOT_TRAIN_SAMPLE_BY_YUID,
                ),
            )

        elif custom_model_name == 'grocery_delivery':
            run_custom_model_training(
                train_helper=GroceryDeliveryModelTrainHelper(
                    yt=yt,
                    logger=logger,
                    date=custom_model_date,
                    storage_sample_path=config.GROCERY_DELIVERY_TRAIN_SAMPLE_BY_PUID,
                    train_sample_path=config.GROCERY_DELIVERY_TRAIN_SAMPLE_BY_YUID,
                ),
            )

        elif custom_model_name == 'windows_installation':
            run_custom_model_training(
                train_helper=WindowsInstallationModelTrainHelper(
                    yt=yt,
                    logger=logger,
                    date=custom_model_date,
                    storage_sample_path=config.WINDOWS_INSTALLATION_TRAIN_SAMPLE_BY_PUID,
                    train_sample_path=config.WINDOWS_INSTALLATION_TRAIN_SAMPLE_BY_YUID,
                ),
            )

        elif custom_model_name == 'generic_car_credit_approval':
            run_custom_model_training(
                train_helper=GenericCarCreditApprovalModelTrainHelper(
                    yt=yt,
                    logger=logger,
                    date=custom_model_date,
                    storage_sample_path=config.GENERIC_CAR_CREDIT_APPROVAL_TRAIN_SAMPLE_BY_PUID,
                    train_sample_path=config.GENERIC_CAR_CREDIT_APPROVAL_TRAIN_SAMPLE_BY_YUID,
                ),
            )
        elif custom_model_name == 'mortgage_approval':
            run_custom_model_training(
                train_helper=MortgageApprovalModelTrainHelper(
                    yt=yt,
                    logger=logger,
                    date=custom_model_date,
                    storage_sample_path=config.MORTGAGE_APPROVAL_TRAIN_SAMPLE_BY_PUID,
                    train_sample_path=config.MORTGAGE_APPROVAL_TRAIN_SAMPLE_BY_YUID,
                ),
            )
        elif custom_model_name == 'debit_cards_activation':
            run_custom_model_training(
                train_helper=DebitCardsActivationModelTrainHelper(
                    yt=yt,
                    logger=logger,
                    date=custom_model_date,
                    storage_sample_path=config.DEBIT_CARDS_ACTIVATION_TRAIN_SAMPLE_BY_PUID,
                    train_sample_path=config.DEBIT_CARDS_ACTIVATION_TRAIN_SAMPLE_BY_YUID,
                ),
            )
        elif custom_model_name == 'dating':
            run_custom_model_training(
                train_helper=DatingModelTrainHelper(
                    yt=yt,
                    logger=logger,
                    date=custom_model_date,
                    storage_sample_path=config.DATING_TRAIN_SAMPLE_BY_PUID,
                    train_sample_path=config.DATING_TRAIN_SAMPLE_BY_YUID,
                ),
            )
        elif custom_model_name == 'credit_card':
            run_custom_model_training(
                train_helper=CreditCardModelTrainHelper(
                    yt=yt,
                    logger=logger,
                    date=custom_model_date,
                    storage_sample_path=config.CREDIT_CARD_TRAIN_SAMPLE_BY_PUID,
                    train_sample_path=config.CREDIT_CARD_TRAIN_SAMPLE_BY_YUID,
                ),
            )

    elif arguments['train_email_gender_tf_model']:
        prepare_sample = arguments['--prepare-train-sample']
        train_helper = EmailGenderTrainHelper()
        train_helper.nirvana_train_model(
            dataset_path=config.EMAIL_GENDER_TRAINING_SAMPLE_TABLE,
            prepare_sample=prepare_sample,
        )

    elif arguments['download_sessions'] or arguments['prepare_vocabulary'] or \
            arguments['upload_vectors'] or arguments['update_yandexuid2vec']:
        logger.info('Executing retrain site2vec block')
        is_site2vec_retrain_ok = True  # For Juggler monitoring
        empty_run = False  # Run every day for Juggler but retrain once a month

        try:
            site2vec_app2vec_table = config.SITE2VEC_APP2VEC_VECTORS_TABLE

            if arguments['update_yandexuid2vec']:
                logger.info('Updating yandexuid2vec')
                yt.set_attribute(
                    config.DAILY_YANDEXUID2VEC,
                    'generate_date',
                    date_helpers.get_yesterday(today),
                )

                yt.set_attribute(
                    config.MONTHLY_YANDEXUID2VEC,
                    'generate_date',
                    date_helpers.get_yesterday(today),
                )

                while not (YtDailyRewritableTarget(table=config.DAILY_YANDEXUID2VEC, date=today).exists() and
                           YtDailyRewritableTarget(table=config.MONTHLY_YANDEXUID2VEC, date=today).exists()):
                    luigi.run(
                        [
                            '--scheduler-url', config.LUIGI_SCHEDULER_URL,
                            '--workers', '1',
                            '--date', today,
                        ],
                        main_task_cls=GetMonthlyYandexuidVectors,
                    )

                # force train custom models next run
                for external_data in yt.list(config.SEGMENT_EXTERNAL_DATA_FOLDER, absolute=True):
                    if yt.get_attribute(external_data, 'type') == 'map_node':
                        for table in yt.list(external_data, absolute=True):
                            if table.endswith('by_yuid'):
                                yt.set_attribute(
                                    table,
                                    attribute='model_training_date',
                                    value=date_helpers.get_date_from_past(current_date=today, months=1),
                                )

            # Retrain on the 15th of each month
            elif date_helpers.get_date_from_past(current_date=today, months=1) < yt.get_attribute(
                    site2vec_app2vec_table, 'generate_date'):
                logger.info('Empty run')
                empty_run = True
            else:
                logger.info('Retraining site2vec')
                data_sources_for_site2vec_learning = {
                    'bar': {
                        'sessions_yt_directory': config.BAR_SESSIONS_DIRECTORY if arguments[
                            'download_sessions'] else config.BAR_HOST_APP_SESSIONS_DIRECTORY,
                        'sessions_file': os.path.join(arguments['--source-path'], 'bar_sessions'),
                        'vocabulary_file': os.path.join(arguments['--source-path'], 'bar_vocabulary'),
                        'start_vectors_file': os.path.join(arguments['--source-path'], 'bar_start_vectors.bin'),
                        'vectors_file': os.path.join(arguments['--source-path'], 'bar_output_vectors.bin'),
                        'vectors_yt_path': config.BAR_VECTORS_TABLE,
                        'site_counter_table': config.BAR_SITE_COUNTER_TABLE,
                        'last_n_days': 7,
                        'min_site_size': 20,
                        'use_start_vectors': 'True',
                        'vector_size': 512,
                    },
                    'with_apps': 'True',
                    'replace_old_vectors': 'True',
                }

                data_sources_for_site2vec_learning.update(get_session_tables(data_sources_for_site2vec_learning))

                if arguments['download_sessions']:
                    logger.info('Start download sessions')
                    download(data_sources_for_site2vec_learning)
                elif arguments['prepare_vocabulary']:
                    logger.info('Start prepare vocabulary')
                    train(data_sources_for_site2vec_learning, not_nirvana_launch=False)
                else:
                    logger.info('Start upload vectors')
                    upload(data_sources_for_site2vec_learning)
        except Exception as e:
            logger.warning('Regular site2vec retrain failed because of {}'.format(e.message))
            is_site2vec_retrain_ok = False

        service_name = 'site2vec_retrain'
        if is_site2vec_retrain_ok:
            report_event_to_juggler(
                status='OK',
                service=service_name,
                host=config.CRYPTA_ML_JUGGLER_HOST,
                logger=logger,
            )
            logger.info('Sent OK to juggler')
        else:
            message = 'Regular site2vec retrain failed'
            report_event_to_juggler(
                status='WARN',
                service=service_name,
                host=config.CRYPTA_ML_JUGGLER_HOST,
                description=message,
                logger=logger,
            )
            exit(1)

        if empty_run:
            exit(1)  # Do not run word2vec


if __name__ == '__main__':
    main()
