import os
import sys
from library.python.nyt import client as nyt_client
from datetime import datetime, timedelta
from datacloud.config.yt import PRODUCTION_ROOT
from datacloud.dev_utils.yt import yt_utils
from datacloud.dev_utils.time.patterns import FMT_DATE
from datacloud.features.dssm import dssm_main


def step1(yt_client, config):
    dssm_main.DSSMProcessor(config, yt_client).step_1_run_prepare_title_url()


def step2(yt_client, config):
    dssm_main.DSSMProcessor(config, yt_client).step_2_dssm_step()


def step3(yt_client, config):
    dssm_main.DSSMProcessor(config, yt_client).step_3_run_dot_product()


def step4(yt_client, config):
    dssm_main.DSSMProcessor(config, yt_client).step_4_join_cids()


def step5(yt_client, config):
    dssm_main.DSSMProcessor(config, yt_client).step_5_run_get_max_features()


def step6(yt_client, config):
    dssm_main.DSSMProcessor(config, yt_client).step_6_run_join_scores()


def build_weekly_dssm(yt_client, config):
    step1(yt_client, config)
    step2(yt_client, config)
    step3(yt_client, config)
    step4(yt_client, config)
    step5(yt_client, config)


def compute_175(yt_client, date_str, n_steps=25, days_in_step=7):
    date_time = datetime.strptime(date_str, FMT_DATE)
    weekly_dir = '//projects/scoring/tmp/re9ulusv/fckn_dssm/weekly'
    ready_table_path = '//projects/scoring/tmp/re9ulusv/fckn_dssm/ready'

    for step in range(n_steps):
        d = (date_time - timedelta(days=days_in_step * step)).strftime(FMT_DATE)
        print('=== START {} / {}; Date is: {}'.format(step+1, n_steps, d))

        config = dssm_main.DSSMConfig(
            date_str, PRODUCTION_ROOT, days_to_take=days_in_step,
            retro_tag='-175-SAMPLE-{}-{}-{}'.format(step, n_steps, d),
            yuid2cid_path='//projects/scoring/tmp/re9ulusv/fckn_dssm/yuid_to_cid',
            weekly_dir=weekly_dir,
            ready_table_path=ready_table_path
        )

        if not yt_client.exists(config.tmp_dir):
            yt_utils.create_folders([config.tmp_dir], yt_client)

        build_weekly_dssm(yt_client, config)

    config = dssm_main.DSSMConfig(
        date_str, PRODUCTION_ROOT, days_to_take=days_in_step,
        retro_tag='-175-SAMPLE-COMBINE',
        yuid2cid_path='//projects/scoring/tmp/re9ulusv/fckn_dssm/yuid_to_cid',
        weekly_dir=weekly_dir,
        ready_table_path=ready_table_path
    )
    step6(yt_client, config)


def main():
    nyt_client.initialize(sys.argv)
    os.environ['YQL_TOKEN'] = os.environ['YT_TOKEN']
    date_str = '2019-07-18'

    yt_client = yt_utils.get_yt_client()

    # build weekly
    # compute_175(yt_client, date_str)

    config = dssm_main.DSSMConfig(
        date_str, PRODUCTION_ROOT, days_to_take=1,
        retro_tag='-175-SAMPLE',
        yuid2cid_path='//projects/scoring/tmp/re9ulusv/fckn_dssm/yuid_to_cid')

    build_weekly_dssm(yt_client, config)


def compute_retro():
    nyt_client.initialize(sys.argv)
    yt_client = yt_utils.get_yt_client()

    processor = dssm_main.DSSMTables(
        date_str='retro',
        base_root='//projects/scoring/vtb/XPROD-1730-reproduce',
        is_retro=True,
        yt_client=yt_client,
        # yql_client=self.yql_client,
        # model_url=settings.DSSM_MODEL_URL,
        # retro_tag=settings.RETRO_TAG,
        retro_tag='-REPRODUCE',
        # use_cloud_nodes=settings.USE_CLOUD_NODES
    )
    # config = dssm_main.DSSMConfig(
    #     'retro',
    #     base_root='//projects/scoring/vtb/XPROD-1730-reproduce',
    #     is_retro=True,
    #     retro_tag='-DSSM-REPRODUCE',
    # )
    # build_weekly_dssm(yt_client, config)
    dssm_main.build_retro_vectors(processor)


if __name__ == '__main__':
    # main()
    compute_retro()
