import click
import logging
from customer_service.ml.chats.zeliboba.lib.preprocessing import generate_query_doc_data, save_data


logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


@click.command()
@click.option('--input_data', required=True, help='Path to input dataset in YT')
@click.option('--output_data', required=True, help='Path to output dataset in YT')
@click.option('--product_tag', required=True, help='Name of the product')
@click.option('--knowledges_url',
              default='https://support-private.s3.mds.yandex.net/knowledge-base/snapshots/snapshot_2022-04-25.gz',
              help='URL to knowledges snapshot')
@click.option('--cluster', default='hahn', help='Cluster in YT')
@click.option('--yt_alias', default='customer-service-yt', help='Alias for YT in yandex vault')
def main(input_data: str,
         output_data: str,
         product_tag: str,
         knowledges_url: str,
         cluster: str,
         yt_alias: str):

    logger.info(f'Preprocessing data from {input_data} to {output_data}')
    df = generate_query_doc_data(knowledges_url, product_tag, input_data, cluster)
    save_data(df, output_data, cluster)


if __name__ == '__main__':
    main()
