import logging

from intranet.femida.src.yt.management.commands.base import SubmissionsYtJobCommand

from intranet.femida.src.candidates.models import CryptaVector


logger = logging.getLogger(__name__)


class Command(SubmissionsYtJobCommand):

    help = 'Get vectors by submission_id or from_dt'
    command_code = 'CRYPTA_VECTORS'

    # Note: мы получаем бинарные вектора, поэтому явно задаём кодировку None,
    # чтобы YT не пытался бинарную строку превратить в юникодную.
    yson_encoding = None

    def get_input_table(self):
        return '//home/crypta/production/profiles/export/vectors/yandexuid_vectors_monthly'

    def process_rows(self, rows):
        existing_submission_ids = set(
            CryptaVector.objects
            .filter(submission_id__in=self.submissions.values('id'))
            .values_list('submission', flat=True)
        )
        vectors = []
        for row in rows:
            row = {k.decode('utf-8'): v for k, v in row.items()}
            for submission in self.uid_submissions_map.get(row['yandexuid'], []):
                if submission.id in existing_submission_ids:
                    continue
                vectors.append(
                    CryptaVector(
                        submission=submission,
                        yandexuid=str(row['yandexuid']),
                        vector=row['vector'],
                    )
                )
        logger.info('[%s] %d vectors to create', self.command_code, len(vectors))
        CryptaVector.objects.bulk_create(vectors)
