# -*- coding: utf-8 -*-_read_resource

import logging

import sandbox.sandboxsdk.task as sdk_task
import sandbox.sandboxsdk.copy as sdk_copy
import sandbox.sandboxsdk.process as sdk_process
from sandbox.sandboxsdk.channel import channel

from sandbox.projects import resource_types
from sandbox.projects.common import apihelpers

SVN_RATEDURLS = 'svn+ssh://arcadia.yandex.ru/arc/trunk/arcadia/extsearch/images/robot/scripts/cm/mrindex/ratedurls.py'


def readFile(filename, lines):
    with open(filename, 'r') as f:
        for line in f:
            line = line.strip()
            if len(line) > 0:
                lines.add(line)


def mergeFiles(appendFrom, appendTo):
    lines = set()
    readFile(appendFrom, lines)
    readFile(appendTo, lines)

    with open(appendTo, 'w') as f:
        for line in sorted(lines):
            f.write(line)
            f.write('\n')


class GetImagesRatedUrls(sdk_task.SandboxTask):
    type = 'GET_IMAGES_RATED_URLS'

    input_parameters = []

    def on_execute(self):
        sdk_copy.RemoteCopy(SVN_RATEDURLS, 'ratedurls.py',
                                   log_dir=self.log_path())()

        logging.info('Get resource object')
        rated_urls_id = self.create_resource(
            "images  rated urls", 'ratedurls.txt',
            resource_types.IMAGES_RATED_URLS,
        ).id
        rated_urls = channel.sandbox.get_resource(rated_urls_id)

        oauth = self.get_vault_data('IMAGES-ROBOT', 'IMAGES_RATEDURLS_OAUTH')
        yt_oauth = self.get_vault_data('IMAGES-ROBOT', 'yt_token')

        sdk_process.run_process(['/skynet/python/bin/python',
                                        './ratedurls.py',
                                        '--oauth',
                                        oauth,
                                        '--yt-oauth',
                                        yt_oauth,
                                        '--output-urls',
                                        rated_urls.path,
                                        '--add-similar-urls'],
                                       log_prefix='ratedurls')

        logging.info('Merging adult rated urls with relevance rated urls')
        adult_rated_urls_id = apihelpers.get_last_released_resource(resource_types.IMAGES_ADULT_RATED_URLS).id
        adult_rated_urls = self.sync_resource(adult_rated_urls_id)
        mergeFiles(adult_rated_urls, rated_urls.path)

        channel.task.mark_resource_ready(rated_urls)

        ratedconvert_id = apihelpers.get_last_released_resource(resource_types.IMAGES_MR_INDEX_RATEDCONVERT).id
        ratedconvert = self.sync_resource(ratedconvert_id)

        hashes_resource_id = self.create_resource(
            "images rated url hashes", 'ratedurlshashes.bin',
            resource_types.IMAGES_RATED_URL_HASHES,
        ).id
        hashes_resource = channel.sandbox.get_resource(hashes_resource_id)

        logging.info('Convert URLs to hashes')
        sdk_process.run_process(
            [
                ratedconvert,
                '--input-file', rated_urls.path,
                '--output-file', hashes_resource.path
            ],
            log_prefix='ratedurls'
        )

        channel.task.mark_resource_ready(hashes_resource)


__Task__ = GetImagesRatedUrls
