import logging
from xml.etree import cElementTree

from sandbox import sdk2
from sandbox.projects import resource_types
from sandbox.sandboxsdk import environments
from sandbox.sandboxsdk import errors


EXCLUDED_PAGE_IDS = {'testsuite'}


def get_advertised_org_ids_from_ads_xml(xml_file):
    xml_ns = {'ad': 'http://maps.yandex.ru/advert/1.x'}
    org_ids = set()
    for _, elem in cElementTree.iterparse(xml_file):
        if elem.tag.endswith('AdvertData'):
            prioritized_flag = elem.find('./ad:prioritized', xml_ns)
            if prioritized_flag is None or prioritized_flag.text == 'true':
                page_ids = [id.text for id in elem.findall('./ad:pageId', xml_ns) if id.text not in EXCLUDED_PAGE_IDS]
                if len(page_ids) > 0:
                    org_ids.update([id.text for id in elem.findall('./ad:Companies/ad:id', xml_ns)])
            elem.clear()
    return org_ids


class UploadAdvertisedOrgsIdsToYt(sdk2.Task):
    '''Finds latest TYCOON_ADVERTS resource and uploads advertised orgs ids to YT'''

    class Requirements(sdk2.Task.Requirements):
        environments = [
            environments.PipEnvironment('yandex-yt'),
            environments.PipEnvironment('yandex-yt-yson-bindings-skynet')
        ]

    def on_execute(self):
        from yt import wrapper as yt

        logging.info('UploadAdvertisedOrgsIdsToYt launched')

        logging.info('Searching for latest stable TYCOON_ADVERTS resource...')
        tycoon_adverts = sdk2.Resource.find(resource_types.TYCOON_ADVERTS, attrs={'released': 'stable'}).order(-sdk2.Resource.id).first()
        if tycoon_adverts is None:
            raise errors.SandboxTaskFailureError('No TYCOON_ADVERTS resource found')
        logging.info('Found TYCOON_ADVERTS resource with id={}'.format(tycoon_adverts.id))

        logging.info('Parsing advertised org ids from TYCOON_ADVERTS XML...')
        tycoon_adverts_xml_file = sdk2.ResourceData(tycoon_adverts).path.joinpath('ads.xml')
        if not tycoon_adverts_xml_file.exists():
            raise errors.SandboxTaskFailureError('TYCOON_ADVERTS XML file "{}" not found'.format(tycoon_adverts_xml_file))
        org_ids = get_advertised_org_ids_from_ads_xml(str(tycoon_adverts_xml_file))
        logging.info('TYCOON_ADVERTS XML parsing complete, got {} org ids'.format(len(org_ids)))

        logging.info('Creating YT client...')
        yt_client = yt.YtClient(proxy='hahn', token=sdk2.Vault.data('robot-maps-search', 'YT_TOKEN'))
        logging.info('YT client created')

        logging.info('Uploading advertised org ids to YT...')
        table = '//home/maps-search/toponym_adverts/toponym_adverts_data/advertised_orgs_ids'
        data = [{'id': int(org_id)} for org_id in org_ids]
        yt_client.write_table(table, data)
        logging.info('Advertised org ids uploading complete')

        logging.info('UploadAdvertisedOrgsIdsToYt finished')
