# -*- coding: utf-8 -*-
import gzip
import logging
import os
import re
import sys
from optparse import OptionParser
from collections import namedtuple
from datetime import datetime
from subprocess import check_call

import requests
from unidecode import unidecode
from urllib import quote_plus, urlencode

import django
from django.conf import settings
from django.contrib.sitemaps import Sitemap
from django.utils.encoding import smart_str
from django.template import Template, Context

from avia_data.models import AviaDirectionNational, AviaSettlementNational, TopFlight
from common.models.geo import Settlement
from common.models.holidays import HolidayDirection
from common.models.schedule import Company, make_slug
from common.models.staticpages import StaticPage
from common.utils.geobase import geobase

from projects.avia.lib.s3sync import S3Client


django.setup()


log = logging.getLogger(__name__)
log.addHandler(logging.StreamHandler())
log.setLevel(logging.DEBUG)


NOW = datetime.now()
FILE_PREFIX = os.path.join(settings.MEDIA_ROOT, 'avia')
TEMPLATE_DIR = os.getenv('AVIA_TEMPLATE_DIR')


with open(os.path.join(TEMPLATE_DIR, 'xml', 'sitemap.xml'), 'rb') as inp:
    SITEMAP_TEMPLATE = Template(inp.read())


with open(os.path.join(TEMPLATE_DIR, 'xml', 'sitemap_index.xml'), 'rb') as inp:
    SITEMAP_INDEX_TEMPLATE = Template(inp.read())


def get_main_domain(tld):
    if tld == 'com.tr':
        return 'https://bilet.yandex.com.tr'
    if tld == 'com':
        return 'https://flights.yandex.com'

    return 'https://avia.yandex.{}'.format(tld)


def make_sitemap(sitemap, tld, country_id):
    urls = []

    if callable(sitemap):
        urls.extend(sitemap(tld, country_id).get_urls())
    else:
        urls.extend(sitemap.get_urls())

    xml = smart_str(SITEMAP_TEMPLATE.render(Context({'urlset': urls})))
    return xml


def queryparams(**params):
    params = {k: v for k, v in params.iteritems() if v is not None}
    return '?' + urlencode(params) if params else ''


class CommonSitemap(Sitemap):
    changefreq = "weekly"
    priority = 0.5
    lastmod = NOW

    def __init__(self, tld, country_id, *args, **kwargs):
        super(CommonSitemap, self).__init__(*args, **kwargs)
        self.country_id = country_id
        self.main_domain = get_main_domain(tld)
        self.tld = tld
        self.national_version = settings.SITEMAP_NATIONAL_MAP[tld]

    def get_urls(self, page=1, site=None, protocol=None):
        urls = []
        for item in self.paginator.page(page).object_list:
            priority = self._Sitemap__get('priority', item, None)
            url_info = {
                'item': item,
                'location': self._Sitemap__get('location', item),
                'lastmod': self._Sitemap__get('lastmod', item, None),
                'changefreq': self._Sitemap__get('changefreq', item, None),
                'priority': str(priority is not None and priority or ''),
            }
            urls.append(url_info)
        return urls


def paginate(iterable, size):
    page = []
    for num, item in enumerate(iterable, 1):
        page.append(item)
        if not num % size:
            yield page
            page = []
    if page:
        yield page


class FromUrlsListSitemap(CommonSitemap):
    def __init__(self, urls_list, *args, **kwargs):
        self.urls_list = urls_list
        super(FromUrlsListSitemap, self).__init__(*args, **kwargs)

    def items(self):
        return self.urls_list

    def location(self, ready_url):
        return ready_url


class PaginatedSitemap(CommonSitemap):
    def gen_urls_by_pages(self, page_size):
        items = self.items() if callable(self.items) else self.items

        for pagenum, items_list in enumerate(paginate(items, page_size), 1):
            urls_list = filter(None, [self.location(item) for item in items_list])

            yield pagenum, urls_list

    def gen_pages(self, page_size):
        for page, urls_list in self.gen_urls_by_pages(page_size):
            yield page, make_sitemap(
                FromUrlsListSitemap(urls_list, self.tld, self.country_id),
                self.tld, self.country_id
            )


SitemapLocItem = namedtuple('SitemapLocItem', ['item', 'lang', 'lang_for_url'])


def packed_item_generator(items, national_version):
    packed_items = []
    national_lang = settings.SITEMAP_NATIONAL_VERSION_DEFAULT_LANG[national_version]
    for item in items:
        for lang in settings.SITEMAP_NATIONAL_LANGS.get(national_version, []):
            packed_items.append(SitemapLocItem(**{
                'item': item,
                'lang': lang,
                'lang_for_url': lang if lang != national_lang else None,
            }))

    return packed_items


class CitiesSitemap(PaginatedSitemap):
    def items(self):
        settlements = Settlement.objects.filter(country_id=self.country_id, iata__isnull=False).select_related('new_L_title')
        for i in packed_item_generator(settlements, self.national_version):
            yield i

    def location(self, loc):
        appendix = quote_plus(unidecode(
            '-%s' % (
                loc.item.L_title(lang=loc.lang).lower(),
            )
        ).replace("'", ""))

        return '{main_domain}/city/{iata}/{appendix}/{queryparams}'.format(
            main_domain=self.main_domain,
            iata=loc.item.iata.lower(),
            appendix=appendix,
            queryparams=queryparams(lang=loc.lang_for_url),
        )


class DirectionSitemap(PaginatedSitemap):
    def items(self):
        unsorted_list = []

        directions = AviaDirectionNational.objects.filter(
            national_version=self.national_version
        ).select_related(
            'departure_settlement', 'arrival_settlement',
            'departure_settlement__new_L_title', 'arrival_settlement__new_L_title',
        )

        for d in directions:
            # com-версия особенная: RASPTICKETS-4587
            if self.national_version == 'com':
                try:
                    in_country = any([
                        self.country_id in geobase.parents(d.departure_settlement._geo_id) if d.departure_settlement._geo_id else False,
                        self.country_id in geobase.parents(d.arrival_settlement._geo_id) if d.arrival_settlement._geo_id else False
                    ])
                except requests.HTTPError as e:
                    if e.response.status_code != 400:
                        raise e
                    in_country = False
                    log.warning('Geobase error: %s', e.response.content)
                country_capital = None
            else:
                in_country = any([
                    d.departure_settlement.country_id == self.country_id,
                    d.arrival_settlement.country_id == self.country_id,
                ])

                if self.national_version == 'tr':
                    country_capital = any([
                        # В базе расписаний Анкара не столица страны :(
                        d.departure_settlement_id == 11503,
                        d.arrival_settlement_id == 11503,
                    ])
                else:
                    country_capital = any([
                        d.departure_settlement.country_id == self.country_id and d.departure_settlement.majority_id == 1,
                        d.arrival_settlement.country_id == self.country_id and d.arrival_settlement.majority_id == 1,
                    ])

            majority = min([
                d.departure_settlement.majority_id,
                d.arrival_settlement.majority_id,
            ])

            sort_index = (not country_capital, not in_country, majority)

            unsorted_list.append((
                sort_index,
                d
            ))

        for i in packed_item_generator([d for _, d in sorted(unsorted_list)], self.national_version):
            yield i

    def location(self, loc):
        item = loc.item

        if not item.departure_settlement.iata or not item.arrival_settlement.iata:
            return

        departure = item.departure_settlement.iata.lower()

        arrival = item.arrival_settlement.iata.lower()

        lang_for_appendix = None if loc.lang_for_url == 'en' else loc.lang  # RASPTICKETS-16284

        appendix = quote_plus(unidecode(
            '%s-%s' % (
                item.departure_settlement.L_title(lang=lang_for_appendix).lower(),
                item.arrival_settlement.L_title(lang=lang_for_appendix).lower()
            )
        ).replace("'", ""))

        location_url = '{main_domain}/routes/{departure}/{arrival}/{appendix}/{queryparams}'.format(
            main_domain=self.main_domain,
            departure=departure,
            arrival=arrival,
            appendix=appendix,
            queryparams=queryparams(lang=loc.lang_for_url),
        )

        return location_url


class GeoSettlementSitemap(PaginatedSitemap):
    def items(self):
        settlements = AviaSettlementNational.objects.filter(
            national_version=self.national_version,
            arrival=False,
        ).order_by('-popularity').select_related('settlement', 'settlement__new_L_title')

        for i in packed_item_generator(settlements, self.national_version):
            yield i

    def location(self, loc):
        settlement = loc.item.settlement
        code = settlement.iata.lower() \
            if settlement.iata else settlement.id

        appendix = quote_plus(unidecode(
            '-%s' % (
                settlement.L_title(lang=loc.lang).lower(),
            )
        ).replace("'", ""))

        return '{main_domain}/geo/{code}/{appendix}/{queryparams}'.format(
            main_domain=self.main_domain,
            code=code,
            appendix=appendix,
            queryparams=queryparams(lang=loc.lang_for_url),
        )


class AirlineSitemap(PaginatedSitemap):
    def items(self):
        yield ''
        airlines = Company.objects.filter(t_type__code='plane')
        for i in packed_item_generator(airlines, self.national_version):
            slug = make_slug(i.item)
            if slug:
                yield slug

    def location(self, loc):
        return '{main_domain}/airline/{slug}/'.format(
            main_domain=self.main_domain,
            slug=loc,
        )


class HolidaysSitemap(PaginatedSitemap):
    def items(self):
        directions = HolidayDirection.objects.filter(
            holiday__is_active=True,
            holiday__name_tanker_key__isnull=False
        ).values_list('holiday_id', 'settlement_from_id')
        directions = {
            (d[0], d[1])
            for d in directions
        }

        settlements = list(Settlement.objects.filter(
            id__in={d[1] for d in directions}
        ))
        id_to_settlement = {
            s.id: s for s in settlements
        }

        items = [
            {
                'holiday_id': d[0],
                'settlement': id_to_settlement[d[1]],
            }
            for d in directions
        ] + [
            {
                'holiday_id': None,
                'settlement': id_to_settlement[settlement_id]
            }
            for settlement_id in {d[1] for d in directions}
        ]

        for i in packed_item_generator(items, self.national_version):
            yield i

    def location(self, loc):
        settlement = loc.item['settlement']
        appendix = quote_plus(unidecode(
            '-%s' % (
                settlement.L_title(lang=loc.lang).lower(),
            )
        ).replace("'", ""))
        code = settlement.id
        if settlement.iata:
            code = settlement.iata.lower()

        route = self.main_domain + '/holiday/'

        if loc.item['holiday_id'] is not None:
            route += '{}/'.format(loc.item['holiday_id'])
        route += '{}/'.format(code)
        if appendix:
            route += '{}/'.format(appendix)

        route += queryparams(lang=loc.lang_for_url)

        return route


class KrymSitemap(PaginatedSitemap):
    def items(self):
        settlements = []

        if self.national_version is 'ru':
            settlements = AviaSettlementNational.objects.filter(
                national_version=self.national_version,
                arrival=False,
            ).order_by('-popularity').select_related('settlement', 'settlement__new_L_title')

        for i in packed_item_generator(settlements, self.national_version):
            yield i

    def get_title(self, lang):
        if lang is 'en':
            return u'Krym'

        return u'Крым'

    def location(self, loc):
        item = loc.item
        departure = item.settlement.iata.lower() \
            if item.settlement.iata else item.settlement.id
        arrival = 'krym'
        appendix = quote_plus(unidecode(
            '%s-%s' % (
                item.settlement.L_title(lang=loc.lang).lower(),
                self.get_title(lang=loc.lang).lower()
            )
        ).replace("'", ""))
        return '{main_domain}/routes/{departure}/{arrival}/{appendix}/{queryparams}'.format(
            main_domain=self.main_domain,
            departure=departure,
            arrival=arrival,
            appendix=appendix,
            queryparams=queryparams(lang=loc.lang_for_url),
        )


class InfoSitemap(PaginatedSitemap):
    def items(self):
        try:
            main_page = StaticPage.admin_manager.get(
                slug='avia',
                is_ticket_page=True,
                is_published=True,
                national_version=self.national_version
            )

            children = list(StaticPage.admin_manager.filter(
                parent_id=main_page.id, is_published=True
            ).order_by('order', 'title'))

        except Exception:
            return

        for i in packed_item_generator([main_page] + children, self.national_version):
            yield i

    def location(self, loc):
        # RASPTICKETS-16289 У нас нет справочников на английском
        if loc.lang_for_url == 'en':
            return None

        if loc.item.slug == 'avia':
            return '{main_domain}/info/{queryparams}'.format(
                main_domain=self.main_domain,
                queryparams=queryparams(lang=loc.lang_for_url),
            )

        return '{main_domain}/info/{slug}/{queryparams}'.format(
            main_domain=self.main_domain,
            slug=loc.item.slug,
            queryparams=queryparams(lang=loc.lang_for_url),
        )


class FlightSitemap(PaginatedSitemap):
    FLIGHT_RE = re.compile('^[A-Z0-9]{2} [0-9]+$')
    DISABLED_NATIONAL_VERSIONS = {'ru'}

    def items(self):
        numbers = set()
        if self.national_version in self.DISABLED_NATIONAL_VERSIONS:
            return
        for flights in TopFlight.objects.distinct().values_list('flights'):
            for flight in flights[0].split(';'):
                if not self.FLIGHT_RE.match(flight):
                    continue
                numbers.add(flight.replace(' ', '-'))

        for i in packed_item_generator(numbers, self.national_version):
            yield i

    def location(self, loc):
        # type: (SitemapLocItem) -> str
        return '{}/flights/{}/'.format(self.main_domain, loc.item) + queryparams(lang=loc.lang_for_url)


TO_CITY_FILES = {
    nv: os.path.join(os.path.dirname(__file__), 'data', 'to_city_landing', '{}.txt'.format(nv))
    for nv in ['ru', 'com', 'ua', 'kz', 'tr']
}


class ToCitiesSitemap(PaginatedSitemap):
    def items(self):
        settlement_ids = []
        with open(TO_CITY_FILES[self.national_version], 'rb') as inp:
            for line in inp:
                settlement_ids.append(int(line))

        settlements = Settlement.objects.filter(id__in=settlement_ids).select_related('new_L_title')
        for i in packed_item_generator(settlements, self.national_version):
            yield i

    def location(self, loc):
        first_part = loc.item.iata
        if first_part is None:
            first_part = loc.item.id
        else:
            first_part = first_part.lower()

        appendix = quote_plus(unidecode(
            u'{}-{}'.format(
                first_part,
                loc.item.L_title(lang=loc.lang).lower(),
            )
        ).replace("'", ""))

        return '{main_domain}/city-to/{appendix}/{queryparams}'.format(
            main_domain=self.main_domain,
            appendix=appendix,
            queryparams=queryparams(lang=loc.lang_for_url),
        )


def gzip_to_file(filepath, content):
    f = gzip.open(filepath, 'wb')
    f.write(content)
    f.flush()
    os.fsync(f)
    f.close()


sitemaps = {
    # Пока пусто
}


paginated_sitemaps = {
    # морды городов
    'cities_%s.xml': CitiesSitemap,
    # Страницы справочника путешественника
    'info_%s.xml': InfoSitemap,
    # Страницы индекса городов
    # 'geosettlements_%s.xml': GeoSettlementSitemap,
    # Страницы направлений
    'directions_%s.xml': DirectionSitemap,
    # Страницы для Крыма
    'krym_directions_%s.xml': KrymSitemap,
    # Авиакомпании
    'airlines_%s.xml': AirlineSitemap,
    # Праздиники
    # 'holidays_%s.xml': HolidaysSitemap,
    # Лендинги рейсов
    'flights_%s.xml': FlightSitemap,
    # Лендинг в город
    'to_city_%s.xml': ToCitiesSitemap,
}


def generate_national(tld, country_id):
    log.info('Start generate national sitemap for %s %d', tld, country_id)
    sitemap_pages = []

    domain = get_main_domain(tld)

    for filename, map_ in sitemaps.items():
        log.info('Start generate sitemap [%s] for %s %d',
                 filename, tld, country_id)
        filename += '.gz'

        # RASPTICKETS-4810 Убрать справочник из сайтмапа com-версии
        if tld == 'com' and filename == 'info.xml.gz':
            continue

        gzip_to_file(
            os.path.join(FILE_PREFIX, 'sitemaps', tld, filename),
            make_sitemap(map_, tld, country_id)
        )
        sitemap_pages.append('%s/sitemaps/%s' % (domain, filename))
        log.info('Finish generate sitemap [%s] for %s %d', filename, tld,
                 country_id)

    filename = 'others.xml.gz'
    gzip_to_file(
        os.path.join(FILE_PREFIX, 'sitemaps', tld, filename),
        make_sitemap(FromUrlsListSitemap(
            ['%s/geo' % domain], tld, country_id
        ), tld, country_id)
    )
    sitemap_pages.append('%s/sitemaps/%s' % (domain, filename))

    for page_filename_template, paginated_sitemap in paginated_sitemaps.items():
        for pagenum, xml in paginated_sitemap(tld, country_id).gen_pages(10000):
            filename = page_filename_template % pagenum
            filename += '.gz'
            gzip_to_file(os.path.join(FILE_PREFIX, 'sitemaps', tld, filename), xml)
            sitemap_pages.append('%s/sitemaps/%s' % (domain, filename))

    index_xml = smart_str(SITEMAP_INDEX_TEMPLATE.render(Context({'pages': sitemap_pages})))

    gzip_to_file(os.path.join(FILE_PREFIX, 'sitemap_%s.xml.gz' % tld), index_xml)
    log.info('Finish generate national sitemap for %s %d', tld, country_id)


def generate():
    log.info('Start create sitemaps directories')
    check_call('mkdir -p "%s"' % os.path.join(FILE_PREFIX, 'sitemaps', 'ru'), shell=True)
    check_call('mkdir -p "%s"' % os.path.join(FILE_PREFIX, 'sitemaps', 'ua'), shell=True)
    check_call('mkdir -p "%s"' % os.path.join(FILE_PREFIX, 'sitemaps', 'com.tr'), shell=True)
    check_call('mkdir -p "%s"' % os.path.join(FILE_PREFIX, 'sitemaps', 'com'), shell=True)
    check_call('mkdir -p "%s"' % os.path.join(FILE_PREFIX, 'sitemaps', 'kz'), shell=True)
    log.info('Finish create sitemaps directories')

    generate_national('ru', 225)
    generate_national('ua', 187)
    generate_national('com.tr', 983)
    generate_national('com', 111)
    generate_national('kz', 159)

    s3client = S3Client(settings.MDS_S3_ENDPOINT_URL, log)
    s3client.sync(settings.S3_CONFIGS['sitemaps'], settings.MDS_S3_AVIA_ADMIN_BUCKET)


if __name__ == '__main__':
    optparser = OptionParser()
    optparser.add_option('-v', '--verbose', action='store_true')

    options, args = optparser.parse_args()

    log.info('Start')
    log.debug('Databases: %r', settings.DATABASES)
    try:
        generate()
    except:
        log.exception('ERROR:')
        sys.exit(1)
    log.info('Finish')

"""
Проверить валидность результатов:
wget -O /tmp/sitemap.xsd http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd
wget -O /tmp/siteindex.xsd http://www.sitemaps.org/schemas/sitemap/0.9/siteindex.xsd

rm -f /tmp/err1

файл индекса:
zcat media/avia/sitemap.xml.gz | xmllint --noout --schema /tmp/siteindex.xsd - 2>>/tmp/err1

cat /tmp/err1 | grep -v validates
"""
