# -*- coding: utf-8 -*-
import logging
import gzip
import os
import re
import shutil
from xml.etree import ElementTree
import datetime
from typing import Set, List, Text

import boto3
import botocore
from more_itertools import chunked

from travel.avia.avia_statistics.landing_cities import LandingCity, iter_landing_cities
from travel.avia.avia_statistics.landing_routes import LandingRoute, get_landing_routes
from travel.avia.library.python.lib_yt.client import configured_client

MAX_URLS_PER_FILE = 50000


class SitemapGenerator(object):
    def __init__(
        self,
        portal_url,
        yt_proxy,
        yt_token,
        changefreq,
        add_lastmod,
        s3_endpoint,
        s3_prefix,
        s3_access_key,
        s3_access_secret_key,
        s3_bucket,
        settlement_repository,
        top_flight_repository,
        need_generate_flight_sitemap=False,
        need_add_avia_root=False,
        need_generate_city_to_sitemap=False,
    ):
        self.s3_bucket = s3_bucket
        self.add_lastmod = add_lastmod
        self.s3_access_secret_key = s3_access_secret_key
        self.s3_access_key = s3_access_key
        self.s3_prefix = s3_prefix
        self.s3_endpoint = s3_endpoint
        self.changefreq = changefreq
        self.portal_url = portal_url
        self.yt_client = configured_client(proxy=yt_proxy, token=yt_token)
        self.temp_path = None
        self.top_flight_repository = top_flight_repository
        self.settlement_repository = settlement_repository
        self.need_generate_flight_sitemap = need_generate_flight_sitemap
        self.need_add_avia_root = need_add_avia_root
        self.need_generate_city_to_sitemap = need_generate_city_to_sitemap

        today = datetime.date.today()
        self.today_str = today.isoformat()

    def _recreate_local_dir(self, path):
        if os.path.exists(path):
            shutil.rmtree(path)
        os.mkdir(path)

    def generate_sitemap_files(self, landing_routes, landing_cities_to, sitemaps_local_root):
        # type: (Set[LandingRoute], Set[LandingCity], str) -> List[str]
        sitemap_files = list()

        # travel/avia/ root
        if self.need_add_avia_root:
            filename = 'avia.xml.gz'
            sitemap_files.append(filename)
            avia_root_url = '{}/{}'.format(self.portal_url, 'avia/')
            self.generate_sitemap_file(sitemaps_local_root, filename, [avia_root_url], is_root_file=False)

        # Routes pages sitemap
        urls = self.generate_routes_urls(landing_routes)
        if urls:
            for i, chunk in enumerate(chunked(urls, MAX_URLS_PER_FILE)):
                filename = 'sitemap.routes_{}.xml.gz'.format(i + 1)
                sitemap_files.append(filename)
                self.generate_sitemap_file(sitemaps_local_root, filename, chunk, is_root_file=False)

        # Flight pages sitemap
        if self.need_generate_flight_sitemap:
            urls = self.generate_flights_urls()
            if urls:
                for i, chunk in enumerate(chunked(urls, MAX_URLS_PER_FILE)):
                    filename = 'sitemap.flights_{}.xml.gz'.format(i + 1)
                    sitemap_files.append(filename)
                    self.generate_sitemap_file(sitemaps_local_root, filename, chunk, is_root_file=False)

        # City to landing sitemap
        if self.need_generate_city_to_sitemap:
            urls = self.generate_city_to_urls(landing_cities_to)
            if urls:
                for i, chunk in enumerate(chunked(urls, MAX_URLS_PER_FILE)):
                    filename = 'sitemap.flights_to_{}.xml.gz'.format(i + 1)
                    sitemap_files.append(filename)
                    self.generate_sitemap_file(sitemaps_local_root, filename, chunk, is_root_file=False)

        # Root sitemap
        root_filename = 'sitemap.xml.gz'
        root_urls = (u'{}/sitemaps/avia/{}'.format(self.portal_url, f) for f in sitemap_files)
        self.generate_sitemap_file(sitemaps_local_root, root_filename, root_urls, is_root_file=True)
        return sitemap_files + [root_filename]

    def generate_routes_urls(self, landing_routes):
        # type: (Set[LandingRoute]) -> List[Text]
        return [self.route2url(route) for route in landing_routes]

    def route2url(self, route):
        # type: (LandingRoute) -> Text
        return u'{}/avia/routes/{}--{}/'.format(
            self.portal_url,
            self.id_to_slug(route.from_id),
            self.id_to_slug(route.to_id),
        )

    def id_to_slug(self, settlement_id):
        return self.settlement_repository.get(settlement_id).Slug

    def generate_sitemap_file(self, sitemaps_local_root, filename, urls, is_root_file):
        # https://www.sitemaps.org/protocol.html
        root = ElementTree.Element(
            'sitemapindex' if is_root_file else 'urlset',
            attrib={
                'xmlns': 'http://www.sitemaps.org/schemas/sitemap/0.9'
            }
        )

        for url in urls:
            root.append(self.url2xml(url, is_root_file))
        tree = ElementTree.ElementTree(root)

        full_filename = os.path.join(sitemaps_local_root, filename)
        with open(full_filename, 'wb') as fgz:
            with gzip.GzipFile(fileobj=fgz, mode="w") as f:
                tree.write(f, encoding='utf-8', xml_declaration=True)

    def url2xml(self, url, is_root_file):
        url_tag = ElementTree.Element('sitemap' if is_root_file else 'url')
        loc_tag = ElementTree.Element('loc')
        loc_tag.text = url
        url_tag.append(loc_tag)
        if not is_root_file:
            if self.changefreq:
                changefreq_tag = ElementTree.Element('changefreq')
                changefreq_tag.text = self.changefreq
                url_tag.append(changefreq_tag)
            if self.add_lastmod:
                lastmod_tag = ElementTree.Element('lastmod')
                lastmod_tag.text = self.today_str
                url_tag.append(lastmod_tag)
        return url_tag

    def sync_with_s3(self, sitemaps_local_root, file_list):
        s3_pfx = self.s3_prefix
        if not s3_pfx.endswith('/'):
            s3_pfx += '/'
        session = boto3.session.Session(
            aws_access_key_id=self.s3_access_key,
            aws_secret_access_key=self.s3_access_secret_key,
        )
        s3 = session.client(service_name='s3', endpoint_url=self.s3_endpoint, verify=False)
        try:
            s3.create_bucket(Bucket=self.s3_bucket)
        except botocore.exceptions.ClientError:
            logging.info('bucket %s already exists', self.s3_bucket)
        allowed_keys = set()
        for fn in file_list:
            local_fn = os.path.join(sitemaps_local_root, fn)
            key = s3_pfx + fn
            logging.info('Uploading {} -> {}'.format(local_fn, key))

            s3.put_object(
                Bucket=self.s3_bucket,
                Key=key,
                Body=open(local_fn, 'rb'),
            )
            allowed_keys.add(key)

        resp = s3.list_objects(Bucket=self.s3_bucket, Prefix=s3_pfx)
        for o in resp['Contents']:
            key = o['Key']
            if key in allowed_keys:
                pass
            else:
                logging.info('Remove \'{}\''.format(key))
                s3.delete_object(Bucket=self.s3_bucket, Key=key)

    FLIGHT_RE = re.compile('^[A-Z0-9]{2} [0-9]+$')

    def generate_flights_urls(self):
        numbers = set()
        for top_flights in self.top_flight_repository.itervalues():
            for flight in top_flights.Flights.split(';'):
                if not self.FLIGHT_RE.match(flight):
                    continue
                numbers.add(flight.replace(' ', '-'))
        return [self.flight_number2url(n) for n in numbers]

    def flight_number2url(self, flight_number):
        return u'{}/avia/flights/{}/'.format(self.portal_url, flight_number)

    def generate_city_to_urls(self, cities):
        # type: (Set[LandingCity]) -> List[Text]
        return [self.city_to2url(city) for city in cities]

    def city_to2url(self, city):
        # type: (LandingCity) -> Text
        return u'{}/avia/flights-to/{}/'.format(
            self.portal_url,
            self.id_to_slug(city.to_id),
        )

    def generate(self):
        sitemaps_local_root = 'sitemaps'
        self._recreate_local_dir(sitemaps_local_root)

        landing_routes = get_landing_routes(self.yt_client)
        landing_cities_to = set(iter_landing_cities(self.yt_client))
        sitemap_files = self.generate_sitemap_files(
            landing_routes=landing_routes,
            landing_cities_to=landing_cities_to,
            sitemaps_local_root=sitemaps_local_root
        )

        self.sync_with_s3(sitemaps_local_root, sitemap_files)
