import zlib
from HTMLParser import HTMLParser

import requests
import logging
from infra.dist.cacus.lib.dbal import ubuntu_upstream

log = logging.getLogger(__name__)

repo_url = 'http://archive.ubuntu.com/ubuntu'
ubuntu_components = ['main', 'restricted', 'universe', 'multiverse']
zlib_gz_wbits = 16 + zlib.MAX_WBITS
pkg_ln_pattern = 'Package: '


class ListingHTMLParser(HTMLParser, object):
    def __init__(self):
        super(self.__class__, self).__init__()
        self.listing = []
        self.dir_flag = False

    def handle_starttag(self, tag, attrs):
        if tag == 'img':
            for name, value in attrs:
                if name == 'alt' and value == '[DIR]':
                    self.dir_flag = True
        if tag == "a":
            for name, value in attrs:
                if name == "href" and self.dir_flag:
                    self.listing.append(value.replace('/', ''))
                    self.dir_flag = False


def update_upstream_packages_list(upstream_store=ubuntu_upstream.default_store):
    response = requests.get('{}/dists/'.format(repo_url))
    response.raise_for_status()
    lp = ListingHTMLParser()
    lp.feed(response.text)

    dist_names = filter(lambda x: x.find('-') < 0, lp.listing)

    dists = {
        d: {
            'dirs': filter(lambda x: d in x, lp.listing),
            'upstream_packages': set()
        }
        for d in dist_names
    }

    for dist in dists:
        for directory in dists[dist]['dirs']:
            for component in ubuntu_components:
                url = '{}/dists/{}/{}/binary-amd64/Packages.gz'.format(
                    repo_url, directory, component)
                log.info('Fetching: {}'.format(url))
                response = requests.get(url)
                response.raise_for_status()
                data = response.content
                packages_file = zlib.decompress(data, zlib_gz_wbits)
                packages = filter(
                    lambda x: x.startswith(pkg_ln_pattern),
                    packages_file.splitlines()
                )
                packages = map(lambda x: x[len(pkg_ln_pattern):], packages)
                dists[dist]['upstream_packages'].update(packages)

    for dist in dists:
        log.info(
            'Distributive {} has {} packages'.format(
                dist, len(dists[dist]['upstream_packages'])
            )
        )

    for dist in dists:
        db_packages = {r.package for r in upstream_store.find(dist=dist)}
        new_packages = dists[dist]['upstream_packages'] - db_packages
        for p in new_packages:
            p = ubuntu_upstream.UpstreamPackage(dist, p)
            upstream_store.save(p)
