# coding: utf8

import re

from urllib2 import urlopen
from lxml.html import document_fromstring as parser


class Vendor(object):
    def __init__(self, url):
        self.url = url
        self.isbn = None
        self.title = None
        self.authors = None
        self.publisher = None
        self.year = None
        self.cover = None


class Amazon(Vendor):
    def resolve(self):
        try:
            raw = urlopen(self.url).read()
        except Exception:
            return False

        html = parser(raw)

        xml_info = html.cssselect('#detail-bullets ul > li')
        info = {}
        for item in xml_info:
            try:
                key = item.cssselect('b')[0].text.strip().strip(':').lower()
                value = item.xpath('text()')[0].strip()
            except Exception:
                continue
            else:
                info[key] = value

        try:
            self.isbn = info.get('isbn-13', '').replace('-', '') or None
            self.title = html.cssselect('#productTitle')[0].text
            self.authors = ', '.join(author.text for author in
                                     html.cssselect('a.contributorNameID'))
            self.publisher = info['publisher']
            self.year = html.cssselect('#title > span')[-1].text[-4:]
        except IndexError:
            pass

        return True


class Ozon(Vendor):
    def resolve(self):
        try:
            raw = urlopen(self.url).read().decode('WINDOWS-1251')
        except:
            return False

        st = raw.find('var model = {')
        if st > -1:
            st = raw.find('"Url":"', st)
            if st > -1:
                st = st + 7
                end = raw.find('"', st)
                self.cover = 'http://static1.ozone.ru/multimedia/' + raw[st:end]

        html = parser(raw)

        data = html.cssselect('.bDetailLogoBlock p[itemprop]')
        info = {}
        for item in data:
            try:
                key = item.get('itemprop')
                value = item.xpath('string(normalize-space(.))')

                if ':' in value:
                    value = value.split(':', 1)[1]
            except Exception:
                continue
            else:
                info[key] = value

        try:
            isbn = re.findall('[0-9-]+', info['isbn'])
            if isbn:
                year = isbn.pop()
                isbn = ', '.join(i.replace('-', '') for i in isbn)
            else:
                year = None

            self.isbn = isbn or None
            self.title = html.cssselect('h1[itemprop="name"]')[0].text
            self.authors = info.get('author')
            self.publisher = info.get('publisher')
            self.year = year
        except IndexError:
            pass

        return True


class Books(Vendor):
    def resolve(self):
        try:
            html = urlopen(self.url).read().decode('UTF-8')
        except:
            return False

        st = html.find('<p class="author">')
        if st > -1:
            st = html.find('>', st) + 1
            border = html.find('</p>', st)
            if html.find('<', st) == border:
                self.authors = html[st:html.find('<', st)].strip()
            else:
                st = html.find('>', st) + 1
                while st > -1 and st < border:
                    end = html.find('<', st)
                    if self.authors:
                        self.authors += ', ' + html[st:end]
                    else:
                        self.authors = html[st:end]
                    st = html.find('<a', st)
                    if st > -1:
                        st = html.find('>', st) + 1
        st = html.find('<p class="author"')
        st = html.find('<h1', st)
        if st > -1:
            st = html.find('>', st) + 1
            end = html.find('<', st)
            self.title = html[st:end].strip()
        st = html.find('class="additional_data"', st)
        if st > -1:
            st1 = html.find(u'Издательство:', st)
            if st1 > -1:
                st1 = html.find('<a', st1)
                st1 = html.find('>', st1) + 1
                end = html.find('<', st1)
                self.publisher = html[st1:end]
            st1 = html.find(u'Дата выхода:', st)
            if st1 > -1:
                st1 = html.find('<td>', st1) + 4
                end = html.find('<', st1)
                m = re.search(r'\d{4}', html[st1:end])
                if m:
                    self.year = m.group(0)
            st1 = html.find('ISBN:', st)
            if st1 > -1:
                st1 = html.find('<td>', st1) + 4
                end = html.find('<', st1)
                self.isbn = html[st1:end].replace('-', '')
        return True


def get_vendor(url):
    if not url:
        return None
    if url.startswith('http://www.amazon.com'):
        return Amazon(url)
    if url.startswith('http://www.ozon.ru'):
        return Ozon(url)
    if url.startswith('http://www.books.ru'):
        return Books(url)
    return None
