# -*- coding: utf-8 -*-
import gzip
import os

from sandbox.sandboxsdk.paths import make_folder

from sandbox.projects import resource_types
from sandbox.projects.common.geosearch.utils import unpack_file
from sandbox.projects.common.geosearch.indexing import generate_single_url_updater

DEFAULT_SOURCES = {
    'stable': 'http://vs-elliptics.http.yandex.net:81/get/review/biz/rating-detailed.tar',
    'testing': 'http://vs-elliptics.http.yandex.net:81/get/review/biz/rating-detailed.tar'
}

BaseUpdater = generate_single_url_updater(index_type=resource_types.BUSINESS_DETAILED_RATINGS,
                                          release_subject='Geosearch business detailed ratings update',
                                          default_sources=DEFAULT_SOURCES)


class UpdateBusinessDetailedRatings(BaseUpdater):
    """
        Update business detailed ratings, just ask status and download
    """
    type = 'UPDATE_BUSINESS_DETAILED_RATINGS'

    def create_index_from_data(self, resource_path):
        unpack_dir = make_folder(os.path.join(self.abs_path(), 'unpacked_ratings'))
        unpack_file(resource_path, unpack_dir)
        self.postprocess_ratings(unpack_dir)
        return os.path.join(unpack_dir, 'reviews.xml')

    def postprocess_ratings(self, rating_dir):
        ratings_xml_path = os.path.join(rating_dir, 'reviews.xml')
        if os.path.exists(ratings_xml_path):
            return

        rating_info_xml_path = os.path.join(rating_dir, 'rating-info.xml.gz')

        with gzip.open(rating_info_xml_path, 'r') as source, open(ratings_xml_path, 'w') as dest:
            print >> dest, '<DetailedReviews>'
            for line in source:
                split = line.split('\t')
                if len(split) < 2:
                    continue

                p = split[0]
                xml = split[1]
                score = xml.find('<score>')

                if score != -1:
                    score = xml[score + len('<score>'):xml.find('</score>')]
                has_review = xml.find("Reviews/") == -1
                if score == -1 and not has_review:
                    continue

                start = xml.find("<Review ")
                end = xml.find("</Reviews>")
                print >> dest, '<Business oid="{0}" score="{1}"><Reviews>'.format(p, score)
                if has_review:
                    print >> dest, xml[start:end]
                print >> dest, "</Reviews></Business>"

            print >> dest, '</DetailedReviews>'


__Task__ = UpdateBusinessDetailedRatings
