import codecs
import os
import shutil

import subprocess
import time
from collections import Counter

from SerpParser import Serp, SerpElement, SerpElementExampleSet
from settings import SerpSettings

__author__ = 'irlab'

class SerpSetSimpleHtm:
    ONTO_HTM_SLICE_GROUPS_COUNT = 100

    def __init__(self, serp_settings, uuid_folder):
        self.serp_settings = serp_settings
        assert isinstance(self.serp_settings, SerpSettings)
        self.uuid_folder = uuid_folder

    def create_out_dir(self, out_dir, serp_list, example_sets):
        if os.path.exists(out_dir):
            shutil.rmtree(out_dir)
        os.makedirs(os.path.join(out_dir + '/' + self.uuid_folder))

        for serp in serp_list:
            assert isinstance(serp, Serp)
            serp_dir = os.path.join(out_dir, self.uuid_folder, serp.id)
            os.mkdir(serp_dir)
            with open(os.path.join(serp_dir, "d.html"), 'wb') as fo:
                fo.write(serp.html_with_seanid)
        for example_set in example_sets:
            assert isinstance(example_set, SerpElementExampleSet)
            for serp_element in example_set.serp_elem_list:
                image = serp_element.image
                if image and image.binary_content:
                    serp_dir = os.path.join(out_dir, self.uuid_folder, serp_element.serp_id)
                    image.filename = os.path.join(serp_dir, os.path.basename(image.filename))
                    if not os.path.exists(serp_dir):
                        os.mkdir(serp_dir)
                    with open(image.filename, 'wb') as fo:
                        fo.write(image.binary_content)
        self.create_simple_htm(out_dir, example_sets)
        subprocess.check_call('tar czf examples.tgz ' + self.uuid_folder, shell=True, cwd=out_dir)

    def create_simple_htm(self, out_dir, example_sets):
        print time.ctime(), "create_simple_htm"
        example_sets.sort(key=lambda ex: (ex.short_skeleton, ex.skeleton))

        short_skeleton2example_count = Counter()
        for example_set in example_sets:
            short_skeleton2example_count[example_set.short_skeleton] += example_set.serp_elem_count

        serp_elements_count = sum(ex.serp_elem_count for ex in example_sets)
        serp_element_groups_count = len(example_sets)
        serp_element_groups_ge2_count = len([1 for example_set in example_sets if example_set.serp_elem_count >= 2])
        short_skeleton_groups_count = len(short_skeleton2example_count)
        short_skeleton_groups_ge2_count = len([1 for cnt in short_skeleton2example_count.values() if cnt >= 2])

        example_sets = filter(lambda g: not self.do_skip_group(g), example_sets)

        index_htm = os.path.join(out_dir, "index.htm")
        print time.ctime(), "write", index_htm
        with codecs.open(index_htm, "wb", encoding="utf8", errors='xmlcharrefreplace') as htm:
            print >>htm, "<html>"
            print >>htm, "<body>"
            for example_set_slice_start in range(0, len(example_sets), self.ONTO_HTM_SLICE_GROUPS_COUNT):
                print >> htm, "<a href=\"onto_%d.htm\">onto_%d.htm</a><br>" % (example_set_slice_start, example_set_slice_start)
            print >>htm, "</body>"
            print >>htm, "</html>"

        for example_set_slice_start in range(0, len(example_sets), self.ONTO_HTM_SLICE_GROUPS_COUNT):
            onto_htm = os.path.join(out_dir, "onto_%d.htm" % example_set_slice_start)
            print time.ctime(), "write", onto_htm
            with codecs.open(onto_htm, "wb", encoding="utf8", errors='xmlcharrefreplace') as htm:
                print >>htm, "<html>"
                print >>htm, " <head>"
                print >>htm, '  <meta http-equiv=Content-Type content="text/html;charset=UTF-8">'
                print >>htm, "  <script src='https://yastatic.net/jquery/2.1.4/jquery.min.js'></script>"
                print >>htm, "  <style>"
                with open("SerpSetSimpleHtm_resource.css") as f:
                    print >>htm, f.read()
                print >>htm, "  </style>"
                print >>htm, "  <script>"
                with open("SerpSetSimpleHtm_resource.js") as f:
                    print >>htm, f.read()
                print >>htm, "  </script>"
                print >>htm, " </head>"
                print >>htm, "<body>"
                # print >>htm, "SERPs processed:", len(serp_list), "<br>"
                print >>htm, "serp_elements count:", serp_elements_count, "<br>"
                print >>htm, "serp_element groups count:", serp_element_groups_count, "<br>"
                print >>htm, "serp_element groups count with >=2 examples:", serp_element_groups_ge2_count, "<br>"
                print >>htm, "serp_element short_skeleton groups count:", short_skeleton_groups_count, "<br>"
                print >>htm, "serp_element short_skeleton groups with >=2 examples:", short_skeleton_groups_ge2_count, "<br>"
                print >>htm, "<table id='tab_serponto' cellspacing=0 cellpadding=0 border=1>"
                print >>htm, " <thead>"
                print >>htm, "  <tr>"
                row = [
                    "#",
                    "subgroup#",
                    "name",
                    "examples",
                    "short_skeleton",
                    "skeleton"
                ]
                print >>htm, "   " + ''.join("<td><b>%s</b></td>" % t for t in row)
                print >>htm, "  </tr>"
                print >>htm, " </thead>"
                print >>htm, " <tbody>"

                last_short_skeleton = None
                short_skeleton_num = -1
                td_colors = ["#FF0000", "#FF7F00", "#FFFF00", "#00FF00", "#0000FF", "#4B0082", "#8B00FF"]
                last_color_num = len(td_colors) - 1

                for skel_num, example_set in enumerate(example_sets[example_set_slice_start : example_set_slice_start + self.ONTO_HTM_SLICE_GROUPS_COUNT]):
                    # sort examples by seanid

                    if self.do_skip_group(example_set):
                        continue

                    curr_short_skeleton = example_set.short_skeleton

                    if curr_short_skeleton != last_short_skeleton:
                        last_short_skeleton = curr_short_skeleton
                        last_color_num = (last_color_num + 1) % len(td_colors)
                        short_skeleton_num += 1

                    print >>htm, "  <tr>"
                    print >>htm, "   <td>", example_set_slice_start + skel_num, "</td>"
                    print >>htm, "   <td bgcolor='", td_colors[last_color_num],"'>", short_skeleton_num, "</td>"
                    print >>htm, "   <td>&nbsp;</td>"
                    print >>htm, "   <td>"
                    print >>htm, "queries:", example_set.query_count, "<br>"
                    print >>htm, "serp elements:", example_set.serp_elem_count, "<br>"
                    self.out_htm_examples(htm, example_set.serp_elem_list)
                    print >>htm, "   </td>"
                    short_skeleton_htm = self.escape_html(example_set.short_skeleton)
                    print >>htm, u"   <td class='td_short_skeleton'><div>%s</pre></td>" % short_skeleton_htm
                    skeleton_htm = self.escape_html(example_set.skeleton)
                    print >>htm, u"   <td class='td_skeleton'><div>%s</pre></td>" % skeleton_htm
                    print >>htm, "  </tr>"

                print >>htm, " </tbody>"
                print >>htm, "</table>"
                print >>htm, "</body>"
                print >>htm, "</html>"

    def out_htm_examples(self, htm, serp_elem_list):
        if not serp_elem_list:
            return
        div_id = "divex_" + serp_elem_list[0].seanid
        print >>htm, "<div class='div_examples_collapse' id='%s'>" % div_id
        print >>htm, " <table id='tab_example'>"
        for serp_elem_num, serp_elem in enumerate(serp_elem_list):
            if serp_elem_num >= self.serp_settings.HTM_SERP_ELEMENTS_SAMPLE_COUNT:
                break
            image = serp_elem.image
            basename = self.uuid_folder + '/' + serp_elem.serp_id
            print >>htm, "  <tr>"
            print >>htm, "   <td>", serp_elem_num, "</td>"
            print >>htm, "   <td>"
            print >>htm, "     <a href='" + basename + "/d.html'>" + self.escape_html(serp_elem.query_text) + "</a>"
            print >>htm, "    ", serp_elem.seanid
            print >>htm, "     <br/>"
            if not image:
                print >>htm, "     no image "
            elif os.path.isfile(image.filename):
                print >>htm, "     <img src='" + basename + "/" + os.path.basename(image.filename) + "'/>"
            elif image.height == 0 or image.width == 0:
                print >>htm, "     invisible element ", image.filename
            else:
                print >>htm, "     no image ", image.filename
            print >>htm, "   </td>"
            print >>htm, "  </tr>"
        print >>htm, " </table>"
        print >>htm, "</div>"

        print >>htm, "<div class='div_expand' id='%s_expand'>" % div_id
        print >>htm, """ <a href='javascript:div_examples_expand("{div_id}")' class='a_expand'>expand</a>""".format(div_id=div_id)
        print >>htm, """ <a href='javascript:div_examples_collapse("{div_id}")' class='a_collapse' style='display:none'>collapse</a>""".format(div_id=div_id)
        print >>htm, "</div>"

    def do_skip_group(self, example_set):
        assert isinstance(example_set, SerpElementExampleSet)
        if example_set.query_count <= self.serp_settings.HTM_SERP_ELEMENTS_SKIP_IF_QUERIES_LE:
            return True
        if example_set.count_good_images <= self.serp_settings.HTM_SERP_ELEMENTS_SKIP_IF_QUERIES_LE:
            return True
        return False

    @staticmethod
    def escape_html(s):
        if isinstance(s, str):
            s = unicode(s, 'utf8', 'xmlcharrefreplace')
        return s.replace('<', '&lt;').replace('>', '&gt;')
