import os
import logging

from sandbox import sdk2
from sandbox.projects.common import file_utils as fu
from sandbox.projects.common import utils
from sandbox.projects.suggest.dicts import SuggestDictTask
from sandbox.projects.answers.resources import AnswersSuggestUsersData


class BuildAnswersUsersDicts(sdk2.Task, SuggestDictTask):
    """ Build suggest dictionary for Answers users """

    @staticmethod
    def normalize(string):
        chars = [c if c.isalnum() else ' ' for c in string.lower()]
        words = [w for w in (''.join(chars)).split(' ') if w]
        return ' '.join(words)

    def on_execute(self):
        cwd = os.getcwd()
        root_dicts_path = os.path.join(cwd, 'dicts')
        os.makedirs(root_dicts_path)

        self.build_dict_for_env(root_dicts_path, 'pre', 'prestable')
        self.build_dict_for_env(root_dicts_path, 'prod', 'production')
        self.publish_dict('answers_users',
                          'User suggest dictionary for answers service',
                          root_dicts_path,
                          autodeploy=True)

    def build_dict_for_env(self, root_dicts_path, dict_name, env_name):
        resource = utils.get_and_check_last_resource_with_attribute(AnswersSuggestUsersData,
                                                                    attr_name='env',
                                                                    attr_value=env_name)
        logging.info('Env = {}, resource id = {}'.format(env_name, resource.id))
        resource_path = utils.sync_resource(resource.id)
        os.chmod(resource_path, 0777)

        queries = []
        datas = []

        with open(resource_path) as input:
            for line in input.readlines():
                name, data, freq, _ = line.split('\t')
                datas.append(name + '\t' + data + '\n')
                queries.append(name + '\t\t' + freq + '\n')

                print name
                try:
                    base, suffix = name.rsplit(':', 1)
                    normalized = BuildAnswersUsersDicts.normalize(base.decode('utf-8')).encode('utf-8') + ':' + suffix

                    if normalized and name != normalized:
                        queries.append(normalized + '\t' + name + '\t' + freq + '\n')

                except:
                    pass

        cwd = os.getcwd()

        queries_path = os.path.join(cwd, 'queries')
        queries = sorted(queries)
        fu.write_lines(queries_path, queries)

        groups_path = os.path.join(cwd, 'groups')
        fu.write_lines(groups_path, ['\n'])

        streams_path = os.path.join(cwd, 'streams')
        fu.write_lines(streams_path, ['ALL\t10\n'])

        data_path = os.path.join(cwd, 'data')
        fu.write_lines(data_path, datas)

        dict_path = os.path.join(root_dicts_path, dict_name)
        os.makedirs(dict_path)

        dict_prefix = os.path.join(dict_path, dict_name)
        self.run_data_builder(dict_prefix,
                              queries_path,
                              groups_path,
                              streams_path,
                              data_path,
                              word_index=True)
