# coding=utf-8

import math
import json
import logging
from collections import defaultdict
from sandbox.projects.sandbox_ci import pulse


class AccessLogParser(object):
    def __init__(self, access_log):
        self._access_log = access_log
        self.texts = []

    def parse(self):
        access_log = self._access_log

        texts_stats = defaultdict(int)
        with open(str(access_log.path)) as fp:
            for line in fp:
                request_data = json.loads(line)
                url_path = request_data.get('path')
                if not url_path:
                    logging.warn('There is no path in %s', line)
                    continue

                text = pulse.get_text_from_url(url_path)
                if not text:
                    logging.warn('There is no text in %s', url_path)
                    continue

                texts_stats[text] += 1

        text_stats_list = texts_stats.items()
        text_stats_list.sort(key=lambda x: x[1], reverse=True)

        self.texts = [p[0] for p in text_stats_list]

    def get_texts(self, texts_count):
        """
        :return:
        :rtype: list of basestring
        """
        results = []
        actual_count = 0
        for text in self.texts:
            results.append(text)

            actual_count += 1
            if actual_count >= texts_count:
                break

        if actual_count < texts_count:
            f = math.ceil(texts_count / float(actual_count))
            more_results = results * int(f)
            results = more_results[:texts_count]

        return results
