import requests
import re
from cStringIO import StringIO

from sandbox.projects import resource_types as rt
from sandbox.sandboxsdk.task import SandboxTask
#from sandbox.sandboxsdk import parameters


BLOCKSTAT_URL = 'https://stat.yandex-team.ru/export_something_to_search.cgi?type=blockstat_dict'
LINE_RE = re.compile('^\d+;[\w-]+$')
MIN_LINE_COUNT = 1500
OUTPUT_FILENAME = 'blockstat.dict'


class BuildBlockstatDict(SandboxTask):
    '''blockstat.dict builder'''

    type = 'BUILD_BLOCKSTAT_DICT'

    def on_execute(self):
        r = requests.get(BLOCKSTAT_URL, timeout=30)

        if r.status_code != 200:
            raise RuntimeError("Response status code is %s" % r.status_code)

        self.check_content(r.text)

        with open(OUTPUT_FILENAME, 'w') as out:
            out.write(r.text)
            out.flush()

        return self.create_resource(
            'blockstat.dict',
            OUTPUT_FILENAME,
            rt.YA_BLOCKSTAT_DICT,
            attributes=dict(
                ttl='inf',
            )
        )

    @staticmethod
    def check_content(text):
        line_count = 0
        for line in StringIO(text):
            line_count += 1
            line = line.rstrip('\n')

            if not LINE_RE.match(line):
                raise ValueError("Malformed line in blockstat.dict: '%s'" % line)

        if line_count < MIN_LINE_COUNT:
            raise ValueError("Too small line count in blockstat: %d < %d" % (line_count, MIN_LINE_COUNT))


__Task__ = BuildBlockstatDict
