import sys
import cyson


def get_te_part(gsid):
    for chunk in gsid.split():
        if chunk.startswith('TE'):
            return chunk
    return None


def check_id(gsid):
    gsid = gsid.decode('utf-8')
    te_part = get_te_part(gsid)
    if te_part:
        return te_part.split(':')[1]
    return 'Unknown check id'


def get_sb_part(gsid):
    for chunk in gsid.split():
        if chunk.startswith('SB'):
            return chunk
    return None


def parent_task(gsid):
    gsid = gsid.decode('utf-8')
    sb_part = get_sb_part(gsid)
    if sb_part and 'AUTOCHECK_BUILD_PARENT_2' in sb_part:
        return sb_part.split(':')[2]
    return 'Unknown parent task'


def chop_name(stage_name):
    for suffix in (b"_started", b"_finished"):
        if stage_name.endswith(suffix):
            return stage_name[:-len(suffix)]
    print('Uknown suffix for stage_name: {}'.format(stage_name), file=sys.stderr)
    return stage_name


def get_stages_durations(payload):
    stages = payload.get(b'payload')
    if not stages or b'error' in stages:
        return {}

    stages_started_timeline = []
    stages_finished_timeline = []
    stages_started = {}
    stages_finished = {}
    for stage, timestamp in stages.items():
        stage_name = chop_name(stage)
        if stage.endswith(b"_started"):
            stages_started[stage_name] = timestamp
            stages_started_timeline.append((stage_name, timestamp))
        elif stage.endswith(b"_finished"):
            stages_finished[stage_name] = timestamp
            stages_finished_timeline.append((stage_name, timestamp))
        else:
            print('skip unusaul stage: {}'.format(stage), file=sys.stderr)

    stages_started_timeline = list(sorted(stages_started_timeline, key=lambda x: x[1]))
    stages_finished_timeline = list(sorted(stages_finished_timeline, key=lambda x: x[1]))

    result = {}
    for i, stage_info in enumerate(stages_started_timeline):
        stage_name, stage_start_time = stage_info
        stage_suffix = b''
        if stage_name in stages_finished:
            stage_finish_time = stages_finished[stage_name]
        else:
            stage_suffix = b'_wo_finished_mark'
            stage_finish_time = stages_finished_timeline[-1][1] if i == (len(stages_started_timeline) - 1) else stages_started_timeline[i + 1][1]
        result[stage_name + stage_suffix] = stage_finish_time - stage_start_time

    return result


def unclosed_stages(payload):
    stages = payload.get(b'payload')

    open_stages = set()
    finished_stages = set()

    for stage, _ in stages.items():
        stage_name = chop_name(stage)
        if stage.endswith(b'_started'):
            open_stages.add(stage_name)
        elif stage.endswith(b'_finished'):
            finished_stages.add(stage_name)
        else:
            pass

    return open_stages ^ finished_stages


get_stages_durations._yql_convert_yson = (cyson.loads, cyson.dumps)
unclosed_stages._yql_convert_yson = (cyson.loads, cyson.dumps)

