import datetime
import pytz
import shutil
import sys
import time
import boto3
from dateutil.relativedelta import relativedelta

from django.db import connection

import yt.wrapper as yt

from intranet.dogma.dogma.core.tasks import *

boto3.set_stream_logger('boto3')


def clean_sqs(
    endpoint='http://sqs.yandex.net:8771',
    access_key='dogma',
    secret_key='not used yet',
    current_node=False,
    verbose=True,
):
    hostname = None
    if current_node:
        hostname = get_current_node().hostname.split(".")[0]
    sqs = boto3.client('sqs', region_name='yandex', endpoint_url=endpoint, aws_access_key_id=access_key, aws_secret_access_key=secret_key)
    queues = sqs.list_queues()
    for queue in queues['QueueUrls']:
        if not current_node or hostname in queue:
            if verbose:
                print(queue)
            sqs.purge_queue(QueueUrl=queue)


def get_clones_distribution(d=None, verbose=True):
    if d is None:
        d = datetime.datetime.utcnow() + datetime.timedelta(hours=3)

    with connection.cursor() as c:
        c.execute(
            'SELECT count(*), core_node.name, core_node.hostname \
        FROM core_clone \
        join core_node on core_clone.node_id = core_node.id, core_repo \
        WHERE core_clone.modified < %s AND core_clone.modified > \'2019-10-01 00:00:00+03\' \
        AND core_clone.repo_id = core_repo.id AND core_repo.is_active=true \
        GROUP BY 2, 3 \
        ORDER BY 3', [datetime.datetime(d.year, d.month, d.day, 0, 0, 0, 0, pytz.FixedOffset(180))]
        )

        res = []

        print('%7s\t%12s\t%40s' % ('Count', 'Name', 'Host'))
        for r in c.fetchall():
            print('%7s\t%12s\t%40s' % (r[0], r[1], r[2]))
            res.append((r[0], r[1], r[2]))

    return res


def force_fetch_clones(d=None, curnode=None):
    if curnode is None:
        curnode = get_current_node()

    if d is None:
        d = datetime.datetime.utcnow() + datetime.timedelta(hours=3)

    while True:
        has_commits = False
        for clone in Clone.objects.filter(
            modified__lte=datetime.datetime(d.year, d.month, d.day, 0, 0, 0, 0, pytz.FixedOffset(180)),
            modified__gte=datetime.datetime(2019, 10, 1),
            node=curnode,
            repo__is_active=True,
        ).order_by('id')[:15]:
            has_commits = True
            try:
                fetch_clone(clone.id)
            except Exception as exc:
                continue
        if not has_commits:
            break


def force_fetch_clones_async(d=None, curnode=None, queue="clone"):
    if curnode is None:
        curnode = get_current_node()

    if d is None:
        d = datetime.datetime.utcnow() + datetime.timedelta(hours=3)

    for clone in Clone.objects.filter(
        modified__lte=datetime.datetime(d.year, d.month, d.day, 0, 0, 0, 0, pytz.FixedOffset(180)),
        modified__gte=datetime.datetime(2019, 10, 1),
        node=curnode,
        repo__is_active=True,
    ).order_by('id'):
        try:
            fetch_clone.apply_async(
                args=[clone.id],
                queue=get_node_queue(queue)
            )
        except Exception as exc:
            continue


def flush_lost_commits_to_yt():
    with locked_context('flush_lost_commits_to_yt') as acquired:
        if acquired:
            days_interval=90
            print('Start flush lost commits to yt')

            yt_table_index = 0
            yt_table_path = settings.YT_AUDIT_PATH + settings.YT_AUDIT_TABLE
            commit_ids_by_repo = dict()
            yt_client = yt.YtClient(proxy=settings.YT_AUDIT_CLUSTER, token=settings.YT_AUDIT_TOKEN)

            print('Start load previous data from yt')

            for row in yt_client.read_table(yt_client.TablePath(yt_table_path, columns=['commit_id', 'repo_id'], start_index=yt_table_index)):
                yt_repo_id = row['repo_id']
                if commit_ids_by_repo.get(str(yt_repo_id)) is None:
                    commit_ids_by_repo[str(yt_repo_id)] = set()
                commit_ids_by_repo.get(str(yt_repo_id)).add(row['commit_id'])
                yt_table_index = yt_table_index + 1

            print('Finish load previous data from yt')

            repos_for_process = [repo.id for repo in Repo.objects.raw(
                    'SELECT id '
                    'FROM core_repo '
                    'WHERE ( '
                    '   SELECT max(commit_time) > %s '
                    '   FROM core_pushedcommit WHERE core_pushedcommit.repo_id = core_repo.id '
                    ') '
                    'ORDER BY id ', (
                        datetime.datetime.now(pytz.FixedOffset(180)) - datetime.timedelta(days=days_interval),
                    )
                )]

            print('Repos for process count: {}'.format(len(repos_for_process)))

            skipped_repos = []
            result_commits = []
            commits_start_dt = datetime.datetime(2020, 3, 2, 0, 0, 0, 0, pytz.FixedOffset(180))
            for repo_id in repos_for_process:
                if PushedCommit.objects.filter(repo__id=repo_id, commit_time__gte=commits_start_dt).exists():
                    commit_ids = commit_ids_by_repo.get(str(repo_id), [])

                    db_commits = PushedCommit.objects.select_related('author', 'committer', 'repo', 'repo__source').filter(repo__id=repo_id, commit_time__gte=commits_start_dt)
                    iteration_commits = [
                        {
                            'commit_id': commit.commit,
                            'commit_time': calendar.timegm(commit.commit_time.utctimetuple()),
                            'author': commit.author.login,
                            'committer': commit.committer.login,
                            'lines_added': commit.lines_added,
                            'lines_deleted': commit.lines_deleted,
                            'repo_vcs_name': commit.repo.vcs_name,
                            'repo_id': commit.repo.id,
                            'source_host': platform.node(),
                            'repo_vcs_type': commit.repo.source.host,
                            'branch_name': commit.branch_name,
                        }
                        for commit in db_commits
                        if 'git-svn-id' not in commit.message and not (commit.commit in commit_ids)
                    ]

                    if not iteration_commits:
                        repo = Repo.objects.get(pk=repo_id)
                        repo.last_yt_sync_time = now()
                        repo.save()
                        continue
                    lock_key = 'clone_action(' + str(repo_id) + ')_create_commits'
                    with locked_context(lock_key) as acquired:
                        if acquired:
                            for row in yt_client.read_table(yt_client.TablePath(
                                    yt_table_path,
                                    columns=['commit_id', 'repo_id'],
                                    start_index=yt_table_index
                            )):
                                yt_table_index = yt_table_index + 1
                                yt_repo_id = row['repo_id']
                                if commit_ids_by_repo.get(str(yt_repo_id)) is None:
                                    commit_ids_by_repo[str(yt_repo_id)] = set()
                                commit_ids_by_repo.get(str(yt_repo_id)).add(row['commit_id'])

                            commit_ids = commit_ids_by_repo.get(str(repo_id), [])
                            iteration_commits = [e for e in iteration_commits if e['commit_id'] not in commit_ids]
                            if iteration_commits:
                                print('Found {} lost commits for repository {}'.format(len(iteration_commits), repo_id))
                                result_commits += iteration_commits
                                if len(result_commits) > 10000:
                                    print('Start flush to yt {} lost commits'.format(len(result_commits)))
                                    yt_client.write_table(yt_client.TablePath(yt_table_path, append=True), result_commits)
                                    result_commits = []
                            repo = Repo.objects.get(pk=repo_id)
                            repo.last_yt_sync_time = now()
                            repo.save()

                        else:
                            print('Lock `{}` is acquired by another process'.format(lock_key))
                            skipped_repos.append(repo_id)

            if result_commits:
                print('Start flush to yt {} lost commits'.format(len(result_commits)))
                yt_client.write_table(yt_client.TablePath(yt_table_path, append=True), result_commits)
            if skipped_repos:
                raise Exception('Can\'t flush commits for repos: {}'.format(skipped_repos))
        else:
            raise Exception('Lock `flush_lost_commits_to_yt` is acquired by another process')


def create_snapshot(d=None, create_link=True):
    yt_client = yt.YtClient(proxy=settings.YT_AUDIT_CLUSTER, token=settings.YT_AUDIT_TOKEN)
    yt_table_path = settings.YT_AUDIT_PATH + settings.YT_AUDIT_TABLE

    if d is None:
        d = datetime.datetime.utcnow() + datetime.timedelta(hours=3)

    day_start = datetime.datetime(d.year, d.month, d.day, 0, 0, 0, 0, pytz.FixedOffset(180)) - datetime.timedelta(days=1)
    next_day_start_sec = calendar.timegm((day_start + datetime.timedelta(days=1)).utctimetuple())
    half_year_ago_start_sec = calendar.timegm((day_start + datetime.timedelta(days=1) + relativedelta(months=-6)).utctimetuple())
    postfix = day_start.strftime('%y-%m-%d')
    yt_new_table_path = settings.YT_AUDIT_PATH + 'commits_' + postfix
    if not yt_client.exists(yt_new_table_path):
        log.info('Table {} does not exist, creating'.format(yt_new_table_path))
        yt_client.create(
            'table',
            yt_new_table_path,
            attributes={
                'schema': [
                    {'name': 'commit_id', 'type': 'string'},
                    {'name': 'commit_time', 'type': 'datetime'},
                    {'name': 'author', 'type': 'string'},
                    {'name': 'committer', 'type': 'string'},
                    {'name': 'lines_added', 'type': 'uint32'},
                    {'name': 'lines_deleted', 'type': 'uint32'},
                    {'name': 'repo_vcs_name', 'type': 'string'},
                    {'name': 'repo_id', 'type': 'int32'},
                    {'name': 'source_host', 'type': 'string'},
                    {'name': 'repo_vcs_type', 'type': 'string'},
                    {'name': 'branch_name', 'type': 'string'},
                ],
                'optimize_for': 'scan'
            },
            recursive=True
        )
    source_commits = yt_client.read_table(yt_client.TablePath(yt_table_path))
    target_commits = [c for c in source_commits if int(c['commit_time']) < next_day_start_sec  # prev day
        and int(c['commit_time']) >= half_year_ago_start_sec  # prev day
        and c['repo_vcs_type'] != 'bitbucket.browser.yandex-team.ru'  # not browser bb
        and c['repo_id'] != 143554  # not mobile/monorepo bb
        and c['repo_id'] != 147593
        and c['repo_id'] != 95983
        and c['repo_id'] != 35641
        and c['repo_id'] != 168378
        and c['repo_id'] != 116542
        and (c['repo_id'] != 50016 or c['branch_name'] != 'releases/experimental/mobile/master')]
    yt_client.write_table(yt_client.TablePath(yt_new_table_path, append=True), target_commits)

    if create_link:
        yt_client.link(target_path=yt_new_table_path, link_path=settings.YT_AUDIT_PATH + 'commits', force=True)


def merge_chunks():
    yt_client = yt.YtClient(proxy=settings.YT_AUDIT_CLUSTER, token=settings.YT_AUDIT_TOKEN)
    yt_client.run_merge(
        '//statbox/qpulse/dogma/raw_commits',
        '//statbox/qpulse/dogma/raw_commits',
        spec={'combine_chunks': True},
    )


def add_clones_to_sqs(d=None):
    if d is None:
        d = datetime.datetime.utcnow() + datetime.timedelta(hours=3)

    clones = Clone.objects.filter(
        modified__lte=datetime.datetime(d.year, d.month, d.day, 0, 0, 0, 0, pytz.FixedOffset(180)),
        modified__gte=datetime.datetime(2019, 10, 1),
        repo__is_active=True
    ).order_by('id')

    nclones = len(clones)

    for i, clone in enumerate(clones):
        print(d, "{}/{}".format(i, nclones))
        try:
            fetch_clone.apply_async(args=[clone.id], queue=get_node_queue('clone', clone.node))
        except Exception as exc:
            print('ERROR:', clone, exc)


def run_sql(sql):
    with connection.cursor() as c:
        c.execute(sql)
        return c.fetchall()


def timed_run_sql(sql):
    t = time.time()
    res = run_sql(sql)
    sys.stderr.write("Duration: {}s\n".format(time.time() - t))
    return res


def get_top_arcadia_committers(
    skip_contrib=True,
    skip_junk=True,
    interval="1 months",
    limit=200,
    extensions=None,
):
    predicates = []
    if skip_contrib:
        predicates.append("AND core_changedfile.name NOT SIMILAR TO '(a|b)/(contrib|vendor)/%'")
    if skip_junk:
        predicates.append("AND core_changedfile.name NOT SIMILAR TO '(a|b)/junk/%'")
    if extensions:
        predicates.append("AND core_changedfile.extension IN (%s)" % ", ".join("'%s'" % e for e in extensions))

    predicates = "\n".join(predicates)

    sql = """
        SELECT
            core_user.login,
            COUNT(DISTINCT core_pushedcommit.id),
            SUM(core_changedfile.lines_added),
            SUM(core_changedfile.lines_deleted)
        FROM core_pushedcommit
        JOIN core_user ON core_pushedcommit.author_id = core_user.id
        JOIN core_changedfile ON core_pushedcommit.id = core_changedfile.commit_id
        WHERE core_pushedcommit.repo_id = 50016
        AND core_pushedcommit.commit_time > CURRENT_TIMESTAMP - INTERVAL '{interval}'
        {predicates}
        GROUP BY core_user.login
        ORDER BY SUM(core_changedfile.lines_added) + SUM(core_changedfile.lines_deleted) DESC
        LIMIT {limit}
    """.format(**locals())

    return run_sql(sql)


def dump_bb_commits_from_staff(yt_table_path=None, file_path=None):
    bb_commits = PushedCommit.objects.filter(repo__source_id=8, author__from_staff=True).prefetch_related('repo', 'author')
    bb_commits = list(bb_commits)

    i = 0
    table = []
    for c in bb_commits:
        if i % 10000 == 0:
            print(float(i)/len(bb_commits))
        i += 1
        table.append({
            "commit_id": c.commit,
            "vcs_name": c.repo.vcs_name,
            "login": c.author.login,
            "email": c.author.email,
            "timestamp": calendar.timegm(c.commit_time.utctimetuple())
        })

    if file_path:
        with open(file_path, "w") as f:
            import json
            json.dump(table, f, indent=4)

    if yt_table_path:
        yt_client = yt.YtClient(proxy=settings.YT_AUDIT_CLUSTER, token=settings.YT_AUDIT_TOKEN)

        yt_client.create(
            'table',
            yt_table_path,
            attributes={
                'schema': [
                    {'name': 'commit_id', 'type': 'string'},
                    {'name': 'timestamp', 'type': 'datetime'},
                    {'name': 'vcs_name', 'type': 'string'},
                    {'name': 'login', 'type': 'string'},
                    {'name': 'email', 'type': 'string'},
                ],
                'optimize_for': 'scan'
            },
            recursive=True
        )
        yt_client.write_table(
            yt_client.TablePath(yt_table_path),
            table
        )


def mark_repos_flushed(ids):
    for i in ids:
        repo = Repo.objects.get(pk=i)
        repo.last_yt_sync_time = datetime.datetime.now()
        repo.save()


def deduplicate_repos():
    clones = Clone.objects.on_current_node()
    count = 0
    for clone in clones:
        if Clone.objects.filter(repo_id=clone.repo_id).count() > 1:
            print(clone)
            path = clone.path
            clone.delete()
            if os.path.exists(path):
                shutil.rmtree(path)
            count += 1
    return count


def refetch_failed_clones(node=None):
    failed_clones = Clone.objects.filter(status=Clone.STATUSES.fail)
    if node:
        failed_clones = failed_clones.filter(node=node)
    for failed_clone in failed_clones:
        repo = failed_clone.repo
        node = failed_clone.node
        failed_clone.delete()
        clone_repo.apply_async(args=(repo.id,), queue=get_node_queue("clone", node))


def find_repos_without_clones():
    cloned_repos = Clone.objects.filter(status=Clone.STATUSES.active).values("repo_id").distinct()
    cloned_repos = set(x["repo_id"] for x in cloned_repos)

    active_repos = Repo.objects.filter(is_active=True).values("id")
    active_repos = set(x["id"] for x in active_repos)

    not_cloned_repos = [x for x in active_repos if x not in cloned_repos]
    return not_cloned_repos


def remove_ticket(ticket):
    commits = PushedCommit.objects.filter(tickets__contains="{%s}" % ticket)
    for commit in commits:
        print(commit, commit.tickets)
        commit.tickets = [t for t in commit.tickets if t != ticket]
        commit.save()
