#!/usr/bin/env python
# coding: utf-8
import os
import sys
import subprocess
import time
import re
import gzip
import pwd
import urllib
import logging
import json

def read_config():
    selfpath = os.path.realpath(sys.argv[0])
    selfdir = os.path.dirname(selfpath)
    config_name = "bsexport.json"
    with open(os.path.join(selfdir, config_name),"r") as f:
        config = json.load(f)
        f.close()
    return config


varnames = [
        'rsync',
        'rsync_wrkdir',
        'mark_file',
        'flight_file',
        'database_path',
        'outdata_dir',
        'effective_username',
        'rsync_sources',
        'custom_logpath',
        'custom_format_query',
        'ts_field',
        'enabled_logs',
        ]

logformat = '%(asctime)-15s %(message)s'
logging.basicConfig(format=logformat)
logger = logging.getLogger('bs_sync')
logger.setLevel(logging.INFO)

def load_mark(mfile):
    mf = None
    try:
        mf = open(mfile)
        mark = mf.read()
    except IOError:
        mark = 0
    finally:
        if mf: mf.close()

    if not mark.strip():
        return list()
    return list(set(mark.strip().split('\n')))

def rsync_file(url, filename):
    cmd = [ rsync, "-z", url+filename, rsync_wrkdir ]
    #logger.info(" ".join(cmd))
    if not os.path.exists(os.path.join(rsync_wrkdir, filename)):
        p = subprocess.Popen(cmd, stdout=None, stdin=None)
        out, err = p.communicate()

    if os.path.splitext(filename)[1] == '.gz':
        fname = os.path.splitext(filename)[0]
        if not os.path.exists(os.path.join(rsync_wrkdir, fname)):
            #gf = gzip.GzipFile(os.path.join(rsync_wrkdir, filename))
            #f = open(os.path.join(rsync_wrkdir, fname), "w+")
            #f.write(gf.read())
            #gf.close()
            #f.close()
            #os.remove(os.path.join(rsync_wrkdir, filename))
            err, out = subprocess.Popen("cd %s && /usr/bin/gunzip -f %s" % (rsync_wrkdir,
                filename), shell=True).communicate()
    else:
        fname = filename

    fbase, fext = os.path.splitext(fname)
    if fext == ".frm" and \
            os.path.exists(os.path.join(rsync_wrkdir, fbase+".MYD")) and \
            os.path.exists(os.path.join(rsync_wrkdir, fbase+".MYI")):
            for e in (".MYD", ".MYI", ".frm"):
                if not os.path.exists(os.path.join(database_path, fbase+e)):
                    os.link(
                            os.path.join(rsync_wrkdir, fbase+e),
                            os.path.join(database_path, fbase+e),
                            )

def split_all_ext(fname, ext=True ):
    while ext: fname,ext = os.path.splitext(fname)
    return fname

def rsync_list_tables(url, logtype):
    config = read_config()
    cmd = [ config["rsync"], '--list-only', '--exclude="\.*"',
        '--exclude="\."' , url ]
    p = subprocess.Popen(cmd, stdout=subprocess.PIPE)
    out, err = p.communicate()
    olist = []
    rdict = {}
    for l in out.rstrip().split('\n'):
        match = re.match("Rotate%sLog([0-9]+)" % logtype, l.split()[4])
        if match:
            olist.append(l.split()[4])
    sortkey=lambda v: re.match("[A-Za-z]*([0-9]+)", v).groups()[0]
    olist.sort(key=sortkey)
    for e in range(len(olist)-2):
        if \
            split_all_ext(olist[e]) == \
            split_all_ext(olist[e+1]) == \
            split_all_ext(olist[e+2]):
                rdict.update(
                        {split_all_ext(olist[e]):(
                            url.rstrip("/") + "/" + olist[e],
                            url.rstrip("/") + "/" + olist[e+1],
                            url.rstrip("/") + "/" + olist[e+2],
                            )})

    return rdict


if __name__ == '__main__':
    config = read_config()
    lcls = locals()
    for var in varnames:
        lcls[var] = config[var]

    remote_tables_dict = {}
    for t in enabled_logs: 
        for s in rsync_sources:
            remote_tables_dict.update(rsync_list_tables(s, t))

    os.setreuid(
        pwd.getpwnam(effective_username).pw_uid,
        pwd.getpwnam(effective_username).pw_uid,
        )

    logger.info("Remote tables: %s" % len(remote_tables_dict))
    done_tables = load_mark(mark_file)
    logger.info("Done tables: %s" % len(done_tables))
    inflight_tables = load_mark(flight_file)
    logger.info("Inflight tables: %s" % len(inflight_tables))
    todo_tables = list(set(remote_tables_dict.keys()) - (set(done_tables).union(set(inflight_tables))))
    logger.info("Todo tables: %s" % len(todo_tables))
    todo_tables.sort()
    for t in todo_tables:
        for f in remote_tables_dict[t]:
            schema, hostnpath = urllib.splittype(f)
            host, path = urllib.splithost(hostnpath)
            pathprefix = os.path.dirname(path)
            filename = os.path.basename(path)

            rsync_file(
                    "%s://%s%s/" % (schema, host, pathprefix),
                    filename)
            """
            """
        logger.info("Table synced: %s" % t) 

#for k,v in rsync_list_tables('rsync://bsgate01e.yandex.ru/yabsdb/', 'Watch').items():
#    print k
#    for e in v: print "\t %s" % e
