#!/usr/bin/env python
# Script for backup logs from MapReduce
#
# $Id$

import os,sys,time
import subprocess
import ConfigParser

STATE_FILE = '/Berkanavt/webscripts/admscripts/scripts/mrbackup/mrbackup.state'
CFG_FILE = '/Berkanavt/webscripts/admscripts/scripts/mrbackup/mrbackup.cfg'
LOG_PATH = "/var/log/mrbackup/master.log"

config = ConfigParser.ConfigParser()
config.read(CFG_FILE)

N = int(config.get('common', 'N'))
FS = int(config.get('common', 'FSIZE'))
MR_SRV = config.get('common', 'MR_SRV')
logs = config.get('MRlogs', 'logs').split(', ') 
srv = config.get('backup servers', 'servers').split(', ')

state = open(STATE_FILE, 'a')

def listTostr(list):
    retval = ""
    for e in list: retval+=e+" "
    retval.rstrip(" ,")
    return retval

def sortedDictValues(adict):
    keys = adict.keys()
    keys.sort(reverse=True)
    return map(adict.get, keys)

def invertDict(dict):
    idict = {}
    for k, v in dict.iteritems():
        idict.setdefault(v, []).append(k)
    return idict

def logging(message, level="INFO"):
    ts = time.strftime("%d-%m-%Y %H:%M:%S")
    mlog = file (LOG_PATH, "a+")
    for s in message.split("\n"):
        mlog.write("%s %s:\t %s\n" % (ts, level, s))
    mlog.close()
    return None


def get_log_list(logs):
    '''  Get list and size logs, stored in MR   '''

    mrlogs = {}
    for k in logs:
        lscmd = "/Berkanavt/bin/mr_ls -l -s abies00:8013 \
                 %s | grep -v -f %s | head -n %s" \
                    % (k, STATE_FILE, N)
        mr_ls = subprocess.Popen(lscmd, 
                           stdout = subprocess.PIPE, 
                           stderr = subprocess.PIPE,
                           shell=True).communicate()[0].\
                           strip().split('\n')
        for l in mr_ls:
            try:
                mrlogs.update({int(l.split()[4]):l.split()[9]})
            except IndexError:
                logging("No such tables %s" %k, level="WARNING")
    return mrlogs


def get_free_space(srv):
    ''' Get free space on /s{0-3} on backup-servers'''

    freespace={}
    dfcmd = "df -g | grep /s\[0-9]"

    for s in srv:
        t_size = 0
        free_space = subprocess.Popen("/usr/bin/ssh -i .ssh/id_dsa\
                                backup@%s %s" %(s, dfcmd),
                                stdout = subprocess.PIPE,
                                stderr = subprocess.PIPE,
                                shell=True)
        if free_space.wait():
            logging("%s is unreachable now!"%s, level="ERROR")
        else:
            for m in free_space.communicate()[0].strip().split('\n'):
                size = int(m.split()[3])
                t_size+=+size
            if t_size > FS:
                freespace.update({t_size:s})
    return freespace


def backup_logs(mrlogs, freespace):
    '''Run backup script on remote backup servers'''

    logs_sorted = sortedDictValues(mrlogs)
    srv_sorted = sortedDictValues(freespace)
    n_servers = len(srv_sorted)
    master_srv = os.uname()[1]
    log2srv = {}

    for i, s in enumerate(logs_sorted):
        state.write(" %s\n" %s)
        log2srv.update({s:srv_sorted[(i % n_servers)]})
    ilog2srv = invertDict(log2srv)
    for srv in ilog2srv:
        mr_backup_cmd = "/usr/bin/ssh -n -i\
                     /home/backup/.ssh/id_dsa \
                     backup@%s \
                     /Berkanavt/webscripts/admscripts/scripts/mrbackup/mr_backup.sh \
                     -s %s -m %s %s  >/dev/null 2>&1 & " \
                     %(srv, MR_SRV, master_srv, listTostr(ilog2srv[srv]))
        mr_backup = subprocess.Popen(mr_backup_cmd,
                     stdout = subprocess.PIPE,
                     stderr = subprocess.PIPE,
                     shell=True)
        if mr_backup.wait():
            logging("%s is unreachable now!"%s, level="ERROR")
        else:
            mr_backup.communicate()[0].strip()
       
        print("%s will be stored at %s" \
             %(listTostr(ilog2srv[srv]), srv))
        logging("%s will be stored at %s" \
          %(listTostr(ilog2srv[srv]), srv))
        time.sleep(10)

def main(argc, *argv):
    freespace = (get_free_space(srv))
    mrlogs = (get_log_list(logs))
    backup_logs(mrlogs, freespace)
    state.close()


if __name__ == "__main__":
    main(len(sys.argv), sys.argv)
