/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.zookeeper.server;

import java.io.IOException;
import java.io.PrintWriter;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.LinkedList;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentSkipListSet;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicLong;
import java.util.logging.Level;

import org.apache.jute.Index;
import org.apache.jute.InputArchive;
import org.apache.jute.OutputArchive;
import org.apache.jute.Record;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.KeeperException.Code;
import org.apache.zookeeper.KeeperException.NoNodeException;
import org.apache.zookeeper.KeeperException.NodeExistsException;
import org.apache.zookeeper.KeeperException.SystemErrorException;
import org.apache.zookeeper.Quotas;
import org.apache.zookeeper.StatsTrack;
import org.apache.zookeeper.WatchedEvent;
import org.apache.zookeeper.Watcher;
import org.apache.zookeeper.ZooDefs;
import org.apache.zookeeper.Watcher.Event;
import org.apache.zookeeper.Watcher.Event.EventType;
import org.apache.zookeeper.Watcher.Event.KeeperState;
import org.apache.zookeeper.ZooDefs.Ids;
import org.apache.zookeeper.ZooDefs.OpCode;
import org.apache.zookeeper.common.PathTrie;
import org.apache.zookeeper.data.Stat;
import org.apache.zookeeper.data.StatPersisted;
import org.apache.zookeeper.txn.CheckVersionTxn;
import org.apache.zookeeper.txn.CreateTxn;
import org.apache.zookeeper.txn.DeleteTxn;
import org.apache.zookeeper.txn.ErrorTxn;
import org.apache.zookeeper.txn.MultiTxn;
import org.apache.zookeeper.txn.SetDataTxn;
import org.apache.zookeeper.txn.Txn;
import org.apache.zookeeper.txn.TxnHeader;
import org.jctools.maps.NonBlockingHashMap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.zookeeper.server.persistence.FileTxnSnapLog;

import ru.yandex.concurrent.WeighableRunnable;
import ru.yandex.logger.PrefixedLogger;
import ru.yandex.util.timesource.TimeSource;

/**
 * This class maintains the tree data structure. It doesn't have any networking
 * or client connection code in it so that it can be tested in a stand alone
 * way.
 * <p>
 * The tree maintains two parallel data structures: a hashtable that maps from
 * full paths to DataNodes and a tree of DataNodes. All accesses to a path is
 * through the hashtable. The tree is traversed only when serializing to disk.
 */
public class DataTree {
    public static final int QUEUE_NODE_DECIMAL_COUNT = 20;

    private static final int MIN_DOCS_IN_MEMORY_QUEUE =
        Integer.parseInt(
            System.getProperty(
                "zoolooser.min-docs-in-memory-queue",
                "2"));

    private static final char[] ZEROES = "00000000000000000000".toCharArray();

    private static final Logger LOG = LoggerFactory.getLogger(DataTree.class);

    private static final AtomicLong INITALIZE_COUNTER = new AtomicLong(0L);
    /**
     * This hashtable provides a fast lookup to the datanodes. The tree is the
     * source of truth and is where all the locking occurs
     */
    private final NonBlockingHashMap<PathKey, DataNode> nodes =
        new NonBlockingHashMap<PathKey, DataNode>();

    private final NonBlockingHashMap<HashKey, PathKey> nodesHashes =
        new NonBlockingHashMap<HashKey, PathKey>();

    private final ConcurrentHashMap<String, Queue> queues =
        new ConcurrentHashMap<String, Queue>();

    private final PrefixedLogger logger;

    interface PathKey {
        public boolean equals(Object o);
        public int hashCode();
        public String toString();
    }

    static class RegularPath implements PathKey {
        public final String path;
        public RegularPath(final String path) {
            this.path = path;
        }

        public boolean equals(Object o) {
            if (o instanceof RegularPath) {
                return path.equals(((RegularPath)o).path);
            }
            return false;
        }

        public int hashCode() {
            return path.hashCode();
        }

        public String toString() {
            return path;
        }
    }

    static class QueueChildPath implements PathKey {
        public final String parent;
        public final long num;

        public QueueChildPath(final String parent, final long num) {
            this.parent = parent.intern();
            this.num = num;
        }

        public boolean equals(Object o) {
            if (o instanceof QueueChildPath) {
                QueueChildPath other = (QueueChildPath)o;
                return parent.equals(other.parent) && num == other.num;
            }
            return false;
        }

        public int hashCode() {
            return parent.hashCode() + (int)num;
        }

        public String toString() {
            return toString(parent, num);
        }

        public static String toString(final String parent, long num) {
            StringBuilder sb =
                new StringBuilder(
                    parent.length() + 21 + QueueChildSet.PREFIX.length());
            sb.append(parent);
            sb.append('/');
            sb.append(QueueChildSet.PREFIX);
            sb.append(ZEROES);
            int len = sb.length();
            for (int i = 1; num > 0; ++i) {
                sb.setCharAt(len - i, (char) ((num % 10L) + '0'));
                num /= 10;
            }
            return new String(sb);
        }
    }

    class Queue implements Comparable<Queue> {
        public final DataNode queueNode;
        public final String path;
        public final QueueChildSet children;
        public long nextDeletePos = 0;
        public DataNode nextDeleteNode;

        public Queue(
            final DataNode queueNode,
            final String path)
        {
            this.queueNode = queueNode;
            this.path = path;
            children = (QueueChildSet) queueNode.getChildren();
        }

        public long ctime() {
//            if (nextDeleteNode == null) {
//                return Long.MAX_VALUE;
//            }
            return nextDeleteNode.getCtime();
        }

        public boolean hasDeletes() {
            final long first = children.firstPosition();
            final long maxPos =
                children.lastPosition() - MIN_DOCS_IN_MEMORY_QUEUE;
            if (nextDeletePos < first) {
                nextDeletePos = first;
            }
            while (nextDeletePos <= maxPos) {
                if (children.contains(nextDeletePos)) {
                    nextDeleteNode =
                        nodes.get(new QueueChildPath(path, nextDeletePos));
                    if (nextDeleteNode == null) {
                        throw new RuntimeException("QueueChildSet inconsistence: "
                            + "position " + nextDeletePos + " is present in set"
                            + " but nodes.get() returned null. "
                            + "dump: " + children.dumpIntervals());
                    }
                    return true;
                }
                nextDeletePos++;
            }
            return false;
        }

        public void nextDelete() {
            nextDeletePos++;
        }

        public String positionToPath(final long position) {
            return QueueChildPath.toString(path, position);
        }

        @Override
        public int compareTo(final Queue other) {
            return Long.compare(ctime(), other.ctime());
        }
    }

    private class DeleteNodeCallback implements WeighableRunnable {
        private final String path;

        DeleteNodeCallback(final String path) {
            this.path = path;
        }

        @Override
        public int weight() {
            // Path is always has latin1 encoding, so no need to multiply by 2
            return 32 + 32 + path.length();
        }

        public void run() {
            try {
                deleteNode(path, -1);
            } catch (KeeperException.NoNodeException e) {
                // ignore
            }
        }
    }

    static class NodeAndPath
    {
    public final DataNode node;
    public final String path;
    public long fixedSize;
        public NodeAndPath( DataNode node, String path )
        {
	    this.node = node;
	    this.path = path;
	    fixedSize = node.size();
	}

	@Override
	public final boolean equals( Object o )
	{
	    NodeAndPath other = (NodeAndPath)o;
	    return path.equals(other.path);
	}

	@Override
	public final int hashCode()
	{
	    return path.hashCode();
	}

	public final void updateSize()
	{
	    fixedSize = node.size();
	}
    }

    private static class NodeAndPathComparator implements Comparator<NodeAndPath>
    {
	@Override
	public int compare( NodeAndPath n1, NodeAndPath n2 )
	{
	    return (int)(n1.fixedSize - n2.fixedSize);
	}
    }

    private class PurgeThread extends Thread {
        public PurgeThread() {
            super("QueuePurgeThread");
            setDaemon(true);
        }

        @Override
        public void run() {
            logger.info("QueuePurgeThread started");
            while (purgeRun) {
                try {
                    if (floatingQueueStorageSize.get()
                        > maxQueueMemSize - queueStorageSizeDelta) 
                    {
                        logger.info("FloatingQueueStorageSize = "
                            + floatingQueueStorageSize.get()
                            + ". commited: "
                            + commitedQueueStorageSize.get()
                            + " Running purge.");
                        purgeOldestQueueNodes();
                        logger.info("FloatingQueueStorageSize = "
                            + floatingQueueStorageSize.get()
                            + " commited: "
                            + commitedQueueStorageSize.get()
                            + " After purge.");
                    } else {
                        synchronized (this) {
                            wait(1000);
                        }
                    }
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
            logger.info("QueuePurgeThread stopped");
        }

        public synchronized void wakeup() {
            notify();
        }
    }

    private PurgeThread queuePurgeThread;
    private volatile boolean purgeRun;

    private final AtomicLong commitedQueueStorageSize = new AtomicLong(0);
    private final AtomicLong floatingQueueStorageSize = new AtomicLong(0);
    private long queueStorageSizeDelta;

    private long maxQueueMemSize;

    private final WatchManager dataWatches = new WatchManager();

    private final WatchManager childWatches = new WatchManager();

    private final DataWatchManager dataWatchManager;

    /** the root of zookeeper tree */
    private static final String rootZookeeper = "/";

    /** the zookeeper nodes that acts as the management and status node **/
    private static final String procZookeeper = Quotas.procZookeeper;

    /** this will be the string thats stored as a child of root */
    private static final String procChildZookeeper = procZookeeper.substring(1);

    /**
     * the zookeeper quota node that acts as the quota management node for
     * zookeeper
     */
//    private static final String quotaZookeeper = Quotas.quotaZookeeper;

    /** this will be the string thats stored as a child of /zookeeper */
//    private static final String quotaChildZookeeper = quotaZookeeper
//            .substring(procZookeeper.length() + 1);

    /**
     * the zookeeper config node that acts as the config management node for
     */
    private static final String configZookeeper = ZooDefs.CONFIG_NODE;

    /** this will be the string thats stored as a child of /zookeeper */
    private static final String configChildZookeeper = configZookeeper
            .substring(procZookeeper.length() + 1);
    
    /**
     * the path trie that keeps track fo the quota nodes in this datatree
     */
    private final PathTrie pTrie = new PathTrie();

    private final Object clearLock = new Object();

    /**
     * This hashtable lists the paths of the ephemeral nodes of a session.
     */
    private final Map<Long, HashSet<String>> ephemerals =
        new ConcurrentHashMap<Long, HashSet<String>>();

    /**
     * this is map from longs to acl's. It saves acl's being stored for each
     * datanode.
     */
//    private final Map<Long, List<ACL>> longKeyMap =
//        new HashMap<Long, List<ACL>>();

    /**
     * this a map from acls to long.
     */
//    private final Map<List<ACL>, Long> aclKeyMap =
//        new HashMap<List<ACL>, Long>();

    /**
     * these are the number of acls that we have in the datatree
     */
//    private long aclIndex = 0;

    @SuppressWarnings("unchecked")
    public Set<String> getEphemerals(long sessionId) {
        HashSet<String> retv = ephemerals.get(sessionId);
        if (retv == null) {
            return new HashSet<String>();
        }
        HashSet<String> cloned = null;
        synchronized (retv) {
            cloned = (HashSet<String>) retv.clone();
        }
        return cloned;
    }
/*
    int getAclSize() {
        return longKeyMap.size();
    }
*/
//    private long incrementIndex() {
//        return ++aclIndex;
//    }

    /**
     * converts the list of acls to a list of longs.
     *
     * @param acls
     * @return a list of longs that map to the acls
     */
/*
    public synchronized Long convertAcls(List<ACL> acls) {
        if (acls == null)
            return -1L;
        // get the value from the map
        Long ret = aclKeyMap.get(acls);
        // could not find the map
        if (ret != null)
            return ret;
        long val = incrementIndex();
        longKeyMap.put(val, acls);
        aclKeyMap.put(acls, val);
        return val;
    }
*/
    /**
     * converts a list of longs to a list of acls.
     *
     * @param longVal
     *            the list of longs
     * @return a list of ACLs that map to longs
     */
/*
    public synchronized List<ACL> convertLong(Long longVal) {
        if (longVal == null)
            return null;
        if (longVal == -1L)
            return Ids.OPEN_ACL_UNSAFE;
        List<ACL> acls = longKeyMap.get(longVal);
        if (acls == null) {
            LOG.error("ERROR: ACL not available for long " + longVal);
            throw new RuntimeException("Failed to fetch acls for " + longVal);
        }
        return acls;
    }
*/
    public Collection<Long> getSessions() {
        return ephemerals.keySet();
    }

    public Map<String, Queue> getQueues() {
        return queues;
    }

    public DataNode getNode(String path) {
        PathKey key;
        int lastSlash = path.lastIndexOf('/');
        if (lastSlash == -1) {
            key = new RegularPath("");
        } else {
            String parentName = path.substring(0, lastSlash);
            if (queues.containsKey(parentName)) {
                int start = lastSlash + 1 + QueueChildSet.PREFIX.length();
                int len = path.length();
                if (start >= len) {
                    key = new RegularPath(path);
                } else {
                    try {
                        key = new QueueChildPath(
                            parentName,
                            Long.parseLong(path, start, len, 10));
                    } catch (Exception e) {
                        e.printStackTrace();
                        logger.severe("Can't parse queue child: " + path + ", path=<" + path + '>');
                        throw e;
                    }
                }
            } else {
                key = new RegularPath(path);
            }
        }
        return nodes.get(key);
    }

    public String getNextNode(String fromPath)
        throws NoNodeException, SystemErrorException
    {
        int lastSlash = fromPath.lastIndexOf('/');
        if (lastSlash == -1) {
            throw new NoNodeException();
        }
        String parentName = fromPath.substring(0, lastSlash);
        if (!queues.containsKey(parentName)) {
            throw new NoNodeException();
        }
        DataNode queue = nodes.get(new RegularPath(parentName));
        if (!queue.hasQueue) {
            throw new NoNodeException();
        }

        String nextNode = snapLog.queueStorage().getNextNode(parentName, fromPath);
        if (nextNode != null) return nextNode;
        synchronized(queue) {
            String child = fromPath.substring(fromPath.lastIndexOf('/')+1);
            ChildSet childs = queue.getChildren();
            nextNode = childs.nextChild(child);
        }
        return nextNode;
    }

//    private static final HashKey(String hash) {
//        String 
//    }

//    public String getNodeByHash(String hash) {
//        return nodesHashes.get(hashKey(hash));
//    }

    public int getNodeCount() {
        return nodes.size();
    }

    public int getWatchCount() {
        return dataWatches.size() + childWatches.size();
    }

    int getEphemeralsCount() {
        int result = 0;
        for (HashSet<String> set : ephemerals.values()) {
            result += set.size();
        }
        return result;
    }

    public void setMaxQueueMemSize( long size )
    {
	maxQueueMemSize = size;
	queueStorageSizeDelta = size / (int)Math.log( size );
	if( queueStorageSizeDelta > 1024 * 1024 * 128 )
	{
	    queueStorageSizeDelta = 1024 * 1024 * 128;
	}
    }


    /**
     * Get the size of the nodes based on path and data length.
     *
     * @return size of the data
     */
    public long approximateDataSize() {
        logger.warning("DataTree: slow approximateDataSize() method is called");
        return 0;
/*        
        long result = 0;
        for (Map.Entry<PathKey, DataNode> entry : nodes.entrySet()) {
            DataNode value = entry.getValue();
            synchronized (value) {
                result += entry.getKey().length();
                result += value.getApproximateDataSize();
            }
        }
        return result;
*/
    }

    /**
     * This is a pointer to the root of the DataTree. It is the source of truth,
     * but we usually use the nodes hashmap to find nodes in the tree.
     */
    private volatile DataNode root;

    /**
     * create a /zookeeper filesystem that is the proc filesystem of zookeeper
     */
    private final DataNode procDataNode;

    /**
     * create a /zookeeper/quota node for maintaining quota properties for
     * zookeeper
     */
//    private final DataNode quotaDataNode = new DataNode(new byte[0], -1L, new StatPersisted());

    /**
     * create a /zookeeper/config node for maintaining the configuration (membership and quorum system) info for
     * zookeeper
     */
    private DataNode configDataNode;
    private FileTxnSnapLog snapLog;
    private static DataTree instance = null;

    public DataTree(FileTxnSnapLog snapLog, final PrefixedLogger logger) {
        this.logger = logger;


        if (instance != null) {
            (new Exception("DataTree double instantiation")).printStackTrace();
            instance.clear();
        }
        instance = this;
//        logger.warning(
//            "DataTree created, since jvm start data tree create counter: "
//                + INITALIZE_COUNTER.incrementAndGet());

        root = new DataNode(null, new StatPersisted());
        procDataNode = new DataNode(null, new StatPersisted());
        configDataNode = new DataNode(null, new StatPersisted());

        /* Rather than fight it, let root have an alias */
        nodesPut("", root);
        nodesPut(rootZookeeper, root);

        /** add the proc node and quota node */
        root.addChild(procChildZookeeper, 0, false);
        nodesPut(procZookeeper, procDataNode);

//        procDataNode.addChild(quotaChildZookeeper, 0);
//        nodes.put(quotaZookeeper, quotaDataNode);

        setMaxQueueMemSize( snapLog.getMaxQueueMemSize() );

        addConfigNode();

	startPurgeThread();
	this.snapLog = snapLog;
        dataWatchManager = new DataWatchManager();
        dataWatchManager.start();

        logger.info( "DataTree.ctor: maxQueueMemSize = " + maxQueueMemSize + ", delta: " + queueStorageSizeDelta );

    }

    public void registerWatcher(
        final String path,
        final DataWatchManager.Watcher watcher)
    {
        dataWatchManager.registerWatcher(path, watcher);
    }

    public void unregisterWatcher(
        final String path,
        final DataWatchManager.Watcher watcher)
    {
        dataWatchManager.unregisterWatcher(path, watcher);
    }


    public static HashKey hashKey(final String hash) {
        if (hash == null) return null;
        try {
            int lastSlash = hash.lastIndexOf('/');
            String parentName = hash.substring("hash".length(), lastSlash);
            String child = hash.substring(lastSlash + 1);
            try {
                HashKey hk = HexByteHash.parse(parentName, child);
                if (hk == null) {
                    hk = new StringHash(parentName, child);
                }
                return hk;
            } catch (Exception e) {
                LOG.info(
                    "Non sha-256 hash: " + hash,
                    e);
                return new StringHash(parentName, child);
            }
        } catch (Exception e) {
            LOG.info("HashKey.fromString exception: " + hash);
            throw e;
        }
    }

    public static HashKey hashKey(final String parentName, final String hash) {
        try {
            HashKey hk = HexByteHash.parse(parentName, hash);
            if (hk == null) {
                hk = new StringHash(parentName, hash);
            }
            return hk;
//            return new HexByteHash(parentName, hash);
        } catch (Exception e) {
            LOG.info("Non sha-256 hash: " + hash, e);
            return new StringHash(parentName, hash);
        }
    }

    private final static PathKey pathKey(
        final String path,
        final boolean queueChild) {
        if (path == null) return null;
        PathKey key;
        if (queueChild) {
            int indexOfSlash = path.lastIndexOf('/');
            String parent = path.substring(0, indexOfSlash);
            key =
                new QueueChildPath(
                    parent,
                    Long.parseLong(
                        path,
                        indexOfSlash + 1 + QueueChildSet.PREFIX.length(),
                        path.length(),
                        10));
        } else {
            key = new RegularPath(path);
        }
        return key;
    }

    private final PathKey pathKey(final String path) {
        PathKey key;
        if (path == null) return null;
        int indexOfSlash = path.lastIndexOf('/');
        if (indexOfSlash == -1) {
            return new RegularPath("");
        }
        String parent = path.substring(0, indexOfSlash);
        if (queues.containsKey(parent)) {
            key =
                new QueueChildPath(
                    parent,
                    Long.parseLong(
                        path,
                        indexOfSlash + 1 + QueueChildSet.PREFIX.length(),
                        path.length(),
                        10));
        } else {
            key = new RegularPath(path);
        }
        return key;
    }

    private final void nodesPut(String path, DataNode node) {
        nodes.put(pathKey(path, node.queueChild), node);
    }

    private void startPurgeThread()
    {
        if (queuePurgeThread != null) {
            (new Exception("QueuePurgeThread double instantiation")).printStackTrace();
            stopPurgeThread();
        }
        purgeRun = true;
        queuePurgeThread = new PurgeThread();
        queuePurgeThread.start();
    }

    private void stopPurgeThread() {
        if (queuePurgeThread == null) {
            return;
        }
        purgeRun = false;
        queuePurgeThread.wakeup();
        try {
            queuePurgeThread.join();
        } catch (java.lang.InterruptedException ign) {
            Thread.currentThread().interrupt();
        }
        queuePurgeThread = null;
    }

    public void clear()
    {
        stopPurgeThread();
        synchronized(clearLock) {
            root = null;
            nodes.clear();
            queues.clear();
            nodesHashes.clear();
            ephemerals.clear();
            commitedQueueStorageSize.set(0);
            floatingQueueStorageSize.set(0);
        }
        dataWatches.stop();
        childWatches.stop();
        dataWatchManager.close();
    }

     public void addConfigNode() {
    	 DataNode zookeeperZnode = nodes.get(new RegularPath(procZookeeper));
         if (zookeeperZnode != null) { // should always be the case
        	 zookeeperZnode.addChild(configChildZookeeper, 0, false);
         } else {
        	 logger.severe("There's no /zookeeper znode - this should never happen");
         }
         nodesPut(configZookeeper, configDataNode);   
     }

    private static String getHash( String parentPath, String hash )
    {
        return "hash" + parentPath + "/" + hash;
    }

    /**
     * is the path one of the special paths owned by zookeeper.
     *
     * @param path
     *            the path to be checked
     * @return true if a special path. false if not.
     */
    boolean isSpecialPath(String path) {
        if (rootZookeeper.equals(path) || procZookeeper.equals(path)
//                || quotaZookeeper.equals(path) 
                || configZookeeper.equals(path)) {
            return true;
        }
        return false;
    }

    static public void copyStatPersisted(StatPersisted from, StatPersisted to) {
        to.setAversion(from.getAversion());
        to.setCtime(from.getCtime());
        to.setCversion(from.getCversion());
        to.setCzxid(from.getCzxid());
        to.setMtime(from.getMtime());
        to.setMzxid(from.getMzxid());
        to.setPzxid(from.getPzxid());
        to.setVersion(from.getVersion());
        to.setEphemeralOwner(from.getEphemeralOwner());
    }

    static public void copyStat(Stat from, Stat to) {
        to.setAversion(from.getAversion());
        to.setCtime(from.getCtime());
        to.setCversion(from.getCversion());
        to.setCzxid(from.getCzxid());
        to.setMtime(from.getMtime());
        to.setMzxid(from.getMzxid());
        to.setPzxid(from.getPzxid());
        to.setVersion(from.getVersion());
        to.setEphemeralOwner(from.getEphemeralOwner());
        to.setDataLength(from.getDataLength());
        to.setNumChildren(from.getNumChildren());
    }

    /**
     * update the count of this stat datanode
     *
     * @param lastPrefix
     *            the path of the node that is quotaed.
     * @param diff
     *            the diff to be added to the count
     */
/*
    public void updateCount(String lastPrefix, int diff) {
        String statNode = Quotas.statPath(lastPrefix);
        DataNode node = nodes.get(statNode);
        StatsTrack updatedStat = null;
        if (node == null) {
            // should not happen
            LOG.error("Missing count node for stat " + statNode);
            return;
        }
        synchronized (node) {
            updatedStat = new StatsTrack(new String(node.data));
            updatedStat.setCount(updatedStat.getCount() + diff);
            node.data = updatedStat.toString().getBytes();
        }
        // now check if the counts match the quota
        String quotaNode = Quotas.quotaPath(lastPrefix);
        node = nodes.get(quotaNode);
        StatsTrack thisStats = null;
        if (node == null) {
            // should not happen
            LOG.error("Missing count node for quota " + quotaNode);
            return;
        }
        synchronized (node) {
            thisStats = new StatsTrack(new String(node.data));
        }
        if (thisStats.getCount() > -1 && (thisStats.getCount() < updatedStat.getCount())) {
            LOG
            .warn("Quota exceeded: " + lastPrefix + " count="
                    + updatedStat.getCount() + " limit="
                    + thisStats.getCount());
        }
    }
*/
    /**
     * update the count of bytes of this stat datanode
     *
     * @param lastPrefix
     *            the path of the node that is quotaed
     * @param diff
     *            the diff to added to number of bytes
     * @throws IOException
     *             if path is not found
     */
/*
    public void updateBytes(String lastPrefix, long diff) {
        String statNode = Quotas.statPath(lastPrefix);
        DataNode node = nodes.get(statNode);
        if (node == null) {
            // should never be null but just to make
            // findbugs happy
            LOG.error("Missing stat node for bytes " + statNode);
            return;
        }
        StatsTrack updatedStat = null;
        synchronized (node) {
            updatedStat = new StatsTrack(new String(node.data));
            updatedStat.setBytes(updatedStat.getBytes() + diff);
            node.data = updatedStat.toString().getBytes();
        }
        // now check if the bytes match the quota
        String quotaNode = Quotas.quotaPath(lastPrefix);
        node = nodes.get(quotaNode);
        if (node == null) {
            // should never be null but just to make
            // findbugs happy
            LOG.error("Missing quota node for bytes " + quotaNode);
            return;
        }
        StatsTrack thisStats = null;
        synchronized (node) {
            thisStats = new StatsTrack(new String(node.data));
        }
        if (thisStats.getBytes() > -1 && (thisStats.getBytes() < updatedStat.getBytes())) {
            LOG
            .warn("Quota exceeded: " + lastPrefix + " bytes="
                    + updatedStat.getBytes() + " limit="
                    + thisStats.getBytes());
        }
    }
*/
    /**
     * Add a new node to the DataTree.
     * @param path
     * 			  Path for the new node.
     * @param data
     *            Data to store in the node.
     * @param acl
     *            Node acls
     * @param ephemeralOwner
     *            the session id that owns this node. -1 indicates this is not
     *            an ephemeral node.
     * @param zxid
     *            Transaction ID
     * @param time
     * @throws NodeExistsException 
     * @throws NoNodeException 
     * @throws KeeperException
     */
//    public void createNode(final String path, byte data[], final String hash,
//                long ephemeralOwner, boolean queueChild, int parentCVersion, long zxid, long time)
//    		throws KeeperException, NoNodeException, NodeExistsException {
//    	createNode(path, data, hash, ephemeralOwner, queueChild, parentCVersion, zxid, time, null);
//    }
    
    /**
     * Add a new node to the DataTree.
     * @param path
     * 			  Path for the new node.
     * @param data
     *            Data to store in the node.
     * @param acl
     *            Node acls
     * @param ephemeralOwner
     *            the session id that owns this node. -1 indicates this is not
     *            an ephemeral node.
     * @param zxid
     *            Transaction ID
     * @param time
     * @param outputStat
     * 			  A Stat object to store Stat output results into.
     * @throws NodeExistsException 
     * @throws NoNodeException 
     * @throws KeeperException
     */
    private void createNode(
        final String path,
        final byte data[],
        final String hash,
        final long ephemeralOwner,
        boolean queueChild,
        long parentCVersion,
        final long zxid,
        final long time,
        final Stat outputStat)
        throws KeeperException, KeeperException.NoNodeException,
            KeeperException.NodeExistsException
    {
//        LOG.info("DataTree.createNode.parentCVersion = " + parentCVersion);
        int lastSlash = path.lastIndexOf('/');
        String parentName = path.substring(0, lastSlash);
        String childName = path.substring(lastSlash + 1);
        StatPersisted stat = new StatPersisted();
        stat.setCtime(time);
        stat.setMtime(time);
        stat.setCzxid(zxid);
        stat.setMzxid(zxid);
        stat.setPzxid(zxid);
        stat.setVersion(0);
        stat.setAversion(0);
        stat.setEphemeralOwner(ephemeralOwner);
        PathKey parentKey = new RegularPath(parentName);
        DataNode parent = nodes.get(parentKey);
        if (parent == null) {
            throw new KeeperException.NoNodeException();
        }
        synchronized (parent) {
            ChildSet children = parent.getChildren();
            if (children != null && children.contains(childName)) {
                throw new KeeperException.NodeExistsException();
            }

            if (parentCVersion == -1) {
                parentCVersion = parent.getCversion();
                parentCVersion++;
            }
            parent.setCversion(parentCVersion);
            parent.setPzxid(zxid);
//            Long longval = convertAcls(acl);
            DataNode child = new DataNode(data, stat);
            if (parent.hasQueue) {
                //some one whants to recreate wanished queue node
                queueChild = true;
            }
            PathKey pathKey = pathKey(path, queueChild);
            if (hash != null) {
//                String fullHash = getHash( parentName, hash );
                HashKey hashKey = hashKey(parentName.intern(), hash);
                nodesHashes.put(hashKey, pathKey);
                child.hash = hashKey;
            }
            child.queueChild = queueChild;
            parent.addChild(childName, child.size(), queueChild);
            nodes.put(pathKey, child);
            if (queueChild) {
                commitedQueueStorageSize.addAndGet(child.size());
                floatingQueueStorageSize.addAndGet(child.size());
                if (!parent.hasQueue) {
                    parent.hasQueue = true;
                    queues.put(parentName, new Queue(parent, parentName));
                }
            }
            if (ephemeralOwner != 0) {
                HashSet<String> list = ephemerals.get(ephemeralOwner);
                if (list == null) {
                    list = new HashSet<String>();
                    ephemerals.put(ephemeralOwner, list);
                }
                synchronized (list) {
                    list.add(path);
                }
            }
            if (outputStat != null) {
            	child.copyStat(outputStat);
            }
        }
        if (queueChild) {
            if (floatingQueueStorageSize.get()
                > maxQueueMemSize - queueStorageSizeDelta)
            {
                queuePurgeThread.wakeup();
            }
        }
/*
        // now check if its one of the zookeeper node child
        if (parentName.startsWith(quotaZookeeper)) {
            // now check if its the limit node
            if (Quotas.limitNode.equals(childName)) {
                // this is the limit node
                // get the parent and add it to the trie
                pTrie.addPath(parentName.substring(quotaZookeeper.length()));
            }
            if (Quotas.statNode.equals(childName)) {
                updateQuotaForPath(parentName
                        .substring(quotaZookeeper.length()));
            }
        }
        // also check to update the quotas for this node
        String lastPrefix = getMaxPrefixWithQuota(path);
        if(lastPrefix != null) {
            // ok we have some match and need to update
            updateCount(lastPrefix, 1);
            updateBytes(lastPrefix, data == null ? 0 : data.length);
        }
*/
        dataWatches.triggerWatch(path, Event.EventType.NodeCreated);
        childWatches.triggerWatch(parentName.equals("") ? "/" : parentName,
                Event.EventType.NodeChildrenChanged);
    }

    private void purgeOldestQueueNodes() throws Exception {
        long prevTime = TimeSource.INSTANCE.currentTimeMillis();
        long iters = 0L;
        while (floatingQueueStorageSize.get()
            > maxQueueMemSize - (queueStorageSizeDelta*2) && purgeRun)
        {
            final PriorityQueue<Queue> priorityQueue =
                new PriorityQueue<>(queues.size());
            for (Queue queue : queues.values()) {
                if (queue.hasDeletes()) {
                    priorityQueue.add(queue);
                }
            }
            if (priorityQueue.size() == 0) {
                logger.warning("Nothing to purge. "
                    + "Try to increase max mem storage size");
                try {
                    Thread.sleep(1000);
                } catch (InterruptedException e) {
                    Thread.currentThread().interrupt();
                }
                return;
            }
            final long toPurge =
                floatingQueueStorageSize.get()
                    - (maxQueueMemSize - (queueStorageSizeDelta << 1));
            logger.warning("To purge: " + toPurge);
            long purged = 0;
            long purgedCount = 0;
            List<Future<Void>> indexTasks = new LinkedList<>();
            while (purged < toPurge && purgeRun) {
                final Queue top = priorityQueue.poll();
                if (top == null) {
                    logger.warning("Nothing else to purge while toPurge is not reached."
                        + "Try to increase max mem storage size");
                    break;
                }
                purged += top.nextDeleteNode.size();
                if (top.nextDeleteNode.data != null) {
                    final String hashString =
                        top.nextDeleteNode.hash == null ? null
                            : top.nextDeleteNode.hash.toString();
                    final String deletePath =
                        top.positionToPath(top.nextDeletePos);
                    indexTasks.add(snapLog.queueStorage().addData(
                        top.path,
                        deletePath,
                        hashString,
                        top.nextDeletePos,
                        top.nextDeleteNode.getCtime(),
                        top.nextDeleteNode.data,
                        new DeleteNodeCallback(deletePath)));
                }
                floatingQueueStorageSize.addAndGet(-top.nextDeleteNode.size());
                purgedCount++;
                top.nextDelete();
                if (top.hasDeletes()) {
                    priorityQueue.add(top);
                }
                if (iters++ % 1000L == 0L
                    && (TimeSource.INSTANCE.currentTimeMillis() - prevTime) > 1000L)
                {
                    logger.info("Purged so far: size=" + purged
                        + ", count=" + purgedCount);
                    prevTime = TimeSource.INSTANCE.currentTimeMillis();
                    Iterator<Future<Void>> iter = indexTasks.iterator();
                    while (iter.hasNext()) {
                        Future<Void> f = iter.next();
                        if (f.isDone() || f.isCancelled()) {
                            iter.remove();
                        }
                    }
                }
            }
            //wait for tasks to finish to accomodate .queueStorageSize
            Iterator<Future<Void>> iter = indexTasks.iterator();
            while (iter.hasNext()) {
                Future<Void> f = iter.next();
                f.get();
                iter.remove();
            }
            logger.info("Purged: size=" + purged + ", count=" + purgedCount);
        }
    }

    /**
     * remove the path from the datatree
     *
     * @param path
     *            the path to of the node to be deleted
     * @param zxid
     *            the current zxid
     * @throws KeeperException.NoNodeException
     */
    public void deleteNode(String path, long zxid)
            throws KeeperException.NoNodeException {
        int lastSlash = path.lastIndexOf('/');
        String parentName = path.substring(0, lastSlash);
        String childName = path.substring(lastSlash + 1);
        PathKey key;
        PathKey parentKey;
        if (queues.containsKey(parentName)) {
            parentKey = new RegularPath(parentName);
            key = new QueueChildPath(
                parentName,
                Long.parseLong(
                    childName,
                    QueueChildSet.PREFIX.length(),
                    childName.length(),
                    10));
        } else {
            parentKey = new RegularPath(parentName);
            key = new RegularPath(path);
        }
        DataNode node = nodes.get(key);
        if (node == null) {
            throw new KeeperException.NoNodeException();
        }
        nodes.remove(key);
        if (node.hasQueue) {
            queues.remove(path);
        }
        DataNode parent = nodes.get(parentKey);
        if (parent == null) {
            throw new KeeperException.NoNodeException();
        }
        if (node.hash != null) {
            nodesHashes.remove( node.hash );
        }
        if (node.queueChild) {
            commitedQueueStorageSize.addAndGet(-node.size());
        }
        synchronized (parent) {
            parent.removeChild(childName, node.size());
            if( zxid != -1 ) parent.setPzxid(zxid);
            long eowner = node.getEphemeralOwner();
            if (eowner != 0) {
                HashSet<String> nodes = ephemerals.get(eowner);
                if (nodes != null) {
                    synchronized (nodes) {
                        nodes.remove(path);
                    }
                }
            }
        }
/*
        if (parentName.startsWith(procZookeeper) && Quotas.limitNode.equals(childName)) {
            // delete the node in the trie.
            // we need to update the trie as well
            pTrie.deletePath(parentName.substring(quotaZookeeper.length()));
        }
        // also check to update the quotas for this node
        String lastPrefix = getMaxPrefixWithQuota(path);
        if(lastPrefix != null) {
            // ok we have some match and need to update
            updateCount(lastPrefix, -1);
            int bytes = 0;
            synchronized (node) {
                bytes = (node.data == null ? 0 : -(node.data.length));
            }
            updateBytes(lastPrefix, bytes);
        }
*/
        if (logger.isLoggable(Level.FINEST)) {
//            ZooTrace.logTraceMessage(LOG, ZooTrace.EVENT_DELIVERY_TRACE_MASK,
//                    "dataWatches.triggerWatch " + path);
//            ZooTrace.logTraceMessage(LOG, ZooTrace.EVENT_DELIVERY_TRACE_MASK,
//                    "childWatches.triggerWatch " + parentName);
        }
        Set<Watcher> processed = dataWatches.triggerWatch(path,
                EventType.NodeDeleted);
        childWatches.triggerWatch(path, EventType.NodeDeleted, processed);
        if( !node.queueChild )
        {
    	    childWatches.triggerWatch("".equals(parentName) ? "/" : parentName,
            	    EventType.NodeChildrenChanged);
        }
    }

    public Stat setData(String path, byte data[], int version, long zxid,
            long time) throws KeeperException, KeeperException.NoNodeException {
        Stat s = new Stat();
        DataNode n = nodes.get(pathKey(path));
        if (n == null) {
            throw new KeeperException.NoNodeException();
        }
        byte lastdata[] = null;
        synchronized (n) {
            lastdata = n.data;
            n.data = data;
            n.setMtime(time);
            n.setMzxid(zxid);
            n.setVersion(version);
            n.copyStat(s);
        }
        // now update if the path is in a quota subtree.
/*
        String lastPrefix = getMaxPrefixWithQuota(path);
        if(lastPrefix != null) {
          this.updateBytes(lastPrefix, (data == null ? 0 : data.length)
              - (lastdata == null ? 0 : lastdata.length));
        }
*/
        dataWatches.triggerWatch(path, EventType.NodeDataChanged);
        dataWatchManager.dataNotify(path, n);
        return s;
    }

    /**
     * If there is a quota set, return the appropriate prefix for that quota
     * Else return null
     * @param path The ZK path to check for quota
     * @return Max quota prefix, or null if none
     */
/*
    public String getMaxPrefixWithQuota(String path) {
        // do nothing for the root.
        // we are not keeping a quota on the zookeeper
        // root node for now.
        String lastPrefix = pTrie.findMaxPrefix(path);

        if (rootZookeeper.equals(lastPrefix) || "".equals(lastPrefix)) {
            return null;
        }
        else {
            return lastPrefix;
        }
    }
*/
    private String findNextPath( String path ) {
        StringBuilder sb = new StringBuilder(path);
        for (int pos = path.length() - 1;; --pos) {
            char c = sb.charAt(pos);
            if (c == '9') {
                sb.setCharAt(pos, '0');
            } else {
                sb.setCharAt(pos, (char) (c + 1));
                break;
            }
        }
        return new String(sb);
    }

    public PathKey getPathByHash( String hash ) throws KeeperException.SystemErrorException
    {
        int lastSlash = hash.lastIndexOf('/');
        String parentName = hash.substring("hash".length(), lastSlash);
        String child = hash.substring(lastSlash + 1);
        HashKey key = hashKey(parentName, child);
        PathKey path = nodesHashes.get( key );
        if( path != null )
        {
//            System.err.println( "getPathByHash: hashed path " + hash + " -> " + path );
            return path;
        }

//        System.err.println( "getPathByHash: parentName " + parentName );
        DataNode parent = nodes.get( new RegularPath(parentName) );
        if( parent != null )
        {
            if( parent.hasQueue )
            {
                return pathKey(snapLog.queueStorage().getPathByHash( parentName, hash ), true);
            }
        }
        return null;
    }

    public String getNodePath(final String hash) {
        try {
            final PathKey key = getPathByHash(hash);
            if (key != null) {
                return key.toString();
            } else {
                return null;
            }
        } catch (KeeperException.SystemErrorException e) {
            return null;
        }
    }

    public String getNodePath(
        final String parentPath,
        final String hash,
        final boolean checkDiskStorage)
    {
        if (hash == null) {
            return null;
        }
        try {
            final HashKey key = hashKey(parentPath, hash);
            final PathKey path = nodesHashes.get(key);
            if (path != null) {
                return path.toString();
            }

            if (checkDiskStorage) {
                final DataNode parent = nodes.get(new RegularPath(parentPath));
                if (parent != null && parent.hasQueue) {
                    return snapLog.queueStorage().getPathByHash(
                        parentPath, getHash(parentPath, hash));
                }
            }
        } catch (KeeperException.SystemErrorException e) {
        }
        return null;
    }

    public byte[] getData(String path, Stat stat, Watcher watcher)
            throws KeeperException, KeeperException.NoNodeException {
        DataNode n;
        PathKey key;
        PathKey parentKey;
        int lastSlash = path.lastIndexOf('/');
        String parentName = path.substring(0, lastSlash);
        if( path.startsWith( "hash/" ) )
        {
            key = getPathByHash( path );
            if( key == null ) throw new KeeperException.NoNodeException();
            parentName = path.substring("hash".length(), lastSlash);
            parentKey = new RegularPath(parentName);
        } else {
            if (queues.containsKey(parentName)) {
                key = new QueueChildPath(
                    parentName,
                    Long.parseLong(
                        path,
                        lastSlash + 1 + QueueChildSet.PREFIX.length(),
                        path.length(),
                        10));
                parentKey = new RegularPath(parentName);
            } else {
                key = new RegularPath(path);
                parentKey = new RegularPath(parentName);
            }
        }
        n = nodes.get(key);
        if (n == null) {
            DataNode parent = nodes.get( parentKey );
//            System.err.println( "getData node = null" );
            if( parent != null )
            {
//                System.err.println( "getData parent != null, " + parentName );
                if (parent.hasQueue) {
                    QueueChildSet children =
                        (QueueChildSet) parent.getChildren();
                    if (children.firstPosition() > ((QueueChildPath) key).num) {
                        byte[] data =
                            snapLog.queueStorage().getData(parentName, path);
                        if (data != null) {
                            return data;
                        }
                    }
                }
            }
            throw new KeeperException.NoNodeException() {
                @Override
                public Throwable fillInStackTrace() {
                    return this;
                }
            };
        }
        synchronized (n) {
            n.copyStat(stat);
            if (watcher != null) {
        	if( n.queueChild )
        	{
        	    try
        	    {
        		String nextQueuePath = findNextPath(path);
        		if( !nodes.containsKey(nextQueuePath) )
        		{
        		    dataWatches.addWatch( findNextPath(path), watcher );
        		}
        	    }
        	    catch( Exception e )
        	    {
        		e.printStackTrace();
        	    }
            	}
            	else
            	{
            	    dataWatches.addWatch(path, watcher);
            	}
            }
            return n.data;
        }
    }

    public Stat statNode(String path, Watcher watcher)
            throws KeeperException.NoNodeException, KeeperException.SystemErrorException {
        Stat stat = new Stat();
        DataNode n;
        PathKey key;
        PathKey parentKey;
        int lastSlash = path.lastIndexOf('/');
        String parentName = path.substring(0, lastSlash);
        if( path.startsWith( "hash/" ) )
        {
            key = getPathByHash( path );
            if( key == null ) throw new KeeperException.NoNodeException();
            parentName = path.substring("hash".length(), lastSlash);
            parentKey = new RegularPath(parentName);
        } else {
            if (queues.containsKey(parentName)) {
                key = new QueueChildPath(
                    parentName,
                    Long.parseLong(
                        path,
                        lastSlash + 1 + QueueChildSet.PREFIX.length(),
                        path.length(),
                        10));
                parentKey = new RegularPath(parentName);
            } else {
                key = new RegularPath(path);
                parentKey = new RegularPath(parentName);
            }
        }
        n = nodes.get(key);
        if (watcher != null) {
            dataWatches.addWatch(path, watcher);
        }
        if (n == null) {
            DataNode parent = nodes.get( parentKey );
            if( parent != null )
            {
                if( parent.hasQueue )
                {
                    byte[] data = snapLog.queueStorage().getData( parentName, path );
                    if( data != null )
                    {
                        return stat;
                    }
                }
            }
            throw new KeeperException.NoNodeException();
        }
        synchronized (n) {
            n.copyStat(stat);
            return stat;
        }
    }

    public List<String> getChildren(String path, Stat stat, Watcher watcher)
            throws KeeperException.NoNodeException, KeeperException.SystemErrorException {
//        System.err.println( "GET CHILDREN: " + path );
        PathKey key;
        if( path.startsWith( "hash/" ) )
        {
            key = getPathByHash( path );
            if (key == null) {
                throw new KeeperException.NoNodeException() {
                    @Override
                    public Throwable fillInStackTrace() {
                        return this;
                    }
                };
            }

            ArrayList<String> children = new ArrayList<String>(1);
            children.add( key.toString() );
            return children;
        } else {
            int lastSlash = path.lastIndexOf('/');
            String parentName = path.substring(0, lastSlash);
            if (queues.containsKey(parentName)) {
                key = new QueueChildPath(
                    parentName,
                    Long.parseLong(
                        path,
                        lastSlash + 1 + QueueChildSet.PREFIX.length(),
                        path.length(),
                        10));
            } else {
                key = new RegularPath(path);
            }
        }
        DataNode n = nodes.get(key);
        if (n == null) {
            throw new KeeperException.NoNodeException();
        }
        synchronized (n) {
            if (stat != null) {
                n.copyStat(stat);
            }
            List<String> children;
            ChildSet childs = n.getChildren();
            if (childs == null || childs.size() == 0) {
                children = new ArrayList<String>(0);
            } else {
        	if( n.hasQueue )
        	{
        	    children = new ArrayList<String>(2);
        	    children.add( childs.first() );
        	    children.add( childs.last() );
        	}
        	else
        	{
            	    children = childs.getList();
            	}
            }

            if (watcher != null) {
                childWatches.addWatch(path, watcher);
            }
            return children;
        }
    }
/*
    public Stat setACL(String path, List<ACL> acl, int version)
            throws KeeperException.NoNodeException {
        Stat stat = new Stat();
        DataNode n = nodes.get(path);
        if (n == null) {
            throw new KeeperException.NoNodeException();
        }
        synchronized (n) {
            n.stat.setAversion(version);
            n.acl = convertAcls(acl);
            n.copyStat(stat);
            return stat;
        }
    }
*/
/*
    public List<ACL> getACL(String path, Stat stat)
            throws KeeperException.NoNodeException {
        DataNode n = nodes.get(path);
        if (n == null) {
            throw new KeeperException.NoNodeException();
        }
        synchronized (n) {
            n.copyStat(stat);
            return new ArrayList<ACL>(convertLong(n.acl));
        }
    }
*/
    static public class ProcessTxnResult {
        public long clientId;

        public int cxid;

        public long zxid;

        public int err;

        public int type;

        public String path;

        public Stat stat;

        public List<ProcessTxnResult> multiResult;

        /**
         * Equality is defined as the clientId and the cxid being the same. This
         * allows us to use hash tables to track completion of transactions.
         *
         * @see java.lang.Object#equals(java.lang.Object)
         */
        @Override
        public boolean equals(Object o) {
            if (o instanceof ProcessTxnResult) {
                ProcessTxnResult other = (ProcessTxnResult) o;
                return other.clientId == clientId && other.cxid == cxid;
            }
            return false;
        }

        /**
         * See equals() to find the rational for how this hashcode is generated.
         *
         * @see ProcessTxnResult#equals(Object)
         * @see java.lang.Object#hashCode()
         */
        @Override
        public int hashCode() {
            return (int) ((clientId ^ cxid) % Integer.MAX_VALUE);
        }

    }

    public volatile long lastProcessedZxid = 0;

    public ProcessTxnResult processTxn(TxnHeader header, Record txn)
    {
        ProcessTxnResult rc = new ProcessTxnResult();
//        System.err.println( "DataTree.processTXN: " + header.toString() );
        try {
            rc.clientId = header.getClientId();
            rc.cxid = header.getCxid();
            rc.zxid = header.getZxid();
            rc.type = header.getType();
            rc.err = 0;
            rc.multiResult = null;
            switch (header.getType()) {
                case OpCode.create:
                    CreateTxn createTxn = (CreateTxn) txn;
                    rc.path = createTxn.getPath();
                    if( createTxn.getExist() )
                    {
                        break;
                    }
                    createNode(
                            createTxn.getPath(),
                            createTxn.getData(),
                            createTxn.getHash(),
//                            createTxn.getAcl(),
                            createTxn.getEphemeral() ? header.getClientId() : 0,
                            createTxn.getQueue(),
                            createTxn.getParentCVersion(),
                            header.getZxid(), header.getTime(), null);
                    break;
                case OpCode.create2:
                    CreateTxn create2Txn = (CreateTxn) txn;
                    rc.path = create2Txn.getPath();
                    if( create2Txn.getExist() )
                    {
                        DataNode node = nodes.get( pathKey(rc.path) );
                        if( node != null )
                        {
                            rc.stat = new Stat();
                            node.copyStat(rc.stat);
                            break;
                        }
                    }
                    Stat stat = new Stat();
                    createNode(
                            create2Txn.getPath(),
                            create2Txn.getData(),
                            create2Txn.getHash(),
//                            create2Txn.getAcl(),
                            create2Txn.getEphemeral() ? header.getClientId() : 0,
                            create2Txn.getQueue(),
                            create2Txn.getParentCVersion(),
                            header.getZxid(), header.getTime(), stat);
                    rc.stat = stat;
                    break;
                case OpCode.delete:
                    DeleteTxn deleteTxn = (DeleteTxn) txn;
                    rc.path = deleteTxn.getPath();
                    deleteNode(deleteTxn.getPath(), header.getZxid());
                    break;
                case OpCode.reconfig:
                case OpCode.setData:
                    SetDataTxn setDataTxn = (SetDataTxn) txn;
                    rc.path = setDataTxn.getPath();
                    rc.stat = setData(setDataTxn.getPath(), setDataTxn
                            .getData(), setDataTxn.getVersion(), header
                            .getZxid(), header.getTime());
                    break;
/*
                case OpCode.setACL:
                    SetACLTxn setACLTxn = (SetACLTxn) txn;
                    rc.path = setACLTxn.getPath();
                    rc.stat = setACL(setACLTxn.getPath(), setACLTxn.getAcl(),
                            setACLTxn.getVersion());
                    break;
*/
                case OpCode.closeSession:
                    killSession(header.getClientId(), header.getZxid());
                    break;
                case OpCode.error:
                    ErrorTxn errTxn = (ErrorTxn) txn;
                    rc.err = errTxn.getErr();
                    break;
                case OpCode.check:
                    CheckVersionTxn checkTxn = (CheckVersionTxn) txn;
                    rc.path = checkTxn.getPath();
                    break;
                case OpCode.multi:
                    MultiTxn multiTxn = (MultiTxn) txn ;
                    List<Txn> txns = multiTxn.getTxns();
                    rc.multiResult = new ArrayList<ProcessTxnResult>();
                    boolean failed = false;
                    for (Txn subtxn : txns) {
                        if (subtxn.getType() == OpCode.error) {
                            failed = true;
                            break;
                        }
                    }

                    boolean post_failed = false;
                    for (Txn subtxn : txns) {
                        ByteBuffer bb = ByteBuffer.wrap(subtxn.getData());
                        Record record = null;
                        switch (subtxn.getType()) {
                            case OpCode.create:
                                record = new CreateTxn();
                                break;
                            case OpCode.delete:
                                record = new DeleteTxn();
                                break;
                            case OpCode.setData:
                                record = new SetDataTxn();
                                break;
                            case OpCode.error:
                                record = new ErrorTxn();
                                post_failed = true;
                                break;
                            case OpCode.check:
                                record = new CheckVersionTxn();
                                break;
                            default:
                                throw new IOException("Invalid type of op: " + subtxn.getType());
                        }
                        assert(record != null);

                        ByteBufferInputStream.byteBuffer2Record(bb, record);

                        if (failed && subtxn.getType() != OpCode.error){
                            int ec = post_failed ? Code.RUNTIMEINCONSISTENCY.intValue()
                                                 : Code.OK.intValue();

                            subtxn.setType(OpCode.error);
                            record = new ErrorTxn(ec);
                        }

                        if (failed) {
                            assert(subtxn.getType() == OpCode.error) ;
                        }

                        TxnHeader subHdr = new TxnHeader(header.getClientId(), header.getCxid(),
                                                         header.getZxid(), header.getTime(),
                                                         subtxn.getType());
                        ProcessTxnResult subRc = processTxn(subHdr, record);
                        rc.multiResult.add(subRc);
                        if (subRc.err != 0 && rc.err == 0) {
                            rc.err = subRc.err ;
                        }
                    }
                    break;
            }
        } catch (KeeperException e) {
            if (logger.isLoggable(Level.FINEST)) {
                logger.log(Level.FINEST, "Failed: " + header + ":" + txn, e);
            }
            rc.err = e.code().intValue();
        } catch (IOException e) {
            if (logger.isLoggable(Level.FINEST)) {
                logger.log(Level.FINEST, "Failed: " + header + ":" + txn, e);
            }
        }
        /*
         * A snapshot might be in progress while we are modifying the data
         * tree. If we set lastProcessedZxid prior to making corresponding
         * change to the tree, then the zxid associated with the snapshot
         * file will be ahead of its contents. Thus, while restoring from
         * the snapshot, the restore method will not apply the transaction
         * for zxid associated with the snapshot file, since the restore
         * method assumes that transaction to be present in the snapshot.
         *
         * To avoid this, we first apply the transaction and then modify
         * lastProcessedZxid.  During restore, we correctly handle the
         * case where the snapshot contains data ahead of the zxid associated
         * with the file.
         */
        if (rc.zxid > lastProcessedZxid) {
            lastProcessedZxid = rc.zxid;
        }

        /*
         * Snapshots are taken lazily. It can happen that the child
         * znodes of a parent are created after the parent
         * is serialized. Therefore, while replaying logs during restore, a
         * create might fail because the node was already
         * created.
         *
         * After seeing this failure, we should increment
         * the cversion of the parent znode since the parent was serialized
         * before its children.
         *
         * Note, such failures on DT should be seen only during
         * restore.
         */
        if (header.getType() == OpCode.create &&
                rc.err == Code.NODEEXISTS.intValue()) {
            logger.fine("Adjusting parent cversion for Txn: " + header.getType() +
                    " path:" + rc.path + " err: " + rc.err);
            int lastSlash = rc.path.lastIndexOf('/');
            String parentName = rc.path.substring(0, lastSlash);
            CreateTxn cTxn = (CreateTxn)txn;
            try {
                setCversionPzxid(parentName, cTxn.getParentCVersion(),
                        header.getZxid());
            } catch (KeeperException.NoNodeException e) {
                logger.log(
                    Level.SEVERE,
                    "Failed to set parent cversion for: " + parentName,
                    e);
                rc.err = e.code().intValue();
            }
        } else if (rc.err != Code.OK.intValue()) {
            logger.fine("Ignoring processTxn failure hdr: " + header.getType() +
                  " : error: " + rc.err);
        }
        return rc;
    }

    void killSession(long session, long zxid) {
        // the list is already removed from the ephemerals
        // so we do not have to worry about synchronizing on
        // the list. This is only called from FinalRequestProcessor
        // so there is no need for synchronization. The list is not
        // changed here. Only create and delete change the list which
        // are again called from FinalRequestProcessor in sequence.
        HashSet<String> list = ephemerals.remove(session);
        if (list != null) {
            for (String path : list) {
                try {
                    deleteNode(path, zxid);
                    if (logger.isLoggable(Level.FINEST)) {
                        logger.fine("Deleting ephemeral node " + path
                                        + " for session 0x"
                                        + Long.toHexString(session));
                    }
                } catch (NoNodeException e) {
                    logger.warning("Ignoring NoNodeException for path " + path
                            + " while removing ephemeral for dead session 0x"
                            + Long.toHexString(session));
                }
            }
        }
    }

    /**
     * a encapsultaing class for return value
     */
    private static class Counts {
        long bytes;
        int count;
    }

    /**
     * this method gets the count of nodes and the bytes under a subtree
     *
     * @param path
     *            the path to be used
     * @param counts
     *            the int count
     */
    private void getCounts(String path, Counts counts) {
        DataNode node = getNode(path);
        if (node == null) {
            return;
        }
        String[] children = null;
        int len = 0;
        synchronized (node) {
            ChildSet childs = node.getChildren();
            if (childs != null) {
                children = childs.toArray();
            }
            len = (node.data == null ? 0 : node.data.length);
        }
        // add itself
        counts.count += 1;
        counts.bytes += len;
        if (children == null || children.length == 0) {
            return;
        }
        for (String child : children) {
            getCounts(path + "/" + child, counts);
        }
    }

    /**
     * update the quota for the given path
     *
     * @param path
     *            the path to be used
     */
/*
    private void updateQuotaForPath(String path) {
        Counts c = new Counts();
        getCounts(path, c);
        StatsTrack strack = new StatsTrack();
        strack.setBytes(c.bytes);
        strack.setCount(c.count);
        String statPath = Quotas.quotaZookeeper + path + "/" + Quotas.statNode;
        DataNode node = getNode(statPath);
        // it should exist
        if (node == null) {
            LOG.warn("Missing quota stat node " + statPath);
            return;
        }
        synchronized (node) {
            node.data = strack.toString().getBytes();
        }
    }
*/
    /**
     * this method traverses the quota path and update the path trie and sets
     *
     * @param path
     */
/*
    private void traverseNode(String path) {
        DataNode node = getNode(path);
        String children[] = null;
        synchronized (node) {
            Set<String> childs = node.getChildren();
            if (childs != null) {
                children = childs.toArray(new String[childs.size()]);
            }
        }
        if (children == null || children.length == 0) {
            // this node does not have a child
            // is the leaf node
            // check if its the leaf node
            String endString = "/" + Quotas.limitNode;
            if (path.endsWith(endString)) {
                // ok this is the limit node
                // get the real node and update
                // the count and the bytes
                String realPath = path.substring(Quotas.quotaZookeeper
                        .length(), path.indexOf(endString));
                updateQuotaForPath(realPath);
                this.pTrie.addPath(realPath);
            }
            return;
        }
        for (String child : children) {
            traverseNode(path + "/" + child);
        }
    }
*/
    /**
     * this method sets up the path trie and sets up stats for quota nodes
     */
/*
    private void setupQuota() {
        String quotaPath = Quotas.quotaZookeeper;
        DataNode node = getNode(quotaPath);
        if (node == null) {
            return;
        }
        traverseNode(quotaPath);
    }
*/
    /**
     * this method uses a stringbuilder to create a new path for children. This
     * is faster than string appends ( str1 + str2).
     *
     * @param oa
     *            OutputArchive to write to.
     * @param path
     *            a string builder.
     * @throws IOException
     * @throws InterruptedException
     */
    private void serializeNode(OutputArchive oa, StringBuilder path, boolean queueChild, StringBuilder buffer) throws IOException {
        String pathString = path.toString();
        PathKey key = pathKey(pathString, queueChild);
        DataNode node = nodes.get(key);
        if (node == null) {
            return;
        }
//        String children[] = null;
        ChildSet childs;
        Iterator<String> childrenIterator = null;
        synchronized (node) {
            oa.writeString(pathString, "path");
//            oa.writeRecord(node, "node");
            node.serialize(oa, "node", buffer);
            childs = node.getChildren();
            if (childs != null) {
                childrenIterator = childs.iterator();
            }
        }
        path.append('/');
        int off = path.length();
        if (childrenIterator != null) {
            while (childrenIterator.hasNext()) {
                final String child = childrenIterator.next();
                // since this is single buffer being resused
                // we need
                // to truncate the previous bytes of string.
                path.delete(off, Integer.MAX_VALUE);
                path.append(child);
                serializeNode(oa, path, node.hasQueue, buffer);
            }
        }
    }
/*
    private void deserializeList(Map<Long, List<ACL>> longKeyMap,
            InputArchive ia) throws IOException {
        int i = ia.readInt("map");
        while (i > 0) {
            Long val = ia.readLong("long");
            if (aclIndex < val) {
                aclIndex = val;
            }
            List<ACL> aclList = new ArrayList<ACL>();
            Index j = ia.startVector("acls");
            while (!j.done()) {
                ACL acl = new ACL();
                acl.deserialize(ia, "acl");
                aclList.add(acl);
                j.incr();
            }
            longKeyMap.put(val, aclList);
            aclKeyMap.put(aclList, val);
            i--;
        }
    }

    private synchronized void serializeList(Map<Long, List<ACL>> longKeyMap,
            OutputArchive oa) throws IOException {
        oa.writeInt(longKeyMap.size(), "map");
        Set<Map.Entry<Long, List<ACL>>> set = longKeyMap.entrySet();
        for (Map.Entry<Long, List<ACL>> val : set) {
            oa.writeLong(val.getKey(), "long");
            List<ACL> aclList = val.getValue();
            oa.startVector(aclList, "acls");
            for (ACL acl : aclList) {
                acl.serialize(oa, "acl");
            }
            oa.endVector(aclList, "acls");
        }
    }
*/
    public void serialize(OutputArchive oa, String tag) throws IOException {
//        serializeList(longKeyMap, oa);
        synchronized(clearLock)
        {
            final StringBuilder buffer = new StringBuilder();
            serializeNode(oa, new StringBuilder(""), false, buffer);
            // / marks end of stream
            // we need to check if clear had been called in between the snapshot.
            if (root != null) {
                oa.writeString("/", "path");
//                snapLog.queueStorage().commit();
            } else {
                throw new IOException("DataTree has been cleared");
            }
        }
//        storage().finalizeCurrentChunk();
    }

    public void deserialize(
        final InputArchive ia,
        final String tag) throws IOException
    {
        stopPurgeThread();
        nodes.clear();
        queues.clear();
        nodesHashes.clear();
        commitedQueueStorageSize.set(0);
        floatingQueueStorageSize.set(0);
        String path = ia.readString("path");
        LinkedList<NodeAndPath> hasQueueNodes = new LinkedList<NodeAndPath>();
        while (!"/".equals(path)) {
            DataNode node = new DataNode();
            ia.readRecord(node, "node");
            PathKey key = pathKey(path, node.queueChild);
            nodes.put(key, node);
            if (node.hash != null) {
                nodesHashes.put(node.hash, key);
            }
            if (node.hasQueue) {
                NodeAndPath nap = new NodeAndPath(node, path);
                hasQueueNodes.add(nap);
                snapLog.queueStorage().getQueue(path);
                snapLog.queueStorage().getOldQueue(path);
            }
            int lastSlash = path.lastIndexOf('/');
            if (lastSlash == -1) {
                root = node;
            } else {
                String parentPath = path.substring(0, lastSlash);
                PathKey parentKey;
                parentKey = new RegularPath(parentPath);
                DataNode parent = nodes.get(parentKey);
                if (parent == null) {
                    throw new IOException("Invalid Datatree, unable to find " +
                            "parent " + parentPath + " of path " + path);
                }
                parent.addChild(
                    path.substring(lastSlash + 1),
                    node.size(),
                    node.queueChild);
                long eowner = node.getEphemeralOwner();
                if (eowner != 0) {
                    HashSet<String> list = ephemerals.get(eowner);
                    if (list == null) {
                        list = new HashSet<String>();
                        ephemerals.put(eowner, list);
                    }
                    list.add(path);
                }
            }
            path = ia.readString("path");
//            System.err.println( "deserialize: <" + path + ">" );
        }
        nodesPut("/", root);
        for (NodeAndPath entry : hasQueueNodes) {
            //check and fix cversion
            QueueChildSet children = (QueueChildSet) entry.node.getChildren();
            if (children.size() > 0) {
                if (entry.node.getCversion() <= children.lastPosition()) {
                    logger.severe("Queue parent: " + entry.path + " has cversion="
                        + entry.node.getCversion() + " which is "
                        + "less_then_or_equals the largest queue child: "
                        + children.lastPosition()
                        + ". Auto fixing.");
                    entry.node.setCversion(children.lastPosition() + 1);
                }
            }
            commitedQueueStorageSize.addAndGet(entry.node.size());
            floatingQueueStorageSize.addAndGet(entry.node.size());
            queues.put(entry.path, new Queue(entry.node, entry.path));
        }
        startPurgeThread();
        dataWatchManager.enable();
        // we are done with deserializing the
        // the datatree
        // update the quotas - create path trie
        // and also update the stat nodes
//        setupQuota();
    }

    /**
     * Summary of the watches on the datatree.
     * @param pwriter the output to write to
     */
    public synchronized void dumpWatchesSummary(PrintWriter pwriter) {
        pwriter.print(dataWatches.toString());
    }

    /**
     * Write a text dump of all the watches on the datatree.
     * Warning, this is expensive, use sparingly!
     * @param pwriter the output to write to
     */
    public synchronized void dumpWatches(PrintWriter pwriter, boolean byPath) {
        dataWatches.dumpWatches(pwriter, byPath);
    }

    /**
     * Write a text dump of all the ephemerals in the datatree.
     * @param pwriter the output to write to
     */
    public void dumpEphemerals(PrintWriter pwriter) {
        Set<Long> keys = ephemerals.keySet();
        pwriter.println("Sessions with Ephemerals ("
                + keys.size() + "):");
        for (long k : keys) {
            pwriter.print("0x" + Long.toHexString(k));
            pwriter.println(":");
            HashSet<String> tmp = ephemerals.get(k);
            synchronized (tmp) {
                for (String path : tmp) {
                    pwriter.println("\t" + path);
                }
            }
        }
    }

    public void removeCnxn(Watcher watcher) {
        dataWatches.removeWatcher(watcher);
        childWatches.removeWatcher(watcher);
    }

    public void setWatches(long relativeZxid, List<String> dataWatches,
            List<String> existWatches, List<String> childWatches,
            Watcher watcher) {
        for (String path : dataWatches) {
            DataNode node = getNode(path);
            WatchedEvent e = null;
            if (node == null) {
                e = new WatchedEvent(EventType.NodeDeleted,
                        KeeperState.SyncConnected, path);
            } else if (node.getCzxid() > relativeZxid) {
                e = new WatchedEvent(EventType.NodeCreated,
                        KeeperState.SyncConnected, path);
            } else if (node.getMzxid() > relativeZxid) {
                e = new WatchedEvent(EventType.NodeDataChanged,
                        KeeperState.SyncConnected, path);
            }
            if (e == null) {
                this.dataWatches.addWatch(path, watcher);
            } else {
                watcher.process(e);
            }
        }
        for (String path : existWatches) {
            DataNode node = getNode(path);
            WatchedEvent e = null;
            if (node == null) {
                // This is the case when the watch was registered
            } else if (node.getMzxid() > relativeZxid) {
                e = new WatchedEvent(EventType.NodeDataChanged,
                        KeeperState.SyncConnected, path);
            } else {
                e = new WatchedEvent(EventType.NodeCreated,
                        KeeperState.SyncConnected, path);
            }
            if (e == null) {
                this.dataWatches.addWatch(path, watcher);
            } else {
                watcher.process(e);
            }
        }
        for (String path : childWatches) {
            DataNode node = getNode(path);
            WatchedEvent e = null;
            if (node == null) {
                e = new WatchedEvent(EventType.NodeDeleted,
                        KeeperState.SyncConnected, path);
            } else if (node.getPzxid() > relativeZxid) {
                e = new WatchedEvent(EventType.NodeChildrenChanged,
                        KeeperState.SyncConnected, path);
            }
            if (e == null) {
                this.childWatches.addWatch(path, watcher);
            } else {
                watcher.process(e);
            }
        }
    }

     /**
      * This method sets the Cversion and Pzxid for the specified node to the
      * values passed as arguments. The values are modified only if newCversion
      * is greater than the current Cversion. A NoNodeException is thrown if
      * a znode for the specified path is not found.
      *
      * @param path
      *     Full path to the znode whose Cversion needs to be modified.
      *     A "/" at the end of the path is ignored.
      * @param newCversion
      *     Value to be assigned to Cversion
      * @param zxid
      *     Value to be assigned to Pzxid
      * @throws KeeperException.NoNodeException
      *     If znode not found.
      **/
    public void setCversionPzxid(String path, long newCversion, long zxid)
        throws KeeperException.NoNodeException {
        if (path.endsWith("/")) {
           path = path.substring(0, path.length() - 1);
        }
        DataNode node = nodes.get(pathKey(path));
        if (node == null) {
            throw new KeeperException.NoNodeException(path);
        }
        synchronized (node) {
            if(newCversion == -1) {
                newCversion = node.getCversion() + 1;
            }
            if (newCversion > node.getCversion()) {
                node.setCversion(newCversion);
                node.setPzxid(zxid);
            }
        }
    }
}
