package org.apache.lucene.util;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_MASK;
import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SHIFT;
import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE;
import static org.apache.lucene.util.ByteBlockPool.LARGE_BLOCK_SIZE;
import static org.apache.lucene.util.ByteBlockPool.LARGE_BLOCK_OFFSET;

import java.util.Arrays;
import java.util.Comparator;
import java.util.concurrent.atomic.LongAdder;

import org.apache.lucene.util.ByteBlockPool.DirectAllocator;

import ru.yandex.collection.ChunkedIntList;

import ru.yandex.util.timesource.TimeSource;

/**
 * {@link BytesRefHash} is a special purpose hash-map like data-structure
 * optimized for {@link BytesRef} instances. BytesRefHash maintains mappings of
 * byte arrays to ordinal (Map<BytesRef,int>) storing the hashed bytes
 * efficiently in continuous storage. The mapping to the ordinal is
 * encapsulated inside {@link BytesRefHash} and is guaranteed to be increased
 * for each added {@link BytesRef}.
 * 
 * <p>
 * Note: The maximum capacity {@link BytesRef} instance passed to
 * {@link #add(BytesRef)} must not be longer than {@link ByteBlockPool#BYTE_BLOCK_SIZE}-2. 
 * The internal storage is limited to 2GB total byte storage.
 * </p>
 * 
 * @lucene.internal
 */
public final class BytesRefHash {
  public static final LongAdder FAKE_LONG_ADDER = new LongAdder();
  public static final int DEFAULT_CAPACITY = 16;
  public static final int MAX_BYTES_SIZE = 64 * 1024;
  public static final int BIG_BYTES_MASK = 0xFFFF;
  public static final int BIG_BYTES_SHIFT = 16;
  public static final int UNSORTED_THRESHOLD = 50000;

  private static final boolean DEBUG = false;
  private static final int[][] EMPTY_SLICES = new int[0][];

  // the following fields are needed by comparator,
  // so package private to prevent access$-methods:
  final ByteBlockPool pool;
  ChunkedIntList bytesStart;

  private final BytesRef scratch1 = new BytesRef();
  private int hashSize;
  private int hashHalfSize;
  private int hashMask;
  private volatile int count;
  private volatile int sortedCount = 0;
  private int lastCount = -1;
  private int[] ords;
  private int[] newOrds;
  private int newOrdsCount = 0;
  private int[][] sortedSlices = EMPTY_SLICES;
  private int sortedSlicesCount = 0;
  private boolean sortingEnabled = false;
  private Comparator<BytesRef> comparator = null;
  private final BytesStartArray bytesStartArray;
  private LongAdder bytesUsed;

  /**
   * Creates a new {@link BytesRefHash} with a {@link ByteBlockPool} using a
   * {@link DirectAllocator}.
   */
  public BytesRefHash() { 
    this(new ByteBlockPool(new DirectAllocator()));
  }

  /**
   * Creates a new {@link BytesRefHash}
   */
  public BytesRefHash(ByteBlockPool pool) {
    this(pool, DEFAULT_CAPACITY, new DirectBytesStartArray(DEFAULT_CAPACITY));
  }

  /**
   * Creates a new {@link BytesRefHash}
   */
  public BytesRefHash(ByteBlockPool pool, int capacity,
      BytesStartArray bytesStartArray) {
    hashSize = capacity;
    hashHalfSize = hashSize >> 1;
    hashMask = hashSize - 1;
    this.pool = pool;
    ords = new int[hashSize];
    newOrds = new int[hashSize];
    Arrays.fill(ords, -1);
    this.bytesStartArray = bytesStartArray;
    bytesStart = bytesStartArray.init();
    bytesUsed = bytesStartArray.bytesUsed();
    bytesUsed.add(hashSize * RamUsageEstimator.NUM_BYTES_INT);
  }

  /**
   * Returns the number of {@link BytesRef} values in this {@link BytesRefHash}.
   *
   * @return the number of {@link BytesRef} values in this {@link BytesRefHash}.
   */
  public int size() {
    return count;
  }

  public void close() {
    bytesUsed.add(-hashSize * RamUsageEstimator.NUM_BYTES_INT);
    clearSortedSlices();
  }

  private void clearSortedSlices() {
    int sum = 0;
    synchronized (this) {
      for (int i = 0; i < sortedSlicesCount; ++i) {
        sum += sortedSlices[i].length;
      }
      sortedSlices = EMPTY_SLICES;
      sortedSlicesCount = 0;
    }
    comparator = null;
    sortingEnabled = false;
    bytesUsed.add(-((long) sum) * RamUsageEstimator.NUM_BYTES_INT);
  }

  /**
   * Populates and returns a {@link BytesRef} with the bytes for the given ord.
   * <p>
   * Note: the given ord must be a positive integer less that the current size (
   * {@link #size()})
   * </p>
   *
   * @param ord the ord
   * @param ref the {@link BytesRef} to populate
   * 
   * @return the given BytesRef instance populated with the bytes for the given ord
   */
  public BytesRef get(int ord, BytesRef ref) {
    assert bytesStart != null : "bytesStart is null - not initialized";
    assert ord < bytesStart.size(): "ord exceeds byteStart len: " + bytesStart.size();
    return pool.setBytesRef(ref, bytesStart.getInt(ord));
  }

  /**
   * Returns the ords array in arbitrary order. Valid ords start at offset of 0
   * and end at a limit of {@link #size()} - 1
   * <p>
   * Note: This is a destructive operation. {@link #clear()} must be called in
   * order to reuse this {@link BytesRefHash} instance.
   * </p>
   */
  public int[] compact() {
    assert bytesStart != null : "Bytesstart is null - not initialized";
    int upto = 0;
    for (int i = 0; i < hashSize; i++) {
      if (ords[i] != -1) {
        if (upto < i) {
          ords[upto] = ords[i];
          ords[i] = -1;
        }
        upto++;
      }
    }

    assert upto == count;
    lastCount = count;
    return ords;
  }

  /**
   * Returns the values array sorted by the referenced byte values.
   * <p>
   * Note: This is a destructive operation. {@link #clear()} must be called in
   * order to reuse this {@link BytesRefHash} instance.
   * </p>
   * 
   * @param comp
   *          the {@link Comparator} used for sorting
   */

  private static final ThreadLocal<BytesRef> pivotLocal = new ThreadLocal<BytesRef>();
  private static final ThreadLocal<BytesRef> scratch1Local = new ThreadLocal<BytesRef>();
  private static final ThreadLocal<BytesRef> scratch2Local = new ThreadLocal<BytesRef>();
  public int[] sort(final Comparator<BytesRef> comp) {
    if (pivotLocal.get() == null) {
        pivotLocal.set(new BytesRef());
    }
    if (scratch1Local.get() == null) {
        scratch1Local.set(new BytesRef());
    }
    if (scratch2Local.get() == null) {
        scratch2Local.set(new BytesRef());
    }
    final int[] compact = compact();
    new SorterTemplate() {
      @Override
      protected void swap(int i, int j) {
        final int o = compact[i];
        compact[i] = compact[j];
        compact[j] = o;
      }
      
      @Override
      protected int compare(int i, int j) {
        final int ord1 = compact[i], ord2 = compact[j];
        assert bytesStart.size() > ord1 && bytesStart.size() > ord2;
        return comp.compare(
            pool.setBytesRef(scratch1, bytesStart.getInt(ord1)),
            pool.setBytesRef(scratch2, bytesStart.getInt(ord2)));
      }

      @Override
      protected void setPivot(int i) {
        final int ord = compact[i];
        assert bytesStart.size() > ord;
        pool.setBytesRef(pivot, bytesStart.getInt(ord));
      }
  
      @Override
      protected int comparePivot(int j) {
        final int ord = compact[j];
        assert bytesStart.size() > ord;
        return comp.compare(
            pivot,
            pool.setBytesRef(scratch2, bytesStart.getInt(ord)));
      }
      
      private final BytesRef pivot = pivotLocal.get(),
        scratch1 = scratch1Local.get(), scratch2 = scratch2Local.get();
    }.quickSort(0, count - 1);
    return compact;
  }

  public int[] sortExternal(final Comparator<BytesRef> comp) {
        return sort(comp);
  }

    public synchronized int[][] getSorted(final Comparator<BytesRef> comp) {
        if (comparator == null || comp != comparator || !sortingEnabled) {
            comparator = comp;
            int[] currentOrds = ords;
            int[] newOrds = new int[currentOrds.length];
            int upto = 0;
            for (int i = 0; i < currentOrds.length; i++) {
                if (currentOrds[i] != -1) {
                    newOrds[upto++] = currentOrds[i];
                }
            }
            int[] sorted = externalSort(comp, newOrds, upto, new BytesRef(),
                new BytesRef(), new BytesRef());
//            sortingEnabled = true;
            sorted = Arrays.copyOf(sorted, upto);
            if (sortedSlices.length == 0) {
                sortedSlices = new int[32][];
                sortedSlicesCount = 1;
                sortedSlices[0] = sorted;
            }
        }
        return Arrays.copyOf(sortedSlices, sortedSlicesCount);
    }

  public int[] externalSort(final Comparator<BytesRef> comp, final int[] ords, 
    final int ordsCount, final BytesRef sortPivot, final BytesRef s1,
    final BytesRef s2)
  {
    assert bytesStart != null : "Bytesstart is null - not initialized";
    int upto = ordsCount;
    final int[] compact = ords;
    new SorterTemplate() {
      @Override
      protected void swap(int i, int j) {
        final int o = compact[i];
        compact[i] = compact[j];
        compact[j] = o;
      }
      
      @Override
      protected int compare(int i, int j) {
        final int ord1 = compact[i], ord2 = compact[j];
        assert bytesStart.size() > ord1 && bytesStart.size() > ord2;
        return comp.compare(
            pool.setBytesRef(s1, bytesStart.getInt(ord1)),
            pool.setBytesRef(s2, bytesStart.getInt(ord2)));
      }

      @Override
      protected void setPivot(int i) {
        final int ord = compact[i];
        assert bytesStart.size() > ord;
        pool.setBytesRef(pivot, bytesStart.getInt(ord));
      }
  
      @Override
      protected int comparePivot(int j) {
        final int ord = compact[j];
        assert bytesStart.size() > ord;
        return comp.compare(
            pivot,
            pool.setBytesRef(s2, bytesStart.getInt(ord)));
      }
      
      private final BytesRef pivot = sortPivot,
        scratch1 = s1, scratch2 = s2;
    }.quickSort(0, upto - 1);
    return compact;
  }

  private boolean equals(int ord, BytesRef b) {
    return pool.setBytesRef(scratch1, bytesStart.getInt(ord)).bytesEquals(b);
  }

  private boolean equals(final int ord, final BytesRef b,
    final BytesRef scratch, final int maxOrd)
  {
    if (ord >= maxOrd) {
        return false;
    } else {
        return pool.setBytesRef(scratch, bytesStart.getInt(ord)).bytesEquals(b);
    }
  }

  private boolean shrink(int targetSize) {
    // Cannot use ArrayUtil.shrink because we require power of 2:
    int newSize = hashSize;
    while (newSize >= 8 && (newSize >> 2) > targetSize) {
      newSize >>= 1;
    }
    if (newSize != hashSize) {
      bytesUsed.add(RamUsageEstimator.NUM_BYTES_INT * -(hashSize - newSize));
      hashSize = newSize;
      ords = new int[hashSize];
      Arrays.fill(ords, -1);
      hashHalfSize = newSize >> 1;
      hashMask = newSize - 1;
      return true;
    } else {
      return false;
    }
  }

  /**
   * Clears the {@link BytesRef} which maps to the given {@link BytesRef}
   */
  public void clear(boolean resetPool) {
    lastCount = count;
    count = 0;
    if (resetPool)
      pool.reset();
    if (lastCount == 0) {
        return;
    }
    clearSortedSlices();
    bytesStart = bytesStartArray.clear();
    if (lastCount != -1 && shrink(lastCount)) {
      // shrink clears the hash entries
      return;
    }
    Arrays.fill(ords, -1);
  }

  public void clear() {
    clear(true);
  }

  private BytesRef sortPivot = null; //new BytesRef();
  private BytesRef s1 = null; //new BytesRef();
  private BytesRef s2 = null; //new BytesRef();

  private void dumpOrds(int[] ords, int len, String name) {
    BytesRef ref = new BytesRef();
    System.err.println("BeginDump: " + name);
    for (int i = 0; i < len; i++) {
        pool.setBytesRef(ref, bytesStart.getInt(ords[i]));
        System.err.println(i + ": " + ref.utf8ToString());
    }
    System.err.println("EndDump: " + name);
  }

  public void finishDoc() {
    if (count - sortedCount > UNSORTED_THRESHOLD && comparator == null) {
        comparator = BytesRef.getUTF8SortedAsUnicodeComparator();
    }
    if (comparator != null && !sortingEnabled) {
        sortingEnabled = true;
    }
    if (newOrdsCount > 0 && sortingEnabled) {
        bytesUsed.add(newOrdsCount * RamUsageEstimator.NUM_BYTES_INT);
        if (sortPivot == null) {
            sortPivot = new BytesRef();
        }
        if (s1 == null) {
            s1 = new BytesRef();
        }
        if (s2 == null) {
            s2 = new BytesRef();
        }
        long time = TimeSource.INSTANCE.currentTimeMillis();
        int[] newSortedOrds = externalSort(comparator, newOrds, newOrdsCount,
            sortPivot, s1, s2);
        time = TimeSource.INSTANCE.currentTimeMillis() - time;
        if (time > 500) {
            System.err.println("BytesRefHash.finishDoc sort time too big: "
                + time + ", newOrdsCount: " + newOrdsCount
                + ", slices.length: " + sortedSlicesCount);
        }

        synchronized(this) {
            if (sortedSlicesCount == 0) {
                sortedSlices = new int[32][];
                sortedSlices[0] = Arrays.copyOf(newSortedOrds, newOrdsCount);
                sortedSlicesCount++;
                if (DEBUG) {
                    System.err.println("BytesRefHash.finishDoc creating slices");
                }
            } else {
                if (sortedSlices[sortedSlicesCount-1].length / newOrdsCount < 3) {
                    if (DEBUG) {
                        System.err.println("BytesRefHash.finishDoc merging slices: " + (sortedSlicesCount-1) + " + new");
                    }
                    sortedSlices[sortedSlicesCount-1] = 
                        mergeOrds(
                            sortedSlices[sortedSlicesCount-1],
                            sortedSlices[sortedSlicesCount-1].length,
                            newSortedOrds, newOrdsCount, s1, s2);
                } else {
                    if (DEBUG) {
                        System.err.println("BytesRefHash.finishDoc adding new slice: " + sortedSlicesCount);
                    }
                    //add new slice
                    if (sortedSlices.length <= sortedSlicesCount) {
                        sortedSlices = Arrays.copyOf(sortedSlices, sortedSlices.length << 1);
                    }
                    sortedSlices[sortedSlicesCount] = Arrays.copyOf(newSortedOrds, newOrdsCount);
                    sortedSlicesCount++;

                    mergeSlices();
                }
            }
        }
    }
    newOrdsCount = 0;
    sortedCount = count;
  }

    private void mergeSlices() {
        boolean merged;
        do {
            merged = false;
            for (int i = 0; i < sortedSlicesCount - 1; i++) {
                if (sortedSlices[i].length / sortedSlices[i + 1].length < 3) {
                    if (DEBUG) {
                        System.err.println("BytesRefHash.mergeSlices: " + i + " + " + (i + 1));
                    }
                    sortedSlices[i] =
                        mergeOrds(sortedSlices[i], sortedSlices[i].length,
                            sortedSlices[i + 1], sortedSlices[i + 1].length,
                            s1, s2);
                    //move slices
                    for (int j = i + 1; j < sortedSlicesCount - 1; j++) {
                        sortedSlices[j] = sortedSlices[j + 1];
                    }
                    sortedSlicesCount--;
                    merged = true;
                }
            }
        } while (merged);
    }

  private static final double log2val = Math.log(2.0);
  public double log2(double a) {
    return Math.log(a) / log2val;
  }

  private final int findFloor(int[] ords, int off, int len, BytesRef needle, BytesRef pivot) {
    int start = off;
    int end = len - 1;
    if (start > end) return end;
//    System.err.println("findFloor: " + pivot.utf8ToString() + ", start=" + start + ", end=" + end);
    if (start == end) {
        pool.setBytesRef(pivot, bytesStart.getInt(ords[start]));
        int cmp = comparator.compare(needle, pivot);
        if (cmp < 0) {
            return start - 1;
        } else {
            return start;
        }
    }
    pool.setBytesRef(pivot, bytesStart.getInt(ords[start]));
    if (comparator.compare(needle, pivot) < 0) {
        return start - 1;
    }
    while(start < end) {
        int mid = (start + end + 1) >>> 1;
        pool.setBytesRef(pivot, bytesStart.getInt(ords[mid]));
//        System.err.println("findFloor: " + pivot.utf8ToString() + ", start=" + start + ", end=" + end + ", mid=" + mid + ", midText=" + pivot.utf8ToString());
//        System.err.println("
        int cmp = comparator.compare(needle, pivot);
        if (cmp < 0) {
            end = mid - 1;
        } else {
            start = mid;
        }
    }
//    System.err.println("findFloor.end: start=" + start + ", end=" + end);
//    if (end == off) {
//        return off - 1;
//    }
    return end;
//    return -1;
//    if (end == off) {
//        return -1;
//    }
//    if (start == end) {
//        return start;
//    }
//    if (
    
  }

  public final int[] mergeOrdsBinary(int[] a, int alength, int[] b, int blength, BytesRef aRef, BytesRef bRef) {
    int[] dst;
    int[] src;
    int dstLen;
    int srcLen;
    int[] merged = new int[alength + blength];

    if (alength > blength) {
        dst = a;
        dstLen = alength;
        src = b;
        srcLen = blength;
    } else {
        dst = b;
        dstLen = blength;
        src = a;
        srcLen = alength;
    }
    int m = 0;
    int d = 0;
    for (int i = 0; i < srcLen; i++) {
        pool.setBytesRef(aRef, bytesStart.getInt(src[i]));
//        System.err.println("mergeOrdsBinary-" + i + ": " + aRef.utf8ToString());
        int floor = findFloor(dst, d, dstLen, aRef, bRef);
//        System.err.println("BytesRefHash.mergeOrdsBinary floor=" + floor);
        for (; d <= floor; d++) {
            pool.setBytesRef(aRef, bytesStart.getInt(dst[d]));
//            System.err.println("mergeOrdsBinary-" + i + ": d=" + d + ", " + aRef.utf8ToString());
            merged[m++] = dst[d];
        }
        merged[m++] = src[i];
    }
    while (d < dstLen) {
        merged[m++] = dst[d++];
    }
    return merged;
  }

  public final int[] mergeOrds(int[] a, int alength, int[] b, int blength, BytesRef aRef, BytesRef bRef )
  {
    int totalLength = alength + blength;
    int minLength = Math.min(alength, blength);
    int binaryComps = (int)(log2(totalLength) * minLength);
    if (binaryComps < totalLength) {
        return mergeOrdsBinary(a, alength, b, blength, aRef, bRef);
    }
    int[] merged = new int[totalLength];
//    if (Math.abs(alength/blength)
    int i = 0;
    int j = 0;
    int k = 0;
    while( i < alength && j < blength )
    {
	pool.setBytesRef( aRef, bytesStart.getInt(a[i]) );
	pool.setBytesRef( bRef, bytesStart.getInt(b[j]) );
	if( comparator.compare( aRef, bRef ) < 0 )
	{
	    merged[k] = a[i];
	    i++;
	}
	else
	{
	    merged[k] = b[j];
	    j++;
	}
	k++;
    }
    while( i < alength )
    {
	merged[k++] = a[i++];
    }
    while( j < blength )
    {
	merged[k++] = b[j++];
    }
    return merged;
  }

  /**
   * Adds a new {@link BytesRef}
   * 
   * @param bytes
   *          the bytes to hash
   * @return the ord the given bytes are hashed if there was no mapping for the
   *         given bytes, otherwise <code>(-(ord)-1)</code>. This guarantees
   *         that the return value will always be &gt;= 0 if the given bytes
   *         haven't been hashed before.
   * 
   * @throws MaxBytesLengthExceededException
   *           if the given bytes are > 2 +
   *           {@link ByteBlockPool#BYTE_BLOCK_SIZE}
   */
  public int add(BytesRef bytes) {
    return add(bytes, bytes.hashCode());
  }

    public int find(final BytesRef bytes, final BytesRef scratch) {
        final int length = bytes.length;
        final int sortedCount = this.sortedCount;
        final int[] ords = this.ords;
        final int hashMask = ords.length - 1;
        int code = bytes.hashCode();
        int hashPos = code & hashMask;
        int e = ords[hashPos];
        if (e != -1 && !equals(e, bytes, scratch, sortedCount)) {
            final int inc = ((code >> 8) + code) | 1;
            do {
                code += inc;
                hashPos = code & hashMask;
                e = ords[hashPos];
            } while (e != -1 && !equals(e, bytes, scratch, sortedCount));
        }
        if (e >= sortedCount) {
            e = -1;
        }
        return e;
    }

  /**
   * Adds a new {@link BytesRef} with a pre-calculated hash code.
   * 
   * @param bytes
   *          the bytes to hash
   * @param code
   *          the bytes hash code
   * 
   *          <p>
   *          Hashcode is defined as:
   * 
   *          <pre>
   * int hash = 0;
   * for (int i = offset; i &lt; offset + length; i++) {
   *   hash = 31 * hash + bytes[i];
   * }
   * </pre>
   * 
   * @return the ord the given bytes are hashed if there was no mapping for the
   *         given bytes, otherwise <code>(-(ord)-1)</code>. This guarantees
   *         that the return value will always be &gt;= 0 if the given bytes
   *         haven't been hashed before.
   * 
   * @throws MaxBytesLengthExceededException
   *           if the given bytes are >
   *           {@link ByteBlockPool#BYTE_BLOCK_SIZE} - 2
   */
  public int add(BytesRef bytes, int code) {
    assert bytesStart != null : "Bytesstart is null - not initialized";
    final int length = bytes.length;
    // final position
    int hashPos = code & hashMask;
    int e = ords[hashPos];
    if (e != -1 && !equals(e, bytes)) {
      // Conflict: keep searching different locations in
      // the hash table.
      final int inc = ((code >> 8) + code) | 1;
      do {
        code += inc;
        hashPos = code & hashMask;
        e = ords[hashPos];
      } while (e != -1 && !equals(e, bytes));
    }

    if (e == -1) {
      // new entry
      if (count >= bytesStart.size()) {
        bytesStart = bytesStartArray.grow();
        assert count < bytesStart.size() + 1 : "count: " + count + " len: "
            + bytesStart.size();
      }

      if (length > LARGE_BLOCK_SIZE) {
        throw new MaxBytesLengthExceededException("bytes can be at most "
              + (BYTE_BLOCK_SIZE - 2) + " in length; got " + bytes.length);
      }
      final int len2 = 2 + bytes.length;
      if (len2 > BYTE_BLOCK_SIZE) {
        final byte[] buffer = pool.nextLargeBuffer(bytes.length);
        final int bufferUpto = pool.largeBufferUpto;
        e = count++;
        bytesStart.setInt(e, -bufferUpto - LARGE_BLOCK_OFFSET);
        System.arraycopy(bytes.bytes, bytes.offset, buffer, 0, length);
      } else {
        if (len2 + pool.byteUpto > BYTE_BLOCK_SIZE) {
          pool.nextBuffer();
        }

        e = count++;

        final byte[] buffer = pool.buffer;
        final int bufferUpto = pool.byteUpto;

        bytesStart.setInt(e, bufferUpto + pool.byteOffset);

        // We first encode the length, followed by the
        // bytes. Length is encoded as vInt, but will consume
        // 1 or 2 bytes at most (we reject too-long terms,
        // above).
        if (length < 128) {
          // 1 byte to store length
          buffer[bufferUpto] = (byte) length;
          pool.byteUpto += length + 1;
          System.arraycopy(bytes.bytes, bytes.offset, buffer, bufferUpto + 1,
              length);
        } else {
          // 2 byte to store length
          buffer[bufferUpto] = (byte) (0x80 | (length & 0x7f));
          buffer[bufferUpto + 1] = (byte) ((length >> 7) & 0xff);
          pool.byteUpto += length + 2;
          System.arraycopy(bytes.bytes, bytes.offset, buffer, bufferUpto + 2,
              length);
        }
      }
      assert ords[hashPos] == -1;
      ords[hashPos] = e;

      if (count == hashHalfSize) {
        rehash(hashSize << 1, true);
      }
      newOrds[newOrdsCount++] = e;
      if (newOrds.length == newOrdsCount) {
        newOrds = Arrays.copyOf(newOrds, newOrds.length << 1);
      }
      return e;
    }
    return -(e + 1);
  }

  public int addByPoolOffset(int offset) {
    assert bytesStart != null : "Bytesstart is null - not initialized";
    // final position
    int code = offset;
    int hashPos = offset & hashMask;
    int e = ords[hashPos];
    if (e != -1 && bytesStart.getInt(e) != offset) {
      // Conflict: keep searching different locations in
      // the hash table.
      final int inc = ((code >> 8) + code) | 1;
      do {
        code += inc;
        hashPos = code & hashMask;
        e = ords[hashPos];
      } while (e != -1 && bytesStart.getInt(e) != offset);
    }
    if (e == -1) {
      // new entry
      if (count >= bytesStart.size()) {
        bytesStart = bytesStartArray.grow();
        assert count < bytesStart.size() + 1 : "count: " + count + " len: "
            + bytesStart.size();
      }
      e = count++;
      bytesStart.setInt(e, offset);
      assert ords[hashPos] == -1;
      ords[hashPos] = e;

      if (count == hashHalfSize) {
        rehash(hashSize << 1, false);
      }
      return e;
    }
    return -(e + 1);
  }

  /**
   * Called when hash is too small (> 50% occupied) or too large (< 20%
   * occupied).
   */
  private void rehash(final int newSize, boolean hashOnData) {
    final int newMask = newSize - 1;
    final int[] newHash = new int[newSize];
    Arrays.fill(newHash, -1);
    for (int i = 0; i < hashSize; i++) {
      final int e0 = ords[i];
      if (e0 != -1) {
        int code;
        if (hashOnData) {
          code = 0;
          final int len;
          int pos;
          final int off = bytesStart.getInt(e0);
          final byte[] bytes;
          if (off < 0) {
            bytes = pool.largeBuffers[-(off + LARGE_BLOCK_OFFSET)];
            len = bytes.length;
            pos = 0;
          } else {
            final int start = off & BYTE_BLOCK_MASK;
            bytes = pool.buffers[off >> BYTE_BLOCK_SHIFT];
            if ((bytes[start] & 0x80) == 0) {
                // length is 1 byte
                len = bytes[start];
                pos = start + 1;
            } else {
                len = (bytes[start] & 0x7f) + ((bytes[start + 1] & 0xff) << 7);
                pos = start + 2;
            }
          }

          final int endPos = pos + len;
          while (pos < endPos) {
            code = BytesRef.HASH_PRIME * code + bytes[pos++];
          }
        } else {
          code = bytesStart.getInt(e0);
        }

        int hashPos = code & newMask;
        assert hashPos >= 0;
        if (newHash[hashPos] != -1) {
          final int inc = ((code >> 8) + code) | 1;
          do {
            code += inc;
            hashPos = code & newMask;
          } while (newHash[hashPos] != -1);
        }
        newHash[hashPos] = e0;
      }
    }

    bytesUsed.add(RamUsageEstimator.NUM_BYTES_INT * (newSize - ords.length));

    hashMask = newMask;
    ords = newHash;
    hashSize = newSize;
    hashHalfSize = newSize >> 1;
  }

  /**
   * reinitializes the {@link BytesRefHash} after a previous {@link #clear()}
   * call. If {@link #clear()} has not been called previously this method has no
   * effect.
   */
  public void reinit() {
    if (bytesStart == null)
      bytesStart = bytesStartArray.init();
  }

  /**
   * Returns the bytesStart offset into the internally used
   * {@link ByteBlockPool} for the given ord
   * 
   * @param ord
   *          the ord to look up
   * @return the bytesStart offset into the internally used
   *         {@link ByteBlockPool} for the given ord
   */
  public int byteStart(int ord) {
    assert bytesStart != null : "Bytesstart is null - not initialized";
    assert ord >= 0 && ord < count : ord;
    return bytesStart.getInt(ord);
  }

  /**
   * Thrown if a {@link BytesRef} exceeds the {@link BytesRefHash} limit of
   * {@link ByteBlockPool#BYTE_BLOCK_SIZE}-2.
   */
  @SuppressWarnings("serial")
  public static class MaxBytesLengthExceededException extends RuntimeException {
    MaxBytesLengthExceededException(String message) {
      super(message);
    }
  }

  public abstract static class BytesStartArray {
    /**
     * Initializes the BytesStartArray. This call will allocate memory
     * 
     * @return the initialized bytes start array
     */
    public abstract ChunkedIntList init();

    /**
     * Grows the {@link BytesStartArray}
     * 
     * @return the grown array
     */
    public abstract ChunkedIntList grow();

    /**
     * clears the {@link BytesStartArray} and returns the cleared instance.
     * 
     * @return the cleared instance, this might be <code>null</code>
     */
    public abstract ChunkedIntList clear();

    /**
     * A {@link AtomicLong} reference holding the number of bytes used by this
     * {@link BytesStartArray}. The {@link BytesRefHash} uses this reference to
     * track it memory usage
     * 
     * @return a {@link AtomicLong} reference holding the number of bytes used
     *         by this {@link BytesStartArray}.
     */
    public abstract LongAdder bytesUsed();
  }

  public static class DirectBytesStartArray extends BytesStartArray {

    protected final int initSize;
    private ChunkedIntList bytesStart;

    public DirectBytesStartArray(int initSize) {
      this.initSize = initSize;
    }

    @Override
    public ChunkedIntList clear() {
      return bytesStart = null;
    }

    @Override
    public ChunkedIntList grow() {
      assert bytesStart != null;
      bytesStart.addInt(0);
      return bytesStart;
    }

    @Override
    public ChunkedIntList init() {
        bytesStart = new ChunkedIntList();
        bytesStart.resize(
            ArrayUtil.oversize(initSize, RamUsageEstimator.NUM_BYTES_INT));
        return bytesStart;
    }

    @Override
    public LongAdder bytesUsed() {
      return FAKE_LONG_ADDER;
    }
  }
}
