package ru.yandex.msearch;

import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.logging.Level;

import org.apache.http.HttpException;
import org.apache.http.HttpRequest;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.entity.ContentType;
import org.apache.http.entity.EntityTemplate;
import org.apache.http.protocol.HttpContext;
import org.apache.http.protocol.HttpRequestHandler;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.ReusableStringReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.OpenBitSet;

import ru.yandex.collection.PatternMap;
import ru.yandex.http.server.sync.JsonContentProducerWriter;
import ru.yandex.http.server.sync.Utf8JsonContentProducer;
import ru.yandex.http.util.BadRequestException;
import ru.yandex.http.util.CharsetUtils;
import ru.yandex.http.util.request.RequestInfo;
import ru.yandex.http.util.server.LoggingServerConnection;
import ru.yandex.json.writer.JsonTypeExtractor;
import ru.yandex.json.writer.JsonValue;
import ru.yandex.json.writer.JsonWriterBase;
import ru.yandex.json.writer.Utf8JsonValue;
import ru.yandex.json.writer.Utf8JsonWriter;
import ru.yandex.msearch.FilteringIndexReader.FilteringReaderLeave;
import ru.yandex.msearch.collector.ClusteringCollector;
import ru.yandex.msearch.collector.CollectingFieldToIndex;
import ru.yandex.msearch.collector.FlushableCollector;
import ru.yandex.msearch.collector.cluster.Cluster;
import ru.yandex.msearch.collector.cluster.ClusterDoc;
import ru.yandex.msearch.collector.cluster.ClusterWithGroup;
import ru.yandex.msearch.collector.cluster.TruncatedCluster;
import ru.yandex.msearch.util.Compress;
import ru.yandex.msearch.util.IOStater;
import ru.yandex.parser.string.CollectionParser;
import ru.yandex.parser.string.NonEmptyValidator;
import ru.yandex.parser.uri.CgiParams;
import ru.yandex.search.prefix.Prefix;

public class ScanClusterHandler
    extends SearchHandlerBase
    implements HttpRequestHandler
{
    private static final CollectionParser<
        String,
        List<String>,
        Exception>
        GET_FIELDS_PARSER = new CollectionParser<>(
            NonEmptyValidator.INSTANCE,
            ArrayList::new);

    public ScanClusterHandler(
        final DatabaseManager dbManager,
        final Config config,
        final PatternMap<RequestInfo, IOStater> ioStaters)
    {
        super(dbManager, config, ioStaters);
    }

    @Override
    public void handle(
        final HttpRequest request,
        final HttpResponse response,
        final HttpContext context)
        throws HttpException, IOException
    {
        HttpRequestContext ctx = new HttpRequestContext(context);

        ScanContext scanContext = new ScanContext(request, ctx);

        Compress.resetStats();
        IOStater ioStater = ioStaterFor(context);
        Searcher searcher = null;
        try {
            searcher = scanContext.index.getSearcher(
                scanContext.prefix, false);
            try {
                scanContext.scan(searcher);
            } catch (ParseException e) {
                throw new IOException(e);
            }
//            ((LoggingServerConnection) ctx.connection())
//                .setHitsCount(Integer.toString(clusters.size()));
            Charset charset = CharsetUtils.acceptedCharset(request);
            EntityTemplate entity;
            if (charset.equals(StandardCharsets.UTF_8)) {
                entity =
                    new EntityTemplate(
                        new Utf8JsonContentProducer(
                            new Utf8ClusterProducer(scanContext),
                            JsonTypeExtractor.NORMAL.extract(
                                scanContext.params)));
            } else {
                entity =
                    new EntityTemplate(
                        new JsonContentProducerWriter(
                            new ClusterProducer(scanContext),
                            JsonTypeExtractor.NORMAL.extract(
                                scanContext.params),
                            charset));
            }
            entity.setChunked(true);
            entity.setContentType(
                ContentType.APPLICATION_JSON.withCharset(charset).toString());
            response.setEntity(entity);
            response.setStatusCode(HttpStatus.SC_OK);
        } finally {
            if (searcher != null) {
                try {
                    searcher.free();
                } catch (Exception se) {
                    ctx.logger().log(Level.SEVERE, "Searcher release fail", se);
                }
            }
            accountStats(ioStater, ctx.logger());
        }
    }

    private class ScanContext {
        private final HttpRequestContext ctx;
        private final CgiParams params;
        private final Prefix prefix;
        private final String field;
        private final String startValue;
        private final int epsilon;
        private final int minPts;
        private final PrefixingAnalyzerWrapper analyzer;
        private final QueryParser parser;
        private final Query filterQuery;
        private final String text;
        private final List<String> get;
        private final ClusterCollector col;
        private final boolean deduplicate;
        private ReusableStringReader stringReader;
        private final Set<String> termDedup = new HashSet<>();
        private final boolean fastScan;
        private final Index index;

        ScanContext(
            final HttpRequest request,
            final HttpRequestContext ctx)
            throws HttpException
        {
            this.ctx = ctx;
            params = new CgiParams(request);
            text = params.getString("text", null);
            index = dbManager.indexOrException(params, SearchHandler.BRE_GEN);
            prefix = params.get("prefix", prefixParser(index.config()));
            field = params.getString("field").intern();
            startValue = params.getString("start");
            epsilon = params.getInt("epsilon");
            minPts = params.getInt("min-pts", 3);
            deduplicate = params.getBoolean("deduplicate", false);
            fastScan = params.getBoolean("fast-scan", true);

            List<String> get = params.getAll(
                "get",
                Collections.emptyList(),
                GET_FIELDS_PARSER);
            this.get = new ArrayList<>(get.size());
            for (String g: get) {
                this.get.add(g.intern());
            }

            analyzer = index.searchAnalyzer(prefix);
            parser = createParser(index.config(), params, analyzer);

            //FIXME context should be final
            ProcessorRequestContext requestContext =
                new ProcessorRequestContext(
                    Collections.singleton(prefix),
                    index,
                    ctx,
                    new CollectingFieldToIndex(),
                    params);
            requestContext.queryParser(parser);

            Query filterQuery = null;
            try {
                if (text != null) {
                    filterQuery = parser.parse(text);
                    // XXX: Do not remove this log record!
                    // It exposes the bug in WildcardQuery
                    if (ctx.logger().isLoggable(Level.INFO)) {
                        ctx.logger().info("Request parsed: " + filterQuery);
                    }
                }
            } catch (Exception e) {
                throw new BadRequestException(
                    "Failed to parse query '" + text + '\'', e);
            }
            this.filterQuery = filterQuery;
            col = new ClusterCollector(field);
        }

        private Query createQuery(final String value) throws ParseException  {
            BooleanQuery main;
            if (fastScan) {
                main = minHashQuery(epsilon, field, value);
                if (main.clauses().size() == 0) {
                    return null;
                }
            } else {
                main = minHashQuerySlow(epsilon, field, value);
            }
            if (filterQuery != null) {
                BooleanQuery bq = new BooleanQuery();
                bq.add(main, Occur.MUST);
                bq.add(filterQuery, Occur.MUST);
                main = bq;
            }
            return main;
        }

        public void scan(final Searcher searcher)
            throws IOException, ParseException
        {
            List<Doc> neighbors = new ArrayList<>();
            Query query = createQuery(startValue);
//            searcher.searcher().search(query, col.filter, col, false);
            ctx.logger().info("Query: " + query.toString());
            IndexSearcher filterSearcher =
                new IndexSearcher(
                    new FilteringIndexReader(searcher.reader()));
            filterSearcher.search(query, col, false);
            List<Doc> newDocs;
            if (fastScan) {
                newDocs = col.newDocs(deduplicate, startValue, epsilon);
            } else {
                newDocs = col.newDocs(deduplicate, startValue, -1);
            }
            if (newDocs.size() < minPts) {
                ctx.logger().info("MinPTS is not reached");
                return;
            }
            neighbors.addAll(newDocs);
            col.flushNewFound();
            Set<String> scanned = new HashSet<>();
            for (int i = 0; i < neighbors.size(); i++) {
                if (i % 1000 == 0) {
                    ctx.logger().info("Scanning: " + i + " / "
                        + neighbors.size());
                }
                Doc doc = neighbors.get(i);
                String nextValue = col.docValue(doc);
                if (!scanned.add(nextValue)) {
                    continue;
                }
//                ctx.logger().info("NextValue: " + nextValue);
                query = createQuery(nextValue);
                if (query == null) {
                    continue;
                }
//                searcher.searcher().search(query, col.filter, col, false);
                filterSearcher.search(query, col, false);
                if (fastScan) {
                    newDocs = col.newDocs(deduplicate, startValue, epsilon);
                } else {
                    newDocs = col.newDocs(deduplicate, startValue, -1);
                }
                if (newDocs.size() >= minPts) {
                    neighbors.addAll(newDocs);
//                } else {
//                    col.dropNewFound();
                }
                col.flushNewFound();
            }
        }

        private Query minHashQuery1(
            final QueryParser parser,
            final int epsilon,
            final String field,
            final String value)
            throws ParseException
        {
            StringBuilder sb = new StringBuilder();
            sb.append(field);
            sb.append('@');
            sb.append(epsilon);
            sb.append(':');
            sb.append('(');
            if (value.charAt(0) == '-') {
                sb.append('\\');
            }
            sb.append(value);
            sb.append(')');
            return parser.parse(new String(sb));
        }

        private BooleanQuery minHashQuery(
            final int epsilon,
            final String field,
            final String value)
            throws ParseException
        {
            if (stringReader == null) {
                stringReader = new ReusableStringReader();
            }
            stringReader.init(value);
            int e = epsilon;
            if (epsilon == 3) {
                e = 4;
            }
            final String fieldPerm =
                (field + "_perm" + e).intern();
            try (TokenStream source =
                analyzer.reusableTokenStream(
                    fieldPerm,
                    stringReader))
            {
                source.reset();
                CharTermAttribute termAtt =
                    source.getAttribute(CharTermAttribute.class);
                BooleanQuery bq = new BooleanQuery();
                while (source.incrementToken()) {
                    String term = termAtt.toString();
                    if (termDedup.add(term)) {
                        bq.add(
                            new TermQuery(
                                new Term(fieldPerm, term)), Occur.SHOULD);
                    }
                }
                return bq;
            } catch (IOException ex) {
                throw (ParseException) new ParseException().initCause(ex);
            }
        }

        private BooleanQuery minHashQuerySlow(
            final int epsilon,
            final String field,
            final String value)
            throws ParseException
        {
            if (stringReader == null) {
                stringReader = new ReusableStringReader();
            }
            stringReader.init(value);
            final String fieldPerm = field.intern();
            try (TokenStream source =
                analyzer.reusableTokenStream(
                    fieldPerm,
                    stringReader))
            {
                source.reset();
                CharTermAttribute termAtt =
                    source.getAttribute(CharTermAttribute.class);
                BooleanQuery bq = new BooleanQuery();
                int count = 0;
                while (source.incrementToken()) {
                    String term = termAtt.toString();
                    if (termDedup.add(term)) {
                        count++;
                        bq.add(
                            new TermQuery(
                                new Term(fieldPerm, term)), Occur.SHOULD);
                    }
                }
                bq.setMinimumNumberShouldMatch(count - epsilon);
                return bq;
            } catch (IOException ex) {
                throw (ParseException) new ParseException().initCause(ex);
            }
        }

        private Query minHashQuery2(
            final QueryParser parser,
            final int epsilon,
            final String field,
            final String value)
            throws ParseException
        {
            StringBuilder sb = new StringBuilder();
            sb.append(field);
            sb.append("_perm");
            sb.append(epsilon);
            sb.append(":(");
            if (value.charAt(0) == '-') {
                sb.append('\\');
            }
            sb.append(value);
            sb.append(')');
            return parser.parse(new String(sb));
        }
    }


    @Override
    public String toString() {
        return "https://wiki.yandex-team.ru/ps/Documentation/Lucene/"
            + "SearchHandlers/clusterize";
    }

    private static class ClusterProducer implements JsonValue {
        private final ScanContext scanContext;

        ClusterProducer(final ScanContext scanContext) {
            this.scanContext = scanContext;
        }

        @Override
        public void writeValue(final JsonWriterBase writer)
            throws IOException
        {
            writer.startObject();
            writer.key("hitsArray");
            writer.startArray();
            writer.endArray();
            writer.endObject();
        }
    }

    private static class Utf8ClusterProducer implements Utf8JsonValue {
        private static final BytesRef SIZE = new BytesRef("size");
        private static final BytesRef GROUP = new BytesRef("group");
        private static final BytesRef MAX = new BytesRef("max");
        private static final BytesRef MIN = new BytesRef("min");
        private static final BytesRef COUNTERS = new BytesRef("counters");
        private static final BytesRef MERGED_DOCS =
            new BytesRef("merged_docs");
        private static final BytesRef HITS_COUNT =
            new BytesRef("hitsCount");
        private static final BytesRef HITS_ARRAY =
            new BytesRef("hitsArray");
        private final ScanContext scanContext;

        Utf8ClusterProducer(final ScanContext scanContext) {
            this.scanContext = scanContext;
        }

        @Override
        public void writeValue(final Utf8JsonWriter writer)
            throws IOException
        {
            writer.startObject();
            writer.key(HITS_COUNT);
            writer.value(scanContext.col.allFound.size());
            writer.key(HITS_ARRAY);
            writer.startArray();
            for (Doc doc: scanContext.col.allFound) {
                writer.startObject();
                for (String field: scanContext.get) {
                    writer.key(field);
                    writer.value(doc.get(field));
                }
                writer.endObject();
            }
//            int printCount = printCount(clusters.size(), offset, length);
//            for (int i = 0; i < printCount; ++i) {
//                writeCluster(writer, clusters.get(i + offset));
//                clusters.set(i + offset, null);
//            }
            writer.endArray();
            writer.endObject();
        }
    }

    private static class ClusterCollector extends FlushableCollector {
        private Map<AtomicReaderContext, OpenBitSet> clusteredMap;
        private List<Doc> newFound;
        private OpenBitSet clusteredSet;
        private ArrayList<Doc> allFound;
        private Set<String> deduplicator = new HashSet<>();
        private FilteringReaderLeave reader;
        private AtomicReaderContext currentContext;
        private final String field;
        private final Ints i1, i2;

        ClusterCollector(final String field) {
            this.field = field;
            clusteredMap = new HashMap<>();
            allFound = new ArrayList<>();
            newFound = new ArrayList<>();
            i1 = new Ints(30);
            i2 = new Ints(30);
        }

        @Override
        public void setScorer(final Scorer scorer) {
        }

        @Override
        public void collect(final int docId) {
            if (reader != null) {
                reader.deleteDocument(docId);
            }
            if (!clusteredSet.get(docId)) {
                newFound.add(new Doc(docId, currentContext));
                clusteredSet.set(docId);
            }
        }

        private void setBitSets(final AtomicReaderContext readerContext) {
            clusteredSet = clusteredMap.get(readerContext);
            if (clusteredSet == null) {
                clusteredSet = new OpenBitSet(readerContext.reader.maxDoc());
                clusteredMap.put(readerContext, clusteredSet);
            }
        }

        @Override
        public void setNextReader(final AtomicReaderContext readerContext) {
            currentContext = readerContext;
            if (readerContext.reader instanceof FilteringReaderLeave) {
                reader = (FilteringReaderLeave) readerContext.reader;
            } else {
                reader = null;
            }
            setBitSets(readerContext);
        }

        @Override
        public boolean acceptsDocsOutOfOrder() {
            return true;
        }

        public void flushNewFound() {
            newFound.clear();
        }

        public String docValue(final Doc doc) throws IOException {
            String value = doc.value();
            if (value == null) {
                value = doc.get(field);
                doc.value(value);
            }
            return value;
        }

        private static int parseInt(
            final String str,
            final int off,
            final int len)
        {
            int num  = 0;
            int sign = -1;
            final char ch  = str.charAt(off);
            if (ch == '-') {
                sign = 1;
            } else {
                num = '0' - ch;
            }
            // Build the number.
            int i = off + 1;
            int end = off + len;
            while (i < end) {
                num = num * 10 + '0' - str.charAt(i++);
            }
            return sign * num;
        }

        private void parseInts(final Ints ints, final String str) {
            int start = 0;
            int end;
            ints.reset();
            while (true) {
                int sep = str.indexOf(start, ',');
                if (sep == -1) {
                    end = str.length();
                } else {
                    end = sep;
                }
                int i = parseInt(str, start, end - start);
                ints.add(i);
                start = end + 1;
                if (sep == -1) {
                    break;
                }
            }
        }

        private int dist(final String s1, final String s2) {
            parseInts(i1, s1);
            parseInts(i2, s2);
            return i1.dist(i2);
        }

        public List<Doc> newDocs(
            final boolean deduplicate,
            final String startValue,
            final int epsilon)
            throws IOException
        {
            Collections.sort(newFound);
            if (deduplicate) {
                List<Doc> newDocs = new ArrayList<>();
                for (Doc doc: newFound) {
                    String value = docValue(doc);
                    if (epsilon >= 0) {
                        int dist = dist(value, startValue);
                        if (dist > epsilon) {
                            continue;
                        }
                    }
                    if (deduplicator.add(value)) {
                        newDocs.add(doc);
                        allFound.add(doc);
                    }
                }
                return newDocs;
            } else {
                for (Doc doc: newFound) {
                    if (epsilon >= 0) {
                        String value = docValue(doc);
                        int dist = dist(value, startValue);
                        if (dist > epsilon) {
                            continue;
                        }
                    }
                    allFound.add(doc);
                }
                return newFound;
            }
        }

        @Override
        public void flush() {
        }

        @Override
        public void close() {
        }

        private static class Ints {
            private int[] array;
            private int count;

            Ints(final int len) {
                array = new int[len];
                count = 0;
            }

            public void add(final int i) {
                array[count++] = i;
                if (count == array.length) {
                    array = Arrays.copyOf(array, array.length << 1);
                }
            }

            public void reset() {
                count = 0;
            }

            public int dist(final Ints o) {
                final int len = Math.min(count, o.count);
                final int[] a1 = this.array;
                final int[] a2 = o.array;
                int dist = len;
                for (int i = 0; i < len; i++) {
                    if (a1[i] == a2[i]) {
                        dist--;
                    }
                }
                return dist;
            }
        }
    }

    private static class Doc implements Comparable<Doc> {
        private final int docId;
        private final AtomicReaderContext readerContext;
        private String value;

        Doc(final int docId, final AtomicReaderContext readerContext) {
            this.docId = docId;
            this.readerContext = readerContext;
        }

        public String value() {
            return value;
        }

        public void value(final String value) {
            this.value = value;
        }

        public String get(final String field) throws IOException {
            return readerContext.reader.document(docId).get(field);
        }

        @Override
        public int compareTo(final Doc other) {
            int cmp =
                Integer.compare(
                    readerContext.docBase,
                    other.readerContext.docBase);
            if (cmp == 0) {
                cmp = Integer.compare(docId, other.docId);
            }
            return cmp;
        }
    }
}

