# coding: utf-8

import sys

import xapian

if len(sys.argv) < 3:
    print >> sys.stderr, "Usage: %s PATH_TO_DATABASE WORDS_FILE" % sys.argv[0]
    sys.exit(1)

geowords = [word.strip() for word in open(sys.argv[2], 'r')]


try:
    # Open the database for searching.
    database = xapian.Database(sys.argv[1])

    # Start an enquire session.
    enquire = xapian.Enquire(database)

    stemmer = xapian.Stem('russian')

    # Попавшиеся запросы
    docids = set()

    counter = 0

    qp = xapian.QueryParser()
    qp.set_stemmer(stemmer)
    qp.set_database(database)
    qp.set_stemming_strategy(xapian.QueryParser.STEM_SOME)

    for i, word1 in enumerate(geowords):
        for word2 in geowords[i + 1:]:

            if counter % 1000 == 0:
                print >> sys.stderr, word1, word2
                print >> sys.stderr, "%d pairs, %d matches" % (counter, len(docids))

            counter += 1

            query = qp.parse_query('"%s" AND "%s"' % (word1, word2))

            enquire.set_query(query)
            # По второму запросу мы знаем, что результатов может быть не больше 249918
            matches = enquire.get_mset(0, 249918)

            for m in matches:
                docid = m.docid

                # Новый документ
                if docid not in docids:
                    # Выводим
                    print m.document.get_data()
                    sys.stdout.flush()

                docids.add(docid)

except Exception, e:
    print >> sys.stderr, "Exception: %s" % str(e)
    sys.exit(1)
