#!/usr/bin/env python
# coding: utf-8

"""Индексируем каждую строчку текста"""

import sys
import xapian
import string


if len(sys.argv) != 3:
    print >> sys.stderr, "Usage: %s PATH_TO_DATABASE LANGUAGE " % sys.argv[0]
    sys.exit(1)

try:
    # Open the database for update, creating a new database if necessary.
    database = xapian.WritableDatabase(sys.argv[1], xapian.DB_CREATE_OR_OPEN)

    indexer = xapian.TermGenerator()
    stemmer = xapian.Stem(sys.argv[2])
    indexer.set_stemmer(stemmer)

    last_id = database.get_lastdocid()

    for i, line in enumerate(sys.stdin):
        if i < last_id:
            sys.stderr.write("Skipping line %d \r" % (i + 1))
            continue

        sys.stderr.write("Indexing line %d \r" % (i + 1))

        line = string.strip(line)

        doc = xapian.Document()
        doc.set_data(line)

        indexer.set_document(doc)
        indexer.index_text(line)

        # Add the document to the database.
        database.add_document(doc)

except Exception, e:
    print >> sys.stderr, "Exception: %s" % str(e)
    sys.exit(1)
