Xapian(Python)のTermGeneratorの簡単な理解と使用例

1974 ワード

まず、次のようなインデックス・ライブラリを作成します.
検索コード:
import posixpath
base = posixpath.abspath('.')
xapian_database_path = posixpath.join(base, u'index')

def index():
    database = xapian.WritableDatabase(xapian_database_path, xapian.DB_CREATE_OR_OPEN)
    indexer = xapian.TermGenerator()
    stemmer = xapian.Stem(u'english')
    indexer.set_stemmer(stemmer)
    for s in ['abc', 'def']:
        doc = xapian.Document()
        doc.set_data(s)
        #          doc,       'abc', 'def'             doc 
        indexer.set_document(doc)
        indexer.index_text(s)

        database.add_document(doc)
        database.flush()
 
  
 
  


 
   
  

搜索代码如下:

def search(query_string):
    try:
        database = xapian.Database(xapian_database_path)
    except xapian.DatabaseOpeningError:
        return
    enquire = xapian.Enquire(database)
    query_parser = xapian.QueryParser()
    stemmer = xapian.Stem('english')
    query_parser.set_stemmer(stemmer)
    query_parser.set_database(database)
    query_parser.set_stemming_strategy(xapian.QueryParser.STEM_SOME)
    query = query_parser.parse_query(query_string)

    enquire.set_query(query)
    matches = enquire.get_mset(0, 10)

    print '%i results found.' % matches.get_matches_estimated()
    print 'Results 1 - %i:' % matches.size()
    for match in matches:
        print '%i: %i%% docid=%i [%s]' % (match.rank+1, 
                                          match.percent, 
                                          match.docid, 
                                          match.document.get_data()
                                          )  

呼び出し:
search('abc') #      
search('def') #      
saerch('XXXX') #