sphinx python xmlpipe 2データソースを使用してインデックスを生成
3897 ワード
source testxml
{
type = xmlpipe2
xmlpipe_command = python /tmp/testx.py
}
index testxml
{
source = testxml
path = /data/sphinx/testxml
docinfo = extern
morphology = none
min_word_len = 1
charset_type = utf-8
min_prefix_len = 0
html_strip = 1
charset_table = 0..9, A..Z->a..z, _, a..z, U+410..U+42F->U+430..U+44F, U+430..U+44F
ngram_len = 1
ngram_chars = U+3000..U+2FA1F
}
indexer
{
mem_limit = 128M
}
searchd
{
port = 4412
log = /data/log/sphinxsearch/searchd.log
query_log = /data/log/sphinxsearch/query.log
read_timeout = 5
max_children = 30
pid_file = /data/log/sphinxsearch/searchd.pid
max_matches = 1000
seamless_rotate = 1
preopen_indexes = 0
unlink_old = 1
}
PYコードは以下の通りである.
# coding=utf-8
from loxun import XmlWriter
from StringIO import StringIO
import pymssql
conn = pymssql.connect(host=r'MyServer2k', user='citymap', password='city@map@com', database='CitycomeMap',as_dict=True,charset='utf8')
cur = conn.cursor()
out = StringIO()
xml = XmlWriter(out)
xml.addNamespace("sphinx","http://www.beihai365.com")
#---docset
xml.startTag("sphinx:docset")
# --- schema
xml.startTag("sphinx:schema")
#--- field
'''....'''
xml.tag("sphinx:field",{"name":"myname"})
xml.tag("sphinx:field",{"name":"myaddress"})
xml.tag("sphinx:field",{"name":"mykeyword"})
'''....'''
xml.tag("sphinx:attr",{"name":"AID","type":"int"})
#--- /field
xml.endTag()
#--- /schema
#-#--- wenwen--document
cur.execute('SELECT COUNT(*) FROM MapObjectInfo')
tj = cur.fetchone()[0] #....
pNum = 1000 #......
cutSqlNum = pNum
_p = 1 #..
zNum = 0
while True:
'''...............................'''
if (tj-zNum)<pNum:
cutSqlNum = tj-zNum
zNum = int(pNum * _p)
cur.execute('SELECT * FROM (select top %d* from (select top %d* from MapObjectInfo ORDER BY ID DESC)t1 ORDER BY ID)t2 ORDER BY ID DESC' % (cutSqlNum,zNum))
#print 'SELECT * FROM (select top %d* from (select top %d * from MapObjectInfo ORDER BY ID DESC)t1 ORDER BY ID)t2 ORDER BY ID DESC
' % (cutSqlNum,zNum)
row = cur.fetchone_asdict()
while row:
if row['ID'] == 0:
row = cur.fetchone_asdict()
continue
xml.startTag("sphinx:document",{"id":row['ID']})
xml.startTag("myname")
xml.text(row['Name'])
xml.endTag()
xml.startTag("myaddress")
xml.text(row['Address'])
xml.endTag()
xml.startTag("mykeyword")
xml.text(row['Keyword'])
xml.endTag()
xml.startTag("AID")
xml.text(str(row['ID']))
xml.endTag()
xml.endTag()
row = cur.fetchone_asdict()
_p = _p + 1
#.........
if zNum > tj:
break
#---#--- /wenwen /document
conn.close()
xml.endTag()
#--- /docset
xml.close()
print out.getvalue()