Apache/2.4.7 (Ubuntu) Linux sman1baleendah 3.13.0-24-generic #46-Ubuntu SMP Thu Apr 10 19:11:08 UTC 2014 x86_64 uid=33(www-data) gid=33(www-data) groups=33(www-data) safemode : OFF MySQL: ON | Perl: ON | cURL: OFF | WGet: ON > / usr / share / doc / python-xapian / examples / | server ip : 104.21.89.46 your ip : 172.71.254.74 H O M E |
Filename | /usr/share/doc/python-xapian/examples/simpleindex.py |
Size | 2 kb |
Permission | rw-r--r-- |
Owner | root : root |
Create time | 27-Apr-2025 09:53 |
Last modified | 07-Jan-2014 02:13 |
Last accessed | 07-Jul-2025 07:03 |
Actions | edit | rename | delete | download (gzip) |
View | text | code | image |
#!/usr/bin/env python
#
# Index each paragraph of a text file as a Xapian document.
#
# Copyright (C) 2003 James Aylett
# Copyright (C) 2004,2007 Olly Betts
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
# USA
import sys
import xapian
import string
if len(sys.argv) != 2:
print >> sys.stderr, "Usage: %s PATH_TO_DATABASE" % sys.argv[0]
sys.exit(1)
try:
# Open the database for update, creating a new database if necessary.
database = xapian.WritableDatabase(sys.argv[1], xapian.DB_CREATE_OR_OPEN)
indexer = xapian.TermGenerator()
stemmer = xapian.Stem("english")
indexer.set_stemmer(stemmer)
para = ''
try:
for line in sys.stdin:
line = string.strip(line)
if line == '':
if para != '':
# We've reached the end of a paragraph, so index it.
doc = xapian.Document()
doc.set_data(para)
indexer.set_document(doc)
indexer.index_text(para)
# Add the document to the database.
database.add_document(doc)
para = ''
else:
if para != '':
para += ' '
para += line
except StopIteration:
pass
except Exception, e:
print >> sys.stderr, "Exception: %s" % str(e)
sys.exit(1)
#
# Index each paragraph of a text file as a Xapian document.
#
# Copyright (C) 2003 James Aylett
# Copyright (C) 2004,2007 Olly Betts
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
# USA
import sys
import xapian
import string
if len(sys.argv) != 2:
print >> sys.stderr, "Usage: %s PATH_TO_DATABASE" % sys.argv[0]
sys.exit(1)
try:
# Open the database for update, creating a new database if necessary.
database = xapian.WritableDatabase(sys.argv[1], xapian.DB_CREATE_OR_OPEN)
indexer = xapian.TermGenerator()
stemmer = xapian.Stem("english")
indexer.set_stemmer(stemmer)
para = ''
try:
for line in sys.stdin:
line = string.strip(line)
if line == '':
if para != '':
# We've reached the end of a paragraph, so index it.
doc = xapian.Document()
doc.set_data(para)
indexer.set_document(doc)
indexer.index_text(para)
# Add the document to the database.
database.add_document(doc)
para = ''
else:
if para != '':
para += ' '
para += line
except StopIteration:
pass
except Exception, e:
print >> sys.stderr, "Exception: %s" % str(e)
sys.exit(1)