You are not logged in Log in Join
You are here: Home » Members » dedalu » How-to create a Lexicon and ZCTextIndex using python

Log in
Name

Password

 

How-to create a Lexicon and ZCTextIndex using python

Notes

  • New things never comes with good documentation... If I'm wrong, pls contact me.
  • manage_addProduct is a method from ObjectManager; so all methods bellow will work from a ObjectManager's derived class. In my case:
    from Acquisition import Implicit
    from Globals import Persistent
    from OFS.ObjectManager import ObjectManager
    from OFS.SimpleItem import Item
    
    def addOTBase(self, id, title='', REQUEST=None):
    	"""
            It adds a new OTBase object.
            """
            ob = OTBase(id, title)
            self._setObject(id, ob)
            self._getOb(id).addCatalog()
            if REQUEST is not None:
            	return self.manage_main(self, REQUEST, update_menu=1)
    
    class OTBase(Implicit, Persistent, ObjectManager, Item):
            ...
    
  • Empty is class Empty: pass.
  • You can find group and name strings in registerFactory calls in lib/python/Products/ZCTextIndex/*.py.

Create a catalog

        def addCatalog(self):
        	"""
                It adds a default catalog with 'Catalog' id.
                """
                #c = ZCatalog('Catalog', 'Default catalog')
                #self._setObject('Catalog', c)
                self.manage_addProduct['ZCatalog'].manage_addZCatalog('Catalog', 'Default catalog')
                cat = self._getOb('Catalog')
                self.addLexicon(cat)
                self.addIndexes(cat)
                self.addMetas(cat)

Create a default Lexicon

	def addLexicon(self, cat):
        	"""
                It adds a default lexicon with 'Lexicon' id.
                """
                # Works?
                #l = PLexicon('Lexicon', '', HTMLWordSplitter(), CaseNormalizer(), StopWordRemover())
                #cat._setObject('Lexicon', l)

                elem = []
                wordSplitter = Empty()
                wordSplitter.group = 'Word Splitter'
                wordSplitter.name = 'HTML aware splitter'

                caseNormalizer = Empty()
                caseNormalizer.group = 'Case Normalizer'
                caseNormalizer.name = 'Case Normalizer'

                stopWords = Empty()
                stopWords.group = 'Stop Words'
                stopWords.name = 'Remove listed and single char words'

                elem.append(wordSplitter)
                elem.append(caseNormalizer)
                elem.append(stopWords)
                cat.manage_addProduct['ZCTextIndex'].manage_addLexicon('Lexicon', 'Default Lexicon', elem)

Create indexes (some are ZCTextIndexes) and metadatas

	def addIndexes(self, cat):
        	"""
                It adds default indexes
                """
                cat.addIndex('id', 'FieldIndex')

                cat.addIndex('keywords', 'KeywordIndex')

                title_extras = Empty()
                title_extras.doc_attr = 'title'
                title_extras.index_type = 'Okapi BM25 Rank'
                title_extras.lexicon_id = 'Lexicon'
                cat.addIndex('title', 'ZCTextIndex', title_extras)

                principiaSearchSource_extras = Empty()
                principiaSearchSource_extras.doc_attr = 'PrincipiaSearchSource'
                principiaSearchSource_extras.index_type = 'Okapi BM25 Rank'
                principiaSearchSource_extras.lexicon_id = 'Lexicon'
                cat.addIndex('PrincipiaSearchSource', 'ZCTextIndex', principiaSearchSource_extras)

                cat.addIndex('bobobase_modification_time', 'DateIndex')

        def addMetas(self, cat):
        	"""
                It adds default metadatas.
                """
                cat.manage_addColumn('id')
                cat.manage_addColumn('title')
                cat.manage_addColumn('bobobase_modification_time')