Code Search for Developers
 
 
  

normalizer.c from TextIndexNG at Krugle


Show normalizer.c syntax highlighted

/*
 TextIndexNG V 3                
 The next generation TextIndex for Zope

 This software is governed by a license. See
 LICENSE.txt for the terms of this license.
*/

#include "Python.h"

typedef struct
{
    PyObject_HEAD
    PyObject * table;
    char *encoding;
}
Normalizer;


static void
Normalizer_dealloc(Normalizer *self)
{
    Py_DECREF(self->table);
    PyMem_DEL(self);
}

static PyObject *getTable(Normalizer *self,PyObject *word)
{
    Py_XINCREF(self->table);
    return self->table;
}

static PyObject *NormalizeWord(Normalizer *self,PyObject *word)
{
    int i;
    PyObject *temp;

    if (PyString_Check(word)) {
        if (! (temp = PyUnicode_FromEncodedObject(word,self->encoding,"strict"))) {
            PyErr_SetString(PyExc_UnicodeError,"unicode conversion failed");
            return NULL;
        }
    }  else  {
        temp = PyUnicode_FromObject(word);
    }

    for (i=0; i<PyList_Size(self->table); i++) {
        PyObject *s, *item, *key, *value;

        item = PySequence_Fast_GET_ITEM(self->table, i);

        key   = PyTuple_GetItem(item,0);
        value = PyTuple_GetItem(item,1);

        if (! (s = PyUnicode_Replace( temp, key, value, -1)))
            return NULL;

        Py_DECREF(temp);

        temp = s;
    }

    return temp;
}

static PyObject *normalize(Normalizer *self, PyObject *args)
{
    int j;
    PyObject * data=NULL ;

    if (! (PyArg_ParseTuple(args,"O", &data)))
        return NULL;

    if (PyList_Check(data)) {
        PyObject *list;

        list = PyList_New(0);

        data = PySequence_Fast(data, "object must be sequence"); 

        for (j=0; j<PyList_Size(data); j++) {
            PyObject *word=NULL,*item=NULL;

            item = PySequence_Fast_GET_ITEM(data,j);
            word = NormalizeWord(self, item);
            PyList_Append(list, word);
        }

        return list;

    } else if (PyUnicode_Check(data) || PyString_Check(data) ) {

        PyObject *word=NULL;

        if (! (word = NormalizeWord(self,data)))
            return NULL;

        return (PyObject *) word;

    } else {
        PyErr_SetString(PyExc_TypeError,"argument must be unicode or string");
        return NULL;
    }

    return data;
}

int checkList(PyObject *o)
{
    int i;
    PyObject *item,*key,*value;


    if ( !( PyList_Check(o) || PyTuple_Check(o) )) {
        PyErr_SetString(PyExc_TypeError, "argument must be list or tuple of 2-tuples of strings");
        return 0;
    }

    for (i=0;i<PySequence_Size(o); i++) {
        item = PySequence_GetItem(o,i);

        if (! PyTuple_Check(item)) {
            PyErr_SetString(PyExc_TypeError,"nested arguments must be tuples");
            goto err;
        }

        if (PyTuple_Size(item) != 2) {
            PyErr_SetString(PyExc_TypeError,"nested arguments must be 2-tuples of strings/unicode strings");
            goto err;
        }

        key = PyTuple_GetItem(item,0);
        value = PyTuple_GetItem(item,1);

        if (! (PyString_Check(key) || PyUnicode_Check(key))) {
            PyErr_SetString(PyExc_TypeError, "arg 1 or 2-tuple must be string or unicode");
            goto err;
        }

        if (! (PyString_Check(value) || PyUnicode_Check(value))) {
            PyErr_SetString(PyExc_TypeError, "arg 2 or 2-tuple must be string or unicode");
            goto err;
        }

        Py_DECREF(item);
    }
    return 1;

err:
    Py_DECREF(item);
    return 0;
}

static struct PyMethodDef Normalizer_methods[] =
    {
        { "getTable", (PyCFunction)getTable,
            METH_VARARGS, "getTable()"
            "-- return the normalization table"
        },
        { "normalize", (PyCFunction)normalize,
            METH_VARARGS, "normalize([string],[or list]) "
            "-- normalize a string or a list of strings/unicode strings"
        },
        { NULL, NULL }		/* sentinel */
    };

static  PyObject *
Normalizer_getattr(Normalizer *self, char *name)
{
    return Py_FindMethod(Normalizer_methods, (PyObject *)self, name);
}

static char NormalizerType__doc__[] = "Normalizer object";

static PyTypeObject NormalizerType = {
                                         PyObject_HEAD_INIT(NULL)
                                         0,                            /*ob_size*/
                                         "Normalizer",                 /*tp_name*/
                                         sizeof(Normalizer),           /*tp_basicsize*/
                                         0,                            /*tp_itemsize*/
                                         /* methods */
                                         (destructor)Normalizer_dealloc,  /*tp_dealloc*/
                                         (printfunc)0,                 /*tp_print*/
                                         (getattrfunc)Normalizer_getattr, /*tp_getattr*/
                                         (setattrfunc)0,               /*tp_setattr*/
                                         (cmpfunc)0,                   /*tp_compare*/
                                         (reprfunc)0,                  /*tp_repr*/
                                         0,                            /*tp_as_number*/
                                         0,                            /*tp_as_sequence*/
                                         0,                            /*tp_as_mapping*/
                                         (hashfunc)0,                  /*tp_hash*/
                                         (ternaryfunc)0,               /*tp_call*/
                                         (reprfunc)0,                  /*tp_str*/

                                         /* Space for future expansion */
                                         0L,0L,0L,0L,
                                         NormalizerType__doc__            /* Documentation string */
                                     };


static char *Normalizer_args[]={"translation","encoding",NULL};

void CopyTranslationTable(Normalizer *self, PyObject *table) {
    
    int i;

    self->table = PyList_New(0);

    table = PySequence_Fast(table, "argument must be sequence"); 

    for (i=0; i<PyObject_Length(table); i++) {
        PyObject *item, *key, *value, *tp;

        item = PySequence_Fast_GET_ITEM(table, i);

        key   = PyTuple_GetItem(item,0);
        value = PyTuple_GetItem(item,1);

        if (PyString_Check(key))  
            key = PyUnicode_FromEncodedObject(key, self->encoding,"strict");
        else Py_XINCREF(key);

        if (PyString_Check(value)) 
            value = PyUnicode_FromEncodedObject(value, self->encoding,"strict");
        else Py_XINCREF(value);

        tp = Py_BuildValue("(OO)",key,value);
        PyList_Append(self->table, tp);
    
        Py_DECREF(tp);
        Py_DECREF(value);
        Py_DECREF(key);
    }

    Py_DECREF(table);
}



static PyObject *
newNormalizer(PyObject *modinfo, PyObject *args, PyObject *keywds)
{
    Normalizer *self=NULL;
    PyObject *table;
    char * encoding = "latin1";

    if (! (PyArg_ParseTupleAndKeywords(args,keywds,"O|s",Normalizer_args,&table,&encoding)))
        return NULL;

    if (! (self = PyObject_NEW(Normalizer, &NormalizerType)))
        return NULL;

   if (! checkList(table))
       return NULL;

    self->encoding = encoding;

    CopyTranslationTable(self,table);

    return (PyObject*) self;
}

static struct PyMethodDef Normalizer_module_methods[] =
    {
        { "Normalizer", (PyCFunction) newNormalizer, 
          METH_VARARGS|METH_KEYWORDS,
          "Normalizer(list, [encoding='latin1')" 
          "-- Normalizer module - takes a list of 2-tuples of strings/unicode strings"
        },
        { NULL, NULL }
    };


void
initnormalizer(void)
{
    Py_InitModule3("normalizer", Normalizer_module_methods,
                   "TextIndexNG Normalizer module");
}




See more files for this project here

TextIndexNG

The next generation fulltext index for the Zope Catalog\r\n\r\nFor details see http://www.zope.org/Members/ajung/TextIndexNG/wiki/TextIndexNG\r\n\r\n

Project homepage: http://sourceforge.net/projects/textindexng
Programming language(s): C,Python
License: other

  tests/
    __init__.py
    testNormalizer.py
    testNormalizer.pyc
  __init__.py
  normalizer.c