Back to index

citadel  8.12
ft_wordbreaker.h
Go to the documentation of this file.
00001 /*
00002  * Copyright (c) 2005-2012 by the citadel.org team
00003  *
00004  *  This program is open source software; you can redistribute it and/or modify
00005  *  it under the terms of the GNU General Public License version 3.
00006  *  
00007  *  
00008  *
00009  *  This program is distributed in the hope that it will be useful,
00010  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *  GNU General Public License for more details.
00013  *
00014  *  
00015  *  
00016  *  
00017  */
00018 
00019 
00020 /*
00021  * This is an ID for the wordbreaker module.  If we do pluggable wordbreakers
00022  * later on, or even if we update this one, we can use a different ID so the
00023  * system knows it needs to throw away the existing index and rebuild it.
00024  */
00025 #define       FT_WORDBREAKER_ID    0x0021
00026 
00027 /*
00028  * Minimum and maximum length of words to index
00029  */
00030 #define WB_MIN                     4      // nothing with 3 or less chars
00031 #define WB_MAX                     40
00032 
00033 void wordbreaker(const char *text, int *num_tokens, int **tokens);
00034 
00035 void initialize_noise_words(void);
00036 void noise_word_cleanup(void);
00037 
00038 
00039 typedef struct noise_word noise_word;
00040 
00041 struct noise_word {
00042        unsigned int len;
00043        char *word;
00044        noise_word *next;
00045 };