Thomas Gleixner | 2874c5f | 2019-05-27 08:55:01 +0200 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
Thomas Graf | 2de4ff7 | 2005-06-23 20:49:30 -0700 | [diff] [blame] | 2 | /* |
| 3 | * lib/textsearch.c Generic text search interface |
| 4 | * |
Thomas Graf | 2de4ff7 | 2005-06-23 20:49:30 -0700 | [diff] [blame] | 5 | * Authors: Thomas Graf <tgraf@suug.ch> |
Pablo Neira Ayuso | e03ba84 | 2007-12-01 00:03:52 +1100 | [diff] [blame] | 6 | * Pablo Neira Ayuso <pablo@netfilter.org> |
Thomas Graf | 2de4ff7 | 2005-06-23 20:49:30 -0700 | [diff] [blame] | 7 | * |
| 8 | * ========================================================================== |
Randy Dunlap | 5968a70 | 2018-04-16 12:32:55 -0700 | [diff] [blame] | 9 | */ |
| 10 | |
| 11 | /** |
| 12 | * DOC: ts_intro |
Thomas Graf | 2de4ff7 | 2005-06-23 20:49:30 -0700 | [diff] [blame] | 13 | * INTRODUCTION |
| 14 | * |
Jesper Dangaard Brouer | de0368d | 2011-01-24 02:41:37 +0000 | [diff] [blame] | 15 | * The textsearch infrastructure provides text searching facilities for |
Thomas Graf | 2de4ff7 | 2005-06-23 20:49:30 -0700 | [diff] [blame] | 16 | * both linear and non-linear data. Individual search algorithms are |
| 17 | * implemented in modules and chosen by the user. |
| 18 | * |
| 19 | * ARCHITECTURE |
| 20 | * |
Randy Dunlap | 5968a70 | 2018-04-16 12:32:55 -0700 | [diff] [blame] | 21 | * .. code-block:: none |
| 22 | * |
| 23 | * User |
Thomas Graf | 2de4ff7 | 2005-06-23 20:49:30 -0700 | [diff] [blame] | 24 | * +----------------+ |
| 25 | * | finish()|<--------------(6)-----------------+ |
| 26 | * |get_next_block()|<--------------(5)---------------+ | |
| 27 | * | | Algorithm | | |
| 28 | * | | +------------------------------+ |
| 29 | * | | | init() find() destroy() | |
| 30 | * | | +------------------------------+ |
| 31 | * | | Core API ^ ^ ^ |
| 32 | * | | +---------------+ (2) (4) (8) |
| 33 | * | (1)|----->| prepare() |---+ | | |
| 34 | * | (3)|----->| find()/next() |-----------+ | |
| 35 | * | (7)|----->| destroy() |----------------------+ |
| 36 | * +----------------+ +---------------+ |
Randy Dunlap | 5968a70 | 2018-04-16 12:32:55 -0700 | [diff] [blame] | 37 | * |
| 38 | * (1) User configures a search by calling textsearch_prepare() specifying |
| 39 | * the search parameters such as the pattern and algorithm name. |
Thomas Graf | 2de4ff7 | 2005-06-23 20:49:30 -0700 | [diff] [blame] | 40 | * (2) Core requests the algorithm to allocate and initialize a search |
| 41 | * configuration according to the specified parameters. |
Randy Dunlap | 5968a70 | 2018-04-16 12:32:55 -0700 | [diff] [blame] | 42 | * (3) User starts the search(es) by calling textsearch_find() or |
| 43 | * textsearch_next() to fetch subsequent occurrences. A state variable |
| 44 | * is provided to the algorithm to store persistent variables. |
Thomas Graf | 2de4ff7 | 2005-06-23 20:49:30 -0700 | [diff] [blame] | 45 | * (4) Core eventually resets the search offset and forwards the find() |
| 46 | * request to the algorithm. |
Jesper Dangaard Brouer | de0368d | 2011-01-24 02:41:37 +0000 | [diff] [blame] | 47 | * (5) Algorithm calls get_next_block() provided by the user continuously |
Thomas Graf | 2de4ff7 | 2005-06-23 20:49:30 -0700 | [diff] [blame] | 48 | * to fetch the data to be searched in block by block. |
| 49 | * (6) Algorithm invokes finish() after the last call to get_next_block |
| 50 | * to clean up any leftovers from get_next_block. (Optional) |
Randy Dunlap | 5968a70 | 2018-04-16 12:32:55 -0700 | [diff] [blame] | 51 | * (7) User destroys the configuration by calling textsearch_destroy(). |
Thomas Graf | 2de4ff7 | 2005-06-23 20:49:30 -0700 | [diff] [blame] | 52 | * (8) Core notifies the algorithm to destroy algorithm specific |
| 53 | * allocations. (Optional) |
| 54 | * |
| 55 | * USAGE |
| 56 | * |
| 57 | * Before a search can be performed, a configuration must be created |
Joonwoo Park | b9c7967 | 2008-07-08 02:37:31 -0700 | [diff] [blame] | 58 | * by calling textsearch_prepare() specifying the searching algorithm, |
| 59 | * the pattern to look for and flags. As a flag, you can set TS_IGNORECASE |
| 60 | * to perform case insensitive matching. But it might slow down |
| 61 | * performance of algorithm, so you should use it at own your risk. |
Jesper Dangaard Brouer | de0368d | 2011-01-24 02:41:37 +0000 | [diff] [blame] | 62 | * The returned configuration may then be used for an arbitrary |
Joonwoo Park | b9c7967 | 2008-07-08 02:37:31 -0700 | [diff] [blame] | 63 | * amount of times and even in parallel as long as a separate struct |
| 64 | * ts_state variable is provided to every instance. |
Thomas Graf | 2de4ff7 | 2005-06-23 20:49:30 -0700 | [diff] [blame] | 65 | * |
Randy Dunlap | 5968a70 | 2018-04-16 12:32:55 -0700 | [diff] [blame] | 66 | * The actual search is performed by either calling |
| 67 | * textsearch_find_continuous() for linear data or by providing |
| 68 | * an own get_next_block() implementation and |
| 69 | * calling textsearch_find(). Both functions return |
Jesper Dangaard Brouer | de0368d | 2011-01-24 02:41:37 +0000 | [diff] [blame] | 70 | * the position of the first occurrence of the pattern or UINT_MAX if |
| 71 | * no match was found. Subsequent occurrences can be found by calling |
Thomas Graf | 2de4ff7 | 2005-06-23 20:49:30 -0700 | [diff] [blame] | 72 | * textsearch_next() regardless of the linearity of the data. |
| 73 | * |
| 74 | * Once you're done using a configuration it must be given back via |
| 75 | * textsearch_destroy. |
| 76 | * |
Randy Dunlap | 5968a70 | 2018-04-16 12:32:55 -0700 | [diff] [blame] | 77 | * EXAMPLE:: |
Thomas Graf | 2de4ff7 | 2005-06-23 20:49:30 -0700 | [diff] [blame] | 78 | * |
| 79 | * int pos; |
| 80 | * struct ts_config *conf; |
| 81 | * struct ts_state state; |
| 82 | * const char *pattern = "chicken"; |
| 83 | * const char *example = "We dance the funky chicken"; |
| 84 | * |
| 85 | * conf = textsearch_prepare("kmp", pattern, strlen(pattern), |
| 86 | * GFP_KERNEL, TS_AUTOLOAD); |
| 87 | * if (IS_ERR(conf)) { |
| 88 | * err = PTR_ERR(conf); |
| 89 | * goto errout; |
| 90 | * } |
| 91 | * |
Randy Dunlap | 2105b52 | 2019-10-02 17:08:18 -0700 | [diff] [blame] | 92 | * pos = textsearch_find_continuous(conf, &state, example, strlen(example)); |
Thomas Graf | 2de4ff7 | 2005-06-23 20:49:30 -0700 | [diff] [blame] | 93 | * if (pos != UINT_MAX) |
Randy Dunlap | 2105b52 | 2019-10-02 17:08:18 -0700 | [diff] [blame] | 94 | * panic("Oh my god, dancing chickens at %d\n", pos); |
Thomas Graf | 2de4ff7 | 2005-06-23 20:49:30 -0700 | [diff] [blame] | 95 | * |
| 96 | * textsearch_destroy(conf); |
Thomas Graf | 2de4ff7 | 2005-06-23 20:49:30 -0700 | [diff] [blame] | 97 | */ |
Randy Dunlap | 5968a70 | 2018-04-16 12:32:55 -0700 | [diff] [blame] | 98 | /* ========================================================================== */ |
Thomas Graf | 2de4ff7 | 2005-06-23 20:49:30 -0700 | [diff] [blame] | 99 | |
Thomas Graf | 2de4ff7 | 2005-06-23 20:49:30 -0700 | [diff] [blame] | 100 | #include <linux/module.h> |
| 101 | #include <linux/types.h> |
| 102 | #include <linux/string.h> |
| 103 | #include <linux/init.h> |
Franck Bui-Huu | 8252474 | 2008-05-12 21:21:05 +0200 | [diff] [blame] | 104 | #include <linux/rculist.h> |
Thomas Graf | 2de4ff7 | 2005-06-23 20:49:30 -0700 | [diff] [blame] | 105 | #include <linux/rcupdate.h> |
| 106 | #include <linux/err.h> |
| 107 | #include <linux/textsearch.h> |
Tejun Heo | 5a0e3ad | 2010-03-24 17:04:11 +0900 | [diff] [blame] | 108 | #include <linux/slab.h> |
Thomas Graf | 2de4ff7 | 2005-06-23 20:49:30 -0700 | [diff] [blame] | 109 | |
| 110 | static LIST_HEAD(ts_ops); |
| 111 | static DEFINE_SPINLOCK(ts_mod_lock); |
| 112 | |
| 113 | static inline struct ts_ops *lookup_ts_algo(const char *name) |
| 114 | { |
| 115 | struct ts_ops *o; |
| 116 | |
| 117 | rcu_read_lock(); |
| 118 | list_for_each_entry_rcu(o, &ts_ops, list) { |
| 119 | if (!strcmp(name, o->name)) { |
| 120 | if (!try_module_get(o->owner)) |
| 121 | o = NULL; |
| 122 | rcu_read_unlock(); |
| 123 | return o; |
| 124 | } |
| 125 | } |
| 126 | rcu_read_unlock(); |
| 127 | |
| 128 | return NULL; |
| 129 | } |
| 130 | |
| 131 | /** |
| 132 | * textsearch_register - register a textsearch module |
| 133 | * @ops: operations lookup table |
| 134 | * |
| 135 | * This function must be called by textsearch modules to announce |
| 136 | * their presence. The specified &@ops must have %name set to a |
| 137 | * unique identifier and the callbacks find(), init(), get_pattern(), |
| 138 | * and get_pattern_len() must be implemented. |
| 139 | * |
| 140 | * Returns 0 or -EEXISTS if another module has already registered |
| 141 | * with same name. |
| 142 | */ |
| 143 | int textsearch_register(struct ts_ops *ops) |
| 144 | { |
| 145 | int err = -EEXIST; |
| 146 | struct ts_ops *o; |
| 147 | |
| 148 | if (ops->name == NULL || ops->find == NULL || ops->init == NULL || |
| 149 | ops->get_pattern == NULL || ops->get_pattern_len == NULL) |
| 150 | return -EINVAL; |
| 151 | |
| 152 | spin_lock(&ts_mod_lock); |
| 153 | list_for_each_entry(o, &ts_ops, list) { |
| 154 | if (!strcmp(ops->name, o->name)) |
| 155 | goto errout; |
| 156 | } |
| 157 | |
| 158 | list_add_tail_rcu(&ops->list, &ts_ops); |
| 159 | err = 0; |
| 160 | errout: |
| 161 | spin_unlock(&ts_mod_lock); |
| 162 | return err; |
| 163 | } |
Fabian Frederick | ce643a3 | 2014-06-04 16:11:58 -0700 | [diff] [blame] | 164 | EXPORT_SYMBOL(textsearch_register); |
Thomas Graf | 2de4ff7 | 2005-06-23 20:49:30 -0700 | [diff] [blame] | 165 | |
| 166 | /** |
| 167 | * textsearch_unregister - unregister a textsearch module |
| 168 | * @ops: operations lookup table |
| 169 | * |
| 170 | * This function must be called by textsearch modules to announce |
| 171 | * their disappearance for examples when the module gets unloaded. |
| 172 | * The &ops parameter must be the same as the one during the |
| 173 | * registration. |
| 174 | * |
| 175 | * Returns 0 on success or -ENOENT if no matching textsearch |
| 176 | * registration was found. |
| 177 | */ |
| 178 | int textsearch_unregister(struct ts_ops *ops) |
| 179 | { |
| 180 | int err = 0; |
| 181 | struct ts_ops *o; |
| 182 | |
| 183 | spin_lock(&ts_mod_lock); |
| 184 | list_for_each_entry(o, &ts_ops, list) { |
| 185 | if (o == ops) { |
| 186 | list_del_rcu(&o->list); |
| 187 | goto out; |
| 188 | } |
| 189 | } |
| 190 | |
| 191 | err = -ENOENT; |
| 192 | out: |
| 193 | spin_unlock(&ts_mod_lock); |
| 194 | return err; |
| 195 | } |
Fabian Frederick | ce643a3 | 2014-06-04 16:11:58 -0700 | [diff] [blame] | 196 | EXPORT_SYMBOL(textsearch_unregister); |
Thomas Graf | 2de4ff7 | 2005-06-23 20:49:30 -0700 | [diff] [blame] | 197 | |
| 198 | struct ts_linear_state |
| 199 | { |
| 200 | unsigned int len; |
| 201 | const void *data; |
| 202 | }; |
| 203 | |
| 204 | static unsigned int get_linear_data(unsigned int consumed, const u8 **dst, |
| 205 | struct ts_config *conf, |
| 206 | struct ts_state *state) |
| 207 | { |
| 208 | struct ts_linear_state *st = (struct ts_linear_state *) state->cb; |
| 209 | |
| 210 | if (likely(consumed < st->len)) { |
| 211 | *dst = st->data + consumed; |
| 212 | return st->len - consumed; |
| 213 | } |
| 214 | |
| 215 | return 0; |
| 216 | } |
| 217 | |
| 218 | /** |
| 219 | * textsearch_find_continuous - search a pattern in continuous/linear data |
| 220 | * @conf: search configuration |
| 221 | * @state: search state |
| 222 | * @data: data to search in |
| 223 | * @len: length of data |
| 224 | * |
| 225 | * A simplified version of textsearch_find() for continuous/linear data. |
| 226 | * Call textsearch_next() to retrieve subsequent matches. |
| 227 | * |
| 228 | * Returns the position of first occurrence of the pattern or |
Robert P. J. Day | 72fd4a3 | 2007-02-10 01:45:59 -0800 | [diff] [blame] | 229 | * %UINT_MAX if no occurrence was found. |
Randy Dunlap | 5968a70 | 2018-04-16 12:32:55 -0700 | [diff] [blame] | 230 | */ |
Thomas Graf | 2de4ff7 | 2005-06-23 20:49:30 -0700 | [diff] [blame] | 231 | unsigned int textsearch_find_continuous(struct ts_config *conf, |
| 232 | struct ts_state *state, |
| 233 | const void *data, unsigned int len) |
| 234 | { |
| 235 | struct ts_linear_state *st = (struct ts_linear_state *) state->cb; |
| 236 | |
| 237 | conf->get_next_block = get_linear_data; |
| 238 | st->data = data; |
| 239 | st->len = len; |
| 240 | |
| 241 | return textsearch_find(conf, state); |
| 242 | } |
Fabian Frederick | ce643a3 | 2014-06-04 16:11:58 -0700 | [diff] [blame] | 243 | EXPORT_SYMBOL(textsearch_find_continuous); |
Thomas Graf | 2de4ff7 | 2005-06-23 20:49:30 -0700 | [diff] [blame] | 244 | |
| 245 | /** |
| 246 | * textsearch_prepare - Prepare a search |
| 247 | * @algo: name of search algorithm |
| 248 | * @pattern: pattern data |
| 249 | * @len: length of pattern |
| 250 | * @gfp_mask: allocation mask |
| 251 | * @flags: search flags |
| 252 | * |
| 253 | * Looks up the search algorithm module and creates a new textsearch |
Raphael Silva | fec2290 | 2014-10-13 15:51:34 -0700 | [diff] [blame] | 254 | * configuration for the specified pattern. |
Thomas Graf | 2de4ff7 | 2005-06-23 20:49:30 -0700 | [diff] [blame] | 255 | * |
| 256 | * Note: The format of the pattern may not be compatible between |
| 257 | * the various search algorithms. |
| 258 | * |
| 259 | * Returns a new textsearch configuration according to the specified |
Pablo Neira Ayuso | e03ba84 | 2007-12-01 00:03:52 +1100 | [diff] [blame] | 260 | * parameters or a ERR_PTR(). If a zero length pattern is passed, this |
| 261 | * function returns EINVAL. |
Thomas Graf | 2de4ff7 | 2005-06-23 20:49:30 -0700 | [diff] [blame] | 262 | */ |
| 263 | struct ts_config *textsearch_prepare(const char *algo, const void *pattern, |
Al Viro | fd4f2df | 2005-10-21 03:18:50 -0400 | [diff] [blame] | 264 | unsigned int len, gfp_t gfp_mask, int flags) |
Thomas Graf | 2de4ff7 | 2005-06-23 20:49:30 -0700 | [diff] [blame] | 265 | { |
| 266 | int err = -ENOENT; |
| 267 | struct ts_config *conf; |
| 268 | struct ts_ops *ops; |
| 269 | |
Pablo Neira Ayuso | e03ba84 | 2007-12-01 00:03:52 +1100 | [diff] [blame] | 270 | if (len == 0) |
| 271 | return ERR_PTR(-EINVAL); |
| 272 | |
Thomas Graf | 2de4ff7 | 2005-06-23 20:49:30 -0700 | [diff] [blame] | 273 | ops = lookup_ts_algo(algo); |
Johannes Berg | a00caa1 | 2008-07-08 19:00:24 +0200 | [diff] [blame] | 274 | #ifdef CONFIG_MODULES |
Thomas Graf | 2de4ff7 | 2005-06-23 20:49:30 -0700 | [diff] [blame] | 275 | /* |
| 276 | * Why not always autoload you may ask. Some users are |
| 277 | * in a situation where requesting a module may deadlock, |
| 278 | * especially when the module is located on a NFS mount. |
| 279 | */ |
| 280 | if (ops == NULL && flags & TS_AUTOLOAD) { |
| 281 | request_module("ts_%s", algo); |
| 282 | ops = lookup_ts_algo(algo); |
| 283 | } |
| 284 | #endif |
| 285 | |
| 286 | if (ops == NULL) |
| 287 | goto errout; |
| 288 | |
Joonwoo Park | b9c7967 | 2008-07-08 02:37:31 -0700 | [diff] [blame] | 289 | conf = ops->init(pattern, len, gfp_mask, flags); |
Thomas Graf | 2de4ff7 | 2005-06-23 20:49:30 -0700 | [diff] [blame] | 290 | if (IS_ERR(conf)) { |
| 291 | err = PTR_ERR(conf); |
| 292 | goto errout; |
| 293 | } |
| 294 | |
| 295 | conf->ops = ops; |
| 296 | return conf; |
| 297 | |
| 298 | errout: |
| 299 | if (ops) |
| 300 | module_put(ops->owner); |
| 301 | |
| 302 | return ERR_PTR(err); |
| 303 | } |
Fabian Frederick | ce643a3 | 2014-06-04 16:11:58 -0700 | [diff] [blame] | 304 | EXPORT_SYMBOL(textsearch_prepare); |
Thomas Graf | 2de4ff7 | 2005-06-23 20:49:30 -0700 | [diff] [blame] | 305 | |
| 306 | /** |
| 307 | * textsearch_destroy - destroy a search configuration |
| 308 | * @conf: search configuration |
| 309 | * |
| 310 | * Releases all references of the configuration and frees |
| 311 | * up the memory. |
| 312 | */ |
| 313 | void textsearch_destroy(struct ts_config *conf) |
| 314 | { |
| 315 | if (conf->ops) { |
| 316 | if (conf->ops->destroy) |
| 317 | conf->ops->destroy(conf); |
| 318 | module_put(conf->ops->owner); |
| 319 | } |
| 320 | |
| 321 | kfree(conf); |
| 322 | } |
Thomas Graf | 2de4ff7 | 2005-06-23 20:49:30 -0700 | [diff] [blame] | 323 | EXPORT_SYMBOL(textsearch_destroy); |