libidn 1.43
stringprep.c
Go to the documentation of this file.
1/* stringprep.c --- Core stringprep implementation.
2 Copyright (C) 2002-2025 Simon Josefsson
3
4 This file is part of GNU Libidn.
5
6 GNU Libidn is free software: you can redistribute it and/or
7 modify it under the terms of either:
8
9 * the GNU Lesser General Public License as published by the Free
10 Software Foundation; either version 3 of the License, or (at
11 your option) any later version.
12
13 or
14
15 * the GNU General Public License as published by the Free
16 Software Foundation; either version 2 of the License, or (at
17 your option) any later version.
18
19 or both in parallel, as here.
20
21 GNU Libidn is distributed in the hope that it will be useful,
22 but WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 General Public License for more details.
25
26 You should have received copies of the GNU General Public License and
27 the GNU Lesser General Public License along with this program. If
28 not, see <https://www.gnu.org/licenses/>. */
29
30#ifdef HAVE_CONFIG_H
31# include "config.h"
32#endif
33
34#include <stdlib.h>
35#include <string.h>
36
37#include "stringprep.h"
38
39static int
40_compare_table_element (const uint32_t *c, const Stringprep_table_element *e)
41{
42 if (*c < e->start)
43 return -1;
44 if (*c > e->end)
45 return 1;
46 return 0;
47}
48
49static ssize_t
50stringprep_find_character_in_table (uint32_t ucs4,
51 const Stringprep_table_element *table,
52 size_t table_size)
53{
54 /* This is where typical uses of Libidn spends very close to all CPU
55 time and causes most cache misses. One could easily do a binary
56 search instead. Before rewriting this, I want hard evidence this
57 slowness is at all relevant in typical applications. (I don't
58 dispute optimization may improve matters significantly, I'm
59 mostly interested in having someone give real-world benchmark on
60 the impact of libidn.)
61 *
62 * Answer (Tim Rühsen rockdaboot@gmx.de):
63 * Testing the fuzz corpora just once via make check takes ~54 billion CPU cycles.
64 * That is almost 20s on my Intel i3 3.1GHz !!!
65 * That even makes fuzzing almost useless, eating up CPU cycles for nothing.
66 *
67 * The bsearch() approach takes ~3 billion CPU cycles.
68 * Almost a factor of 20 faster (but still pretty slow).
69 * There are still ~2 million calls to bsearch() which make ~30% of CPU time used.
70 * Most time is spent in _g_utf8_normalize_wc().
71
72 ssize_t i;
73
74 for (i = 0; table[i].start || table[i].end; i++)
75 if (ucs4 >= table[i].start &&
76 ucs4 <= (table[i].end ? table[i].end : table[i].start))
77 return i;
78 */
79
81 bsearch (&ucs4, table, table_size, sizeof (Stringprep_table_element),
82 (int (*)(const void *, const void *)) _compare_table_element);
83
84 return p ? (p - table) : -1;
85}
86
87static ssize_t
88stringprep_find_string_in_table (uint32_t *ucs4,
89 size_t ucs4len,
90 size_t *tablepos,
91 const Stringprep_table_element *table,
92 size_t table_size)
93{
94 size_t j;
95 ssize_t pos;
96
97 for (j = 0; j < ucs4len; j++)
98 if ((pos =
99 stringprep_find_character_in_table (ucs4[j], table,
100 table_size)) != -1)
101 {
102 if (tablepos)
103 *tablepos = pos;
104 return j;
105 }
106
107 return -1;
108}
109
110static int
111stringprep_apply_table_to_string (uint32_t *ucs4,
112 size_t *ucs4len,
113 size_t maxucs4len,
114 const Stringprep_table_element *table,
115 size_t table_size)
116{
117 ssize_t pos;
118 size_t i, maplen;
119 uint32_t *src = ucs4; /* points to unprocessed data */
120 size_t srclen = *ucs4len; /* length of unprocessed data */
121
122 while ((pos = stringprep_find_string_in_table (src, srclen,
123 &i, table,
124 table_size)) != -1)
125 {
126 for (maplen = STRINGPREP_MAX_MAP_CHARS;
127 maplen > 0 && table[i].map[maplen - 1] == 0; maplen--)
128 ;
129
130 if (*ucs4len - 1 + maplen >= maxucs4len)
132
133 memmove (src + pos + maplen, src + pos + 1,
134 sizeof (uint32_t) * (srclen - pos - 1));
135 memcpy (src + pos, table[i].map, sizeof (uint32_t) * maplen);
136 *ucs4len = *ucs4len - 1 + maplen;
137 src += pos + maplen;
138 srclen -= pos + 1;
139 }
140
141 return STRINGPREP_OK;
142}
143
144#define INVERTED(x) ((x) & ((~0UL) >> 1))
145#define UNAPPLICAPLEFLAGS(flags, profileflags) \
146 ((!INVERTED(profileflags) && !(profileflags & flags) && profileflags) || \
147 ( INVERTED(profileflags) && (profileflags & flags)))
148
180int
181stringprep_4i (uint32_t *ucs4, size_t *len, size_t maxucs4len,
183 const Stringprep_profile *profile)
184{
185 size_t i, j;
186 ssize_t k;
187 size_t ucs4len = *len;
188 int rc;
189
190 for (i = 0; profile[i].operation; i++)
191 {
192 switch (profile[i].operation)
193 {
194 case STRINGPREP_NFKC:
195 {
196 uint32_t *q = 0;
197
198 if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
199 break;
200
201 if (flags & STRINGPREP_NO_NFKC && !profile[i].flags)
202 /* Profile requires NFKC, but callee asked for no NFKC. */
204
205 q = stringprep_ucs4_nfkc_normalize (ucs4, ucs4len);
206 if (!q)
208
209 for (ucs4len = 0; q[ucs4len]; ucs4len++)
210 ;
211
212 if (ucs4len >= maxucs4len)
213 {
214 free (q);
216 }
217
218 memcpy (ucs4, q, ucs4len * sizeof (ucs4[0]));
219
220 free (q);
221 }
222 break;
223
225 k = stringprep_find_string_in_table (ucs4, ucs4len,
226 NULL, profile[i].table,
227 profile[i].table_size);
228 if (k != -1)
230 break;
231
233 if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
234 break;
235 if (flags & STRINGPREP_NO_UNASSIGNED)
236 {
237 k = stringprep_find_string_in_table
238 (ucs4, ucs4len, NULL, profile[i].table,
239 profile[i].table_size);
240 if (k != -1)
242 }
243 break;
244
246 if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
247 break;
248 rc = stringprep_apply_table_to_string
249 (ucs4, &ucs4len, maxucs4len, profile[i].table,
250 profile[i].table_size);
251 if (rc != STRINGPREP_OK)
252 return rc;
253 break;
254
258 break;
259
260 case STRINGPREP_BIDI:
261 {
262 int done_prohibited = 0;
263 int done_ral = 0;
264 int done_l = 0;
265 size_t contains_ral = SIZE_MAX;
266 size_t contains_l = SIZE_MAX;
267
268 for (j = 0; profile[j].operation; j++)
269 if (profile[j].operation == STRINGPREP_BIDI_PROHIBIT_TABLE)
270 {
271 done_prohibited = 1;
272 k = stringprep_find_string_in_table (ucs4, ucs4len,
273 NULL,
274 profile[j].table,
275 profile[j].table_size);
276 if (k != -1)
278 }
279 else if (profile[j].operation == STRINGPREP_BIDI_RAL_TABLE)
280 {
281 done_ral = 1;
282 if (stringprep_find_string_in_table
283 (ucs4, ucs4len, NULL, profile[j].table,
284 profile[j].table_size) != -1)
285 contains_ral = j;
286 }
287 else if (profile[j].operation == STRINGPREP_BIDI_L_TABLE)
288 {
289 done_l = 1;
290 if (stringprep_find_string_in_table
291 (ucs4, ucs4len, NULL, profile[j].table,
292 profile[j].table_size) != -1)
293 contains_l = j;
294 }
295
296 if (!done_prohibited || !done_ral || !done_l)
298
299 if (contains_ral != SIZE_MAX && contains_l != SIZE_MAX)
301
302 if (contains_ral != SIZE_MAX)
303 {
304 if (!(stringprep_find_character_in_table
305 (ucs4[0], profile[contains_ral].table,
306 profile[contains_ral].table_size) != -1
307 &&
308 stringprep_find_character_in_table (ucs4[ucs4len - 1],
309 profile
310 [contains_ral].table,
311 profile
312 [contains_ral].table_size)
313 != -1))
315 }
316 }
317 break;
318
319 default:
321 break;
322 }
323 }
324
325 *len = ucs4len;
326
327 return STRINGPREP_OK;
328}
329
330static int
331stringprep_4zi_1 (uint32_t *ucs4, size_t ucs4len, size_t maxucs4len,
333 const Stringprep_profile *profile)
334{
335 int rc;
336
337 rc = stringprep_4i (ucs4, &ucs4len, maxucs4len, flags, profile);
338 if (rc != STRINGPREP_OK)
339 return rc;
340
341 if (ucs4len >= maxucs4len)
343
344 ucs4[ucs4len] = 0;
345
346 return STRINGPREP_OK;
347}
348
373int
374stringprep_4zi (uint32_t *ucs4, size_t maxucs4len,
376 const Stringprep_profile *profile)
377{
378 size_t ucs4len;
379
380 for (ucs4len = 0; ucs4len < maxucs4len && ucs4[ucs4len] != 0; ucs4len++)
381 ;
382
383 return stringprep_4zi_1 (ucs4, ucs4len, maxucs4len, flags, profile);
384}
385
413int
414stringprep (char *in,
415 size_t maxlen,
416 Stringprep_profile_flags flags, const Stringprep_profile *profile)
417{
418 int rc;
419 char *utf8 = NULL;
420 uint32_t *ucs4 = NULL;
421 size_t ucs4len, maxucs4len, adducs4len = strlen (in) / 10 + 1;
422
423 do
424 {
425 uint32_t *newp;
426
427 free (ucs4);
428 ucs4 = stringprep_utf8_to_ucs4 (in, -1, &ucs4len);
429 if (ucs4 == NULL)
431 maxucs4len = ucs4len + adducs4len;
432 newp = realloc (ucs4, maxucs4len * sizeof (uint32_t));
433 if (!newp)
434 {
435 free (ucs4);
437 }
438 ucs4 = newp;
439
440 rc = stringprep_4i (ucs4, &ucs4len, maxucs4len, flags, profile);
441 adducs4len *= 2;
442 }
443 while (rc == STRINGPREP_TOO_SMALL_BUFFER);
444 if (rc != STRINGPREP_OK)
445 {
446 free (ucs4);
447 return rc;
448 }
449
450 utf8 = stringprep_ucs4_to_utf8 (ucs4, ucs4len, 0, 0);
451 free (ucs4);
452 if (!utf8)
454
455 if (strlen (utf8) >= maxlen)
456 {
457 free (utf8);
459 }
460
461 strcpy (in, utf8); /* flawfinder: ignore */
462
463 free (utf8);
464
465 return STRINGPREP_OK;
466}
467
492int
493stringprep_profile (const char *in,
494 char **out,
495 const char *profile, Stringprep_profile_flags flags)
496{
497 const Stringprep_profiles *p;
498 char *str = NULL;
499 size_t len = strlen (in) + 1, addlen = len / 10 + 1;
500 int rc;
501
502 for (p = &stringprep_profiles[0]; p->name; p++)
503 if (strcmp (p->name, profile) == 0)
504 break;
505
506 if (!p || !p->name || !p->tables)
508
509 do
510 {
511 free (str);
512 str = (char *) malloc (len);
513 if (str == NULL)
515
516 strcpy (str, in);
517
518 rc = stringprep (str, len, flags, p->tables);
519 len += addlen;
520 addlen *= 2;
521 }
522 while (rc == STRINGPREP_TOO_SMALL_BUFFER);
523
524 if (rc == STRINGPREP_OK)
525 *out = str;
526 else
527 free (str);
528
529 return rc;
530}
531
char * stringprep_ucs4_to_utf8(const uint32_t *str, ssize_t len, size_t *items_read, size_t *items_written)
Definition nfkc.c:1019
uint32_t * stringprep_ucs4_nfkc_normalize(const uint32_t *str, ssize_t len)
Definition nfkc.c:1076
uint32_t * stringprep_utf8_to_ucs4(const char *str, ssize_t len, size_t *items_written)
Definition nfkc.c:986
const Stringprep_profiles stringprep_profiles[]
Definition profiles.c:34
int stringprep_4zi(uint32_t *ucs4, size_t maxucs4len, Stringprep_profile_flags flags, const Stringprep_profile *profile)
Definition stringprep.c:374
int stringprep_4i(uint32_t *ucs4, size_t *len, size_t maxucs4len, Stringprep_profile_flags flags, const Stringprep_profile *profile)
Definition stringprep.c:181
int stringprep_profile(const char *in, char **out, const char *profile, Stringprep_profile_flags flags)
Definition stringprep.c:493
#define UNAPPLICAPLEFLAGS(flags, profileflags)
Definition stringprep.c:145
int stringprep(char *in, size_t maxlen, Stringprep_profile_flags flags, const Stringprep_profile *profile)
Definition stringprep.c:414
@ STRINGPREP_BIDI_PROHIBIT_TABLE
Definition stringprep.h:101
@ STRINGPREP_BIDI_RAL_TABLE
Definition stringprep.h:102
@ STRINGPREP_NFKC
Definition stringprep.h:96
@ STRINGPREP_PROHIBIT_TABLE
Definition stringprep.h:100
@ STRINGPREP_UNASSIGNED_TABLE
Definition stringprep.h:99
@ STRINGPREP_BIDI
Definition stringprep.h:97
@ STRINGPREP_MAP_TABLE
Definition stringprep.h:98
@ STRINGPREP_BIDI_L_TABLE
Definition stringprep.h:103
Stringprep_profile_flags
Definition stringprep.h:87
@ STRINGPREP_NO_NFKC
Definition stringprep.h:88
@ STRINGPREP_NO_UNASSIGNED
Definition stringprep.h:90
@ STRINGPREP_UNKNOWN_PROFILE
Definition stringprep.h:78
@ STRINGPREP_NFKC_FAILED
Definition stringprep.h:81
@ STRINGPREP_TOO_SMALL_BUFFER
Definition stringprep.h:75
@ STRINGPREP_ICONV_ERROR
Definition stringprep.h:79
@ STRINGPREP_MALLOC_ERROR
Definition stringprep.h:82
@ STRINGPREP_FLAG_ERROR
Definition stringprep.h:77
@ STRINGPREP_OK
Definition stringprep.h:67
@ STRINGPREP_CONTAINS_UNASSIGNED
Definition stringprep.h:69
@ STRINGPREP_CONTAINS_PROHIBITED
Definition stringprep.h:70
@ STRINGPREP_BIDI_CONTAINS_PROHIBITED
Definition stringprep.h:73
@ STRINGPREP_BIDI_BOTH_L_AND_RAL
Definition stringprep.h:71
@ STRINGPREP_BIDI_LEADTRAIL_NOT_RAL
Definition stringprep.h:72
@ STRINGPREP_PROFILE_ERROR
Definition stringprep.h:76
#define STRINGPREP_MAX_MAP_CHARS
Definition stringprep.h:106
const char * name
Definition stringprep.h:171
const Stringprep_profile * tables
Definition stringprep.h:172
uint32_t map[STRINGPREP_MAX_MAP_CHARS]
Definition stringprep.h:135
Stringprep_profile_steps operation
Definition stringprep.h:150