libidn 1.43
tld.c
Go to the documentation of this file.
1/* tld.c --- Declarations for TLD restriction checking.
2 Copyright (C) 2004-2025 Simon Josefsson.
3 Copyright (C) 2003-2025 Free Software Foundation, Inc.
4
5 Author: Thomas Jacob, Internet24.de
6
7 This file is part of GNU Libidn.
8
9 GNU Libidn is free software: you can redistribute it and/or
10 modify it under the terms of either:
11
12 * the GNU Lesser General Public License as published by the Free
13 Software Foundation; either version 3 of the License, or (at
14 your option) any later version.
15
16 or
17
18 * the GNU General Public License as published by the Free
19 Software Foundation; either version 2 of the License, or (at
20 your option) any later version.
21
22 or both in parallel, as here.
23
24 GNU Libidn is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27 General Public License for more details.
28
29 You should have received copies of the GNU General Public License and
30 the GNU Lesser General Public License along with this program. If
31 not, see <https://www.gnu.org/licenses/>. */
32
33#include <config.h>
34
35/* Get stringprep_utf8_to_ucs4, stringprep_locale_to_utf8. */
36#include <stringprep.h>
37
38/* Get strcmp(). */
39#include <string.h>
40
41/* Get specifications. */
42#include <tld.h>
43
44/* Array of built-in domain restriction structures. See tlds.c. */
45extern const Tld_table *_tld_tables[];
46
59const Tld_table *
60tld_get_table (const char *tld, const Tld_table **tables)
61{
62 const Tld_table **tldtable = NULL;
63
64 if (!tld || !tables)
65 return NULL;
66
67 for (tldtable = tables; *tldtable; tldtable++)
68 if (!strcmp ((*tldtable)->name, tld))
69 return *tldtable;
70
71 return NULL;
72}
73
88const Tld_table *
89tld_default_table (const char *tld, const Tld_table **overrides)
90{
91 const Tld_table *tldtable = NULL;
92
93 if (!tld)
94 return NULL;
95
96 if (overrides)
97 tldtable = tld_get_table (tld, overrides);
98
99 if (!tldtable)
100 tldtable = tld_get_table (tld, _tld_tables);
101
102 return tldtable;
103}
104
105#define DOTP(c) ((c) == 0x002E || (c) == 0x3002 || \
106 (c) == 0xFF0E || (c) == 0xFF61)
107
121int
122tld_get_4 (const uint32_t *in, size_t inlen, char **out)
123{
124 const uint32_t *ipos;
125 size_t olen;
126
127 *out = NULL;
128 if (!in || inlen == 0)
129 return TLD_NODATA;
130
131 ipos = &in[inlen - 1];
132 olen = 0;
133 /* Scan backwards for non(latin)letters. */
134 while (ipos >= in && ((*ipos >= 0x41 && *ipos <= 0x5A) ||
135 (*ipos >= 0x61 && *ipos <= 0x7A)))
136 ipos--, olen++;
137
138 if (olen > 0 && ipos >= in && DOTP (*ipos))
139 {
140 /* Found something that appears a TLD. */
141 char *out_s = malloc (sizeof (char) * (olen + 1));
142 char *opos = out_s;
143
144 if (!opos)
145 return TLD_MALLOC_ERROR;
146
147 ipos++;
148 /* Transcribe to lowercase ascii string. */
149 for (; ipos < &in[inlen]; ipos++, opos++)
150 *opos = *ipos > 0x5A ? *ipos : *ipos + 0x20;
151 *opos = 0;
152 *out = out_s;
153 return TLD_SUCCESS;
154 }
155
156 return TLD_NO_TLD;
157}
158
170int
171tld_get_4z (const uint32_t *in, char **out)
172{
173 const uint32_t *ipos = in;
174
175 if (!in)
176 return TLD_NODATA;
177
178 while (*ipos)
179 ipos++;
180
181 return tld_get_4 (in, ipos - in, out);
182}
183
196int
197tld_get_z (const char *in, char **out)
198{
199 uint32_t *iucs;
200 size_t i, ilen;
201 int rc;
202
203 ilen = strlen (in);
204 iucs = calloc (ilen, sizeof (*iucs));
205
206 if (!iucs)
207 return TLD_MALLOC_ERROR;
208
209 for (i = 0; i < ilen; i++)
210 iucs[i] = in[i];
211
212 rc = tld_get_4 (iucs, ilen, out);
213
214 free (iucs);
215
216 return rc;
217}
218
219/*
220 * tld_checkchar - verify that character is permitted
221 * @ch: 32 bit unicode character to check.
222 * @tld: A #Tld_table data structure to check @ch against.
223 *
224 * Verify if @ch is either in [a-z0-9-.] or mentioned as a valid
225 * character in @tld.
226 *
227 * Return value: Return the #Tld_rc value %TLD_SUCCESS if @ch is a
228 * valid character for the TLD @tld or if @tld is %NULL,
229 * %TLD_INVALID if @ch is invalid as defined by @tld.
230 */
231static int
232_tld_checkchar (uint32_t ch, const Tld_table *tld)
233{
234 const Tld_table_element *s, *e, *m;
235
236 if (!tld)
237 return TLD_SUCCESS;
238
239 /* Check for [-a-z0-9.]. */
240 if ((ch >= 0x61 && ch <= 0x7A) ||
241 (ch >= 0x30 && ch <= 0x39) || ch == 0x2D || DOTP (ch))
242 return TLD_SUCCESS;
243
244 s = tld->valid;
245 e = s + tld->nvalid;
246 while (s < e)
247 {
248 m = s + ((e - s) >> 1);
249 if (ch < m->start)
250 e = m;
251 else if (ch > m->end)
252 s = m + 1;
253 else
254 return TLD_SUCCESS;
255 }
256
257 return TLD_INVALID;
258}
259
279int
280tld_check_4t (const uint32_t *in, size_t inlen, size_t *errpos,
281 const Tld_table *tld)
282{
283 const uint32_t *ipos;
284 int rc;
285
286 if (!tld) /* No data for TLD so everything is valid. */
287 return TLD_SUCCESS;
288
289 ipos = in;
290 while (ipos < &in[inlen])
291 {
292 rc = _tld_checkchar (*ipos, tld);
293 if (rc != TLD_SUCCESS)
294 {
295 if (errpos)
296 *errpos = ipos - in;
297 return rc;
298 }
299 ipos++;
300 }
301 return TLD_SUCCESS;
302}
303
321int
322tld_check_4tz (const uint32_t *in, size_t *errpos, const Tld_table *tld)
323{
324 const uint32_t *ipos = in;
325
326 if (!ipos)
327 return TLD_NODATA;
328
329 while (*ipos)
330 ipos++;
331
332 return tld_check_4t (in, ipos - in, errpos, tld);
333}
334
358int
359tld_check_4 (const uint32_t *in, size_t inlen, size_t *errpos,
360 const Tld_table **overrides)
361{
362 const Tld_table *tld;
363 char *domain;
364 int rc;
365
366 if (errpos)
367 *errpos = 0;
368
369 /* Get TLD name. */
370 rc = tld_get_4 (in, inlen, &domain);
371
372 if (rc != TLD_SUCCESS)
373 {
374 if (rc == TLD_NO_TLD) /* No TLD, say OK */
375 return TLD_SUCCESS;
376 else
377 return rc;
378 }
379
380 /* Retrieve appropriate data structure. */
381 tld = tld_default_table (domain, overrides);
382 free (domain);
383
384 return tld_check_4t (in, inlen, errpos, tld);
385}
386
408int
409tld_check_4z (const uint32_t *in, size_t *errpos, const Tld_table **overrides)
410{
411 const uint32_t *ipos = in;
412
413 if (!ipos)
414 return TLD_NODATA;
415
416 while (*ipos)
417 ipos++;
418
419 return tld_check_4 (in, ipos - in, errpos, overrides);
420}
421
445int
446tld_check_8z (const char *in, size_t *errpos, const Tld_table **overrides)
447{
448 uint32_t *iucs;
449 size_t ilen;
450 int rc;
451
452 if (!in)
453 return TLD_NODATA;
454
455 iucs = stringprep_utf8_to_ucs4 (in, -1, &ilen);
456
457 if (!iucs)
458 return TLD_MALLOC_ERROR;
459
460 rc = tld_check_4 (iucs, ilen, errpos, overrides);
461
462 free (iucs);
463
464 return rc;
465}
466
490int
491tld_check_lz (const char *in, size_t *errpos, const Tld_table **overrides)
492{
493 char *utf8;
494 int rc;
495
496 if (!in)
497 return TLD_NODATA;
498
499 utf8 = stringprep_locale_to_utf8 (in);
500 if (!utf8)
501 return TLD_ICONV_ERROR;
502
503
504 rc = tld_check_8z (utf8, errpos, overrides);
505
506 free (utf8);
507
508 return rc;
509}
510
uint32_t * stringprep_utf8_to_ucs4(const char *str, ssize_t len, size_t *items_written)
Definition nfkc.c:986
IDNAPI char * stringprep_locale_to_utf8(const char *str)
Definition toutf8.c:145
uint32_t end
Definition tld.h:81
const Tld_table_element * valid
Definition tld.h:99
size_t nvalid
Definition tld.h:98
int tld_check_8z(const char *in, size_t *errpos, const Tld_table **overrides)
Definition tld.c:446
int tld_check_4z(const uint32_t *in, size_t *errpos, const Tld_table **overrides)
Definition tld.c:409
#define DOTP(c)
Definition tld.c:105
int tld_check_4(const uint32_t *in, size_t inlen, size_t *errpos, const Tld_table **overrides)
Definition tld.c:359
int tld_check_lz(const char *in, size_t *errpos, const Tld_table **overrides)
Definition tld.c:491
const Tld_table * tld_get_table(const char *tld, const Tld_table **tables)
Definition tld.c:60
const Tld_table * _tld_tables[]
Definition tlds.c:61
int tld_check_4tz(const uint32_t *in, size_t *errpos, const Tld_table *tld)
Definition tld.c:322
int tld_check_4t(const uint32_t *in, size_t inlen, size_t *errpos, const Tld_table *tld)
Definition tld.c:280
const Tld_table * tld_default_table(const char *tld, const Tld_table **overrides)
Definition tld.c:89
int tld_get_4z(const uint32_t *in, char **out)
Definition tld.c:171
int tld_get_4(const uint32_t *in, size_t inlen, char **out)
Definition tld.c:122
int tld_get_z(const char *in, char **out)
Definition tld.c:197
@ TLD_ICONV_ERROR
Definition tld.h:111
@ TLD_MALLOC_ERROR
Definition tld.h:110
@ TLD_SUCCESS
Definition tld.h:107
@ TLD_NODATA
Definition tld.h:109
@ TLD_NO_TLD
Definition tld.h:112
@ TLD_INVALID
Definition tld.h:108