42#define guchar unsigned char
44#define guint unsigned int
45#define gushort unsigned short
47#define guint16 uint16_t
48#define gunichar uint32_t
51#define g_malloc malloc
53#define g_return_val_if_fail(expr,val) { \
82#define G_N_ELEMENTS(arr) (sizeof (arr) / sizeof ((arr)[0]))
84#define G_UNLIKELY(expr) (expr)
117#define g_utf8_next_char(p) ((p) + g_utf8_skip[*(const guchar *)(p)])
137#define UTF8_COMPUTE(Char, Mask, Len) \
143 else if ((Char & 0xe0) == 0xc0) \
148 else if ((Char & 0xf0) == 0xe0) \
153 else if ((Char & 0xf8) == 0xf0) \
158 else if ((Char & 0xfc) == 0xf8) \
163 else if ((Char & 0xfe) == 0xfc) \
171#define UTF8_LENGTH(Char) \
172 ((Char) < 0x80 ? 1 : \
173 ((Char) < 0x800 ? 2 : \
174 ((Char) < 0x10000 ? 3 : \
175 ((Char) < 0x200000 ? 4 : \
176 ((Char) < 0x4000000 ? 5 : 6)))))
178#define UTF8_GET(Result, Chars, Count, Mask, Len) \
179 (Result) = (Chars)[0] & (Mask); \
180 for ((Count) = 1; (Count) < (Len); ++(Count)) \
182 if (((Chars)[(Count)] & 0xc0) != 0x80) \
188 (Result) |= ((Chars)[(Count)] & 0x3f); \
191static const gchar utf8_skip_data[256] = {
192 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
194 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
196 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
198 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
200 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
202 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
204 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
206 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5,
210static const gchar *
const g_utf8_skip = utf8_skip_data;
226g_utf8_strlen (
const gchar *p)
254g_utf8_get_char (
const gchar *p)
256 int i, mask = 0, len;
258 unsigned char c = (
unsigned char) *p;
297 else if (c < 0x10000)
302 else if (c < 0x200000)
307 else if (c < 0x4000000)
320 for (i = len - 1; i > 0; --i)
322 outbuf[i] = (c & 0x3f) | 0x80;
325 outbuf[0] = c | first;
369 while (p < str + len && *p)
381 for (i = 0; i < n_chars; i++)
406 wc |= (
guchar) (*p++) & 0x3f;
409 while ((wc & mask) != 0);
452 gchar *result = NULL;
457 for (i = 0; i < len; i++)
462 if (str[i] >= 0x80000000)
468 result =
g_malloc (result_length + 1);
474 while (p < result + result_length)
475 p += g_unichar_to_utf8 (str[i++], p);
480 *items_written = p - result;
510#define CC_PART1(Page, Char) \
511 ((combining_class_table_part1[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
512 ? (combining_class_table_part1[Page] - G_UNICODE_MAX_TABLE_INDEX) \
513 : (cclass_data[combining_class_table_part1[Page]][Char]))
515#define CC_PART2(Page, Char) \
516 ((combining_class_table_part2[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
517 ? (combining_class_table_part2[Page] - G_UNICODE_MAX_TABLE_INDEX) \
518 : (cclass_data[combining_class_table_part2[Page]][Char]))
520#define COMBINING_CLASS(Char) \
521 (((Char) <= G_UNICODE_LAST_CHAR_PART1) \
522 ? CC_PART1 ((Char) >> 8, (Char) & 0xff) \
523 : (((Char) >= 0xe0000 && (Char) <= G_UNICODE_LAST_CHAR) \
524 ? CC_PART2 (((Char) - 0xe0000) >> 8, (Char) & 0xff) \
535#define NCount (VCount * TCount)
536#define SCount (LCount * NCount)
559 for (i = 0; i < len - 1; ++i)
562 if (next != 0 && last > next)
566 for (j = i + 1; j > 0; --j)
572 string[j] =
string[j - 1];
604 r[2] =
TBase + TIndex;
618 if (ch >= decomp_table[start].ch && ch <= decomp_table[end - 1].ch)
622 int half = (start + end) / 2;
623 if (ch == decomp_table[half].ch)
629 offset = decomp_table[half].compat_offset;
631 offset = decomp_table[half].canon_offset;
635 offset = decomp_table[half].canon_offset;
640 return &(decomp_expansion_string[offset]);
642 else if (half == start)
644 else if (ch > decomp_table[half].ch)
671 if ((SIndex %
TCount) == 0)
675 *result = a + TIndex;
683#define CI(Page, Char) \
684 ((compose_table[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
685 ? (compose_table[Page] - G_UNICODE_MAX_TABLE_INDEX) \
686 : (compose_data[compose_table[Page]][Char]))
688#define COMPOSE_INDEX(Char) \
689 (((Char >> 8) > (COMPOSE_TABLE_LAST)) ? 0 : CI((Char) >> 8, (Char) & 0xff))
696 if (combine_hangul (a, b, result))
758 while ((max_len < 0 || p < str + max_len) && *p)
766 decompose_hangul (wc, NULL, &result_len);
771 decomp = find_decomposition (wc, do_compat);
774 n_wc += g_utf8_strlen (decomp);
789 while ((max_len < 0 || p < str + max_len) && *p)
794 gsize old_n_wc = n_wc;
799 decompose_hangul (wc, wc_buffer + n_wc, &result_len);
804 decomp = find_decomposition (wc, do_compat);
810 wc_buffer[n_wc++] = g_utf8_get_char (pd);
813 wc_buffer[n_wc++] = wc;
822 g_unicode_canonical_ordering (wc_buffer + last_start,
824 last_start = old_n_wc;
833 g_unicode_canonical_ordering (wc_buffer + last_start,
842 if (do_compose && n_wc > 0)
848 for (i = 0; i < n_wc; i++)
853 (last_cc == 0 || last_cc != cc) &&
854 combine (wc_buffer[last_start], wc_buffer[i],
855 &wc_buffer[last_start]))
857 for (j = i + 1; j < n_wc; j++)
858 wc_buffer[j - 1] = wc_buffer[j];
921 gunichar *result_wc = _g_utf8_normalize_wc (str, len, mode);
922 gchar *result = NULL;
925 result = g_ucs4_to_utf8 (result_wc, -1, NULL, NULL);
947 return g_utf8_get_char (p);
964 return g_unichar_to_utf8 (c, outbuf);
995 if (u8_check ((
const uint8_t *) str, n))
998 return g_utf8_to_ucs4_fast (str, len, items_written);
1020 size_t *items_read,
size_t *items_written)
1022 return g_ucs4_to_utf8 (str, len, items_read, items_written);
1057 if (u8_check ((
const uint8_t *) str, n))
1079 uint32_t *result_wc;
#define COMPOSE_SECOND_SINGLE_START
#define COMPOSE_SECOND_START
#define COMPOSE_FIRST_START
#define COMPOSE_FIRST_SINGLE_START
#define G_UNICODE_NOT_PRESENT_OFFSET
#define g_return_val_if_fail(expr, val)
char * stringprep_ucs4_to_utf8(const uint32_t *str, ssize_t len, size_t *items_read, size_t *items_written)
#define UTF8_COMPUTE(Char, Mask, Len)
int stringprep_unichar_to_utf8(uint32_t c, char *outbuf)
#define COMPOSE_INDEX(Char)
uint32_t * stringprep_ucs4_nfkc_normalize(const uint32_t *str, ssize_t len)
#define G_N_ELEMENTS(arr)
#define UTF8_LENGTH(Char)
char * stringprep_utf8_nfkc_normalize(const char *str, ssize_t len)
#define g_utf8_next_char(p)
#define UTF8_GET(Result, Chars, Count, Mask, Len)
uint32_t stringprep_utf8_to_unichar(const char *p)
#define COMBINING_CLASS(Char)
@ G_NORMALIZE_DEFAULT_COMPOSE
@ G_NORMALIZE_ALL_COMPOSE
uint32_t * stringprep_utf8_to_ucs4(const char *str, ssize_t len, size_t *items_written)