46 return g_utf8_collate(da, db);
61 #define UTF8_COMPUTE(Char, Mask, Len) \ 67 else if ((Char & 0xe0) == 0xc0) \ 72 else if ((Char & 0xf0) == 0xe0) \ 77 else if ((Char & 0xf8) == 0xf0) \ 82 else if ((Char & 0xfc) == 0xf8) \ 87 else if ((Char & 0xfe) == 0xfc) \ 95 #define UTF8_LENGTH(Char) \ 96 ((Char) < 0x80 ? 1 : \ 97 ((Char) < 0x800 ? 2 : \ 98 ((Char) < 0x10000 ? 3 : \ 99 ((Char) < 0x200000 ? 4 : \ 100 ((Char) < 0x4000000 ? 5 : 6))))) 103 #define UTF8_GET(Result, Chars, Count, Mask, Len) \ 104 (Result) = (Chars)[0] & (Mask); \ 105 for ((Count) = 1; (Count) < (Len); ++(Count)) \ 107 if (((Chars)[(Count)] & 0xc0) != 0x80) \ 113 (Result) |= ((Chars)[(Count)] & 0x3f); \ 116 #define UNICODE_VALID(Char) \ 117 ((Char) < 0x110000 && \ 118 (((Char) & 0xFFFFF800) != 0xD800) && \ 119 ((Char) < 0xFDD0 || (Char) > 0xFDEF) && \ 120 ((Char) >= 0x20 || (Char) == 0x09 || (Char) == 0x0A || (Char) == 0x0D) && \ 121 ((Char) & 0xFFFE) != 0xFFFE) 131 g_return_val_if_fail (str != NULL, FALSE);
138 while ((max_len < 0 || (p - str) < max_len) && *p)
140 int i, mask = 0, len;
142 unsigned char c = (
unsigned char) * p;
144 UTF8_COMPUTE (c, mask, len);
151 ((max_len - (p - str)) < len))
154 UTF8_GET (result, p, i, mask, len);
156 if (UTF8_LENGTH (result) != len)
159 if (result == (gunichar) - 1)
162 if (!UNICODE_VALID (result))
175 p != (str + max_len))
177 else if (max_len < 0 &&
190 g_return_if_fail(str);
195 g_warning(
"Invalid utf8 string: %s", str);
199 memmove(end, end + 1, len);
207 gchar *result = g_strdup (str);
216 const gchar *controls =
"\b\f\n\r\t\v";
217 g_return_if_fail (str != NULL && strlen (str) > 0);
219 for(c = str + strlen (str) - 1; c != str; --c)
221 gboolean line_control = ((
unsigned char)(*c) < 0x20);
222 if (line_control || strchr(controls, *c) != NULL)
231 gsize bytes_written = 0;
235 locale_str = g_locale_from_utf8(str, -1, NULL, &bytes_written, &err);
238 g_warning(
"g_locale_from_utf8 failed: %s", err->message);
249 gsize bytes_written = 0;
253 utf8_str = g_locale_to_utf8(str, -1, NULL, &bytes_written, &err);
256 g_warning(
"g_locale_to_utf8 failed: %s", err->message);
267 for (; list != NULL; list = list->next)
269 rtn = g_list_prepend (rtn, (*fn)(list->data, user_data));
271 return g_list_reverse (rtn);
277 if (list == NULL || *list == NULL)
281 if (cut_point->prev == NULL)
287 cut_point->prev->next = NULL;
288 cut_point->prev = NULL;
292 utf8_strstr(
char **needle,
char *haystack)
294 char *tmp = g_utf8_normalize (*needle, -1, G_NORMALIZE_NFC);
295 if (haystack && *haystack)
297 char *place = strstr(haystack, tmp);
309 gnc_g_list_stringjoin_internal (GList *list_of_strings,
const gchar *sep,
bool testdups)
311 gint seplen = sep ? strlen(sep) : 0;
312 gint length = -seplen;
315 for (GList *n = list_of_strings; n; n = n->next)
317 gchar *str = n->data;
319 length += strlen (str) + seplen;
325 p = retval = (gchar*) g_malloc0 (length *
sizeof (gchar) + 1);
326 for (GList *n = list_of_strings; n; n = n->next)
328 gchar *str = n->data;
331 if (!testdups || utf8_strstr (&str, retval))
333 if (sep && (p != retval))
334 p = g_stpcpy (p, sep);
335 p = g_stpcpy (p, str);
347 return gnc_g_list_stringjoin_internal (list_of_strings, sep,
false);
353 return gnc_g_list_stringjoin_internal (list_of_strings, sep,
true);
359 for (GList *lst = (GList*) list;; lst = g_list_next (lst), len--)
361 if (!lst)
return (len ? -1 : 0);
GList * gnc_g_list_map(GList *list, GncGMapFunc fn, gpointer user_data)
gchar * gnc_g_list_stringjoin(GList *list_of_strings, const gchar *sep)
Return a string joining a GList whose elements are gchar* strings.
int safe_utf8_collate(const char *da, const char *db)
Collate two UTF-8 strings.
void gnc_utf8_strip_invalid_and_controls(gchar *str)
Strip any non-utf8 characters and any control characters (everything < 0x20, , , ...
gchar * gnc_locale_from_utf8(const gchar *str)
Converts a string from UTF-8 to the encoding used for strings in the current locale.
gboolean gnc_utf8_validate(const gchar *str, gssize max_len, const gchar **end)
Validates UTF-8 encoded text for use in GnuCash.
void gnc_g_list_cut(GList **list, GList *cut_point)
Cut a GList into two parts; the cut_point is the beginning of the new list; list may need to be modif...
void gnc_utf8_strip_invalid(gchar *str)
Strip any non-UTF-8 characters from a string.
gchar * gnc_g_list_stringjoin_nodups(GList *list_of_strings, const gchar *sep)
Like stringjoin but ensures that the string to be added isn't already part of the return string...
gchar * gnc_locale_to_utf8(const gchar *str)
Converts a string to UTF-8 from the encoding used for strings in the current locale.
gchar * gnc_utf8_strip_invalid_strdup(const gchar *str)
Returns a newly allocated copy of the given string but with any non-UTF-8 character stripped from it...
gint gnc_list_length_cmp(const GList *list, size_t len)
Scans the GList elements the minimum number of iterations required to test it against a specified siz...