GnuCash  5.6-150-g038405b370+
gnc-glib-utils.c
1 /********************************************************************\
2  * gnc-glib-utils.c -- utility functions based on glib functions *
3  * Copyright (C) 2006 David Hampton <hampton@employees.org> *
4  * *
5  * This program is free software; you can redistribute it and/or *
6  * modify it under the terms of the GNU General Public License as *
7  * published by the Free Software Foundation; either version 2 of *
8  * the License, or (at your option) any later version. *
9  * *
10  * This program is distributed in the hope that it will be useful, *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13  * GNU General Public License for more details. *
14  * *
15  * You should have received a copy of the GNU General Public License*
16  * along with this program; if not, contact: *
17  * *
18  * Free Software Foundation Voice: +1-617-542-5942 *
19  * 51 Franklin Street, Fifth Floor Fax: +1-617-542-2652 *
20  * Boston, MA 02110-1301, USA gnu@gnu.org *
21  * *
22 \********************************************************************/
23 
24 #include <config.h>
25 #include <errno.h>
26 #include <stdio.h>
27 #include <signal.h>
28 #include <string.h>
29 #include <stdbool.h>
30 
31 #include "gnc-glib-utils.h"
32 
33 #ifdef G_OS_WIN32
34 #include <windows.h>
35 #endif
36 
37 int
38 safe_utf8_collate (const char * da, const char * db)
39 {
40  if (da && !(*da))
41  da = NULL;
42  if (db && !(*db))
43  db = NULL;
44 
45  if (da && db)
46  return g_utf8_collate(da, db);
47  if (da)
48  return 1;
49  if (db)
50  return -1;
51  return 0;
52 }
53 
54 /********************************************************************
55  * The following definitions are from gutf8.c, for use by
56  * gnc_utf8_validate(). These are all verbatim copies, except for
57  * UNICODE_VALID() which has been modified to look for the strict
58  * subset of UTF-8 that is valid XML text.
59  */
60 
61 #define UTF8_COMPUTE(Char, Mask, Len) \
62  if (Char < 128) \
63  { \
64  Len = 1; \
65  Mask = 0x7f; \
66  } \
67  else if ((Char & 0xe0) == 0xc0) \
68  { \
69  Len = 2; \
70  Mask = 0x1f; \
71  } \
72  else if ((Char & 0xf0) == 0xe0) \
73  { \
74  Len = 3; \
75  Mask = 0x0f; \
76  } \
77  else if ((Char & 0xf8) == 0xf0) \
78  { \
79  Len = 4; \
80  Mask = 0x07; \
81  } \
82  else if ((Char & 0xfc) == 0xf8) \
83  { \
84  Len = 5; \
85  Mask = 0x03; \
86  } \
87  else if ((Char & 0xfe) == 0xfc) \
88  { \
89  Len = 6; \
90  Mask = 0x01; \
91  } \
92  else \
93  Len = -1;
94 
95 #define UTF8_LENGTH(Char) \
96  ((Char) < 0x80 ? 1 : \
97  ((Char) < 0x800 ? 2 : \
98  ((Char) < 0x10000 ? 3 : \
99  ((Char) < 0x200000 ? 4 : \
100  ((Char) < 0x4000000 ? 5 : 6)))))
101 
102 
103 #define UTF8_GET(Result, Chars, Count, Mask, Len) \
104  (Result) = (Chars)[0] & (Mask); \
105  for ((Count) = 1; (Count) < (Len); ++(Count)) \
106  { \
107  if (((Chars)[(Count)] & 0xc0) != 0x80) \
108  { \
109  (Result) = -1; \
110  break; \
111  } \
112  (Result) <<= 6; \
113  (Result) |= ((Chars)[(Count)] & 0x3f); \
114  }
115 
116 #define UNICODE_VALID(Char) \
117  ((Char) < 0x110000 && \
118  (((Char) & 0xFFFFF800) != 0xD800) && \
119  ((Char) < 0xFDD0 || (Char) > 0xFDEF) && \
120  ((Char) >= 0x20 || (Char) == 0x09 || (Char) == 0x0A || (Char) == 0x0D) && \
121  ((Char) & 0xFFFE) != 0xFFFE)
122 
123 gboolean
124 gnc_utf8_validate(const gchar *str,
125  gssize max_len,
126  const gchar **end)
127 {
128 
129  const gchar *p;
130 
131  g_return_val_if_fail (str != NULL, FALSE);
132 
133  if (end)
134  *end = str;
135 
136  p = str;
137 
138  while ((max_len < 0 || (p - str) < max_len) && *p)
139  {
140  int i, mask = 0, len;
141  gunichar result;
142  unsigned char c = (unsigned char) * p;
143 
144  UTF8_COMPUTE (c, mask, len);
145 
146  if (len == -1)
147  break;
148 
149  /* check that the expected number of bytes exists in str */
150  if (max_len >= 0 &&
151  ((max_len - (p - str)) < len))
152  break;
153 
154  UTF8_GET (result, p, i, mask, len);
155 
156  if (UTF8_LENGTH (result) != len) /* Check for overlong UTF-8 */
157  break;
158 
159  if (result == (gunichar) - 1)
160  break;
161 
162  if (!UNICODE_VALID (result))
163  break;
164 
165  p += len;
166  }
167 
168  if (end)
169  *end = p;
170 
171  /* See that we covered the entire length if a length was
172  * passed in, or that we ended on a nul if not
173  */
174  if (max_len >= 0 &&
175  p != (str + max_len))
176  return FALSE;
177  else if (max_len < 0 &&
178  *p != '\0')
179  return FALSE;
180  else
181  return TRUE;
182 }
183 
184 void
186 {
187  gchar *end;
188  gint len;
189 
190  g_return_if_fail(str);
191 
192  if (gnc_utf8_validate(str, -1, (const gchar **)&end))
193  return;
194 
195  g_warning("Invalid utf8 string: %s", str);
196  do
197  {
198  len = strlen(end);
199  memmove(end, end + 1, len); /* shuffle the remainder one byte */
200  }
201  while (!gnc_utf8_validate(str, -1, (const gchar **)&end));
202 }
203 
204 gchar *
206 {
207  gchar *result = g_strdup (str);
208  gnc_utf8_strip_invalid (result);
209  return result;
210 }
211 
212 void
214 {
215  gchar *c = NULL;
216  const gchar *controls = "\b\f\n\r\t\v";
217  g_return_if_fail (str != NULL && strlen (str) > 0);
218  gnc_utf8_strip_invalid (str); /* First fix the UTF-8 */
219  for(c = str + strlen (str) - 1; c != str; --c)
220  {
221  gboolean line_control = ((unsigned char)(*c) < 0x20);
222  if (line_control || strchr(controls, *c) != NULL)
223  *c = ' '; /*replace controls with a single space. */
224  }
225 }
226 
227 gchar *
228 gnc_locale_from_utf8(const gchar* str)
229 {
230  gchar * locale_str;
231  gsize bytes_written = 0;
232  GError * err = NULL;
233 
234  /* Convert from UTF-8 to the encoding used in the current locale. */
235  locale_str = g_locale_from_utf8(str, -1, NULL, &bytes_written, &err);
236  if (err)
237  {
238  g_warning("g_locale_from_utf8 failed: %s", err->message);
239  g_error_free(err);
240  }
241 
242  return locale_str;
243 }
244 
245 gchar *
246 gnc_locale_to_utf8(const gchar* str)
247 {
248  gchar * utf8_str;
249  gsize bytes_written = 0;
250  GError * err = NULL;
251 
252  /* Convert to UTF-8 from the encoding used in the current locale. */
253  utf8_str = g_locale_to_utf8(str, -1, NULL, &bytes_written, &err);
254  if (err)
255  {
256  g_warning("g_locale_to_utf8 failed: %s", err->message);
257  g_error_free(err);
258  }
259 
260  return utf8_str;
261 }
262 
263 GList*
264 gnc_g_list_map(GList* list, GncGMapFunc fn, gpointer user_data)
265 {
266  GList *rtn = NULL;
267  for (; list != NULL; list = list->next)
268  {
269  rtn = g_list_prepend (rtn, (*fn)(list->data, user_data));
270  }
271  return g_list_reverse (rtn);
272 }
273 
274 void
275 gnc_g_list_cut(GList **list, GList *cut_point)
276 {
277  if (list == NULL || *list == NULL)
278  return;
279 
280  // if it's the first element.
281  if (cut_point->prev == NULL)
282  {
283  *list = NULL;
284  return;
285  }
286 
287  cut_point->prev->next = NULL;
288  cut_point->prev = NULL;
289 }
290 
291 static bool
292 utf8_strstr(char **needle, char *haystack)
293 {
294  char *tmp = g_utf8_normalize (*needle, -1, G_NORMALIZE_NFC);
295  if (haystack && *haystack)
296  {
297  char *place = strstr(haystack, tmp);
298  if (place)
299  {
300  g_free (tmp);
301  return false;
302  }
303  }
304  *needle = tmp; //so that haystack is already normalized
305  return true;
306 }
307 
308 static gchar *
309 gnc_g_list_stringjoin_internal (GList *list_of_strings, const gchar *sep, bool testdups)
310 {
311  gint seplen = sep ? strlen(sep) : 0;
312  gint length = -seplen;
313  gchar *retval, *p;
314 
315  for (GList *n = list_of_strings; n; n = n->next)
316  {
317  gchar *str = n->data;
318  if (str && *str)
319  length += strlen (str) + seplen;
320  }
321 
322  if (length <= 0)
323  return NULL;
324 
325  p = retval = (gchar*) g_malloc0 (length * sizeof (gchar) + 1);
326  for (GList *n = list_of_strings; n; n = n->next)
327  {
328  gchar *str = n->data;
329  if (!str || !str[0])
330  continue;
331  if (!testdups || utf8_strstr (&str, retval))
332  {
333  if (sep && (p != retval))
334  p = g_stpcpy (p, sep);
335  p = g_stpcpy (p, str);
336  if (testdups)
337  g_free (str);
338  }
339  }
340 
341  return retval;
342 }
343 
344 gchar *
345 gnc_g_list_stringjoin (GList *list_of_strings, const gchar *sep)
346 {
347  return gnc_g_list_stringjoin_internal (list_of_strings, sep, false);
348 }
349 
350 gchar *
351 gnc_g_list_stringjoin_nodups (GList *list_of_strings, const gchar *sep)
352 {
353  return gnc_g_list_stringjoin_internal (list_of_strings, sep, true);
354 }
355 
356 gint
357 gnc_list_length_cmp (const GList *list, size_t len)
358 {
359  for (GList *lst = (GList*) list;; lst = g_list_next (lst), len--)
360  {
361  if (!lst) return (len ? -1 : 0);
362  if (!len) return 1;
363  }
364 }
GList * gnc_g_list_map(GList *list, GncGMapFunc fn, gpointer user_data)
gchar * gnc_g_list_stringjoin(GList *list_of_strings, const gchar *sep)
Return a string joining a GList whose elements are gchar* strings.
int safe_utf8_collate(const char *da, const char *db)
Collate two UTF-8 strings.
void gnc_utf8_strip_invalid_and_controls(gchar *str)
Strip any non-utf8 characters and any control characters (everything < 0x20, , , ...
gchar * gnc_locale_from_utf8(const gchar *str)
Converts a string from UTF-8 to the encoding used for strings in the current locale.
gboolean gnc_utf8_validate(const gchar *str, gssize max_len, const gchar **end)
Validates UTF-8 encoded text for use in GnuCash.
void gnc_g_list_cut(GList **list, GList *cut_point)
Cut a GList into two parts; the cut_point is the beginning of the new list; list may need to be modif...
void gnc_utf8_strip_invalid(gchar *str)
Strip any non-UTF-8 characters from a string.
GLib helper routines.
gchar * gnc_g_list_stringjoin_nodups(GList *list_of_strings, const gchar *sep)
Like stringjoin but ensures that the string to be added isn&#39;t already part of the return string...
gchar * gnc_locale_to_utf8(const gchar *str)
Converts a string to UTF-8 from the encoding used for strings in the current locale.
gchar * gnc_utf8_strip_invalid_strdup(const gchar *str)
Returns a newly allocated copy of the given string but with any non-UTF-8 character stripped from it...
gint gnc_list_length_cmp(const GList *list, size_t len)
Scans the GList elements the minimum number of iterations required to test it against a specified siz...