docs/STABLE/gnc-glib-utils_8c_source.html

 /********************************************************************\
  * gnc-glib-utils.c -- utility functions based on glib functions    *
  * Copyright (C) 2006 David Hampton <hampton@employees.org>         *
  *                                                                  *
  * This program is free software; you can redistribute it and/or    *
  * modify it under the terms of the GNU General Public License as   *
  * published by the Free Software Foundation; either version 2 of   *
  * the License, or (at your option) any later version.              *
  *                                                                  *
  * This program is distributed in the hope that it will be useful,  *
  * but WITHOUT ANY WARRANTY; without even the implied warranty of   *
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the    *
  * GNU General Public License for more details.                     *
  *                                                                  *
  * You should have received a copy of the GNU General Public License*
  * along with this program; if not, contact:                        *
  *                                                                  *
  * Free Software Foundation           Voice:  +1-617-542-5942       *
  * 51 Franklin Street, Fifth Floor    Fax:    +1-617-542-2652       *
  * Boston, MA  02110-1301,  USA       gnu@gnu.org                   *
  *                                                                  *
 \********************************************************************/

 #include <config.h>
 #include <errno.h>
 #include <stdio.h>
 #include <signal.h>
 #include <string.h>
 #include <stdbool.h>

 #include "gnc-glib-utils.h"

 #ifdef G_OS_WIN32
 #include <windows.h>
 #endif

 int
 safe_utf8_collate (const char * da, const char * db)
 {
     if (da && !(*da))
         da = NULL;
     if (db && !(*db))
         db = NULL;

     if (da && db)
         return g_utf8_collate(da, db);
     if (da)
         return 1;
     if (db)
         return -1;
     return 0;
 }

 /********************************************************************
  * The following definitions are from gutf8.c, for use by
  * gnc_utf8_validate().  These are all verbatim copies, except for
  * UNICODE_VALID() which has been modified to look for the strict
  * subset of UTF-8 that is valid XML text.
  */

 #define UTF8_COMPUTE(Char, Mask, Len)                         \
   if (Char < 128)                                 \
     {                                         \
       Len = 1;                                    \
       Mask = 0x7f;                                \
     }                                         \
   else if ((Char & 0xe0) == 0xc0)                         \
     {                                         \
       Len = 2;                                    \
       Mask = 0x1f;                                \
     }                                         \
   else if ((Char & 0xf0) == 0xe0)                         \
     {                                         \
       Len = 3;                                    \
       Mask = 0x0f;                                \
     }                                         \
   else if ((Char & 0xf8) == 0xf0)                         \
     {                                         \
       Len = 4;                                    \
       Mask = 0x07;                                \
     }                                         \
   else if ((Char & 0xfc) == 0xf8)                         \
     {                                         \
       Len = 5;                                    \
       Mask = 0x03;                                \
     }                                         \
   else if ((Char & 0xfe) == 0xfc)                         \
     {                                         \
       Len = 6;                                    \
       Mask = 0x01;                                \
     }                                         \
   else                                        \
     Len = -1;

 #define UTF8_LENGTH(Char)              \
   ((Char) < 0x80 ? 1 :                 \
    ((Char) < 0x800 ? 2 :               \
     ((Char) < 0x10000 ? 3 :            \
      ((Char) < 0x200000 ? 4 :          \
       ((Char) < 0x4000000 ? 5 : 6)))))


 #define UTF8_GET(Result, Chars, Count, Mask, Len)                 \
   (Result) = (Chars)[0] & (Mask);                         \
   for ((Count) = 1; (Count) < (Len); ++(Count))                   \
     {                                         \
       if (((Chars)[(Count)] & 0xc0) != 0x80)                      \
     {                                     \
       (Result) = -1;                              \
       break;                                  \
     }                                     \
       (Result) <<= 6;                                 \
       (Result) |= ((Chars)[(Count)] & 0x3f);                      \
     }

 #define UNICODE_VALID(Char)                   \
     ((Char) < 0x110000 &&                 \
      (((Char) & 0xFFFFF800) != 0xD800) &&         \
      ((Char) < 0xFDD0 || (Char) > 0xFDEF) &&          \
      ((Char) >= 0x20 || (Char) == 0x09 || (Char) == 0x0A || (Char) == 0x0D) && \
      ((Char) & 0xFFFE) != 0xFFFE)

 gboolean
 gnc_utf8_validate(const gchar  *str,
                   gssize        max_len,
                   const gchar **end)
 {

     const gchar *p;

     g_return_val_if_fail (str != NULL, FALSE);

     if (end)
         *end = str;

     p = str;

     while ((max_len < 0 || (p - str) < max_len) && *p)
     {
         int i, mask = 0, len;
         gunichar result;
         unsigned char c = (unsigned char) * p;

         UTF8_COMPUTE (c, mask, len);

         if (len == -1)
             break;

         /* check that the expected number of bytes exists in str */
         if (max_len >= 0 &&
                 ((max_len - (p - str)) < len))
             break;

         UTF8_GET (result, p, i, mask, len);

         if (UTF8_LENGTH (result) != len) /* Check for overlong UTF-8 */
             break;

         if (result == (gunichar) - 1)
             break;

         if (!UNICODE_VALID (result))
             break;

         p += len;
     }

     if (end)
         *end = p;

     /* See that we covered the entire length if a length was
      * passed in, or that we ended on a nul if not
      */
     if (max_len >= 0 &&
             p != (str + max_len))
         return FALSE;
     else if (max_len < 0 &&
              *p != '\0')
         return FALSE;
     else
         return TRUE;
 }

 void
 gnc_utf8_strip_invalid (gchar *str)
 {
     gchar *end;
     gint len;

     g_return_if_fail(str);

     if (gnc_utf8_validate(str, -1, (const gchar **)&end))
         return;

     g_warning("Invalid utf8 string: %s", str);
     do
     {
         len = strlen(end);
         memmove(end, end + 1, len); /* shuffle the remainder one byte */
     }
     while (!gnc_utf8_validate(str, -1, (const gchar **)&end));
 }

 gchar *
 gnc_utf8_strip_invalid_strdup(const gchar* str)
 {
     gchar *result = g_strdup (str);
     gnc_utf8_strip_invalid (result);
     return result;
 }

 void
 gnc_utf8_strip_invalid_and_controls (gchar *str)
 {
     gchar *c = NULL;
     const gchar *controls = "\b\f\n\r\t\v";
     g_return_if_fail (str != NULL && strlen (str) > 0);
     gnc_utf8_strip_invalid (str); /* First fix the UTF-8 */
     for(c = str + strlen (str) - 1; c != str; --c)
     {
         gboolean line_control = ((unsigned char)(*c) < 0x20);
         if (line_control || strchr(controls, *c) != NULL)
             *c = ' '; /*replace controls with a single space. */
     }
 }

 gchar *
 gnc_locale_from_utf8(const gchar* str)
 {
     gchar *   locale_str;
     gsize     bytes_written = 0;
     GError *  err = NULL;

     /* Convert from UTF-8 to the encoding used in the current locale. */
     locale_str = g_locale_from_utf8(str, -1, NULL, &bytes_written, &err);
     if (err)
     {
         g_warning("g_locale_from_utf8 failed: %s", err->message);
         g_error_free(err);
     }

     return locale_str;
 }

 gchar *
 gnc_locale_to_utf8(const gchar* str)
 {
     gchar *   utf8_str;
     gsize     bytes_written = 0;
     GError *  err = NULL;

     /* Convert to UTF-8 from the encoding used in the current locale. */
     utf8_str = g_locale_to_utf8(str, -1, NULL, &bytes_written, &err);
     if (err)
     {
         g_warning("g_locale_to_utf8 failed: %s", err->message);
         g_error_free(err);
     }

     return utf8_str;
 }

 GList*
 gnc_g_list_map(GList* list, GncGMapFunc fn, gpointer user_data)
 {
     GList *rtn = NULL;
     for (; list != NULL; list = list->next)
     {
         rtn = g_list_prepend (rtn, (*fn)(list->data, user_data));
     }
     return g_list_reverse (rtn);
 }

 void
 gnc_g_list_cut(GList **list, GList *cut_point)
 {
     if (list == NULL || *list == NULL)
         return;

     // if it's the first element.
     if (cut_point->prev == NULL)
     {
         *list = NULL;
         return;
     }

     cut_point->prev->next = NULL;
     cut_point->prev = NULL;
 }

 static bool
 utf8_strstr(char **needle, char *haystack)
 {
     char *tmp = g_utf8_normalize (*needle, -1, G_NORMALIZE_NFC);
     if (haystack && *haystack)
     {
         char *place = strstr(haystack, tmp);
         if (place)
         {
             g_free (tmp);
             return false;
         }
     }
     *needle = tmp; //so that haystack is already normalized
     return true;
 }

 static gchar *
 gnc_g_list_stringjoin_internal (GList *list_of_strings, const gchar *sep, bool testdups)
 {
     gint seplen = sep ? strlen(sep) : 0;
     gint length = -seplen;
     gchar *retval, *p;

     for (GList *n = list_of_strings; n; n = n->next)
     {
         gchar *str = n->data;
         if (str && *str)
             length += strlen (str) + seplen;
     }

     if (length <= 0)
         return NULL;

     p = retval = (gchar*) g_malloc0 (length * sizeof (gchar) + 1);
     for (GList *n = list_of_strings; n; n = n->next)
     {
         gchar *str = n->data;
         if (!str || !str[0])
             continue;
         if (!testdups || utf8_strstr (&str, retval))
         {
             if (sep && (p != retval))
                 p = g_stpcpy (p, sep);
             p = g_stpcpy (p, str);
             if (testdups)
                 g_free (str);
         }
     }

     return retval;
 }

 gchar *
 gnc_g_list_stringjoin (GList *list_of_strings, const gchar *sep)
 {
     return gnc_g_list_stringjoin_internal (list_of_strings, sep, false);
 }

 gchar *
 gnc_g_list_stringjoin_nodups (GList *list_of_strings, const gchar *sep)
 {
     return gnc_g_list_stringjoin_internal (list_of_strings, sep, true);
 }

 gint
 gnc_list_length_cmp (const GList *list, size_t len)
 {
     for (GList *lst = (GList*) list;; lst = g_list_next (lst), len--)
     {
         if (!lst) return (len ? -1 : 0);
         if (!len) return 1;
     }
 }
gnc_g_list_map
GList * gnc_g_list_map(GList *list, GncGMapFunc fn, gpointer user_data)
Definition: gnc-glib-utils.c:264

gnc_g_list_stringjoin
gchar * gnc_g_list_stringjoin(GList *list_of_strings, const gchar *sep)
Return a string joining a GList whose elements are gchar* strings.
Definition: gnc-glib-utils.c:345

safe_utf8_collate
int safe_utf8_collate(const char *da, const char *db)
Collate two UTF-8 strings.
Definition: gnc-glib-utils.c:38

gnc_utf8_strip_invalid_and_controls
void gnc_utf8_strip_invalid_and_controls(gchar *str)
Strip any non-utf8 characters and any control characters (everything < 0x20, , ,  ...
Definition: gnc-glib-utils.c:213

gnc_locale_from_utf8
gchar * gnc_locale_from_utf8(const gchar *str)
Converts a string from UTF-8 to the encoding used for strings in the current locale.
Definition: gnc-glib-utils.c:228

gnc_utf8_validate
gboolean gnc_utf8_validate(const gchar *str, gssize max_len, const gchar **end)
Validates UTF-8 encoded text for use in GnuCash.
Definition: gnc-glib-utils.c:124

gnc_g_list_cut
void gnc_g_list_cut(GList **list, GList *cut_point)
Cut a GList into two parts; the cut_point is the beginning of the new list; list may need to be modif...
Definition: gnc-glib-utils.c:275

gnc_utf8_strip_invalid
void gnc_utf8_strip_invalid(gchar *str)
Strip any non-UTF-8 characters from a string.
Definition: gnc-glib-utils.c:185

gnc-glib-utils.h
GLib helper routines.

gnc_g_list_stringjoin_nodups
gchar * gnc_g_list_stringjoin_nodups(GList *list_of_strings, const gchar *sep)
Like stringjoin but ensures that the string to be added isn&#39;t already part of the return string...
Definition: gnc-glib-utils.c:351

gnc_locale_to_utf8
gchar * gnc_locale_to_utf8(const gchar *str)
Converts a string to UTF-8 from the encoding used for strings in the current locale.
Definition: gnc-glib-utils.c:246

gnc_utf8_strip_invalid_strdup
gchar * gnc_utf8_strip_invalid_strdup(const gchar *str)
Returns a newly allocated copy of the given string but with any non-UTF-8 character stripped from it...
Definition: gnc-glib-utils.c:205

gnc_list_length_cmp
gint gnc_list_length_cmp(const GList *list, size_t len)
Scans the GList elements the minimum number of iterations required to test it against a specified siz...
Definition: gnc-glib-utils.c:357