00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #include <string.h>
00021 #include <libaudcore/audstrings.h>
00022
00023 #include "audconfig.h"
00024 #include "chardet.h"
00025 #include "config.h"
00026 #include "i18n.h"
00027 #include "main.h"
00028 #include "debug.h"
00029
00030 #ifdef USE_CHARDET
00031 # include <libguess.h>
00032 #endif
00033
00034 gchar *
00035 cd_str_to_utf8(const gchar * str)
00036 {
00037 gchar *out_str;
00038
00039 if (str == NULL)
00040 return NULL;
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065 #ifdef USE_CHARDET
00066 if (libguess_validate_utf8(str, strlen(str)))
00067 return g_strdup(str);
00068 #else
00069 if (g_utf8_validate(str, strlen(str), NULL))
00070 return g_strdup(str);
00071 #endif
00072
00073
00074 if ((out_str = cd_chardet_to_utf8(str, strlen(str), NULL, NULL, NULL)) != NULL)
00075 return out_str;
00076
00077
00078 return str_to_utf8_fallback(str);
00079 }
00080
00081 gchar *
00082 cd_chardet_to_utf8(const gchar * str, gssize len, gsize * arg_bytes_read,
00083 gsize * arg_bytes_write, GError ** error)
00084 {
00085 if (error)
00086 * error = NULL;
00087
00088 #ifdef USE_CHARDET
00089 gchar *det = NULL, *encoding = NULL;
00090 #endif
00091 gchar *ret = NULL;
00092 gsize *bytes_read, *bytes_write;
00093 gsize my_bytes_read, my_bytes_write;
00094
00095 bytes_read = arg_bytes_read != NULL ? arg_bytes_read : &my_bytes_read;
00096 bytes_write = arg_bytes_write != NULL ? arg_bytes_write : &my_bytes_write;
00097
00098 g_return_val_if_fail(str != NULL, NULL);
00099
00100 #ifdef USE_CHARDET
00101 if (libguess_validate_utf8(str, len))
00102 #else
00103 if (g_utf8_validate(str, len, NULL))
00104 #endif
00105 {
00106 if (len < 0)
00107 len = strlen (str);
00108
00109 ret = g_malloc (len + 1);
00110 memcpy (ret, str, len);
00111 ret[len] = 0;
00112
00113 if (arg_bytes_read != NULL)
00114 * arg_bytes_read = len;
00115 if (arg_bytes_write != NULL)
00116 * arg_bytes_write = len;
00117
00118 return ret;
00119 }
00120 #ifdef USE_CHARDET
00121 if (cfg.chardet_detector)
00122 det = cfg.chardet_detector;
00123
00124 if (det)
00125 {
00126 AUDDBG("guess encoding (%s) %s\n", det, str);
00127 encoding = (gchar *) libguess_determine_encoding(str, len, det);
00128 AUDDBG("encoding = %s\n", encoding);
00129 if (encoding == NULL)
00130 goto fallback;
00131
00132 ret = g_convert (str, len, "UTF-8", encoding, bytes_read, bytes_write,
00133 (error && * error) ? NULL : error);
00134 }
00135
00136 fallback:
00137 #endif
00138
00139
00140 if (ret == NULL && cfg.chardet_fallback_s != NULL)
00141 {
00142 gchar **enc;
00143 for (enc = cfg.chardet_fallback_s; *enc != NULL; enc++)
00144 {
00145 ret = g_convert (str, len, "UTF-8", * enc, bytes_read, bytes_write,
00146 (error && * error) ? NULL : error);
00147 if (len == *bytes_read)
00148 break;
00149 else {
00150 g_free(ret);
00151 ret = NULL;
00152 }
00153 }
00154 }
00155
00156
00157 if (ret == NULL)
00158 ret = g_locale_to_utf8 (str, len, bytes_read, bytes_write,
00159 (error && * error) ? NULL : error);
00160
00161
00162 if (ret == NULL)
00163 ret = g_convert (str, len, "UTF-8", "ISO-8859-1", bytes_read,
00164 bytes_write, (error && * error) ? NULL : error);
00165
00166 if (ret != NULL)
00167 {
00168 if (g_utf8_validate(ret, -1, NULL))
00169 return ret;
00170 else
00171 {
00172 g_warning("g_utf8_validate() failed for converted string in cd_chardet_to_utf8: '%s'", ret);
00173 g_free(ret);
00174 return NULL;
00175 }
00176 }
00177
00178 return NULL;
00179 }
00180
00181
00182 void chardet_init(void)
00183 {
00184 str_to_utf8 = cd_str_to_utf8;
00185 chardet_to_utf8 = cd_chardet_to_utf8;
00186 }