blocxx
IConv.cpp
Go to the documentation of this file.
1 /*******************************************************************************
2 * Copyright (C) 2005 Novell, Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * - Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 *
10 * - Redistributions in binary form must reproduce the above copyright notice,
11 * this list of conditions and the following disclaimer in the documentation
12 * and/or other materials provided with the distribution.
13 *
14 * - Neither the name of Novell, Inc., nor the names of its
15 * contributors may be used to endorse or promote products derived from this
16 * software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS''
19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL Novell, Inc., OR THE
22 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *******************************************************************************/
34 #include "blocxx/IConv.hpp"
35 
36 #if defined(BLOCXX_HAVE_ICONV_SUPPORT)
37 #include "blocxx/Assertion.hpp"
38 #include "blocxx/Format.hpp"
39 #include "blocxx/Exec.hpp"
40 
41 #include <cwchar>
42 #include <cwctype>
43 
44 #include <errno.h>
45 
46 namespace BLOCXX_NAMESPACE
47 {
48 
49 // -------------------------------------------------------------------
50 IConv_t::IConv_t()
51  : m_iconv(iconv_t(-1))
52 {
53 }
54 
55 
56 // -------------------------------------------------------------------
57 IConv_t::IConv_t(const String &fromEncoding, const String &toEncoding)
58 {
59  m_iconv = ::iconv_open(toEncoding.c_str(), fromEncoding.c_str());
60  if( m_iconv == iconv_t(-1))
61  {
62  BLOCXX_THROW(StringConversionException,
63  Format("Unable to convert from \"%1\" to \"%2\"",
64  fromEncoding, toEncoding).c_str());
65  }
66 }
67 
68 
69 // -------------------------------------------------------------------
70 IConv_t::~IConv_t()
71 {
72  close();
73 }
74 
75 
76 // -------------------------------------------------------------------
77 bool
78 IConv_t::open(const String &fromEncoding, const String &toEncoding)
79 {
80  close();
81  m_iconv = ::iconv_open(toEncoding.c_str(), fromEncoding.c_str());
82  return ( m_iconv != iconv_t(-1));
83 }
84 
85 
86 // -------------------------------------------------------------------
87 size_t
88 IConv_t::convert(char **istr, size_t *ibytesleft,
89  char **ostr, size_t *obytesleft)
90 {
91 #if defined(BLOCXX_ICONV_INBUF_CONST)
92  BLOCXX_ASSERT(istr != NULL);
93  const char *ptr = *istr;
94  int ret = ::iconv(m_iconv, &ptr, ibytesleft, ostr, obytesleft);
95  *istr = const_cast<char*>(ptr);
96  return ret;
97 #else
98  return ::iconv(m_iconv, istr, ibytesleft, ostr, obytesleft);
99 #endif
100 }
101 
102 
103 // -------------------------------------------------------------------
104 bool
106 {
107  bool ret = true;
108  int err = errno;
109 
110  if( m_iconv != iconv_t(-1))
111  {
112  if( ::iconv_close(m_iconv) == -1)
113  ret = false;
114  m_iconv = iconv_t(-1);
115  }
116 
117  errno = err;
118  return ret;
119 }
120 
121 
122 // *******************************************************************
123 namespace IConv
124 {
125 
126 // -------------------------------------------------------------------
127 static inline void
128 mayThrowStringConversionException()
129 {
130  switch( errno)
131  {
132  case E2BIG:
133  break;
134 
135  case EILSEQ:
136  BLOCXX_THROW(StringConversionException,
137  "Invalid character or multibyte sequence in the input");
138  break;
139 
140  case EINVAL:
141  default:
142  BLOCXX_THROW(StringConversionException,
143  "Incomplete multibyte sequence in the input");
144  break;
145  }
146 }
147 
148 // -------------------------------------------------------------------
149 String
150 fromByteString(const String &enc, const char *str, size_t len)
151 {
152  if( !str || len == 0)
153  return String();
154 
155  IConv_t iconv(enc, "UTF-8"); // throws error
156  String out;
157  char obuf[4097];
158  char *optr;
159  size_t olen;
160 
161  char *sptr = (char *)str;
162  size_t slen = len;
163 
164  while( slen > 0)
165  {
166  obuf[0] = '\0';
167  optr = (char *)obuf;
168  olen = sizeof(obuf) - sizeof(obuf[0]);
169 
170  size_t ret = iconv.convert(&sptr, &slen, &optr, &olen);
171  if( ret == size_t(-1))
172  {
173  mayThrowStringConversionException();
174  }
175  *optr = '\0';
176  out += obuf;
177  }
178 
179  return out;
180 }
181 
182 
183 // -------------------------------------------------------------------
184 String
185 fromByteString(const String &enc, const std::string &str)
186 {
187  return fromByteString(enc, str.c_str(), str.length());
188 }
189 
190 
191 #ifdef BLOCXX_HAVE_STD_WSTRING
192 // -------------------------------------------------------------------
193 String
194 fromWideString(const String &enc, const std::wstring &str)
195 {
196  if( str.empty())
197  return String();
198 
199  IConv_t iconv(enc, "UTF-8"); // throws error
200  String out;
201  char obuf[4097];
202  char *optr;
203  size_t olen;
204 
205  char *sptr = (char *)str.c_str();
206  size_t slen = str.length() * sizeof(wchar_t);
207 
208  while( slen > 0)
209  {
210  obuf[0] = '\0';
211  optr = (char *)obuf;
212  olen = sizeof(obuf) - sizeof(obuf[0]);
213 
214  size_t ret = iconv.convert(&sptr, &slen, &optr, &olen);
215  if( ret == size_t(-1))
216  {
217  mayThrowStringConversionException();
218  }
219  *optr = '\0';
220  out += obuf;
221  }
222 
223  return out;
224 }
225 #endif
226 
227 // -------------------------------------------------------------------
228 std::string
229 toByteString(const String &enc, const String &utf8)
230 {
231  if( utf8.empty())
232  return std::string();
233 
234  IConv_t iconv("UTF-8", enc); // throws error
235  std::string out;
236  char obuf[4097];
237  char *optr;
238  size_t olen;
239 
240  char *sptr = (char *)utf8.c_str();
241  size_t slen = utf8.length();
242 
243  while( slen > 0)
244  {
245  obuf[0] = '\0';
246  optr = (char *)obuf;
247  olen = sizeof(obuf) - sizeof(obuf[0]);
248 
249  size_t ret = iconv.convert(&sptr, &slen, &optr, &olen);
250  if( ret == size_t(-1))
251  {
252  mayThrowStringConversionException();
253  }
254  *optr = '\0';
255  out += obuf;
256  }
257 
258  return out;
259 }
260 
261 #ifdef BLOCXX_HAVE_STD_WSTRING
262 // -------------------------------------------------------------------
263 std::wstring
264 toWideString(const String &enc, const String &utf8)
265 {
266  if( utf8.empty())
267  return std::wstring();
268 
269  IConv_t iconv("UTF-8", enc); // throws error
270  std::wstring out;
271  wchar_t obuf[1025];
272  char *optr;
273  size_t olen;
274 
275  char *sptr = (char *)utf8.c_str();
276  size_t slen = utf8.length();
277 
278  while( slen > 0)
279  {
280  obuf[0] = L'\0';
281  optr = (char *)obuf;
282  olen = sizeof(obuf) - sizeof(obuf[0]);
283 
284  size_t ret = iconv.convert(&sptr, &slen, &optr, &olen);
285  if( ret == size_t(-1))
286  {
287  mayThrowStringConversionException();
288  }
289  *((wchar_t *)optr) = L'\0';
290  out += obuf;
291  }
292 
293  return out;
294 }
295 #endif
296 
297 
298 #if 0
299 // -------------------------------------------------------------------
301 encodings()
302 {
303  StringArray command;
304  String output;
305  int status = -1;
306 
307  command.push_back("/usr/bin/iconv");
308  command.push_back("--list");
309 
310  try
311  {
312  Exec::executeProcessAndGatherOutput(command, output, status);
313  }
314  catch(...)
315  {
316  }
317 
318  if(status == 0)
319  {
320  return output.tokenize("\r\n");
321  }
322  return StringArray();
323 }
324 #endif
325 
326 
327 } // End of IConv namespace
328 } // End of BLOCXX_NAMESPACE
329 
330 #endif // BLOCXX_HAVE_ICONV_SUPPORT
331 
332 /* vim: set ts=8 sts=8 sw=8 ai noet: */
333