Skip to content

Commit 2ee0b7c

Browse files
committed
Add utf_selector and is_utf_same.
1 parent 3e423a4 commit 2ee0b7c

File tree

3 files changed

+190
-92
lines changed

3 files changed

+190
-92
lines changed

README.md

+38-34
Original file line numberDiff line numberDiff line change
@@ -29,16 +29,20 @@ Tested on following compilers:
2929

3030
```cpp
3131
// यूनिकोड
32-
static char const u8_orig[] = "\xE0\xA4\xAF\xE0\xA5\x82\xE0\xA4\xA8\xE0\xA4\xBF\xE0\xA4\x95\xE0\xA5\x8B\xE0\xA4\xA1";
33-
using namespace ww898;
32+
static char const u8s[] = "\xE0\xA4\xAF\xE0\xA5\x82\xE0\xA4\xA8\xE0\xA4\xBF\xE0\xA4\x95\xE0\xA5\x8B\xE0\xA4\xA1";
33+
using namespace ww898::utf;
3434
std::u16string u16;
35-
utf::convz<utf::utf8, utf::utf16>(u8_orig, std::back_inserter(u16));
35+
convz<utf_selector_t<decltype(*u8s)>, utf16>(u8s, std::back_inserter(u16));
3636
std::u32string u32;
37-
utf::conv<utf::utf16, utf::utf32>(u16.begin(), u16.end(), std::back_inserter(u32));
37+
conv<utf16, utf_selector_t<decltype(u32)::value_type>>(u16.begin(), u16.end(), std::back_inserter(u32));
3838
std::vector<char> u8;
39-
utf::convz<utf::utf32, utf::utf8>(u32.begin(), std::back_inserter(u8));
40-
std::wstring wstr;
41-
utf::convz<utf::utf8, utf::utfw>(u8.begin(), std::back_inserter(wstr));
39+
convz<utf32, utf8>(u32.data(), std::back_inserter(u8));
40+
std::wstring uw;
41+
conv<utf8, utfw>(u8s, u8s + sizeof(u8s), std::back_inserter(uw));
42+
static_assert(is_utf_same<decltype(*u8s), decltype(u8)::value_type>::value, "Fail");
43+
static_assert(1 ==
44+
(is_utf_same<decltype(u16)::value_type, decltype(uw)::value_type>::value ? 1 : 0) +
45+
(is_utf_same<decltype(u32)::value_type, decltype(uw)::value_type>::value ? 1 : 0), "Fail");
4246
```
4347
4448
## Performance
@@ -237,33 +241,33 @@ UTF8 ==> UTFW : 0.340384930s (+196.02%)
237241

238242
#### MacOS High Sierra v10.13.6 (Clang v6.0.0)
239243
```cpp
240-
Running 489 test cases...
241-
sizeof wchar_t: 4
242-
UTFW: UTF32
243-
Resolution: 2793647583
244-
UTF8 ==> UTF8 : 0.111039205s
245-
UTF8 ==> UTF16: 0.143631552s
246-
UTF8 ==> UTF32: 0.105463425s
247-
UTF8 ==> UTFW : 0.105106640s
248-
UTF16 ==> UTF8 : 0.158074631s
249-
UTF16 ==> UTF16: 0.055528284s
250-
UTF16 ==> UTF32: 0.063203264s
251-
UTF16 ==> UTFW : 0.063167823s
252-
UTF32 ==> UTF8 : 0.123977591s
253-
UTF32 ==> UTF16: 0.061630976s
254-
UTF32 ==> UTF32: 0.027633560s
255-
UTF32 ==> UTFW : 0.029324893s
256-
UTFW ==> UTF8 : 0.123948012s
257-
UTFW ==> UTF16: 0.064873256s
258-
UTFW ==> UTF32: 0.030606730s
259-
UTFW ==> UTFW : 0.027596372s
260-
codecvt_utf8_utf16<char16_t>:
261-
UTF16 ==> UTF8 : 0.151798551s (-3.97%)
262-
UTF8 ==> UTF16: 0.256203078s (+78.38%)
263-
codecvt_utf8<wchar_t>:
264-
UTFW ==> UTF8 : 0.137034385s (+10.56%)
265-
UTF8 ==> UTFW : 0.360953804s (+243.42%)
266-
244+
Running 489 test cases...
245+
sizeof wchar_t: 4
246+
UTFW: UTF32
247+
Resolution: 2793647583
248+
UTF8 ==> UTF8 : 0.111039205s
249+
UTF8 ==> UTF16: 0.143631552s
250+
UTF8 ==> UTF32: 0.105463425s
251+
UTF8 ==> UTFW : 0.105106640s
252+
UTF16 ==> UTF8 : 0.158074631s
253+
UTF16 ==> UTF16: 0.055528284s
254+
UTF16 ==> UTF32: 0.063203264s
255+
UTF16 ==> UTFW : 0.063167823s
256+
UTF32 ==> UTF8 : 0.123977591s
257+
UTF32 ==> UTF16: 0.061630976s
258+
UTF32 ==> UTF32: 0.027633560s
259+
UTF32 ==> UTFW : 0.029324893s
260+
UTFW ==> UTF8 : 0.123948012s
261+
UTFW ==> UTF16: 0.064873256s
262+
UTFW ==> UTF32: 0.030606730s
263+
UTFW ==> UTFW : 0.027596372s
264+
codecvt_utf8_utf16<char16_t>:
265+
UTF16 ==> UTF8 : 0.151798551s (-3.97%)
266+
UTF8 ==> UTF16: 0.256203078s (+78.38%)
267+
codecvt_utf8<wchar_t>:
268+
UTFW ==> UTF8 : 0.137034385s (+10.56%)
269+
UTF8 ==> UTFW : 0.360953804s (+243.42%)
270+
267271
*** No errors detected
268272
```
269273

include/ww898/utf_converters.hpp

+34-5
Original file line numberDiff line numberDiff line change
@@ -456,8 +456,9 @@ template<
456456
typename Utf,
457457
typename Outf,
458458
typename It,
459+
typename Eit,
459460
typename Oit>
460-
Oit conv(It && it, It && eit, Oit && oit)
461+
Oit conv(It && it, Eit && eit, Oit && oit)
461462
{
462463
return detail::conv_strategy<Utf, Outf,
463464
typename std::decay<It>::type,
@@ -468,7 +469,7 @@ Oit conv(It && it, It && eit, Oit && oit)
468469
? detail::conv_impl::random_interator
469470
: detail::conv_impl::normal>()(
470471
std::forward<It>(it),
471-
std::forward<It>(eit),
472+
std::forward<Eit>(eit),
472473
std::forward<Oit>(oit));
473474
}
474475

@@ -478,11 +479,39 @@ template<
478479
size_t wchar_size>
479480
struct wchar_selector {};
480481

481-
template<> struct wchar_selector<2> { typedef utf16 utfw_type; };
482-
template<> struct wchar_selector<4> { typedef utf32 utfw_type; };
482+
template<> struct wchar_selector<2> { typedef utf16 type; };
483+
template<> struct wchar_selector<4> { typedef utf32 type; };
483484

484485
}
485486

486-
typedef detail::wchar_selector<sizeof(wchar_t)>::utfw_type utfw;
487+
typedef detail::wchar_selector<sizeof(wchar_t)>::type utfw;
488+
489+
namespace detail {
490+
491+
template<
492+
typename Ch>
493+
struct utf_selector {};
494+
495+
template<> struct utf_selector< char> { typedef utf8 type; };
496+
template<> struct utf_selector<unsigned char> { typedef utf8 type; };
497+
template<> struct utf_selector<signed char> { typedef utf8 type; };
498+
template<> struct utf_selector<char16_t > { typedef utf16 type; };
499+
template<> struct utf_selector<char32_t > { typedef utf32 type; };
500+
template<> struct utf_selector<wchar_t > { typedef utfw type; };
501+
502+
}
503+
504+
template<
505+
typename Ch>
506+
using utf_selector = detail::utf_selector<typename std::decay<Ch>::type>;
507+
508+
template<
509+
typename Ch>
510+
using utf_selector_t = typename utf_selector<Ch>::type;
511+
512+
template<
513+
typename Ch1,
514+
typename Ch2>
515+
using is_utf_same = std::is_same<utf_selector_t<Ch1>, utf_selector_t<Ch2>>;
487516

488517
}}

0 commit comments

Comments
 (0)