Skip to content

Commit

Permalink
Use Boost.Locale for UTF8 conversions so that the fix builds on MinGW…
Browse files Browse the repository at this point in the history
… as well
  • Loading branch information
arhag committed May 25, 2016
1 parent 3d446a1 commit 88297e5
Showing 1 changed file with 18 additions and 24 deletions.
42 changes: 18 additions & 24 deletions libraries/chain/steem_evaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,20 @@

#ifndef IS_LOW_MEM
#include <diff_match_patch.h>
#include <locale>
#include <codecvt>

// Hack to workaround diff-match-patch-cpp-stl's apparently broken UTF8 handling
// Long term I think the best solution is to just fix the diff_match_patch code.
template <> struct diff_match_patch_traits<char16_t> : diff_match_patch_utf32_from_utf16<char16_t> {
static bool is_alnum(char16_t c) { return std::isalnum(c)? true : false; }
static bool is_digit(char16_t c) { return std::isdigit(c)? true : false; }
static bool is_space(char16_t c) { return std::isspace(c)? true : false; }
static int to_int(const char16_t* s) {
std::wstring_convert<std::codecvt_utf8_utf16<char16_t>,char16_t> codecvt;
std::string utf8 = codecvt.to_bytes(std::u16string(s));
return static_cast<int>(std::stol(utf8, NULL, 10)); }
static char16_t from_wchar(wchar_t c) { return c; }
static wchar_t to_wchar(char16_t c) { return c; }
static const std::u16string cs(const wchar_t* s) { return std::u16string(s, s + wcslen(s)); }
static const char16_t eol = u'\n';
static const char16_t tab = u'\t';
};
#include <boost/locale/encoding_utf.hpp>

using boost::locale::conv::utf_to_utf;

std::wstring utf8_to_wstring(const std::string& str)
{
return utf_to_utf<wchar_t>(str.c_str(), str.c_str() + str.size());
}

std::string wstring_to_utf8(const std::wstring& str)
{
return utf_to_utf<char>(str.c_str(), str.c_str() + str.size());
}

#endif

#include <fc/uint128.hpp>
Expand Down Expand Up @@ -290,12 +285,11 @@ void comment_evaluator::do_apply( const comment_operation& o )

if( o.body.size() ) {
try {
std::wstring_convert<std::codecvt_utf8_utf16<char16_t>,char16_t> codecvt;
diff_match_patch<std::u16string> dmp;
auto patch = dmp.patch_fromText( codecvt.from_bytes(o.body) );
diff_match_patch<std::wstring> dmp;
auto patch = dmp.patch_fromText( utf8_to_wstring(o.body) );
if( patch.size() ) {
auto result = dmp.patch_apply( patch, codecvt.from_bytes(com.body) );
auto patched_body = codecvt.to_bytes(result.first);
auto result = dmp.patch_apply( patch, utf8_to_wstring(com.body) );
auto patched_body = wstring_to_utf8(result.first);
if( !fc::is_utf8( patched_body ) ) {
idump(("invalid utf8")(patched_body));
com.body = fc::prune_invalid_utf8(patched_body);
Expand Down

0 comments on commit 88297e5

Please sign in to comment.