| 1 | See https://github.com/hunspell/hunspell/pull/521 |
| 2 | |
| 3 | Fix chenc and its usages |
| 4 | chenc was changing buffer it was passed to. This caused untrackable |
| 5 | multiple conversions of token and possibly other variables. |
| 6 | |
| 7 | Change it to only return converted string |
| 8 | |
| 9 | Additionally logic extending dest buffer implicitly assumed that 0 |
| 10 | bytes are left in dest buffer. It's not necessarily the case when |
| 11 | converting to UTF-8 as if result would be 2-byte sequence and only 1 byte |
| 12 | is remaining you get E2BIG as well. |
| 13 | |
| 14 | This fixes the case of pipe (-a) in UTF-8 with KOI8-R dictionary. |
| 15 | |
| 16 | diff -ur hunspell-1.6.1/src/tools/hunspell.cxx hunspell-1.6.1-mod2/src/tools/hunspell.cxx |
| 17 | --- hunspell-1.6.1/src/tools/hunspell.cxx 2017-03-25 22:20:45.000000000 +0100 |
| 18 | +++ hunspell-1.6.1-mod2/src/tools/hunspell.cxx 2017-08-14 23:22:16.246966174 +0200 |
| 19 | @@ -243,7 +243,7 @@ |
| 20 | #endif |
| 21 | |
| 22 | /* change character encoding */ |
| 23 | -std::string& chenc(std::string& st, const char* enc1, const char* enc2) { |
| 24 | +std::string chenc(const std::string& st, const char* enc1, const char* enc2) { |
| 25 | #ifndef HAVE_ICONV |
| 26 | (void)enc1; |
| 27 | (void)enc2; |
| 28 | @@ -258,7 +258,7 @@ |
| 29 | std::string out(st.size(), std::string::value_type()); |
| 30 | size_t c1(st.size()); |
| 31 | size_t c2(out.size()); |
| 32 | - ICONV_CONST char* source = &st[0]; |
| 33 | + ICONV_CONST char* source = (ICONV_CONST char*) &st[0]; |
| 34 | char* dest = &out[0]; |
| 35 | iconv_t conv = iconv_open(fix_encoding_name(enc2), fix_encoding_name(enc1)); |
| 36 | if (conv == (iconv_t)-1) { |
| 37 | @@ -267,9 +267,10 @@ |
| 38 | size_t res; |
| 39 | while ((res = iconv(conv, &source, &c1, &dest, &c2)) == size_t(-1)) { |
| 40 | if (errno == E2BIG) { |
| 41 | + ssize_t destoff = dest - const_cast<char*>(&out[0]); |
| 42 | out.resize(out.size() + (c2 += c1)); |
| 43 | |
| 44 | - dest = const_cast<char*>(&out[0]) + out.size() - c2; |
| 45 | + dest = const_cast<char*>(&out[0]) + destoff; |
| 46 | } else |
| 47 | break; |
| 48 | } |
| 49 | @@ -278,7 +279,7 @@ |
| 50 | } |
| 51 | iconv_close(conv); |
| 52 | out.resize(dest - &out[0]); |
| 53 | - st = out; |
| 54 | + return out; |
| 55 | } |
| 56 | |
| 57 | return st; |
| 58 | @@ -507,8 +508,7 @@ |
| 59 | #endif |
| 60 | |
| 61 | int putdic(const std::string& in_word, Hunspell* pMS) { |
| 62 | - std::string word(in_word); |
| 63 | - chenc(word, ui_enc, dic_enc[0]); |
| 64 | + std::string word = chenc(in_word, ui_enc, dic_enc[0]); |
| 65 | |
| 66 | std::string buf; |
| 67 | pMS->input_conv(word.c_str(), buf); |
| 68 | @@ -565,7 +565,7 @@ |
| 69 | if (!dic) |
| 70 | return 0; |
| 71 | for (size_t i = 0; i < w.size(); ++i) { |
| 72 | - chenc(w[i], io_enc, ui_enc); |
| 73 | + w[i] = chenc(w[i], io_enc, ui_enc); |
| 74 | fprintf(dic, "%s\n", w[i].c_str()); |
| 75 | } |
| 76 | fclose(dic); |
| 77 | @@ -595,8 +595,7 @@ |
| 78 | // check words in the dictionaries (and set first checked dictionary) |
| 79 | bool check(Hunspell** pMS, int* d, const std::string& token, int* info, std::string* root) { |
| 80 | for (int i = 0; i < dmax; ++i) { |
| 81 | - std::string buf(token); |
| 82 | - chenc(buf, io_enc, dic_enc[*d]); |
| 83 | + std::string buf = chenc(token, io_enc, dic_enc[*d]); |
| 84 | mystrrep(buf, ENTITY_APOS, "'"); |
| 85 | if (checkapos && buf.find('\'') != std::string::npos) |
| 86 | return false; |
| 87 | @@ -937,7 +936,7 @@ |
| 88 | fprintf(stdout, "%s", chenc(wlst[0], dic_enc[d], io_enc).c_str()); |
| 89 | } |
| 90 | for (size_t j = 1; j < wlst.size(); ++j) { |
| 91 | - fprintf(stdout, ", %s", chenc(wlst[j], dic_enc[d], io_enc).c_str()); |
| 92 | + fprintf(stdout, ", %s", chenc(wlst[j], dic_enc[d], io_enc).c_str()); |
| 93 | } |
| 94 | fprintf(stdout, "\n"); |
| 95 | fflush(stdout); |
| 96 | @@ -1194,8 +1193,7 @@ |
| 97 | } |
| 98 | |
| 99 | std::string lower_first_char(const std::string& token, const char* ioenc, int langnum) { |
| 100 | - std::string utf8str(token); |
| 101 | - chenc(utf8str, ioenc, "UTF-8"); |
| 102 | + std::string utf8str = chenc(token, ioenc, "UTF-8"); |
| 103 | std::vector<w_char> u; |
| 104 | u8_u16(u, utf8str); |
| 105 | if (!u.empty()) { |
| 106 | @@ -1206,8 +1204,7 @@ |
| 107 | } |
| 108 | std::string scratch; |
| 109 | u16_u8(scratch, u); |
| 110 | - chenc(scratch, "UTF-8", ioenc); |
| 111 | - return scratch; |
| 112 | + return chenc(scratch, "UTF-8", ioenc); |
| 113 | } |
| 114 | |
| 115 | // for terminal interface |
| 116 | @@ -1532,13 +1529,13 @@ |
| 117 | std::vector<std::string> wlst; |
| 118 | dialogscreen(parser, token, filename, info, wlst); // preview |
| 119 | refresh(); |
| 120 | - std::string buf(token); |
| 121 | - wlst = pMS[d]->suggest(mystrrep(chenc(buf, io_enc, dic_enc[d]), ENTITY_APOS, "'").c_str()); |
| 122 | + std::string dicbuf = chenc(token, io_enc, dic_enc[d]); |
| 123 | + wlst = pMS[d]->suggest(mystrrep(dicbuf, ENTITY_APOS, "'").c_str()); |
| 124 | if (wlst.empty()) { |
| 125 | dialogexit = dialog(parser, pMS[d], token, filename, wlst, info); |
| 126 | } else { |
| 127 | for (size_t j = 0; j < wlst.size(); ++j) { |
| 128 | - chenc(wlst[j], dic_enc[d], io_enc); |
| 129 | + wlst[j] = chenc(wlst[j], dic_enc[d], io_enc); |
| 130 | } |
| 131 | dialogexit = dialog(parser, pMS[d], token, filename, wlst, info); |
| 132 | } |