Commit | Line | Data |
---|---|---|
2d3744e1 VS |
1 | Fix chenc and its usages |
2 | chenc was changing buffer it was passed to. This caused untrackable | |
3 | multiple conversions of token and possibly other variables. | |
4 | ||
5 | Change it to only return converted string | |
6 | ||
7 | Additionally logic extending dest buffer implicitly assumed that 0 | |
8 | bytes are left in dest buffer. It's not necessarily the case when | |
9 | converting to UTF-8 as if result would be 2-byte sequence and only 1 byte | |
10 | is remaining you get E2BIG as well. | |
11 | ||
12 | This fixes the case of pipe (-a) in UTF-8 with KOI8-R dictionary. | |
13 | ||
14 | diff -ur hunspell-1.6.1/src/tools/hunspell.cxx hunspell-1.6.1-mod2/src/tools/hunspell.cxx | |
15 | --- hunspell-1.6.1/src/tools/hunspell.cxx 2017-03-25 22:20:45.000000000 +0100 | |
16 | +++ hunspell-1.6.1-mod2/src/tools/hunspell.cxx 2017-08-14 23:22:16.246966174 +0200 | |
17 | @@ -243,7 +243,7 @@ | |
18 | #endif | |
19 | ||
20 | /* change character encoding */ | |
21 | -std::string& chenc(std::string& st, const char* enc1, const char* enc2) { | |
22 | +std::string chenc(const std::string& st, const char* enc1, const char* enc2) { | |
23 | #ifndef HAVE_ICONV | |
24 | (void)enc1; | |
25 | (void)enc2; | |
26 | @@ -258,7 +258,7 @@ | |
27 | std::string out(st.size(), std::string::value_type()); | |
28 | size_t c1(st.size()); | |
29 | size_t c2(out.size()); | |
30 | - ICONV_CONST char* source = &st[0]; | |
31 | + ICONV_CONST char* source = (ICONV_CONST char*) &st[0]; | |
32 | char* dest = &out[0]; | |
33 | iconv_t conv = iconv_open(fix_encoding_name(enc2), fix_encoding_name(enc1)); | |
34 | if (conv == (iconv_t)-1) { | |
35 | @@ -267,9 +267,10 @@ | |
36 | size_t res; | |
37 | while ((res = iconv(conv, &source, &c1, &dest, &c2)) == size_t(-1)) { | |
38 | if (errno == E2BIG) { | |
39 | + ssize_t destoff = dest - const_cast<char*>(&out[0]); | |
40 | out.resize(out.size() + (c2 += c1)); | |
41 | ||
42 | - dest = const_cast<char*>(&out[0]) + out.size() - c2; | |
43 | + dest = const_cast<char*>(&out[0]) + destoff; | |
44 | } else | |
45 | break; | |
46 | } | |
47 | @@ -278,7 +279,7 @@ | |
48 | } | |
49 | iconv_close(conv); | |
50 | out.resize(dest - &out[0]); | |
51 | - st = out; | |
52 | + return out; | |
53 | } | |
54 | ||
55 | return st; | |
56 | @@ -507,8 +508,7 @@ | |
57 | #endif | |
58 | ||
59 | int putdic(const std::string& in_word, Hunspell* pMS) { | |
60 | - std::string word(in_word); | |
61 | - chenc(word, ui_enc, dic_enc[0]); | |
62 | + std::string word = chenc(in_word, ui_enc, dic_enc[0]); | |
63 | ||
64 | std::string buf; | |
65 | pMS->input_conv(word.c_str(), buf); | |
66 | @@ -565,7 +565,7 @@ | |
67 | if (!dic) | |
68 | return 0; | |
69 | for (size_t i = 0; i < w.size(); ++i) { | |
70 | - chenc(w[i], io_enc, ui_enc); | |
71 | + w[i] = chenc(w[i], io_enc, ui_enc); | |
72 | fprintf(dic, "%s\n", w[i].c_str()); | |
73 | } | |
74 | fclose(dic); | |
75 | @@ -595,8 +595,7 @@ | |
76 | // check words in the dictionaries (and set first checked dictionary) | |
77 | bool check(Hunspell** pMS, int* d, const std::string& token, int* info, std::string* root) { | |
78 | for (int i = 0; i < dmax; ++i) { | |
79 | - std::string buf(token); | |
80 | - chenc(buf, io_enc, dic_enc[*d]); | |
81 | + std::string buf = chenc(token, io_enc, dic_enc[*d]); | |
82 | mystrrep(buf, ENTITY_APOS, "'"); | |
83 | if (checkapos && buf.find('\'') != std::string::npos) | |
84 | return false; | |
85 | @@ -937,7 +936,7 @@ | |
86 | fprintf(stdout, "%s", chenc(wlst[0], dic_enc[d], io_enc).c_str()); | |
87 | } | |
88 | for (size_t j = 1; j < wlst.size(); ++j) { | |
89 | - fprintf(stdout, ", %s", chenc(wlst[j], dic_enc[d], io_enc).c_str()); | |
90 | + fprintf(stdout, ", %s", chenc(wlst[j], dic_enc[d], io_enc).c_str()); | |
91 | } | |
92 | fprintf(stdout, "\n"); | |
93 | fflush(stdout); | |
94 | @@ -1194,8 +1193,7 @@ | |
95 | } | |
96 | ||
97 | std::string lower_first_char(const std::string& token, const char* ioenc, int langnum) { | |
98 | - std::string utf8str(token); | |
99 | - chenc(utf8str, ioenc, "UTF-8"); | |
100 | + std::string utf8str = chenc(token, ioenc, "UTF-8"); | |
101 | std::vector<w_char> u; | |
102 | u8_u16(u, utf8str); | |
103 | if (!u.empty()) { | |
104 | @@ -1206,8 +1204,7 @@ | |
105 | } | |
106 | std::string scratch; | |
107 | u16_u8(scratch, u); | |
108 | - chenc(scratch, "UTF-8", ioenc); | |
109 | - return scratch; | |
110 | + return chenc(scratch, "UTF-8", ioenc); | |
111 | } | |
112 | ||
113 | // for terminal interface | |
114 | @@ -1532,13 +1529,13 @@ | |
115 | std::vector<std::string> wlst; | |
116 | dialogscreen(parser, token, filename, info, wlst); // preview | |
117 | refresh(); | |
118 | - std::string buf(token); | |
119 | - wlst = pMS[d]->suggest(mystrrep(chenc(buf, io_enc, dic_enc[d]), ENTITY_APOS, "'").c_str()); | |
120 | + std::string dicbuf = chenc(token, io_enc, dic_enc[d]); | |
121 | + wlst = pMS[d]->suggest(mystrrep(dicbuf, ENTITY_APOS, "'").c_str()); | |
122 | if (wlst.empty()) { | |
123 | dialogexit = dialog(parser, pMS[d], token, filename, wlst, info); | |
124 | } else { | |
125 | for (size_t j = 0; j < wlst.size(); ++j) { | |
126 | - chenc(wlst[j], dic_enc[d], io_enc); | |
127 | + wlst[j] = chenc(wlst[j], dic_enc[d], io_enc); | |
128 | } | |
129 | dialogexit = dialog(parser, pMS[d], token, filename, wlst, info); | |
130 | } |