[termux-packages] / packages / hunspell / hunspell-chenc.patch

See https://github.com/hunspell/hunspell/pull/521

Fix chenc and its usages
chenc was changing buffer it was passed to. This caused untrackable
multiple conversions of token and possibly other variables.

Change it to only return converted string

Additionally logic extending dest buffer implicitly assumed that 0
bytes are left in dest buffer. It's not necessarily the case when
converting to UTF-8 as if result would be 2-byte sequence and only 1 byte
is remaining you get E2BIG as well.

This fixes the case of pipe (-a) in UTF-8 with KOI8-R dictionary.

diff -ur hunspell-1.6.1/src/tools/hunspell.cxx hunspell-1.6.1-mod2/src/tools/hunspell.cxx
--- hunspell-1.6.1/src/tools/hunspell.cxx	2017-03-25 22:20:45.000000000 +0100
+++ hunspell-1.6.1-mod2/src/tools/hunspell.cxx	2017-08-14 23:22:16.246966174 +0200
@@ -243,7 +243,7 @@
 #endif
 
 /* change character encoding */
-std::string& chenc(std::string& st, const char* enc1, const char* enc2) {
+std::string chenc(const std::string& st, const char* enc1, const char* enc2) {
 #ifndef HAVE_ICONV
   (void)enc1;
   (void)enc2;
@@ -258,7 +258,7 @@
   std::string out(st.size(), std::string::value_type());
   size_t c1(st.size());
   size_t c2(out.size());
-  ICONV_CONST char* source = &st[0];
+  ICONV_CONST char* source = (ICONV_CONST char*) &st[0];
   char* dest = &out[0];
   iconv_t conv = iconv_open(fix_encoding_name(enc2), fix_encoding_name(enc1));
   if (conv == (iconv_t)-1) {
@@ -267,9 +267,10 @@
     size_t res;
     while ((res = iconv(conv, &source, &c1, &dest, &c2)) == size_t(-1)) {
       if (errno == E2BIG) {
+        ssize_t destoff = dest - const_cast<char*>(&out[0]);
         out.resize(out.size() + (c2 += c1));
 
-        dest = const_cast<char*>(&out[0]) + out.size() - c2;
+        dest = const_cast<char*>(&out[0]) + destoff;
       } else
         break;
     }
@@ -278,7 +279,7 @@
     }
     iconv_close(conv);
     out.resize(dest - &out[0]);
-    st = out;
+    return out;
   }
 
   return st;
@@ -507,8 +508,7 @@
 #endif
 
 int putdic(const std::string& in_word, Hunspell* pMS) {
-  std::string word(in_word);
-  chenc(word, ui_enc, dic_enc[0]);
+  std::string word = chenc(in_word, ui_enc, dic_enc[0]);
 
   std::string buf;
   pMS->input_conv(word.c_str(), buf);
@@ -565,7 +565,7 @@
   if (!dic)
     return 0;
   for (size_t i = 0; i < w.size(); ++i) {
-    chenc(w[i], io_enc, ui_enc);
+    w[i] = chenc(w[i], io_enc, ui_enc);
     fprintf(dic, "%s\n", w[i].c_str());
   }
   fclose(dic);
@@ -595,8 +595,7 @@
 // check words in the dictionaries (and set first checked dictionary)
 bool check(Hunspell** pMS, int* d, const std::string& token, int* info, std::string* root) {
   for (int i = 0; i < dmax; ++i) {
-    std::string buf(token);
-    chenc(buf, io_enc, dic_enc[*d]);
+    std::string buf = chenc(token, io_enc, dic_enc[*d]);
     mystrrep(buf, ENTITY_APOS, "'");
     if (checkapos && buf.find('\'') != std::string::npos)
       return false;
@@ -937,7 +936,7 @@
                 fprintf(stdout, "%s", chenc(wlst[0], dic_enc[d], io_enc).c_str());
               }
               for (size_t j = 1; j < wlst.size(); ++j) {
-                fprintf(stdout, ", %s", chenc(wlst[j], dic_enc[d], io_enc).c_str());
+                  fprintf(stdout, ", %s", chenc(wlst[j], dic_enc[d], io_enc).c_str());
               }
               fprintf(stdout, "\n");
               fflush(stdout);
@@ -1194,8 +1193,7 @@
 }
 
 std::string lower_first_char(const std::string& token, const char* ioenc, int langnum) {
-  std::string utf8str(token);
-  chenc(utf8str, ioenc, "UTF-8");
+  std::string utf8str = chenc(token, ioenc, "UTF-8");
   std::vector<w_char> u;
   u8_u16(u, utf8str);
   if (!u.empty()) {
@@ -1206,8 +1204,7 @@
   }
   std::string scratch;
   u16_u8(scratch, u);
-  chenc(scratch, "UTF-8", ioenc);
-  return scratch;
+  return chenc(scratch, "UTF-8", ioenc);
 }
 
 // for terminal interface
@@ -1532,13 +1529,13 @@
       std::vector<std::string> wlst;
       dialogscreen(parser, token, filename, info, wlst);  // preview
       refresh();
-      std::string buf(token);
-      wlst = pMS[d]->suggest(mystrrep(chenc(buf, io_enc, dic_enc[d]), ENTITY_APOS, "'").c_str());
+      std::string dicbuf = chenc(token, io_enc, dic_enc[d]);
+      wlst = pMS[d]->suggest(mystrrep(dicbuf, ENTITY_APOS, "'").c_str());
       if (wlst.empty()) {
         dialogexit = dialog(parser, pMS[d], token, filename, wlst, info);
       } else {
         for (size_t j = 0; j < wlst.size(); ++j) {
-          chenc(wlst[j], dic_enc[d], io_enc);
+          wlst[j] = chenc(wlst[j], dic_enc[d], io_enc);
         }
         dialogexit = dialog(parser, pMS[d], token, filename, wlst, info);
       }
Commit	Line	Data
982ce490 FF	1	See https://github.com/hunspell/hunspell/pull/521
982ce490 FF	2
2d3744e1 VS	3	Fix chenc and its usages
	4	chenc was changing buffer it was passed to. This caused untrackable
	5	multiple conversions of token and possibly other variables.
	6
	7	Change it to only return converted string
	8
	9	Additionally logic extending dest buffer implicitly assumed that 0
	10	bytes are left in dest buffer. It's not necessarily the case when
	11	converting to UTF-8 as if result would be 2-byte sequence and only 1 byte
	12	is remaining you get E2BIG as well.
	13
	14	This fixes the case of pipe (-a) in UTF-8 with KOI8-R dictionary.
	15
	16	diff -ur hunspell-1.6.1/src/tools/hunspell.cxx hunspell-1.6.1-mod2/src/tools/hunspell.cxx
	17	--- hunspell-1.6.1/src/tools/hunspell.cxx 2017-03-25 22:20:45.000000000 +0100
	18	+++ hunspell-1.6.1-mod2/src/tools/hunspell.cxx 2017-08-14 23:22:16.246966174 +0200
	19	@@ -243,7 +243,7 @@
	20	#endif
	21
	22	/* change character encoding */
	23	-std::string& chenc(std::string& st, const char* enc1, const char* enc2) {
	24	+std::string chenc(const std::string& st, const char* enc1, const char* enc2) {
	25	#ifndef HAVE_ICONV
	26	(void)enc1;
	27	(void)enc2;
	28	@@ -258,7 +258,7 @@
	29	std::string out(st.size(), std::string::value_type());
	30	size_t c1(st.size());
	31	size_t c2(out.size());
	32	- ICONV_CONST char* source = &st[0];
	33	+ ICONV_CONST char* source = (ICONV_CONST char*) &st[0];
	34	char* dest = &out[0];
	35	iconv_t conv = iconv_open(fix_encoding_name(enc2), fix_encoding_name(enc1));
	36	if (conv == (iconv_t)-1) {
	37	@@ -267,9 +267,10 @@
	38	size_t res;
	39	while ((res = iconv(conv, &source, &c1, &dest, &c2)) == size_t(-1)) {
	40	if (errno == E2BIG) {
	41	+ ssize_t destoff = dest - const_cast<char*>(&out[0]);
	42	out.resize(out.size() + (c2 += c1));
	43
	44	- dest = const_cast<char*>(&out[0]) + out.size() - c2;
	45	+ dest = const_cast<char*>(&out[0]) + destoff;
	46	} else
	47	break;
	48	}
	49	@@ -278,7 +279,7 @@
	50	}
	51	iconv_close(conv);
	52	out.resize(dest - &out[0]);
	53	- st = out;
	54	+ return out;
	55	}
	56
	57	return st;
	58	@@ -507,8 +508,7 @@
	59	#endif
	60
	61	int putdic(const std::string& in_word, Hunspell* pMS) {
	62	- std::string word(in_word);
	63	- chenc(word, ui_enc, dic_enc[0]);
	64	+ std::string word = chenc(in_word, ui_enc, dic_enc[0]);
	65
	66	std::string buf;
67	pMS->input_conv(word.c_str(), buf);
68	@@ -565,7 +565,7 @@
69	if (!dic)
70	return 0;
71	for (size_t i = 0; i < w.size(); ++i) {
72	- chenc(w[i], io_enc, ui_enc);
73	+ w[i] = chenc(w[i], io_enc, ui_enc);
74	fprintf(dic, "%s\n", w[i].c_str());
75	}
76	fclose(dic);
77	@@ -595,8 +595,7 @@
78	// check words in the dictionaries (and set first checked dictionary)
79	bool check(Hunspell** pMS, int* d, const std::string& token, int* info, std::string* root) {
80	for (int i = 0; i < dmax; ++i) {
81	- std::string buf(token);
82	- chenc(buf, io_enc, dic_enc[*d]);
83	+ std::string buf = chenc(token, io_enc, dic_enc[*d]);
84	mystrrep(buf, ENTITY_APOS, "'");
85	if (checkapos && buf.find('\'') != std::string::npos)
86	return false;
87	@@ -937,7 +936,7 @@
88	fprintf(stdout, "%s", chenc(wlst[0], dic_enc[d], io_enc).c_str());
89	}
90	for (size_t j = 1; j < wlst.size(); ++j) {
91	- fprintf(stdout, ", %s", chenc(wlst[j], dic_enc[d], io_enc).c_str());
92	+ fprintf(stdout, ", %s", chenc(wlst[j], dic_enc[d], io_enc).c_str());
93	}
94	fprintf(stdout, "\n");
95	fflush(stdout);
96	@@ -1194,8 +1193,7 @@
97	}
98
99	std::string lower_first_char(const std::string& token, const char* ioenc, int langnum) {
100	- std::string utf8str(token);
101	- chenc(utf8str, ioenc, "UTF-8");
102	+ std::string utf8str = chenc(token, ioenc, "UTF-8");
103	std::vector<w_char> u;
104	u8_u16(u, utf8str);
105	if (!u.empty()) {
106	@@ -1206,8 +1204,7 @@
107	}
108	std::string scratch;
109	u16_u8(scratch, u);
110	- chenc(scratch, "UTF-8", ioenc);
111	- return scratch;
112	+ return chenc(scratch, "UTF-8", ioenc);
113	}
114
115	// for terminal interface
116	@@ -1532,13 +1529,13 @@
117	std::vector<std::string> wlst;
118	dialogscreen(parser, token, filename, info, wlst); // preview
119	refresh();
120	- std::string buf(token);
121	- wlst = pMS[d]->suggest(mystrrep(chenc(buf, io_enc, dic_enc[d]), ENTITY_APOS, "'").c_str());
122	+ std::string dicbuf = chenc(token, io_enc, dic_enc[d]);
123	+ wlst = pMS[d]->suggest(mystrrep(dicbuf, ENTITY_APOS, "'").c_str());
124	if (wlst.empty()) {
125	dialogexit = dialog(parser, pMS[d], token, filename, wlst, info);
126	} else {
127	for (size_t j = 0; j < wlst.size(); ++j) {
128	- chenc(wlst[j], dic_enc[d], io_enc);
129	+ wlst[j] = chenc(wlst[j], dic_enc[d], io_enc);
130	}
131	dialogexit = dialog(parser, pMS[d], token, filename, wlst, info);
132	}