Commit | Line | Data |
---|---|---|
b6b9d458 | 1 | .\" -*-nroff-*- |
c4ccbbf9 MW |
2 | .\" |
3 | .\" Manual for dynamic strings | |
4 | .\" | |
5 | .\" (c) 1999--2003, 2005, 2007, 2009, 2013, 2014, 2023, 2024 | |
6 | .\" Straylight/Edgeware | |
7 | .\" | |
8 | . | |
9 | .\"----- Licensing notice --------------------------------------------------- | |
10 | .\" | |
11 | .\" This file is part of the mLib utilities library. | |
12 | .\" | |
13 | .\" mLib is free software: you can redistribute it and/or modify it under | |
14 | .\" the terms of the GNU Library General Public License as published by | |
15 | .\" the Free Software Foundation; either version 2 of the License, or (at | |
16 | .\" your option) any later version. | |
17 | .\" | |
18 | .\" mLib is distributed in the hope that it will be useful, but WITHOUT | |
19 | .\" ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
20 | .\" FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public | |
21 | .\" License for more details. | |
22 | .\" | |
23 | .\" You should have received a copy of the GNU Library General Public | |
24 | .\" License along with mLib. If not, write to the Free Software | |
25 | .\" Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, | |
26 | .\" USA. | |
27 | . | |
28 | .\"-------------------------------------------------------------------------- | |
29 | .so ../defs.man \" @@@PRE@@@ | |
30 | . | |
31 | .\"-------------------------------------------------------------------------- | |
32 | .TH dstr 3mLib "8 May 1999" "Straylight/Edgeware" "mLib utilities library" | |
08da152e | 33 | .\" @dstr_create |
34 | .\" @dstr_destroy | |
35 | .\" @dstr_reset | |
36 | .\" @dstr_ensure | |
37 | .\" @dstr_tidy | |
c4ccbbf9 | 38 | . |
08da152e | 39 | .\" @dstr_putc |
40 | .\" @dstr_putz | |
41 | .\" @dstr_puts | |
42 | .\" @dstr_putf | |
c4ccbbf9 MW |
43 | .\" @dstr_vputf |
44 | .\" @dstr_printops | |
08da152e | 45 | .\" @dstr_putd |
46 | .\" @dstr_putm | |
47 | .\" @dstr_putline | |
48 | .\" @dstr_write | |
c4ccbbf9 | 49 | . |
e49a7995 | 50 | .\" @DSTR_INIT |
08da152e | 51 | .\" @DCREATE |
52 | .\" @DDESTROY | |
53 | .\" @DRESET | |
54 | .\" @DENSURE | |
55 | .\" @DPUTC | |
56 | .\" @DPUTZ | |
57 | .\" @DPUTS | |
58 | .\" @DPUTD | |
59 | .\" @DPUTM | |
60 | .\" @DWRITE | |
c4ccbbf9 MW |
61 | . |
62 | .\"-------------------------------------------------------------------------- | |
63 | .SH NAME | |
64 | dstr \- a simple dynamic string type | |
65 | . | |
66 | .\"-------------------------------------------------------------------------- | |
b6b9d458 | 67 | .SH SYNOPSIS |
c4ccbbf9 | 68 | . |
b6b9d458 | 69 | .nf |
70 | .B "#include <mLib/dstr.h>" | |
d056fbdf | 71 | .PP |
4729aa69 MW |
72 | .B "typedef struct { ...\& } dstr;" |
73 | .B "#define DSTR_INIT ..." | |
d056fbdf | 74 | .PP |
b6b9d458 | 75 | .BI "void dstr_create(dstr *" d ); |
76 | .BI "void dstr_destroy(dstr *" d ); | |
77 | .BI "void dstr_reset(dstr *" d ); | |
d056fbdf | 78 | .PP |
b6b9d458 | 79 | .BI "void dstr_ensure(dstr *" d ", size_t " sz ); |
80 | .BI "void dstr_tidy(dstr *" d ); | |
d056fbdf | 81 | .PP |
2be33c7c | 82 | .BI "void dstr_putc(dstr *" d ", int " ch ); |
b6b9d458 | 83 | .BI "void dstr_putz(dstr *" d ); |
84 | .BI "void dstr_puts(dstr *" d ", const char *" s ); | |
5a18a126 | 85 | .BI "int dstr_vputf(dstr *" d ", va_list *" ap ); |
d2a91066 | 86 | .BI "int dstr_putf(dstr *" d ", ...);" |
b6b9d458 | 87 | .BI "void dstr_putd(dstr *" d ", const dstr *" p ); |
88 | .BI "void dstr_putm(dstr *" d ", const void *" p ", size_t " sz ); | |
89 | .BI "int dstr_putline(dstr *" d ", FILE *" fp ); | |
90 | .BI "size_t dstr_write(const dstr *" d ", FILE *" fp ); | |
d056fbdf | 91 | .PP |
b6b9d458 | 92 | .BI "void DCREATE(dstr *" d ); |
93 | .BI "void DDESTROY(dstr *" d ); | |
94 | .BI "void DRESET(dstr *" d ); | |
95 | .BI "void DENSURE(dstr *" d ", size_t " sz ); | |
08da152e | 96 | .BI "void DPUTC(dstr *" c ", char " ch ); |
b6b9d458 | 97 | .BI "void DPUTZ(dstr *" d ); |
98 | .BI "void DPUTS(dstr *" d ", const char *" s ); | |
99 | .BI "void DPUTD(dstr *" d ", const dstr *" p ); | |
100 | .BI "void DPUTM(dstr *" d ", const void *" p ", size_t " sz ); | |
101 | .BI "size_t DWRITE(const dstr *" d ", FILE *" fp ); | |
102 | .fi | |
c4ccbbf9 MW |
103 | . |
104 | .\"-------------------------------------------------------------------------- | |
750e4b6c | 105 | .SH DESCRIPTION |
c4ccbbf9 | 106 | . |
b6b9d458 | 107 | The header |
108 | .B dstr.h | |
109 | declares a type for representing dynamically extending strings, and a | |
110 | small collection of useful operations on them. None of the operations | |
111 | returns a failure result on an out-of-memory condition; instead, the | |
112 | exception | |
113 | .B EXC_NOMEM | |
114 | is raised. | |
115 | .PP | |
116 | Many of the functions which act on dynamic strings have macro | |
117 | equivalents. These equivalent macros may evaluate their arguments | |
118 | multiple times. | |
c4ccbbf9 | 119 | . |
750e4b6c | 120 | .SS "Underlying type" |
b6b9d458 | 121 | A |
122 | .B dstr | |
4729aa69 | 123 | object is a small structure with the following members. |
b6b9d458 | 124 | The |
125 | .B buf | |
126 | member points to the actual character data in the string. The data may | |
127 | or may not be null terminated, depending on what operations have | |
128 | recently been performed on it. None of the | |
129 | .B dstr | |
130 | functions depend on the string being null-terminated; indeed, all of | |
131 | them work fine on strings containing arbitrary binary data. You can | |
132 | force null-termination by calling the | |
133 | .B dstr_putz | |
134 | function, or the | |
135 | .B DPUTZ | |
136 | macro. | |
137 | .PP | |
138 | The | |
139 | .B sz | |
140 | member describes the current size of the buffer. This reflects the | |
141 | maximum possible length of string that can be represented in | |
142 | .B buf | |
143 | without allocating a new buffer. | |
144 | .PP | |
145 | The | |
146 | .B len | |
147 | member describes the current length of the string. It is the number of | |
148 | bytes in the string which are actually interesting. The length does | |
149 | .I not | |
150 | include a null-terminating byte, if there is one. | |
151 | .PP | |
152 | The following invariants are maintained by | |
153 | .B dstr | |
154 | and must hold when any function is called: | |
08da152e | 155 | .hP \*o |
d4efbcd9 | 156 | If |
b6b9d458 | 157 | .B sz |
158 | is nonzero, then | |
159 | .B buf | |
160 | points to a block of memory of length | |
161 | .BR sz . | |
162 | If | |
163 | .B sz | |
164 | is zero, then | |
165 | .B buf | |
166 | is a null pointer. | |
08da152e | 167 | .hP \*o |
b6b9d458 | 168 | At all times, |
7527ed0b | 169 | .BR sz " \(>= " len. |
b6b9d458 | 170 | .PP |
d2a91066 | 171 | Note that there is no equivalent of the standard C distinction between |
b6b9d458 | 172 | the empty string (a pointer to an array of characters whose first |
d2a91066 | 173 | element is zero) and the nonexistent string (a null pointer). Any |
b6b9d458 | 174 | .B dstr |
175 | whose | |
176 | .B len | |
177 | is zero is an empty string. | |
cededfbe | 178 | .PP |
179 | The | |
180 | .I a | |
181 | member refers to the arena from which the string's buffer has been | |
182 | allocated. Immediately after creation, this is set to be | |
183 | .BR arena_stdlib (3); | |
184 | you can set it to point to any other arena of your choice before the | |
185 | buffer is allocated. | |
c4ccbbf9 | 186 | . |
750e4b6c | 187 | .SS "Creation and destruction" |
b6b9d458 | 188 | The caller is responsible for allocating the |
189 | .B dstr | |
528c8b4d | 190 | structure. It can be initialized: |
08da152e | 191 | .hP \*o |
528c8b4d | 192 | using the macro |
b6b9d458 | 193 | .B DSTR_INIT |
528c8b4d | 194 | as an initializer in the declaration of the object, |
08da152e | 195 | .hP \*o |
528c8b4d | 196 | passing its address to the |
b6b9d458 | 197 | .B dstr_create |
528c8b4d | 198 | function, or |
08da152e | 199 | .hP \*o |
528c8b4d | 200 | passing its address to the (equivalent) |
b6b9d458 | 201 | .B DCREATE |
202 | macro. | |
203 | .PP | |
204 | The initial value of a | |
205 | .B dstr | |
206 | is the empty string. | |
207 | .PP | |
208 | The additional storage space for a string's contents may be reclaimed by | |
209 | passing it to the | |
210 | .B dstr_destroy | |
211 | function, or the | |
212 | .B DDESTROY | |
213 | macro. After destruction, a string's value is reset to the empty | |
214 | string: | |
215 | .I "it's still a valid" | |
216 | .BR dstr . | |
217 | However, once a string has been destroyed, it's safe to deallocate the | |
218 | underlying | |
219 | .B dstr | |
220 | object. | |
221 | .PP | |
222 | The | |
223 | .B dstr_reset | |
224 | function empties a string | |
225 | .I without | |
226 | deallocating any memory. Therefore appending more characters is quick, | |
d2a91066 | 227 | because the old buffer is still there and doesn't need to be allocated. |
b6b9d458 | 228 | Calling |
229 | .VS | |
230 | dstr_reset(d); | |
231 | .VE | |
d2a91066 | 232 | is equivalent to directly assigning |
b6b9d458 | 233 | .VS |
234 | d->len = 0; | |
235 | .VE | |
236 | There's also a macro | |
237 | .B DRESET | |
238 | which does the same job as the | |
239 | .B dstr_reset | |
240 | function. | |
c4ccbbf9 | 241 | . |
750e4b6c | 242 | .SS "Extending a string" |
b6b9d458 | 243 | All memory allocation for strings is done by the function |
244 | .BR dstr_ensure . | |
d4efbcd9 | 245 | Given a pointer |
b6b9d458 | 246 | .I d |
247 | to a | |
248 | .B dstr | |
249 | and a size | |
250 | .IR sz , | |
251 | the function ensures that there are at least | |
252 | .I sz | |
253 | unused bytes in the string's buffer. The current algorithm for | |
254 | extending the buffer is fairly unsophisticated, but seems to work | |
255 | relatively well \- see the source if you really want to know what it's | |
256 | doing. | |
257 | .PP | |
258 | Extending a string never returns a failure result. Instead, if there | |
259 | isn't enough memory for a longer string, the exception | |
260 | .B EXC_NOMEM | |
261 | is raised. See | |
08da152e | 262 | .BR exc (3) |
d4efbcd9 | 263 | for more information about |
b6b9d458 | 264 | .BR mLib 's |
265 | exception handling system. | |
266 | .PP | |
267 | Note that if an ensure operation needs to reallocate a string buffer, | |
268 | any pointers you've taken into the string become invalid. | |
269 | .PP | |
270 | There's a macro | |
271 | .B DENSURE | |
272 | which does a quick inline check to see whether there's enough space in | |
273 | a string's buffer. This saves a procedure call when no reallocation | |
274 | needs to be done. The | |
275 | .B DENSURE | |
276 | macro is called in the same way as the | |
277 | .B dstr_ensure | |
278 | function. | |
279 | .PP | |
280 | The function | |
281 | .B dstr_tidy | |
282 | `trims' a string's buffer so that it's just large enough for the string | |
283 | contents and a null terminating byte. This might raise an exception due | |
284 | to lack of memory. (There are two possible ways this might happen. | |
d2a91066 | 285 | Firstly, the underlying allocator might just be brain-damaged enough to |
b6b9d458 | 286 | fail on reducing a block's size. Secondly, tidying an empty string with no |
287 | buffer allocated for it causes allocation of a buffer large enough for | |
288 | the terminating null byte.) | |
c4ccbbf9 | 289 | . |
750e4b6c | 290 | .SS "Contributing data to a string" |
b6b9d458 | 291 | There are a collection of functions which add data to a string. All of |
292 | these functions add their new data to the | |
293 | .I end | |
294 | of the string. This is good, because programs usually build strings | |
295 | left-to-right. If you want to do something more clever, that's up to | |
296 | you. | |
297 | .PP | |
298 | Several of these functions have equivalent macros which do the main work | |
299 | inline. (There still might need to be a function call if the buffer | |
300 | needs to be extended.) | |
301 | .PP | |
302 | Any of these functions might extend the string, causing pointers into | |
303 | the string buffer to be invalidated. If you don't want that to happen, | |
304 | pre-ensure enough space before you start. | |
305 | .PP | |
306 | The simplest function is | |
307 | .B dstr_putc | |
308 | which appends a single character | |
309 | .I ch | |
310 | to the end of the string. It has a macro equivalent called | |
311 | .BR DPUTC . | |
312 | .PP | |
313 | The function | |
314 | .B dstr_putz | |
315 | places a zero byte at the end of the string. It does | |
316 | .I not | |
317 | affect the string's length, so any other data added to the string will | |
318 | overwrite the null terminator. This is useful if you want to pass your | |
319 | string to one of the standard C library string-handling functions. The | |
320 | macro | |
321 | .B DPUTZ | |
322 | does the same thing. | |
323 | .PP | |
324 | The function | |
325 | .B dstr_puts | |
326 | writes a C-style null-terminated string to the end of a dynamic string. | |
327 | A terminating zero byte is also written, as if | |
328 | .B dstr_putz | |
329 | were called. The macro | |
330 | .B DPUTS | |
331 | does the same job. | |
332 | .PP | |
333 | The function | |
334 | .B dstr_putf | |
335 | works similarly to the standard | |
336 | .BR sprintf (3) | |
337 | function. It accepts a | |
338 | .BR print (3)-style | |
339 | format string and an arbitrary number of arguments to format and writes | |
340 | the resulting text to the end of a dynamic string, returning the number | |
341 | of characters so written. A terminating zero byte is also appended. | |
c4ccbbf9 | 342 | There is no macro equivalent of |
b6b9d458 | 343 | .BR dstr_putf . |
344 | .PP | |
345 | The function | |
346 | .B dstr_vputf | |
347 | provides access to the `guts' of | |
348 | .BR dstr_putf : | |
5a18a126 | 349 | given a format string and a pointer to a |
350 | .BR va_list | |
351 | it will format the arguments according to the format string, just as | |
b6b9d458 | 352 | .B dstr_putf |
5a18a126 | 353 | does. (Note: that's a |
354 | .BR "va_list *" , | |
355 | not a plain | |
356 | .BR va_list , | |
357 | so that it gets updated properly on exit.) | |
b6b9d458 | 358 | .PP |
c4ccbbf9 MW |
359 | The |
360 | .B dstr_putf | |
361 | and | |
362 | .B dstr_vputf | |
363 | functions are implemented using | |
364 | .BR gprintf (3). | |
365 | The output operations table is exposed as | |
366 | .BR dstr_printops ; | |
367 | the functions expect the output pointer to be the address of the output | |
368 | .BR dstr | |
369 | .PP | |
b6b9d458 | 370 | The function |
371 | .B dstr_putd | |
372 | appends the contents of one dynamic string to another. A null | |
373 | terminator is also appended. The macro | |
374 | .B DPUTD | |
375 | does the same thing. | |
376 | .PP | |
377 | The function | |
378 | .B dstr_putm | |
379 | puts an arbitrary block of memory, addressed by | |
380 | .IR p , | |
381 | with length | |
382 | .I sz | |
383 | bytes, at the end of a dynamic string. No terminating null is appended: | |
384 | it's assumed that if you're playing with arbitrary chunks of memory then | |
385 | you're probably not going to be using the resulting data as a normal | |
386 | text string. The macro | |
387 | .B DPUTM | |
388 | works the same way. | |
389 | .PP | |
390 | The function | |
391 | .B dstr_putline | |
392 | reads a line from an input stream | |
393 | .I fp | |
394 | and appends it to a string. If an error occurs, or end-of-file is | |
395 | encountered, before any characters have been read, then | |
396 | .B dstr_putline | |
397 | returns the value | |
750e4b6c | 398 | .B EOF |
399 | and does not extend the string. Otherwise, it reads until it encounters | |
400 | a newline character, an error, or end-of-file, and returns the number of | |
401 | characters read. If reading was terminated by a newline character, the | |
402 | newline character is | |
b6b9d458 | 403 | .I not |
404 | inserted in the buffer. A terminating null is appended, as by | |
405 | .BR dstr_putz . | |
c4ccbbf9 | 406 | . |
750e4b6c | 407 | .SS "Other functions" |
b6b9d458 | 408 | The |
409 | .B dstr_write | |
410 | function writes a string to an output stream | |
411 | .IR fp . | |
412 | It returns the number of characters written, or | |
413 | .B 0 | |
414 | if an error occurred before the first write. No newline character is | |
415 | written to the stream, unless the string actually contains one already. | |
416 | The macro | |
417 | .B DWRITE | |
418 | is equivalent. | |
c4ccbbf9 MW |
419 | . |
420 | .\"-------------------------------------------------------------------------- | |
b6b9d458 | 421 | .SH "SECURITY CONSIDERATIONS" |
c4ccbbf9 | 422 | . |
d2a91066 | 423 | The implementation of the |
b6b9d458 | 424 | .B dstr |
425 | functions is designed to do string handling in security-critical | |
426 | programs. However, there may be bugs in the code somewhere. In | |
427 | particular, the | |
428 | .B dstr_putf | |
f1583053 | 429 | functions are quite complicated, and could do with some checking by |
b6b9d458 | 430 | independent people who know what they're doing. |
c4ccbbf9 MW |
431 | . |
432 | .\"-------------------------------------------------------------------------- | |
08da152e | 433 | .SH "SEE ALSO" |
c4ccbbf9 | 434 | . |
08da152e | 435 | .BR exc (3), |
c4ccbbf9 | 436 | .BR gprintf (3), |
08da152e | 437 | .BR mLib (3). |
c4ccbbf9 MW |
438 | . |
439 | .\"-------------------------------------------------------------------------- | |
b6b9d458 | 440 | .SH AUTHOR |
c4ccbbf9 | 441 | . |
9b5ac6ff | 442 | Mark Wooding, <mdw@distorted.org.uk> |
c4ccbbf9 MW |
443 | . |
444 | .\"----- That's all, folks -------------------------------------------------- |