Commit | Line | Data |
---|---|---|
b6b9d458 | 1 | .\" -*-nroff-*- |
2 | .de VS | |
3 | .sp 1 | |
d66d7727 | 4 | .RS |
b6b9d458 | 5 | .nf |
6 | .ft B | |
7 | .. | |
8 | .de VE | |
9 | .ft R | |
10 | .fi | |
11 | .RE | |
12 | .sp 1 | |
13 | .. | |
08da152e | 14 | .de hP |
b6b9d458 | 15 | .IP |
16 | .ft B | |
17 | \h'-\w'\\$1\ 'u'\\$1\ \c | |
18 | .ft P | |
19 | .. | |
20 | .ie t .ds o \(bu | |
21 | .el .ds o o | |
fbf20b5b | 22 | .TH dstr 3 "8 May 1999" "Straylight/Edgeware" "mLib utilities library" |
7527ed0b | 23 | .SH NAME |
b6b9d458 | 24 | dstr \- a simple dynamic string type |
08da152e | 25 | .\" @dstr_create |
26 | .\" @dstr_destroy | |
27 | .\" @dstr_reset | |
28 | .\" @dstr_ensure | |
29 | .\" @dstr_tidy | |
30 | .\" | |
31 | .\" @dstr_putc | |
32 | .\" @dstr_putz | |
33 | .\" @dstr_puts | |
34 | .\" @dstr_putf | |
35 | .\" @dstr_putd | |
36 | .\" @dstr_putm | |
37 | .\" @dstr_putline | |
38 | .\" @dstr_write | |
39 | .\" | |
e49a7995 | 40 | .\" @DSTR_INIT |
08da152e | 41 | .\" @DCREATE |
42 | .\" @DDESTROY | |
43 | .\" @DRESET | |
44 | .\" @DENSURE | |
45 | .\" @DPUTC | |
46 | .\" @DPUTZ | |
47 | .\" @DPUTS | |
48 | .\" @DPUTD | |
49 | .\" @DPUTM | |
50 | .\" @DWRITE | |
51 | .\" | |
b6b9d458 | 52 | .SH SYNOPSIS |
53 | .nf | |
54 | .B "#include <mLib/dstr.h>" | |
55 | ||
4729aa69 MW |
56 | .B "typedef struct { ...\& } dstr;" |
57 | .B "#define DSTR_INIT ..." | |
58 | ||
b6b9d458 | 59 | .BI "void dstr_create(dstr *" d ); |
60 | .BI "void dstr_destroy(dstr *" d ); | |
61 | .BI "void dstr_reset(dstr *" d ); | |
62 | ||
63 | .BI "void dstr_ensure(dstr *" d ", size_t " sz ); | |
64 | .BI "void dstr_tidy(dstr *" d ); | |
65 | ||
2be33c7c | 66 | .BI "void dstr_putc(dstr *" d ", int " ch ); |
b6b9d458 | 67 | .BI "void dstr_putz(dstr *" d ); |
68 | .BI "void dstr_puts(dstr *" d ", const char *" s ); | |
5a18a126 | 69 | .BI "int dstr_vputf(dstr *" d ", va_list *" ap ); |
d2a91066 | 70 | .BI "int dstr_putf(dstr *" d ", ...);" |
b6b9d458 | 71 | .BI "void dstr_putd(dstr *" d ", const dstr *" p ); |
72 | .BI "void dstr_putm(dstr *" d ", const void *" p ", size_t " sz ); | |
73 | .BI "int dstr_putline(dstr *" d ", FILE *" fp ); | |
74 | .BI "size_t dstr_write(const dstr *" d ", FILE *" fp ); | |
75 | ||
76 | .BI "void DCREATE(dstr *" d ); | |
77 | .BI "void DDESTROY(dstr *" d ); | |
78 | .BI "void DRESET(dstr *" d ); | |
79 | .BI "void DENSURE(dstr *" d ", size_t " sz ); | |
08da152e | 80 | .BI "void DPUTC(dstr *" c ", char " ch ); |
b6b9d458 | 81 | .BI "void DPUTZ(dstr *" d ); |
82 | .BI "void DPUTS(dstr *" d ", const char *" s ); | |
83 | .BI "void DPUTD(dstr *" d ", const dstr *" p ); | |
84 | .BI "void DPUTM(dstr *" d ", const void *" p ", size_t " sz ); | |
85 | .BI "size_t DWRITE(const dstr *" d ", FILE *" fp ); | |
86 | .fi | |
750e4b6c | 87 | .SH DESCRIPTION |
b6b9d458 | 88 | The header |
89 | .B dstr.h | |
90 | declares a type for representing dynamically extending strings, and a | |
91 | small collection of useful operations on them. None of the operations | |
92 | returns a failure result on an out-of-memory condition; instead, the | |
93 | exception | |
94 | .B EXC_NOMEM | |
95 | is raised. | |
96 | .PP | |
97 | Many of the functions which act on dynamic strings have macro | |
98 | equivalents. These equivalent macros may evaluate their arguments | |
99 | multiple times. | |
750e4b6c | 100 | .SS "Underlying type" |
b6b9d458 | 101 | A |
102 | .B dstr | |
4729aa69 | 103 | object is a small structure with the following members. |
b6b9d458 | 104 | The |
105 | .B buf | |
106 | member points to the actual character data in the string. The data may | |
107 | or may not be null terminated, depending on what operations have | |
108 | recently been performed on it. None of the | |
109 | .B dstr | |
110 | functions depend on the string being null-terminated; indeed, all of | |
111 | them work fine on strings containing arbitrary binary data. You can | |
112 | force null-termination by calling the | |
113 | .B dstr_putz | |
114 | function, or the | |
115 | .B DPUTZ | |
116 | macro. | |
117 | .PP | |
118 | The | |
119 | .B sz | |
120 | member describes the current size of the buffer. This reflects the | |
121 | maximum possible length of string that can be represented in | |
122 | .B buf | |
123 | without allocating a new buffer. | |
124 | .PP | |
125 | The | |
126 | .B len | |
127 | member describes the current length of the string. It is the number of | |
128 | bytes in the string which are actually interesting. The length does | |
129 | .I not | |
130 | include a null-terminating byte, if there is one. | |
131 | .PP | |
132 | The following invariants are maintained by | |
133 | .B dstr | |
134 | and must hold when any function is called: | |
08da152e | 135 | .hP \*o |
d4efbcd9 | 136 | If |
b6b9d458 | 137 | .B sz |
138 | is nonzero, then | |
139 | .B buf | |
140 | points to a block of memory of length | |
141 | .BR sz . | |
142 | If | |
143 | .B sz | |
144 | is zero, then | |
145 | .B buf | |
146 | is a null pointer. | |
08da152e | 147 | .hP \*o |
b6b9d458 | 148 | At all times, |
7527ed0b | 149 | .BR sz " \(>= " len. |
b6b9d458 | 150 | .PP |
d2a91066 | 151 | Note that there is no equivalent of the standard C distinction between |
b6b9d458 | 152 | the empty string (a pointer to an array of characters whose first |
d2a91066 | 153 | element is zero) and the nonexistent string (a null pointer). Any |
b6b9d458 | 154 | .B dstr |
155 | whose | |
156 | .B len | |
157 | is zero is an empty string. | |
cededfbe | 158 | .PP |
159 | The | |
160 | .I a | |
161 | member refers to the arena from which the string's buffer has been | |
162 | allocated. Immediately after creation, this is set to be | |
163 | .BR arena_stdlib (3); | |
164 | you can set it to point to any other arena of your choice before the | |
165 | buffer is allocated. | |
750e4b6c | 166 | .SS "Creation and destruction" |
b6b9d458 | 167 | The caller is responsible for allocating the |
168 | .B dstr | |
528c8b4d | 169 | structure. It can be initialized: |
08da152e | 170 | .hP \*o |
528c8b4d | 171 | using the macro |
b6b9d458 | 172 | .B DSTR_INIT |
528c8b4d | 173 | as an initializer in the declaration of the object, |
08da152e | 174 | .hP \*o |
528c8b4d | 175 | passing its address to the |
b6b9d458 | 176 | .B dstr_create |
528c8b4d | 177 | function, or |
08da152e | 178 | .hP \*o |
528c8b4d | 179 | passing its address to the (equivalent) |
b6b9d458 | 180 | .B DCREATE |
181 | macro. | |
182 | .PP | |
183 | The initial value of a | |
184 | .B dstr | |
185 | is the empty string. | |
186 | .PP | |
187 | The additional storage space for a string's contents may be reclaimed by | |
188 | passing it to the | |
189 | .B dstr_destroy | |
190 | function, or the | |
191 | .B DDESTROY | |
192 | macro. After destruction, a string's value is reset to the empty | |
193 | string: | |
194 | .I "it's still a valid" | |
195 | .BR dstr . | |
196 | However, once a string has been destroyed, it's safe to deallocate the | |
197 | underlying | |
198 | .B dstr | |
199 | object. | |
200 | .PP | |
201 | The | |
202 | .B dstr_reset | |
203 | function empties a string | |
204 | .I without | |
205 | deallocating any memory. Therefore appending more characters is quick, | |
d2a91066 | 206 | because the old buffer is still there and doesn't need to be allocated. |
b6b9d458 | 207 | Calling |
208 | .VS | |
209 | dstr_reset(d); | |
210 | .VE | |
d2a91066 | 211 | is equivalent to directly assigning |
b6b9d458 | 212 | .VS |
213 | d->len = 0; | |
214 | .VE | |
215 | There's also a macro | |
216 | .B DRESET | |
217 | which does the same job as the | |
218 | .B dstr_reset | |
219 | function. | |
750e4b6c | 220 | .SS "Extending a string" |
b6b9d458 | 221 | All memory allocation for strings is done by the function |
222 | .BR dstr_ensure . | |
d4efbcd9 | 223 | Given a pointer |
b6b9d458 | 224 | .I d |
225 | to a | |
226 | .B dstr | |
227 | and a size | |
228 | .IR sz , | |
229 | the function ensures that there are at least | |
230 | .I sz | |
231 | unused bytes in the string's buffer. The current algorithm for | |
232 | extending the buffer is fairly unsophisticated, but seems to work | |
233 | relatively well \- see the source if you really want to know what it's | |
234 | doing. | |
235 | .PP | |
236 | Extending a string never returns a failure result. Instead, if there | |
237 | isn't enough memory for a longer string, the exception | |
238 | .B EXC_NOMEM | |
239 | is raised. See | |
08da152e | 240 | .BR exc (3) |
d4efbcd9 | 241 | for more information about |
b6b9d458 | 242 | .BR mLib 's |
243 | exception handling system. | |
244 | .PP | |
245 | Note that if an ensure operation needs to reallocate a string buffer, | |
246 | any pointers you've taken into the string become invalid. | |
247 | .PP | |
248 | There's a macro | |
249 | .B DENSURE | |
250 | which does a quick inline check to see whether there's enough space in | |
251 | a string's buffer. This saves a procedure call when no reallocation | |
252 | needs to be done. The | |
253 | .B DENSURE | |
254 | macro is called in the same way as the | |
255 | .B dstr_ensure | |
256 | function. | |
257 | .PP | |
258 | The function | |
259 | .B dstr_tidy | |
260 | `trims' a string's buffer so that it's just large enough for the string | |
261 | contents and a null terminating byte. This might raise an exception due | |
262 | to lack of memory. (There are two possible ways this might happen. | |
d2a91066 | 263 | Firstly, the underlying allocator might just be brain-damaged enough to |
b6b9d458 | 264 | fail on reducing a block's size. Secondly, tidying an empty string with no |
265 | buffer allocated for it causes allocation of a buffer large enough for | |
266 | the terminating null byte.) | |
750e4b6c | 267 | .SS "Contributing data to a string" |
b6b9d458 | 268 | There are a collection of functions which add data to a string. All of |
269 | these functions add their new data to the | |
270 | .I end | |
271 | of the string. This is good, because programs usually build strings | |
272 | left-to-right. If you want to do something more clever, that's up to | |
273 | you. | |
274 | .PP | |
275 | Several of these functions have equivalent macros which do the main work | |
276 | inline. (There still might need to be a function call if the buffer | |
277 | needs to be extended.) | |
278 | .PP | |
279 | Any of these functions might extend the string, causing pointers into | |
280 | the string buffer to be invalidated. If you don't want that to happen, | |
281 | pre-ensure enough space before you start. | |
282 | .PP | |
283 | The simplest function is | |
284 | .B dstr_putc | |
285 | which appends a single character | |
286 | .I ch | |
287 | to the end of the string. It has a macro equivalent called | |
288 | .BR DPUTC . | |
289 | .PP | |
290 | The function | |
291 | .B dstr_putz | |
292 | places a zero byte at the end of the string. It does | |
293 | .I not | |
294 | affect the string's length, so any other data added to the string will | |
295 | overwrite the null terminator. This is useful if you want to pass your | |
296 | string to one of the standard C library string-handling functions. The | |
297 | macro | |
298 | .B DPUTZ | |
299 | does the same thing. | |
300 | .PP | |
301 | The function | |
302 | .B dstr_puts | |
303 | writes a C-style null-terminated string to the end of a dynamic string. | |
304 | A terminating zero byte is also written, as if | |
305 | .B dstr_putz | |
306 | were called. The macro | |
307 | .B DPUTS | |
308 | does the same job. | |
309 | .PP | |
310 | The function | |
311 | .B dstr_putf | |
312 | works similarly to the standard | |
313 | .BR sprintf (3) | |
314 | function. It accepts a | |
315 | .BR print (3)-style | |
316 | format string and an arbitrary number of arguments to format and writes | |
317 | the resulting text to the end of a dynamic string, returning the number | |
318 | of characters so written. A terminating zero byte is also appended. | |
319 | The formatting is intended to be convenient and safe rather than | |
320 | efficient, so don't expect blistering performance. Similarly, there may | |
321 | be differences between the formatting done by | |
322 | .B dstr_putf | |
323 | and | |
324 | .BR sprintf (3) | |
325 | because the former has to do most of its work itself. In particular, | |
326 | .B dstr_putf | |
eff136f6 | 327 | understands the POSIX |
b6b9d458 | 328 | .RB ` n$ ' |
eff136f6 MW |
329 | positional parameter notation accepted by many Unix C libraries, even if |
330 | the underlying C library does not. There is no macro equivalent of | |
b6b9d458 | 331 | .BR dstr_putf . |
332 | .PP | |
333 | The function | |
334 | .B dstr_vputf | |
335 | provides access to the `guts' of | |
336 | .BR dstr_putf : | |
5a18a126 | 337 | given a format string and a pointer to a |
338 | .BR va_list | |
339 | it will format the arguments according to the format string, just as | |
b6b9d458 | 340 | .B dstr_putf |
5a18a126 | 341 | does. (Note: that's a |
342 | .BR "va_list *" , | |
343 | not a plain | |
344 | .BR va_list , | |
345 | so that it gets updated properly on exit.) | |
b6b9d458 | 346 | .PP |
347 | The function | |
348 | .B dstr_putd | |
349 | appends the contents of one dynamic string to another. A null | |
350 | terminator is also appended. The macro | |
351 | .B DPUTD | |
352 | does the same thing. | |
353 | .PP | |
354 | The function | |
355 | .B dstr_putm | |
356 | puts an arbitrary block of memory, addressed by | |
357 | .IR p , | |
358 | with length | |
359 | .I sz | |
360 | bytes, at the end of a dynamic string. No terminating null is appended: | |
361 | it's assumed that if you're playing with arbitrary chunks of memory then | |
362 | you're probably not going to be using the resulting data as a normal | |
363 | text string. The macro | |
364 | .B DPUTM | |
365 | works the same way. | |
366 | .PP | |
367 | The function | |
368 | .B dstr_putline | |
369 | reads a line from an input stream | |
370 | .I fp | |
371 | and appends it to a string. If an error occurs, or end-of-file is | |
372 | encountered, before any characters have been read, then | |
373 | .B dstr_putline | |
374 | returns the value | |
750e4b6c | 375 | .B EOF |
376 | and does not extend the string. Otherwise, it reads until it encounters | |
377 | a newline character, an error, or end-of-file, and returns the number of | |
378 | characters read. If reading was terminated by a newline character, the | |
379 | newline character is | |
b6b9d458 | 380 | .I not |
381 | inserted in the buffer. A terminating null is appended, as by | |
382 | .BR dstr_putz . | |
750e4b6c | 383 | .SS "Other functions" |
b6b9d458 | 384 | The |
385 | .B dstr_write | |
386 | function writes a string to an output stream | |
387 | .IR fp . | |
388 | It returns the number of characters written, or | |
389 | .B 0 | |
390 | if an error occurred before the first write. No newline character is | |
391 | written to the stream, unless the string actually contains one already. | |
392 | The macro | |
393 | .B DWRITE | |
394 | is equivalent. | |
395 | .SH "SECURITY CONSIDERATIONS" | |
d2a91066 | 396 | The implementation of the |
b6b9d458 | 397 | .B dstr |
398 | functions is designed to do string handling in security-critical | |
399 | programs. However, there may be bugs in the code somewhere. In | |
400 | particular, the | |
401 | .B dstr_putf | |
f1583053 | 402 | functions are quite complicated, and could do with some checking by |
b6b9d458 | 403 | independent people who know what they're doing. |
08da152e | 404 | .SH "SEE ALSO" |
405 | .BR exc (3), | |
406 | .BR mLib (3). | |
b6b9d458 | 407 | .SH AUTHOR |
9b5ac6ff | 408 | Mark Wooding, <mdw@distorted.org.uk> |