b6b9d458 |
1 | .\" -*-nroff-*- |
2 | .de VS |
3 | .sp 1 |
d66d7727 |
4 | .RS |
b6b9d458 |
5 | .nf |
6 | .ft B |
7 | .. |
8 | .de VE |
9 | .ft R |
10 | .fi |
11 | .RE |
12 | .sp 1 |
13 | .. |
08da152e |
14 | .de hP |
b6b9d458 |
15 | .IP |
16 | .ft B |
17 | \h'-\w'\\$1\ 'u'\\$1\ \c |
18 | .ft P |
19 | .. |
20 | .ie t .ds o \(bu |
21 | .el .ds o o |
fbf20b5b |
22 | .TH dstr 3 "8 May 1999" "Straylight/Edgeware" "mLib utilities library" |
7527ed0b |
23 | .SH NAME |
b6b9d458 |
24 | dstr \- a simple dynamic string type |
08da152e |
25 | .\" @dstr_create |
26 | .\" @dstr_destroy |
27 | .\" @dstr_reset |
28 | .\" @dstr_ensure |
29 | .\" @dstr_tidy |
30 | .\" |
31 | .\" @dstr_putc |
32 | .\" @dstr_putz |
33 | .\" @dstr_puts |
34 | .\" @dstr_putf |
35 | .\" @dstr_putd |
36 | .\" @dstr_putm |
37 | .\" @dstr_putline |
38 | .\" @dstr_write |
39 | .\" |
e49a7995 |
40 | .\" @DSTR_INIT |
08da152e |
41 | .\" @DCREATE |
42 | .\" @DDESTROY |
43 | .\" @DRESET |
44 | .\" @DENSURE |
45 | .\" @DPUTC |
46 | .\" @DPUTZ |
47 | .\" @DPUTS |
48 | .\" @DPUTD |
49 | .\" @DPUTM |
50 | .\" @DWRITE |
51 | .\" |
b6b9d458 |
52 | .SH SYNOPSIS |
53 | .nf |
54 | .B "#include <mLib/dstr.h>" |
55 | |
56 | .BI "void dstr_create(dstr *" d ); |
57 | .BI "void dstr_destroy(dstr *" d ); |
58 | .BI "void dstr_reset(dstr *" d ); |
59 | |
60 | .BI "void dstr_ensure(dstr *" d ", size_t " sz ); |
61 | .BI "void dstr_tidy(dstr *" d ); |
62 | |
63 | .BI "void dstr_putc(dstr *" d ", char " ch ); |
64 | .BI "void dstr_putz(dstr *" d ); |
65 | .BI "void dstr_puts(dstr *" d ", const char *" s ); |
5a18a126 |
66 | .BI "int dstr_vputf(dstr *" d ", va_list *" ap ); |
d2a91066 |
67 | .BI "int dstr_putf(dstr *" d ", ...);" |
b6b9d458 |
68 | .BI "void dstr_putd(dstr *" d ", const dstr *" p ); |
69 | .BI "void dstr_putm(dstr *" d ", const void *" p ", size_t " sz ); |
70 | .BI "int dstr_putline(dstr *" d ", FILE *" fp ); |
71 | .BI "size_t dstr_write(const dstr *" d ", FILE *" fp ); |
72 | |
e49a7995 |
73 | .BI "dstr " d " = DSTR_INIT;" |
b6b9d458 |
74 | .BI "void DCREATE(dstr *" d ); |
75 | .BI "void DDESTROY(dstr *" d ); |
76 | .BI "void DRESET(dstr *" d ); |
77 | .BI "void DENSURE(dstr *" d ", size_t " sz ); |
08da152e |
78 | .BI "void DPUTC(dstr *" c ", char " ch ); |
b6b9d458 |
79 | .BI "void DPUTZ(dstr *" d ); |
80 | .BI "void DPUTS(dstr *" d ", const char *" s ); |
81 | .BI "void DPUTD(dstr *" d ", const dstr *" p ); |
82 | .BI "void DPUTM(dstr *" d ", const void *" p ", size_t " sz ); |
83 | .BI "size_t DWRITE(const dstr *" d ", FILE *" fp ); |
84 | .fi |
750e4b6c |
85 | .SH DESCRIPTION |
b6b9d458 |
86 | The header |
87 | .B dstr.h |
88 | declares a type for representing dynamically extending strings, and a |
89 | small collection of useful operations on them. None of the operations |
90 | returns a failure result on an out-of-memory condition; instead, the |
91 | exception |
92 | .B EXC_NOMEM |
93 | is raised. |
94 | .PP |
95 | Many of the functions which act on dynamic strings have macro |
96 | equivalents. These equivalent macros may evaluate their arguments |
97 | multiple times. |
750e4b6c |
98 | .SS "Underlying type" |
b6b9d458 |
99 | A |
100 | .B dstr |
101 | object is a small structure with the following members: |
102 | .VS |
103 | typedef struct dstr { |
104 | char *buf; /* Pointer to string buffer */ |
105 | size_t sz; /* Size of the buffer */ |
106 | size_t len; /* Length of the string */ |
cededfbe |
107 | arena *a; /* Pointer to arena */ |
b6b9d458 |
108 | } dstr; |
109 | .VE |
110 | The |
111 | .B buf |
112 | member points to the actual character data in the string. The data may |
113 | or may not be null terminated, depending on what operations have |
114 | recently been performed on it. None of the |
115 | .B dstr |
116 | functions depend on the string being null-terminated; indeed, all of |
117 | them work fine on strings containing arbitrary binary data. You can |
118 | force null-termination by calling the |
119 | .B dstr_putz |
120 | function, or the |
121 | .B DPUTZ |
122 | macro. |
123 | .PP |
124 | The |
125 | .B sz |
126 | member describes the current size of the buffer. This reflects the |
127 | maximum possible length of string that can be represented in |
128 | .B buf |
129 | without allocating a new buffer. |
130 | .PP |
131 | The |
132 | .B len |
133 | member describes the current length of the string. It is the number of |
134 | bytes in the string which are actually interesting. The length does |
135 | .I not |
136 | include a null-terminating byte, if there is one. |
137 | .PP |
138 | The following invariants are maintained by |
139 | .B dstr |
140 | and must hold when any function is called: |
08da152e |
141 | .hP \*o |
b6b9d458 |
142 | If |
143 | .B sz |
144 | is nonzero, then |
145 | .B buf |
146 | points to a block of memory of length |
147 | .BR sz . |
148 | If |
149 | .B sz |
150 | is zero, then |
151 | .B buf |
152 | is a null pointer. |
08da152e |
153 | .hP \*o |
b6b9d458 |
154 | At all times, |
7527ed0b |
155 | .BR sz " \(>= " len. |
b6b9d458 |
156 | .PP |
d2a91066 |
157 | Note that there is no equivalent of the standard C distinction between |
b6b9d458 |
158 | the empty string (a pointer to an array of characters whose first |
d2a91066 |
159 | element is zero) and the nonexistent string (a null pointer). Any |
b6b9d458 |
160 | .B dstr |
161 | whose |
162 | .B len |
163 | is zero is an empty string. |
cededfbe |
164 | .PP |
165 | The |
166 | .I a |
167 | member refers to the arena from which the string's buffer has been |
168 | allocated. Immediately after creation, this is set to be |
169 | .BR arena_stdlib (3); |
170 | you can set it to point to any other arena of your choice before the |
171 | buffer is allocated. |
750e4b6c |
172 | .SS "Creation and destruction" |
b6b9d458 |
173 | The caller is responsible for allocating the |
174 | .B dstr |
528c8b4d |
175 | structure. It can be initialized: |
08da152e |
176 | .hP \*o |
528c8b4d |
177 | using the macro |
b6b9d458 |
178 | .B DSTR_INIT |
528c8b4d |
179 | as an initializer in the declaration of the object, |
08da152e |
180 | .hP \*o |
528c8b4d |
181 | passing its address to the |
b6b9d458 |
182 | .B dstr_create |
528c8b4d |
183 | function, or |
08da152e |
184 | .hP \*o |
528c8b4d |
185 | passing its address to the (equivalent) |
b6b9d458 |
186 | .B DCREATE |
187 | macro. |
188 | .PP |
189 | The initial value of a |
190 | .B dstr |
191 | is the empty string. |
192 | .PP |
193 | The additional storage space for a string's contents may be reclaimed by |
194 | passing it to the |
195 | .B dstr_destroy |
196 | function, or the |
197 | .B DDESTROY |
198 | macro. After destruction, a string's value is reset to the empty |
199 | string: |
200 | .I "it's still a valid" |
201 | .BR dstr . |
202 | However, once a string has been destroyed, it's safe to deallocate the |
203 | underlying |
204 | .B dstr |
205 | object. |
206 | .PP |
207 | The |
208 | .B dstr_reset |
209 | function empties a string |
210 | .I without |
211 | deallocating any memory. Therefore appending more characters is quick, |
d2a91066 |
212 | because the old buffer is still there and doesn't need to be allocated. |
b6b9d458 |
213 | Calling |
214 | .VS |
215 | dstr_reset(d); |
216 | .VE |
d2a91066 |
217 | is equivalent to directly assigning |
b6b9d458 |
218 | .VS |
219 | d->len = 0; |
220 | .VE |
221 | There's also a macro |
222 | .B DRESET |
223 | which does the same job as the |
224 | .B dstr_reset |
225 | function. |
750e4b6c |
226 | .SS "Extending a string" |
b6b9d458 |
227 | All memory allocation for strings is done by the function |
228 | .BR dstr_ensure . |
229 | Given a pointer |
230 | .I d |
231 | to a |
232 | .B dstr |
233 | and a size |
234 | .IR sz , |
235 | the function ensures that there are at least |
236 | .I sz |
237 | unused bytes in the string's buffer. The current algorithm for |
238 | extending the buffer is fairly unsophisticated, but seems to work |
239 | relatively well \- see the source if you really want to know what it's |
240 | doing. |
241 | .PP |
242 | Extending a string never returns a failure result. Instead, if there |
243 | isn't enough memory for a longer string, the exception |
244 | .B EXC_NOMEM |
245 | is raised. See |
08da152e |
246 | .BR exc (3) |
b6b9d458 |
247 | for more information about |
248 | .BR mLib 's |
249 | exception handling system. |
250 | .PP |
251 | Note that if an ensure operation needs to reallocate a string buffer, |
252 | any pointers you've taken into the string become invalid. |
253 | .PP |
254 | There's a macro |
255 | .B DENSURE |
256 | which does a quick inline check to see whether there's enough space in |
257 | a string's buffer. This saves a procedure call when no reallocation |
258 | needs to be done. The |
259 | .B DENSURE |
260 | macro is called in the same way as the |
261 | .B dstr_ensure |
262 | function. |
263 | .PP |
264 | The function |
265 | .B dstr_tidy |
266 | `trims' a string's buffer so that it's just large enough for the string |
267 | contents and a null terminating byte. This might raise an exception due |
268 | to lack of memory. (There are two possible ways this might happen. |
d2a91066 |
269 | Firstly, the underlying allocator might just be brain-damaged enough to |
b6b9d458 |
270 | fail on reducing a block's size. Secondly, tidying an empty string with no |
271 | buffer allocated for it causes allocation of a buffer large enough for |
272 | the terminating null byte.) |
750e4b6c |
273 | .SS "Contributing data to a string" |
b6b9d458 |
274 | There are a collection of functions which add data to a string. All of |
275 | these functions add their new data to the |
276 | .I end |
277 | of the string. This is good, because programs usually build strings |
278 | left-to-right. If you want to do something more clever, that's up to |
279 | you. |
280 | .PP |
281 | Several of these functions have equivalent macros which do the main work |
282 | inline. (There still might need to be a function call if the buffer |
283 | needs to be extended.) |
284 | .PP |
285 | Any of these functions might extend the string, causing pointers into |
286 | the string buffer to be invalidated. If you don't want that to happen, |
287 | pre-ensure enough space before you start. |
288 | .PP |
289 | The simplest function is |
290 | .B dstr_putc |
291 | which appends a single character |
292 | .I ch |
293 | to the end of the string. It has a macro equivalent called |
294 | .BR DPUTC . |
295 | .PP |
296 | The function |
297 | .B dstr_putz |
298 | places a zero byte at the end of the string. It does |
299 | .I not |
300 | affect the string's length, so any other data added to the string will |
301 | overwrite the null terminator. This is useful if you want to pass your |
302 | string to one of the standard C library string-handling functions. The |
303 | macro |
304 | .B DPUTZ |
305 | does the same thing. |
306 | .PP |
307 | The function |
308 | .B dstr_puts |
309 | writes a C-style null-terminated string to the end of a dynamic string. |
310 | A terminating zero byte is also written, as if |
311 | .B dstr_putz |
312 | were called. The macro |
313 | .B DPUTS |
314 | does the same job. |
315 | .PP |
316 | The function |
317 | .B dstr_putf |
318 | works similarly to the standard |
319 | .BR sprintf (3) |
320 | function. It accepts a |
321 | .BR print (3)-style |
322 | format string and an arbitrary number of arguments to format and writes |
323 | the resulting text to the end of a dynamic string, returning the number |
324 | of characters so written. A terminating zero byte is also appended. |
325 | The formatting is intended to be convenient and safe rather than |
326 | efficient, so don't expect blistering performance. Similarly, there may |
327 | be differences between the formatting done by |
328 | .B dstr_putf |
329 | and |
330 | .BR sprintf (3) |
331 | because the former has to do most of its work itself. In particular, |
332 | .B dstr_putf |
333 | doesn't (and probably never will) understand the |
334 | .RB ` n$ ' |
d2a91066 |
335 | positional parameter notation accepted by many Unix C libraries. There |
b6b9d458 |
336 | is no macro equivalent of |
337 | .BR dstr_putf . |
338 | .PP |
339 | The function |
340 | .B dstr_vputf |
341 | provides access to the `guts' of |
342 | .BR dstr_putf : |
5a18a126 |
343 | given a format string and a pointer to a |
344 | .BR va_list |
345 | it will format the arguments according to the format string, just as |
b6b9d458 |
346 | .B dstr_putf |
5a18a126 |
347 | does. (Note: that's a |
348 | .BR "va_list *" , |
349 | not a plain |
350 | .BR va_list , |
351 | so that it gets updated properly on exit.) |
b6b9d458 |
352 | .PP |
353 | The function |
354 | .B dstr_putd |
355 | appends the contents of one dynamic string to another. A null |
356 | terminator is also appended. The macro |
357 | .B DPUTD |
358 | does the same thing. |
359 | .PP |
360 | The function |
361 | .B dstr_putm |
362 | puts an arbitrary block of memory, addressed by |
363 | .IR p , |
364 | with length |
365 | .I sz |
366 | bytes, at the end of a dynamic string. No terminating null is appended: |
367 | it's assumed that if you're playing with arbitrary chunks of memory then |
368 | you're probably not going to be using the resulting data as a normal |
369 | text string. The macro |
370 | .B DPUTM |
371 | works the same way. |
372 | .PP |
373 | The function |
374 | .B dstr_putline |
375 | reads a line from an input stream |
376 | .I fp |
377 | and appends it to a string. If an error occurs, or end-of-file is |
378 | encountered, before any characters have been read, then |
379 | .B dstr_putline |
380 | returns the value |
750e4b6c |
381 | .B EOF |
382 | and does not extend the string. Otherwise, it reads until it encounters |
383 | a newline character, an error, or end-of-file, and returns the number of |
384 | characters read. If reading was terminated by a newline character, the |
385 | newline character is |
b6b9d458 |
386 | .I not |
387 | inserted in the buffer. A terminating null is appended, as by |
388 | .BR dstr_putz . |
750e4b6c |
389 | .SS "Other functions" |
b6b9d458 |
390 | The |
391 | .B dstr_write |
392 | function writes a string to an output stream |
393 | .IR fp . |
394 | It returns the number of characters written, or |
395 | .B 0 |
396 | if an error occurred before the first write. No newline character is |
397 | written to the stream, unless the string actually contains one already. |
398 | The macro |
399 | .B DWRITE |
400 | is equivalent. |
401 | .SH "SECURITY CONSIDERATIONS" |
d2a91066 |
402 | The implementation of the |
b6b9d458 |
403 | .B dstr |
404 | functions is designed to do string handling in security-critical |
405 | programs. However, there may be bugs in the code somewhere. In |
406 | particular, the |
407 | .B dstr_putf |
f1583053 |
408 | functions are quite complicated, and could do with some checking by |
b6b9d458 |
409 | independent people who know what they're doing. |
08da152e |
410 | .SH "SEE ALSO" |
411 | .BR exc (3), |
412 | .BR mLib (3). |
b6b9d458 |
413 | .SH AUTHOR |
414 | Mark Wooding, <mdw@nsict.org> |