@@@ misc mess
[mLib] / struct / dstr.3.in
1 .\" -*-nroff-*-
2 .\"
3 .\" Manual for dynamic strings
4 .\"
5 .\" (c) 1999--2003, 2005, 2007, 2009, 2013, 2014, 2023, 2024
6 .\" Straylight/Edgeware
7 .\"
8 .
9 .\"----- Licensing notice ---------------------------------------------------
10 .\"
11 .\" This file is part of the mLib utilities library.
12 .\"
13 .\" mLib is free software: you can redistribute it and/or modify it under
14 .\" the terms of the GNU Library General Public License as published by
15 .\" the Free Software Foundation; either version 2 of the License, or (at
16 .\" your option) any later version.
17 .\"
18 .\" mLib is distributed in the hope that it will be useful, but WITHOUT
19 .\" ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
20 .\" FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
21 .\" License for more details.
22 .\"
23 .\" You should have received a copy of the GNU Library General Public
24 .\" License along with mLib. If not, write to the Free Software
25 .\" Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
26 .\" USA.
27 .
28 .\"--------------------------------------------------------------------------
29 .so ../defs.man \" @@@PRE@@@
30 .
31 .\"--------------------------------------------------------------------------
32 .TH dstr 3mLib "8 May 1999" "Straylight/Edgeware" "mLib utilities library"
33 .\" @dstr_create
34 .\" @dstr_destroy
35 .\" @dstr_reset
36 .\" @dstr_ensure
37 .\" @dstr_tidy
38 .
39 .\" @dstr_putc
40 .\" @dstr_putz
41 .\" @dstr_puts
42 .\" @dstr_putf
43 .\" @dstr_vputf
44 .\" @dstr_printops
45 .\" @dstr_putd
46 .\" @dstr_putm
47 .\" @dstr_putline
48 .\" @dstr_write
49 .
50 .\" @DSTR_INIT
51 .\" @DCREATE
52 .\" @DDESTROY
53 .\" @DRESET
54 .\" @DENSURE
55 .\" @DPUTC
56 .\" @DPUTZ
57 .\" @DPUTS
58 .\" @DPUTD
59 .\" @DPUTM
60 .\" @DWRITE
61 .
62 .\"--------------------------------------------------------------------------
63 .SH NAME
64 dstr \- a simple dynamic string type
65 .
66 .\"--------------------------------------------------------------------------
67 .SH SYNOPSIS
68 .
69 .nf
70 .B "#include <mLib/dstr.h>"
71 .PP
72 .B "typedef struct { ...\& } dstr;"
73 .B "#define DSTR_INIT ..."
74 .PP
75 .BI "void dstr_create(dstr *" d );
76 .BI "void dstr_destroy(dstr *" d );
77 .BI "void dstr_reset(dstr *" d );
78 .PP
79 .BI "void dstr_ensure(dstr *" d ", size_t " sz );
80 .BI "void dstr_tidy(dstr *" d );
81 .PP
82 .BI "void dstr_putc(dstr *" d ", int " ch );
83 .BI "void dstr_putz(dstr *" d );
84 .BI "void dstr_puts(dstr *" d ", const char *" s );
85 .BI "int dstr_vputf(dstr *" d ", va_list *" ap );
86 .BI "int dstr_putf(dstr *" d ", ...);"
87 .BI "void dstr_putd(dstr *" d ", const dstr *" p );
88 .BI "void dstr_putm(dstr *" d ", const void *" p ", size_t " sz );
89 .BI "int dstr_putline(dstr *" d ", FILE *" fp );
90 .BI "size_t dstr_write(const dstr *" d ", FILE *" fp );
91 .PP
92 .BI "void DCREATE(dstr *" d );
93 .BI "void DDESTROY(dstr *" d );
94 .BI "void DRESET(dstr *" d );
95 .BI "void DENSURE(dstr *" d ", size_t " sz );
96 .BI "void DPUTC(dstr *" c ", char " ch );
97 .BI "void DPUTZ(dstr *" d );
98 .BI "void DPUTS(dstr *" d ", const char *" s );
99 .BI "void DPUTD(dstr *" d ", const dstr *" p );
100 .BI "void DPUTM(dstr *" d ", const void *" p ", size_t " sz );
101 .BI "size_t DWRITE(const dstr *" d ", FILE *" fp );
102 .fi
103 .
104 .\"--------------------------------------------------------------------------
105 .SH DESCRIPTION
106 .
107 The header
108 .B dstr.h
109 declares a type for representing dynamically extending strings, and a
110 small collection of useful operations on them. None of the operations
111 returns a failure result on an out-of-memory condition; instead, the
112 exception
113 .B EXC_NOMEM
114 is raised.
115 .PP
116 Many of the functions which act on dynamic strings have macro
117 equivalents. These equivalent macros may evaluate their arguments
118 multiple times.
119 .
120 .SS "Underlying type"
121 A
122 .B dstr
123 object is a small structure with the following members.
124 The
125 .B buf
126 member points to the actual character data in the string. The data may
127 or may not be null terminated, depending on what operations have
128 recently been performed on it. None of the
129 .B dstr
130 functions depend on the string being null-terminated; indeed, all of
131 them work fine on strings containing arbitrary binary data. You can
132 force null-termination by calling the
133 .B dstr_putz
134 function, or the
135 .B DPUTZ
136 macro.
137 .PP
138 The
139 .B sz
140 member describes the current size of the buffer. This reflects the
141 maximum possible length of string that can be represented in
142 .B buf
143 without allocating a new buffer.
144 .PP
145 The
146 .B len
147 member describes the current length of the string. It is the number of
148 bytes in the string which are actually interesting. The length does
149 .I not
150 include a null-terminating byte, if there is one.
151 .PP
152 The following invariants are maintained by
153 .B dstr
154 and must hold when any function is called:
155 .hP \*o
156 If
157 .B sz
158 is nonzero, then
159 .B buf
160 points to a block of memory of length
161 .BR sz .
162 If
163 .B sz
164 is zero, then
165 .B buf
166 is a null pointer.
167 .hP \*o
168 At all times,
169 .BR sz " \(>= " len.
170 .PP
171 Note that there is no equivalent of the standard C distinction between
172 the empty string (a pointer to an array of characters whose first
173 element is zero) and the nonexistent string (a null pointer). Any
174 .B dstr
175 whose
176 .B len
177 is zero is an empty string.
178 .PP
179 The
180 .I a
181 member refers to the arena from which the string's buffer has been
182 allocated. Immediately after creation, this is set to be
183 .BR arena_stdlib (3);
184 you can set it to point to any other arena of your choice before the
185 buffer is allocated.
186 .
187 .SS "Creation and destruction"
188 The caller is responsible for allocating the
189 .B dstr
190 structure. It can be initialized:
191 .hP \*o
192 using the macro
193 .B DSTR_INIT
194 as an initializer in the declaration of the object,
195 .hP \*o
196 passing its address to the
197 .B dstr_create
198 function, or
199 .hP \*o
200 passing its address to the (equivalent)
201 .B DCREATE
202 macro.
203 .PP
204 The initial value of a
205 .B dstr
206 is the empty string.
207 .PP
208 The additional storage space for a string's contents may be reclaimed by
209 passing it to the
210 .B dstr_destroy
211 function, or the
212 .B DDESTROY
213 macro. After destruction, a string's value is reset to the empty
214 string:
215 .I "it's still a valid"
216 .BR dstr .
217 However, once a string has been destroyed, it's safe to deallocate the
218 underlying
219 .B dstr
220 object.
221 .PP
222 The
223 .B dstr_reset
224 function empties a string
225 .I without
226 deallocating any memory. Therefore appending more characters is quick,
227 because the old buffer is still there and doesn't need to be allocated.
228 Calling
229 .VS
230 dstr_reset(d);
231 .VE
232 is equivalent to directly assigning
233 .VS
234 d->len = 0;
235 .VE
236 There's also a macro
237 .B DRESET
238 which does the same job as the
239 .B dstr_reset
240 function.
241 .
242 .SS "Extending a string"
243 All memory allocation for strings is done by the function
244 .BR dstr_ensure .
245 Given a pointer
246 .I d
247 to a
248 .B dstr
249 and a size
250 .IR sz ,
251 the function ensures that there are at least
252 .I sz
253 unused bytes in the string's buffer. The current algorithm for
254 extending the buffer is fairly unsophisticated, but seems to work
255 relatively well \- see the source if you really want to know what it's
256 doing.
257 .PP
258 Extending a string never returns a failure result. Instead, if there
259 isn't enough memory for a longer string, the exception
260 .B EXC_NOMEM
261 is raised. See
262 .BR exc (3)
263 for more information about
264 .BR mLib 's
265 exception handling system.
266 .PP
267 Note that if an ensure operation needs to reallocate a string buffer,
268 any pointers you've taken into the string become invalid.
269 .PP
270 There's a macro
271 .B DENSURE
272 which does a quick inline check to see whether there's enough space in
273 a string's buffer. This saves a procedure call when no reallocation
274 needs to be done. The
275 .B DENSURE
276 macro is called in the same way as the
277 .B dstr_ensure
278 function.
279 .PP
280 The function
281 .B dstr_tidy
282 `trims' a string's buffer so that it's just large enough for the string
283 contents and a null terminating byte. This might raise an exception due
284 to lack of memory. (There are two possible ways this might happen.
285 Firstly, the underlying allocator might just be brain-damaged enough to
286 fail on reducing a block's size. Secondly, tidying an empty string with no
287 buffer allocated for it causes allocation of a buffer large enough for
288 the terminating null byte.)
289 .
290 .SS "Contributing data to a string"
291 There are a collection of functions which add data to a string. All of
292 these functions add their new data to the
293 .I end
294 of the string. This is good, because programs usually build strings
295 left-to-right. If you want to do something more clever, that's up to
296 you.
297 .PP
298 Several of these functions have equivalent macros which do the main work
299 inline. (There still might need to be a function call if the buffer
300 needs to be extended.)
301 .PP
302 Any of these functions might extend the string, causing pointers into
303 the string buffer to be invalidated. If you don't want that to happen,
304 pre-ensure enough space before you start.
305 .PP
306 The simplest function is
307 .B dstr_putc
308 which appends a single character
309 .I ch
310 to the end of the string. It has a macro equivalent called
311 .BR DPUTC .
312 .PP
313 The function
314 .B dstr_putz
315 places a zero byte at the end of the string. It does
316 .I not
317 affect the string's length, so any other data added to the string will
318 overwrite the null terminator. This is useful if you want to pass your
319 string to one of the standard C library string-handling functions. The
320 macro
321 .B DPUTZ
322 does the same thing.
323 .PP
324 The function
325 .B dstr_puts
326 writes a C-style null-terminated string to the end of a dynamic string.
327 A terminating zero byte is also written, as if
328 .B dstr_putz
329 were called. The macro
330 .B DPUTS
331 does the same job.
332 .PP
333 The function
334 .B dstr_putf
335 works similarly to the standard
336 .BR sprintf (3)
337 function. It accepts a
338 .BR print (3)-style
339 format string and an arbitrary number of arguments to format and writes
340 the resulting text to the end of a dynamic string, returning the number
341 of characters so written. A terminating zero byte is also appended.
342 There is no macro equivalent of
343 .BR dstr_putf .
344 .PP
345 The function
346 .B dstr_vputf
347 provides access to the `guts' of
348 .BR dstr_putf :
349 given a format string and a pointer to a
350 .BR va_list
351 it will format the arguments according to the format string, just as
352 .B dstr_putf
353 does. (Note: that's a
354 .BR "va_list *" ,
355 not a plain
356 .BR va_list ,
357 so that it gets updated properly on exit.)
358 .PP
359 The
360 .B dstr_putf
361 and
362 .B dstr_vputf
363 functions are implemented using
364 .BR gprintf (3).
365 The output operations table is exposed as
366 .BR dstr_printops ;
367 the functions expect the output pointer to be the address of the output
368 .BR dstr .
369 .PP
370 The function
371 .B dstr_putd
372 appends the contents of one dynamic string to another. A null
373 terminator is also appended. The macro
374 .B DPUTD
375 does the same thing.
376 .PP
377 The function
378 .B dstr_putm
379 puts an arbitrary block of memory, addressed by
380 .IR p ,
381 with length
382 .I sz
383 bytes, at the end of a dynamic string. No terminating null is appended:
384 it's assumed that if you're playing with arbitrary chunks of memory then
385 you're probably not going to be using the resulting data as a normal
386 text string. The macro
387 .B DPUTM
388 works the same way.
389 .PP
390 The function
391 .B dstr_putline
392 reads a line from an input stream
393 .I fp
394 and appends it to a string. If an error occurs, or end-of-file is
395 encountered, before any characters have been read, then
396 .B dstr_putline
397 returns the value
398 .B EOF
399 and does not extend the string. Otherwise, it reads until it encounters
400 a newline character, an error, or end-of-file, and returns the number of
401 characters read. If reading was terminated by a newline character, the
402 newline character is
403 .I not
404 inserted in the buffer. A terminating null is appended, as by
405 .BR dstr_putz .
406 .
407 .SS "Other functions"
408 The
409 .B dstr_write
410 function writes a string to an output stream
411 .IR fp .
412 It returns the number of characters written, or
413 .B 0
414 if an error occurred before the first write. No newline character is
415 written to the stream, unless the string actually contains one already.
416 The macro
417 .B DWRITE
418 is equivalent.
419 .
420 .\"--------------------------------------------------------------------------
421 .SH "SECURITY CONSIDERATIONS"
422 .
423 The implementation of the
424 .B dstr
425 functions is designed to do string handling in security-critical
426 programs. However, there may be bugs in the code somewhere. In
427 particular, the
428 .B dstr_putf
429 functions are quite complicated, and could do with some checking by
430 independent people who know what they're doing.
431 .
432 .\"--------------------------------------------------------------------------
433 .SH "SEE ALSO"
434 .
435 .BR exc (3),
436 .BR gprintf (3),
437 .BR mLib (3).
438 .
439 .\"--------------------------------------------------------------------------
440 .SH AUTHOR
441 .
442 Mark Wooding, <mdw@distorted.org.uk>
443 .
444 .\"----- That's all, folks --------------------------------------------------