| 1 | .\" -*-nroff-*- |
| 2 | .de VS |
| 3 | .sp 1 |
| 4 | .RS |
| 5 | .nf |
| 6 | .ft B |
| 7 | .. |
| 8 | .de VE |
| 9 | .ft R |
| 10 | .fi |
| 11 | .RE |
| 12 | .sp 1 |
| 13 | .. |
| 14 | .de hP |
| 15 | .IP |
| 16 | .ft B |
| 17 | \h'-\w'\\$1\ 'u'\\$1\ \c |
| 18 | .ft P |
| 19 | .. |
| 20 | .ie t .ds o \(bu |
| 21 | .el .ds o o |
| 22 | .TH dstr 3 "8 May 1999" "Straylight/Edgeware" "mLib utilities library" |
| 23 | .SH NAME |
| 24 | dstr \- a simple dynamic string type |
| 25 | .\" @dstr_create |
| 26 | .\" @dstr_destroy |
| 27 | .\" @dstr_reset |
| 28 | .\" @dstr_ensure |
| 29 | .\" @dstr_tidy |
| 30 | .\" |
| 31 | .\" @dstr_putc |
| 32 | .\" @dstr_putz |
| 33 | .\" @dstr_puts |
| 34 | .\" @dstr_putf |
| 35 | .\" @dstr_putd |
| 36 | .\" @dstr_putm |
| 37 | .\" @dstr_putline |
| 38 | .\" @dstr_write |
| 39 | .\" |
| 40 | .\" @DSTR_INIT |
| 41 | .\" @DCREATE |
| 42 | .\" @DDESTROY |
| 43 | .\" @DRESET |
| 44 | .\" @DENSURE |
| 45 | .\" @DPUTC |
| 46 | .\" @DPUTZ |
| 47 | .\" @DPUTS |
| 48 | .\" @DPUTD |
| 49 | .\" @DPUTM |
| 50 | .\" @DWRITE |
| 51 | .\" |
| 52 | .SH SYNOPSIS |
| 53 | .nf |
| 54 | .B "#include <mLib/dstr.h>" |
| 55 | |
| 56 | .BI "void dstr_create(dstr *" d ); |
| 57 | .BI "void dstr_destroy(dstr *" d ); |
| 58 | .BI "void dstr_reset(dstr *" d ); |
| 59 | |
| 60 | .BI "void dstr_ensure(dstr *" d ", size_t " sz ); |
| 61 | .BI "void dstr_tidy(dstr *" d ); |
| 62 | |
| 63 | .BI "void dstr_putc(dstr *" d ", int " ch ); |
| 64 | .BI "void dstr_putz(dstr *" d ); |
| 65 | .BI "void dstr_puts(dstr *" d ", const char *" s ); |
| 66 | .BI "int dstr_vputf(dstr *" d ", va_list *" ap ); |
| 67 | .BI "int dstr_putf(dstr *" d ", ...);" |
| 68 | .BI "void dstr_putd(dstr *" d ", const dstr *" p ); |
| 69 | .BI "void dstr_putm(dstr *" d ", const void *" p ", size_t " sz ); |
| 70 | .BI "int dstr_putline(dstr *" d ", FILE *" fp ); |
| 71 | .BI "size_t dstr_write(const dstr *" d ", FILE *" fp ); |
| 72 | |
| 73 | .BI "dstr " d " = DSTR_INIT;" |
| 74 | .BI "void DCREATE(dstr *" d ); |
| 75 | .BI "void DDESTROY(dstr *" d ); |
| 76 | .BI "void DRESET(dstr *" d ); |
| 77 | .BI "void DENSURE(dstr *" d ", size_t " sz ); |
| 78 | .BI "void DPUTC(dstr *" c ", char " ch ); |
| 79 | .BI "void DPUTZ(dstr *" d ); |
| 80 | .BI "void DPUTS(dstr *" d ", const char *" s ); |
| 81 | .BI "void DPUTD(dstr *" d ", const dstr *" p ); |
| 82 | .BI "void DPUTM(dstr *" d ", const void *" p ", size_t " sz ); |
| 83 | .BI "size_t DWRITE(const dstr *" d ", FILE *" fp ); |
| 84 | .fi |
| 85 | .SH DESCRIPTION |
| 86 | The header |
| 87 | .B dstr.h |
| 88 | declares a type for representing dynamically extending strings, and a |
| 89 | small collection of useful operations on them. None of the operations |
| 90 | returns a failure result on an out-of-memory condition; instead, the |
| 91 | exception |
| 92 | .B EXC_NOMEM |
| 93 | is raised. |
| 94 | .PP |
| 95 | Many of the functions which act on dynamic strings have macro |
| 96 | equivalents. These equivalent macros may evaluate their arguments |
| 97 | multiple times. |
| 98 | .SS "Underlying type" |
| 99 | A |
| 100 | .B dstr |
| 101 | object is a small structure with the following members: |
| 102 | .VS |
| 103 | typedef struct dstr { |
| 104 | char *buf; /* Pointer to string buffer */ |
| 105 | size_t sz; /* Size of the buffer */ |
| 106 | size_t len; /* Length of the string */ |
| 107 | arena *a; /* Pointer to arena */ |
| 108 | } dstr; |
| 109 | .VE |
| 110 | The |
| 111 | .B buf |
| 112 | member points to the actual character data in the string. The data may |
| 113 | or may not be null terminated, depending on what operations have |
| 114 | recently been performed on it. None of the |
| 115 | .B dstr |
| 116 | functions depend on the string being null-terminated; indeed, all of |
| 117 | them work fine on strings containing arbitrary binary data. You can |
| 118 | force null-termination by calling the |
| 119 | .B dstr_putz |
| 120 | function, or the |
| 121 | .B DPUTZ |
| 122 | macro. |
| 123 | .PP |
| 124 | The |
| 125 | .B sz |
| 126 | member describes the current size of the buffer. This reflects the |
| 127 | maximum possible length of string that can be represented in |
| 128 | .B buf |
| 129 | without allocating a new buffer. |
| 130 | .PP |
| 131 | The |
| 132 | .B len |
| 133 | member describes the current length of the string. It is the number of |
| 134 | bytes in the string which are actually interesting. The length does |
| 135 | .I not |
| 136 | include a null-terminating byte, if there is one. |
| 137 | .PP |
| 138 | The following invariants are maintained by |
| 139 | .B dstr |
| 140 | and must hold when any function is called: |
| 141 | .hP \*o |
| 142 | If |
| 143 | .B sz |
| 144 | is nonzero, then |
| 145 | .B buf |
| 146 | points to a block of memory of length |
| 147 | .BR sz . |
| 148 | If |
| 149 | .B sz |
| 150 | is zero, then |
| 151 | .B buf |
| 152 | is a null pointer. |
| 153 | .hP \*o |
| 154 | At all times, |
| 155 | .BR sz " \(>= " len. |
| 156 | .PP |
| 157 | Note that there is no equivalent of the standard C distinction between |
| 158 | the empty string (a pointer to an array of characters whose first |
| 159 | element is zero) and the nonexistent string (a null pointer). Any |
| 160 | .B dstr |
| 161 | whose |
| 162 | .B len |
| 163 | is zero is an empty string. |
| 164 | .PP |
| 165 | The |
| 166 | .I a |
| 167 | member refers to the arena from which the string's buffer has been |
| 168 | allocated. Immediately after creation, this is set to be |
| 169 | .BR arena_stdlib (3); |
| 170 | you can set it to point to any other arena of your choice before the |
| 171 | buffer is allocated. |
| 172 | .SS "Creation and destruction" |
| 173 | The caller is responsible for allocating the |
| 174 | .B dstr |
| 175 | structure. It can be initialized: |
| 176 | .hP \*o |
| 177 | using the macro |
| 178 | .B DSTR_INIT |
| 179 | as an initializer in the declaration of the object, |
| 180 | .hP \*o |
| 181 | passing its address to the |
| 182 | .B dstr_create |
| 183 | function, or |
| 184 | .hP \*o |
| 185 | passing its address to the (equivalent) |
| 186 | .B DCREATE |
| 187 | macro. |
| 188 | .PP |
| 189 | The initial value of a |
| 190 | .B dstr |
| 191 | is the empty string. |
| 192 | .PP |
| 193 | The additional storage space for a string's contents may be reclaimed by |
| 194 | passing it to the |
| 195 | .B dstr_destroy |
| 196 | function, or the |
| 197 | .B DDESTROY |
| 198 | macro. After destruction, a string's value is reset to the empty |
| 199 | string: |
| 200 | .I "it's still a valid" |
| 201 | .BR dstr . |
| 202 | However, once a string has been destroyed, it's safe to deallocate the |
| 203 | underlying |
| 204 | .B dstr |
| 205 | object. |
| 206 | .PP |
| 207 | The |
| 208 | .B dstr_reset |
| 209 | function empties a string |
| 210 | .I without |
| 211 | deallocating any memory. Therefore appending more characters is quick, |
| 212 | because the old buffer is still there and doesn't need to be allocated. |
| 213 | Calling |
| 214 | .VS |
| 215 | dstr_reset(d); |
| 216 | .VE |
| 217 | is equivalent to directly assigning |
| 218 | .VS |
| 219 | d->len = 0; |
| 220 | .VE |
| 221 | There's also a macro |
| 222 | .B DRESET |
| 223 | which does the same job as the |
| 224 | .B dstr_reset |
| 225 | function. |
| 226 | .SS "Extending a string" |
| 227 | All memory allocation for strings is done by the function |
| 228 | .BR dstr_ensure . |
| 229 | Given a pointer |
| 230 | .I d |
| 231 | to a |
| 232 | .B dstr |
| 233 | and a size |
| 234 | .IR sz , |
| 235 | the function ensures that there are at least |
| 236 | .I sz |
| 237 | unused bytes in the string's buffer. The current algorithm for |
| 238 | extending the buffer is fairly unsophisticated, but seems to work |
| 239 | relatively well \- see the source if you really want to know what it's |
| 240 | doing. |
| 241 | .PP |
| 242 | Extending a string never returns a failure result. Instead, if there |
| 243 | isn't enough memory for a longer string, the exception |
| 244 | .B EXC_NOMEM |
| 245 | is raised. See |
| 246 | .BR exc (3) |
| 247 | for more information about |
| 248 | .BR mLib 's |
| 249 | exception handling system. |
| 250 | .PP |
| 251 | Note that if an ensure operation needs to reallocate a string buffer, |
| 252 | any pointers you've taken into the string become invalid. |
| 253 | .PP |
| 254 | There's a macro |
| 255 | .B DENSURE |
| 256 | which does a quick inline check to see whether there's enough space in |
| 257 | a string's buffer. This saves a procedure call when no reallocation |
| 258 | needs to be done. The |
| 259 | .B DENSURE |
| 260 | macro is called in the same way as the |
| 261 | .B dstr_ensure |
| 262 | function. |
| 263 | .PP |
| 264 | The function |
| 265 | .B dstr_tidy |
| 266 | `trims' a string's buffer so that it's just large enough for the string |
| 267 | contents and a null terminating byte. This might raise an exception due |
| 268 | to lack of memory. (There are two possible ways this might happen. |
| 269 | Firstly, the underlying allocator might just be brain-damaged enough to |
| 270 | fail on reducing a block's size. Secondly, tidying an empty string with no |
| 271 | buffer allocated for it causes allocation of a buffer large enough for |
| 272 | the terminating null byte.) |
| 273 | .SS "Contributing data to a string" |
| 274 | There are a collection of functions which add data to a string. All of |
| 275 | these functions add their new data to the |
| 276 | .I end |
| 277 | of the string. This is good, because programs usually build strings |
| 278 | left-to-right. If you want to do something more clever, that's up to |
| 279 | you. |
| 280 | .PP |
| 281 | Several of these functions have equivalent macros which do the main work |
| 282 | inline. (There still might need to be a function call if the buffer |
| 283 | needs to be extended.) |
| 284 | .PP |
| 285 | Any of these functions might extend the string, causing pointers into |
| 286 | the string buffer to be invalidated. If you don't want that to happen, |
| 287 | pre-ensure enough space before you start. |
| 288 | .PP |
| 289 | The simplest function is |
| 290 | .B dstr_putc |
| 291 | which appends a single character |
| 292 | .I ch |
| 293 | to the end of the string. It has a macro equivalent called |
| 294 | .BR DPUTC . |
| 295 | .PP |
| 296 | The function |
| 297 | .B dstr_putz |
| 298 | places a zero byte at the end of the string. It does |
| 299 | .I not |
| 300 | affect the string's length, so any other data added to the string will |
| 301 | overwrite the null terminator. This is useful if you want to pass your |
| 302 | string to one of the standard C library string-handling functions. The |
| 303 | macro |
| 304 | .B DPUTZ |
| 305 | does the same thing. |
| 306 | .PP |
| 307 | The function |
| 308 | .B dstr_puts |
| 309 | writes a C-style null-terminated string to the end of a dynamic string. |
| 310 | A terminating zero byte is also written, as if |
| 311 | .B dstr_putz |
| 312 | were called. The macro |
| 313 | .B DPUTS |
| 314 | does the same job. |
| 315 | .PP |
| 316 | The function |
| 317 | .B dstr_putf |
| 318 | works similarly to the standard |
| 319 | .BR sprintf (3) |
| 320 | function. It accepts a |
| 321 | .BR print (3)-style |
| 322 | format string and an arbitrary number of arguments to format and writes |
| 323 | the resulting text to the end of a dynamic string, returning the number |
| 324 | of characters so written. A terminating zero byte is also appended. |
| 325 | The formatting is intended to be convenient and safe rather than |
| 326 | efficient, so don't expect blistering performance. Similarly, there may |
| 327 | be differences between the formatting done by |
| 328 | .B dstr_putf |
| 329 | and |
| 330 | .BR sprintf (3) |
| 331 | because the former has to do most of its work itself. In particular, |
| 332 | .B dstr_putf |
| 333 | understands the POSIX |
| 334 | .RB ` n$ ' |
| 335 | positional parameter notation accepted by many Unix C libraries, even if |
| 336 | the underlying C library does not. There is no macro equivalent of |
| 337 | .BR dstr_putf . |
| 338 | .PP |
| 339 | The function |
| 340 | .B dstr_vputf |
| 341 | provides access to the `guts' of |
| 342 | .BR dstr_putf : |
| 343 | given a format string and a pointer to a |
| 344 | .BR va_list |
| 345 | it will format the arguments according to the format string, just as |
| 346 | .B dstr_putf |
| 347 | does. (Note: that's a |
| 348 | .BR "va_list *" , |
| 349 | not a plain |
| 350 | .BR va_list , |
| 351 | so that it gets updated properly on exit.) |
| 352 | .PP |
| 353 | The function |
| 354 | .B dstr_putd |
| 355 | appends the contents of one dynamic string to another. A null |
| 356 | terminator is also appended. The macro |
| 357 | .B DPUTD |
| 358 | does the same thing. |
| 359 | .PP |
| 360 | The function |
| 361 | .B dstr_putm |
| 362 | puts an arbitrary block of memory, addressed by |
| 363 | .IR p , |
| 364 | with length |
| 365 | .I sz |
| 366 | bytes, at the end of a dynamic string. No terminating null is appended: |
| 367 | it's assumed that if you're playing with arbitrary chunks of memory then |
| 368 | you're probably not going to be using the resulting data as a normal |
| 369 | text string. The macro |
| 370 | .B DPUTM |
| 371 | works the same way. |
| 372 | .PP |
| 373 | The function |
| 374 | .B dstr_putline |
| 375 | reads a line from an input stream |
| 376 | .I fp |
| 377 | and appends it to a string. If an error occurs, or end-of-file is |
| 378 | encountered, before any characters have been read, then |
| 379 | .B dstr_putline |
| 380 | returns the value |
| 381 | .B EOF |
| 382 | and does not extend the string. Otherwise, it reads until it encounters |
| 383 | a newline character, an error, or end-of-file, and returns the number of |
| 384 | characters read. If reading was terminated by a newline character, the |
| 385 | newline character is |
| 386 | .I not |
| 387 | inserted in the buffer. A terminating null is appended, as by |
| 388 | .BR dstr_putz . |
| 389 | .SS "Other functions" |
| 390 | The |
| 391 | .B dstr_write |
| 392 | function writes a string to an output stream |
| 393 | .IR fp . |
| 394 | It returns the number of characters written, or |
| 395 | .B 0 |
| 396 | if an error occurred before the first write. No newline character is |
| 397 | written to the stream, unless the string actually contains one already. |
| 398 | The macro |
| 399 | .B DWRITE |
| 400 | is equivalent. |
| 401 | .SH "SECURITY CONSIDERATIONS" |
| 402 | The implementation of the |
| 403 | .B dstr |
| 404 | functions is designed to do string handling in security-critical |
| 405 | programs. However, there may be bugs in the code somewhere. In |
| 406 | particular, the |
| 407 | .B dstr_putf |
| 408 | functions are quite complicated, and could do with some checking by |
| 409 | independent people who know what they're doing. |
| 410 | .SH "SEE ALSO" |
| 411 | .BR exc (3), |
| 412 | .BR mLib (3). |
| 413 | .SH AUTHOR |
| 414 | Mark Wooding, <mdw@distorted.org.uk> |