Commit | Line | Data |
---|---|---|
236f657b | 1 | .\" -*-nroff-*- |
c4ccbbf9 MW |
2 | .\" |
3 | .\" Manual for new-fangled binary encoding and decoding | |
4 | .\" | |
5 | .\" (c) 2009, 2014, 2015, 2019, 2023, 2024 Straylight/Edgeware | |
6 | .\" | |
7 | . | |
8 | .\"----- Licensing notice --------------------------------------------------- | |
9 | .\" | |
10 | .\" This file is part of the mLib utilities library. | |
11 | .\" | |
12 | .\" mLib is free software: you can redistribute it and/or modify it under | |
13 | .\" the terms of the GNU Library General Public License as published by | |
14 | .\" the Free Software Foundation; either version 2 of the License, or (at | |
15 | .\" your option) any later version. | |
16 | .\" | |
17 | .\" mLib is distributed in the hope that it will be useful, but WITHOUT | |
18 | .\" ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
19 | .\" FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public | |
20 | .\" License for more details. | |
21 | .\" | |
22 | .\" You should have received a copy of the GNU Library General Public | |
23 | .\" License along with mLib. If not, write to the Free Software | |
24 | .\" Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, | |
25 | .\" USA. | |
26 | . | |
27 | .\"-------------------------------------------------------------------------- | |
28 | .so ../defs.man \" @@@PRE@@@ | |
29 | . | |
30 | .\"-------------------------------------------------------------------------- | |
31 | .TH codec 3mLib "9 January 2009" "Straylight/Edgeware" "mLib utilities library" | |
236f657b MW |
32 | .\" @codec_class |
33 | .\" @codec_strerror | |
34 | .\" @null_codec_class | |
35 | .\" @base64_class | |
36 | .\" @file64_class | |
37 | .\" @base64url_class | |
38 | .\" @base32_class | |
39 | .\" @base32hex_class | |
40 | .\" @hex_class | |
c4ccbbf9 MW |
41 | . |
42 | .\"-------------------------------------------------------------------------- | |
43 | .SH NAME | |
44 | codec \- binary encoding and decoding | |
45 | . | |
46 | .\"-------------------------------------------------------------------------- | |
236f657b | 47 | .SH SYNOPSIS |
c4ccbbf9 | 48 | . |
236f657b MW |
49 | .nf |
50 | .B "#include <mLib/codec.h>" | |
51 | .B "#include <mLib/base64.h>" | |
52 | .B "#include <mLib/base32.h>" | |
53 | .B "#include <mLib/hex.h>" | |
d056fbdf | 54 | .PP |
4729aa69 MW |
55 | .B "#define CDCF_LOWERC ..." |
56 | .B "#define CDCF_IGNCASE ..." | |
57 | .B "#define CDCF_NOEQPAD ..." | |
58 | .B "#define CDCF_IGNEQPAD ..." | |
59 | .B "#define CDCF_IGNEQMID ..." | |
60 | .B "#define CDCF_IGNZPAD ..." | |
61 | .B "#define CDCF_IGNNEWL ..." | |
62 | .B "#define CDCF_IGNINVCH ..." | |
63 | .B "#define CDCF_IGNSPC ..." | |
64 | .B "#define CDCF_IGNJUNK ..." | |
d056fbdf | 65 | .PP |
adec5584 | 66 | .ta 2n |
4729aa69 | 67 | .B "enum {" |
adec5584 MW |
68 | .B " CDCERR_OK = ...," |
69 | .B " CDCERR_INVCH = ...," | |
70 | .B " CDCERR_INVEQPAD = ...," | |
71 | .B " CDCERR_INVZPAD = ..." | |
4729aa69 | 72 | .B "};" |
d056fbdf | 73 | .PP |
4729aa69 | 74 | .B "typedef struct {" |
adec5584 MW |
75 | .B " const char *name;" |
76 | .ta 2n +\w'\fBcodec *(*encoder)('u | |
77 | .BI " codec *(*encoder)(unsigned " flags , | |
78 | .BI " const char *" indent ", unsigned " maxlen ); | |
79 | .BI " codec *(*decoder)(unsigned " flags ); | |
80 | .B " ...\&" | |
4729aa69 | 81 | .B "} codec_class;" |
d056fbdf | 82 | .PP |
4729aa69 | 83 | .B "typedef struct {" |
adec5584 | 84 | .B " const codec_ops *ops;" |
4729aa69 | 85 | .B "} codec;" |
d056fbdf | 86 | .PP |
4729aa69 | 87 | .B "typedef struct {" |
adec5584 MW |
88 | .B " const codec_class *c;" |
89 | .BI " int (*code)(codec *" c ", const void *" p ", size_t " sz ", dstr *" d ); | |
90 | .BI " void (*destroy)(codec *" c ); | |
4729aa69 | 91 | .B "} codec_ops;" |
d056fbdf | 92 | .PP |
236f657b MW |
93 | .B "codec_class null_codec_class;" |
94 | .B "codec_class base64_class, file64_class, base64url_class;" | |
95 | .B "codec_class base32_class, base32hex_class;" | |
96 | .B "codec_class hex_class;" | |
d056fbdf | 97 | .PP |
236f657b MW |
98 | .BI "const char *codec_strerror(int " err ");" |
99 | .fi | |
c4ccbbf9 MW |
100 | . |
101 | .\"-------------------------------------------------------------------------- | |
236f657b | 102 | .SH DESCRIPTION |
c4ccbbf9 | 103 | . |
236f657b MW |
104 | The |
105 | .B codec | |
106 | system provides an object-based interface to functions which encode | |
107 | binary data as plain text and decode the result to recover the original | |
108 | binary data. The interface makes it easy to support multiple encodings | |
109 | and select an appropriate one at runtime. | |
c4ccbbf9 | 110 | . |
236f657b MW |
111 | .SS "The codec_class structure" |
112 | The | |
113 | .B codec_class | |
114 | structure represents a particular encoding format. The structure has | |
115 | the following members. | |
116 | .TP | |
117 | .B "const char *name" | |
118 | The name of the class, as a null-terminated string. The name should not | |
119 | contain whitespace characters. | |
120 | .TP | |
121 | .BI "codec *(*encoder)(unsigned " flags ", const char *" indent ", unsigned " maxline ")" | |
122 | Pointer to a function which constructs a new encoder object, of type | |
123 | .BR codec . | |
124 | The | |
125 | .I flags | |
126 | configure the behaviour of the object; the | |
127 | .I indent | |
128 | string is written to separate lines of output; the integer | |
129 | .I maxline | |
130 | is the maximum length of line to be produced, or zero to forbid line | |
131 | breaking. | |
132 | .TP | |
133 | .BI "codec *(*decoder)(unsigned " flags ")" | |
134 | Pointer to a function which constructs a new decoder object, also of | |
135 | type | |
136 | .BR codec . | |
137 | The | |
138 | .I flags | |
139 | configure the behaviour of the object. | |
140 | .PP | |
141 | The | |
142 | .I flags | |
143 | to the | |
144 | .B encoder | |
145 | and | |
146 | .B decoder | |
147 | functions have the following meanings. | |
148 | .TP | |
149 | .B CDCF_LOWERC | |
150 | For codecs which produce output using a single alphabetic case (e.g., | |
151 | .BR base32 , | |
152 | .BR hex ), | |
153 | emit and accept only lower case; the default to emit and accept only | |
154 | upper case, for compatibility with RFC4648. If the codec usually | |
155 | produces mixed-case output, then this flag is ignored. | |
156 | .TP | |
157 | .B CDCF_IGNCASE | |
158 | For codecs which produce output using a single alphabetic case, ignore | |
159 | the case of the input when decoding. If the codec usually produces | |
160 | mixed-case output, then this flag is ignored. | |
161 | .TP | |
162 | .B CDCF_NOEQPAD | |
163 | For codecs which usually pad their output (e.g., | |
164 | .BR base64 , | |
165 | .BR base32 ), | |
166 | do not emit or accept padding characters. If the codec does not usually | |
167 | produce padding, or the padding is not redundant, then this flag is | |
168 | ignored. | |
169 | .TP | |
170 | .B CDCF_IGNEQPAD | |
171 | For codecs which usually pad their output, do not treat incorrect (e.g., | |
172 | missing or excessive) padding as an error when decoding. If the codec | |
173 | does not usually produce padding, or the padding is required for | |
174 | unambiguous decoding, then this flag is ignored. | |
175 | .TP | |
176 | .B CDCF_IGNEQMID | |
177 | For codecs which usually pad their output, ignore padding characters | |
178 | wherever they may appear when decoding. Usually padding characters | |
179 | indicate the end of the input, and further input characters are | |
180 | considered erroneous. If the codec does not usually produce padding, or | |
181 | it is impossible to resume decoding correctly having seen padding | |
182 | characters, then this flag is ignored. | |
183 | .TP | |
184 | .B CDCF_IGNZPAD | |
185 | For codecs which need to pad their input, ignore unusual padding bits | |
186 | when decoding. (This is not at all the same thing as the padding | |
187 | characters controlled by the flags above: they deal with padding the | |
188 | length of the encoding | |
189 | .I output | |
190 | up to a suitable multiple of characters; this option deals with padding | |
191 | of the | |
192 | .I input | |
193 | prior to encoding.) If the codec does not add padding bits, or specific | |
194 | values are required for unambiguous decoding, then this flag is ignored. | |
195 | .TP | |
196 | .B CDCF_IGNNEWL | |
197 | Ignore newline (and carriage-return) characters when decoding: the | |
198 | default for RFC4648 codecs is to reject newline characters. If these | |
199 | characters are significant in the encoding, then this flag is ignored. | |
200 | .TP | |
09fbf4d0 MW |
201 | .B CDCF_IGNSPC |
202 | Ignore whitespace characters (other than newlines) when decoding: the | |
203 | default for RFC4648 codecs is to reject whitespace characters. If these | |
204 | characters are significant in the encoding, then this flag is ignored. | |
205 | .TP | |
236f657b MW |
206 | .B CDCF_IGNINVCH |
207 | Ignore any other invalid characters appearing in the input when | |
208 | decoding. | |
209 | .TP | |
210 | .B CDCF_IGNJUNK | |
211 | Ignore all `junk' in the input. This should suppress almost all | |
212 | decoding errors. | |
213 | .PP | |
214 | If you do not set any of the | |
c3dd6b29 | 215 | .BR CDCF_IGN ...\& |
236f657b MW |
216 | flags, a decoder should only accept the exact encoding that the |
217 | corresponding encoder would produce (with | |
218 | .I maxline | |
219 | = 0 to inhibit line-breaking). | |
c4ccbbf9 | 220 | . |
236f657b MW |
221 | .SS "The codec and codec_ops structures" |
222 | The | |
223 | .B codec | |
224 | structure represents the state of an encoder or decoder, as returned by | |
225 | the | |
226 | .B encoder | |
227 | and | |
228 | .B decoder | |
229 | functions described above, contains a single member. | |
230 | .TP | |
231 | .B "const codec_ops *ops" | |
232 | Pointer to a | |
233 | .B codec_ops | |
234 | structure which contains operations and metadata for use with the | |
235 | encoder or decoder. | |
236 | .PP | |
237 | The | |
238 | .B codec_ops | |
239 | structure contains the following members. | |
240 | .TP | |
241 | .B "const codec_class *c" | |
242 | Pointer back to the | |
243 | .B codec_class | |
244 | which was used to construct the | |
245 | .B codec | |
246 | object. | |
247 | .TP | |
248 | .BI "int (*code)(codec *" c ", const void *" p ", size_t " sz ", dstr *" d ")" | |
249 | Encode or decode, using the codec | |
63ba7202 | 250 | .IR c , |
236f657b MW |
251 | the data in the buffer at address |
252 | .I p | |
253 | and continuing for | |
254 | .I sz | |
255 | bytes, appending the output to the dynamic string | |
256 | .I d | |
257 | (see | |
258 | .BR dstr (3)). | |
259 | If the operation was successful, the function returns zero; otherwise it | |
260 | returns a nonzero error code, as described below. | |
261 | .TP | |
262 | .BI "void (*destroy)(codec *" c ")" | |
263 | Destroy the codec object | |
264 | .IR c , | |
265 | freeing any resources it may hold. | |
266 | .PP | |
267 | A codec may buffer its input (e.g., if needs to see more in order to | |
268 | decide what output to produce next); it may also need to take special | |
269 | action at the end of the input (e.g., flushing buffers, and applying | |
270 | padding). To signal the codec that there is no more input, call the | |
271 | .B code | |
272 | function with a null | |
273 | .I p | |
274 | pointer. It will then write any final output to | |
275 | .IR d . | |
276 | .PP | |
277 | The following error conditions may be reported. | |
278 | .TP | |
279 | .B CDCERR_INVCH | |
280 | An invalid character was encountered while decoding. This includes | |
281 | encoutering padding characters if padding is disabled using the | |
282 | .B CDCF_NOEQPAD | |
283 | flag. | |
284 | .TP | |
285 | .B CDCERR_INVEQPAD | |
286 | Invalid padding characters (e.g., wrong characters, or too few, too | |
287 | many, or none at all) were found during decoding. This may also | |
288 | indicate that the input is truncated, even if the codec does not usually | |
289 | perform output padding. | |
290 | .TP | |
291 | .B CDCERR_INVZPAD | |
292 | Invalid padding bits were found during decoding. | |
293 | .PP | |
294 | The | |
295 | .B codec_strerror | |
296 | function converts these error codes to brief, (moderately) | |
297 | human-readable strings. | |
c4ccbbf9 | 298 | . |
236f657b MW |
299 | .SS "Provided codecs" |
300 | The library provides a number of standard codecs. | |
301 | .TP | |
302 | .B base64 | |
303 | Implements Base64 encoding, as defined by RFC4648. Output is | |
304 | mixed-case, so the | |
305 | .B CDCF_LOWERC | |
306 | and | |
307 | .B CDCF_IGNCASE | |
308 | flags are ignored. | |
309 | .TP | |
310 | .B safe64 | |
311 | Implements a variant of the Base64 encoding which uses | |
312 | .RB ` % ' | |
313 | in place of | |
314 | .RB ` / ', | |
315 | so that its output is suitable for use as a Unix filename. | |
316 | .TP | |
317 | .B base64url | |
318 | Implements the filename- and URL-safe variant of Base64 encoding, as | |
319 | defined by RFC4648. | |
320 | .TP | |
321 | .B base32 | |
322 | Implements Base32 encoding, as defined by RFC4648. Output is in upper | |
323 | case by default. | |
324 | .TP | |
325 | .B base32hex | |
326 | Implements the extended-hex variant of Base32, as defined by RFC4648. | |
327 | This encoding has the property that the encoding preserves the ordering | |
328 | of messages if padding is suppressed. | |
329 | .TP | |
330 | .B hex | |
331 | Implements hex encoding, defined by RFC4648 under the name Base16. For | |
332 | compatibility with that specification, output is in upper case by | |
333 | default. | |
c4ccbbf9 MW |
334 | . |
335 | .\"-------------------------------------------------------------------------- | |
236f657b | 336 | .SH "SEE ALSO" |
c4ccbbf9 | 337 | . |
236f657b MW |
338 | .BR bincode (1), |
339 | .BR dstr (3), | |
340 | .BR mLib (3). | |
c4ccbbf9 MW |
341 | . |
342 | .\"-------------------------------------------------------------------------- | |
236f657b | 343 | .SH AUTHOR |
c4ccbbf9 | 344 | . |
236f657b | 345 | Mark Wooding, <mdw@distorted.org.uk> |
c4ccbbf9 MW |
346 | . |
347 | .\"----- That's all, folks -------------------------------------------------- |