@@@ much mess, mostly manpages
[mLib] / codec / codec.3.in
CommitLineData
236f657b 1.\" -*-nroff-*-
c4ccbbf9
MW
2.\"
3.\" Manual for new-fangled binary encoding and decoding
4.\"
5.\" (c) 2009, 2014, 2015, 2019, 2023, 2024 Straylight/Edgeware
6.\"
7.
8.\"----- Licensing notice ---------------------------------------------------
9.\"
10.\" This file is part of the mLib utilities library.
11.\"
12.\" mLib is free software: you can redistribute it and/or modify it under
13.\" the terms of the GNU Library General Public License as published by
14.\" the Free Software Foundation; either version 2 of the License, or (at
15.\" your option) any later version.
16.\"
17.\" mLib is distributed in the hope that it will be useful, but WITHOUT
18.\" ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19.\" FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
20.\" License for more details.
21.\"
22.\" You should have received a copy of the GNU Library General Public
23.\" License along with mLib. If not, write to the Free Software
24.\" Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
25.\" USA.
26.
27.\"--------------------------------------------------------------------------
28.so ../defs.man \" @@@PRE@@@
29.
30.\"--------------------------------------------------------------------------
31.TH codec 3mLib "9 January 2009" "Straylight/Edgeware" "mLib utilities library"
236f657b
MW
32.\" @codec_class
33.\" @codec_strerror
34.\" @null_codec_class
35.\" @base64_class
36.\" @file64_class
37.\" @base64url_class
38.\" @base32_class
39.\" @base32hex_class
40.\" @hex_class
c4ccbbf9
MW
41.
42.\"--------------------------------------------------------------------------
43.SH NAME
44codec \- binary encoding and decoding
45.
46.\"--------------------------------------------------------------------------
236f657b 47.SH SYNOPSIS
c4ccbbf9 48.
236f657b
MW
49.nf
50.B "#include <mLib/codec.h>"
51.B "#include <mLib/base64.h>"
52.B "#include <mLib/base32.h>"
53.B "#include <mLib/hex.h>"
d056fbdf 54.PP
4729aa69
MW
55.B "#define CDCF_LOWERC ..."
56.B "#define CDCF_IGNCASE ..."
57.B "#define CDCF_NOEQPAD ..."
58.B "#define CDCF_IGNEQPAD ..."
59.B "#define CDCF_IGNEQMID ..."
60.B "#define CDCF_IGNZPAD ..."
61.B "#define CDCF_IGNNEWL ..."
62.B "#define CDCF_IGNINVCH ..."
63.B "#define CDCF_IGNSPC ..."
64.B "#define CDCF_IGNJUNK ..."
d056fbdf 65.PP
adec5584 66.ta 2n
4729aa69 67.B "enum {"
adec5584
MW
68.B " CDCERR_OK = ...,"
69.B " CDCERR_INVCH = ...,"
70.B " CDCERR_INVEQPAD = ...,"
71.B " CDCERR_INVZPAD = ..."
4729aa69 72.B "};"
d056fbdf 73.PP
4729aa69 74.B "typedef struct {"
adec5584
MW
75.B " const char *name;"
76.ta 2n +\w'\fBcodec *(*encoder)('u
77.BI " codec *(*encoder)(unsigned " flags ,
78.BI " const char *" indent ", unsigned " maxlen );
79.BI " codec *(*decoder)(unsigned " flags );
80.B " ...\&"
4729aa69 81.B "} codec_class;"
d056fbdf 82.PP
4729aa69 83.B "typedef struct {"
adec5584 84.B " const codec_ops *ops;"
4729aa69 85.B "} codec;"
d056fbdf 86.PP
4729aa69 87.B "typedef struct {"
adec5584
MW
88.B " const codec_class *c;"
89.BI " int (*code)(codec *" c ", const void *" p ", size_t " sz ", dstr *" d );
90.BI " void (*destroy)(codec *" c );
4729aa69 91.B "} codec_ops;"
d056fbdf 92.PP
236f657b
MW
93.B "codec_class null_codec_class;"
94.B "codec_class base64_class, file64_class, base64url_class;"
95.B "codec_class base32_class, base32hex_class;"
96.B "codec_class hex_class;"
d056fbdf 97.PP
236f657b
MW
98.BI "const char *codec_strerror(int " err ");"
99.fi
c4ccbbf9
MW
100.
101.\"--------------------------------------------------------------------------
236f657b 102.SH DESCRIPTION
c4ccbbf9 103.
236f657b
MW
104The
105.B codec
106system provides an object-based interface to functions which encode
107binary data as plain text and decode the result to recover the original
108binary data. The interface makes it easy to support multiple encodings
109and select an appropriate one at runtime.
c4ccbbf9 110.
236f657b
MW
111.SS "The codec_class structure"
112The
113.B codec_class
114structure represents a particular encoding format. The structure has
115the following members.
116.TP
117.B "const char *name"
118The name of the class, as a null-terminated string. The name should not
119contain whitespace characters.
120.TP
121.BI "codec *(*encoder)(unsigned " flags ", const char *" indent ", unsigned " maxline ")"
122Pointer to a function which constructs a new encoder object, of type
123.BR codec .
124The
125.I flags
126configure the behaviour of the object; the
127.I indent
128string is written to separate lines of output; the integer
129.I maxline
130is the maximum length of line to be produced, or zero to forbid line
131breaking.
132.TP
133.BI "codec *(*decoder)(unsigned " flags ")"
134Pointer to a function which constructs a new decoder object, also of
135type
136.BR codec .
137The
138.I flags
139configure the behaviour of the object.
140.PP
141The
142.I flags
143to the
144.B encoder
145and
146.B decoder
147functions have the following meanings.
148.TP
149.B CDCF_LOWERC
150For codecs which produce output using a single alphabetic case (e.g.,
151.BR base32 ,
152.BR hex ),
153emit and accept only lower case; the default to emit and accept only
154upper case, for compatibility with RFC4648. If the codec usually
155produces mixed-case output, then this flag is ignored.
156.TP
157.B CDCF_IGNCASE
158For codecs which produce output using a single alphabetic case, ignore
159the case of the input when decoding. If the codec usually produces
160mixed-case output, then this flag is ignored.
161.TP
162.B CDCF_NOEQPAD
163For codecs which usually pad their output (e.g.,
164.BR base64 ,
165.BR base32 ),
166do not emit or accept padding characters. If the codec does not usually
167produce padding, or the padding is not redundant, then this flag is
168ignored.
169.TP
170.B CDCF_IGNEQPAD
171For codecs which usually pad their output, do not treat incorrect (e.g.,
172missing or excessive) padding as an error when decoding. If the codec
173does not usually produce padding, or the padding is required for
174unambiguous decoding, then this flag is ignored.
175.TP
176.B CDCF_IGNEQMID
177For codecs which usually pad their output, ignore padding characters
178wherever they may appear when decoding. Usually padding characters
179indicate the end of the input, and further input characters are
180considered erroneous. If the codec does not usually produce padding, or
181it is impossible to resume decoding correctly having seen padding
182characters, then this flag is ignored.
183.TP
184.B CDCF_IGNZPAD
185For codecs which need to pad their input, ignore unusual padding bits
186when decoding. (This is not at all the same thing as the padding
187characters controlled by the flags above: they deal with padding the
188length of the encoding
189.I output
190up to a suitable multiple of characters; this option deals with padding
191of the
192.I input
193prior to encoding.) If the codec does not add padding bits, or specific
194values are required for unambiguous decoding, then this flag is ignored.
195.TP
196.B CDCF_IGNNEWL
197Ignore newline (and carriage-return) characters when decoding: the
198default for RFC4648 codecs is to reject newline characters. If these
199characters are significant in the encoding, then this flag is ignored.
200.TP
09fbf4d0
MW
201.B CDCF_IGNSPC
202Ignore whitespace characters (other than newlines) when decoding: the
203default for RFC4648 codecs is to reject whitespace characters. If these
204characters are significant in the encoding, then this flag is ignored.
205.TP
236f657b
MW
206.B CDCF_IGNINVCH
207Ignore any other invalid characters appearing in the input when
208decoding.
209.TP
210.B CDCF_IGNJUNK
211Ignore all `junk' in the input. This should suppress almost all
212decoding errors.
213.PP
214If you do not set any of the
c3dd6b29 215.BR CDCF_IGN ...\&
236f657b
MW
216flags, a decoder should only accept the exact encoding that the
217corresponding encoder would produce (with
218.I maxline
219= 0 to inhibit line-breaking).
c4ccbbf9 220.
236f657b
MW
221.SS "The codec and codec_ops structures"
222The
223.B codec
224structure represents the state of an encoder or decoder, as returned by
225the
226.B encoder
227and
228.B decoder
229functions described above, contains a single member.
230.TP
231.B "const codec_ops *ops"
232Pointer to a
233.B codec_ops
234structure which contains operations and metadata for use with the
235encoder or decoder.
236.PP
237The
238.B codec_ops
239structure contains the following members.
240.TP
241.B "const codec_class *c"
242Pointer back to the
243.B codec_class
244which was used to construct the
245.B codec
246object.
247.TP
248.BI "int (*code)(codec *" c ", const void *" p ", size_t " sz ", dstr *" d ")"
249Encode or decode, using the codec
63ba7202 250.IR c ,
236f657b
MW
251the data in the buffer at address
252.I p
253and continuing for
254.I sz
255bytes, appending the output to the dynamic string
256.I d
257(see
258.BR dstr (3)).
259If the operation was successful, the function returns zero; otherwise it
260returns a nonzero error code, as described below.
261.TP
262.BI "void (*destroy)(codec *" c ")"
263Destroy the codec object
264.IR c ,
265freeing any resources it may hold.
266.PP
267A codec may buffer its input (e.g., if needs to see more in order to
268decide what output to produce next); it may also need to take special
269action at the end of the input (e.g., flushing buffers, and applying
270padding). To signal the codec that there is no more input, call the
271.B code
272function with a null
273.I p
274pointer. It will then write any final output to
275.IR d .
276.PP
277The following error conditions may be reported.
278.TP
279.B CDCERR_INVCH
280An invalid character was encountered while decoding. This includes
281encoutering padding characters if padding is disabled using the
282.B CDCF_NOEQPAD
283flag.
284.TP
285.B CDCERR_INVEQPAD
286Invalid padding characters (e.g., wrong characters, or too few, too
287many, or none at all) were found during decoding. This may also
288indicate that the input is truncated, even if the codec does not usually
289perform output padding.
290.TP
291.B CDCERR_INVZPAD
292Invalid padding bits were found during decoding.
293.PP
294The
295.B codec_strerror
296function converts these error codes to brief, (moderately)
297human-readable strings.
c4ccbbf9 298.
236f657b
MW
299.SS "Provided codecs"
300The library provides a number of standard codecs.
301.TP
302.B base64
303Implements Base64 encoding, as defined by RFC4648. Output is
304mixed-case, so the
305.B CDCF_LOWERC
306and
307.B CDCF_IGNCASE
308flags are ignored.
309.TP
310.B safe64
311Implements a variant of the Base64 encoding which uses
312.RB ` % '
313in place of
314.RB ` / ',
315so that its output is suitable for use as a Unix filename.
316.TP
317.B base64url
318Implements the filename- and URL-safe variant of Base64 encoding, as
319defined by RFC4648.
320.TP
321.B base32
322Implements Base32 encoding, as defined by RFC4648. Output is in upper
323case by default.
324.TP
325.B base32hex
326Implements the extended-hex variant of Base32, as defined by RFC4648.
327This encoding has the property that the encoding preserves the ordering
328of messages if padding is suppressed.
329.TP
330.B hex
331Implements hex encoding, defined by RFC4648 under the name Base16. For
332compatibility with that specification, output is in upper case by
333default.
c4ccbbf9
MW
334.
335.\"--------------------------------------------------------------------------
236f657b 336.SH "SEE ALSO"
c4ccbbf9 337.
236f657b
MW
338.BR bincode (1),
339.BR dstr (3),
340.BR mLib (3).
c4ccbbf9
MW
341.
342.\"--------------------------------------------------------------------------
236f657b 343.SH AUTHOR
c4ccbbf9 344.
236f657b 345Mark Wooding, <mdw@distorted.org.uk>
c4ccbbf9
MW
346.
347.\"----- That's all, folks --------------------------------------------------