@@@ much mess, mostly manpages
[mLib] / codec / url.3.in
CommitLineData
b6b9d458 1.\" -*-nroff-*-
c4ccbbf9
MW
2.\"
3.\" Manual for form-urlencoding
4.\"
5.\" (c) 1999, 2001, 2005--2007, 2009, 2023, 2024 Straylight/Edgeware
6.\"
7.
8.\"----- Licensing notice ---------------------------------------------------
9.\"
10.\" This file is part of the mLib utilities library.
11.\"
12.\" mLib is free software: you can redistribute it and/or modify it under
13.\" the terms of the GNU Library General Public License as published by
14.\" the Free Software Foundation; either version 2 of the License, or (at
15.\" your option) any later version.
16.\"
17.\" mLib is distributed in the hope that it will be useful, but WITHOUT
18.\" ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19.\" FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
20.\" License for more details.
21.\"
22.\" You should have received a copy of the GNU Library General Public
23.\" License along with mLib. If not, write to the Free Software
24.\" Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
25.\" USA.
26.
27.\"--------------------------------------------------------------------------
28.so ../defs.man \" @@@PRE@@@
29.
30.\"--------------------------------------------------------------------------
31.TH url 3mLib "20 June 1999" "Straylight/Edgeware" "mLib utilities library"
08da152e 32.\" @url_initenc
33.\" @url_enc
34.\" @url_initdec
35.\" @url_dec
c4ccbbf9
MW
36.
37.\"--------------------------------------------------------------------------
38.SH NAME
39url \- manipulation of form-urlencoded strings
40.
41.\"--------------------------------------------------------------------------
b6b9d458 42.SH SYNOPSIS
c4ccbbf9 43.
b6b9d458 44.nf
d2a91066 45.B "#include <mLib/url.h>"
d056fbdf 46.PP
adec5584 47.ta 2n
4729aa69 48.B "typedef struct {"
adec5584
MW
49.B " unsigned f;"
50.B " ..."
4729aa69 51.B "} url_ectx;"
d056fbdf 52.PP
4729aa69 53.B "typedef struct {"
adec5584
MW
54.B " unsigned f;"
55.B " ..."
4729aa69 56.B "} url_dctx;"
d056fbdf 57.PP
4729aa69
MW
58.B "#define URLF_STRICT ..."
59.B "#define URLF_LAX ..."
60.B "#define URLF_SEMI ..."
d056fbdf 61.PP
b6b9d458 62.BI "void url_initenc(url_ectx *" ctx );
adec5584
MW
63.ta \w'\fBvoid url_enc('u
64.BI "void url_enc(url_ectx *" ctx ", dstr *" d ,
65.BI " const char *" name ", const char *" value );
d056fbdf 66.PP
b6b9d458 67.BI "void url_initdec(url_dctx *" ctx ", const char *" p );
68.BI "int url_dec(url_dctx *" ctx ", dstr *" n ", dstr *" v );
69.fi
c4ccbbf9
MW
70.
71.\"--------------------------------------------------------------------------
b6b9d458 72.SH DESCRIPTION
c4ccbbf9 73.
b6b9d458 74The functions in
75.B <mLib/url.h>
76read and write `form-urlencoded' data, as specified in RFC1866. The
77encoding represents a sequence of name/value pairs where both the name
78and value are arbitrary binary strings (although the format is optimized
79for textual data). An encoded string contains no nonprintable
80characters or whitespace. This interface is capable of decoding any
81urlencoded string; however, it can currently only
82.I encode
83names and values which do not contain null bytes, because the encoding
84interface uses standard C strings.
85.PP
86Encoding a sequence of name/value pairs is achieved using the
87.B url_enc
88function. It requires as input an
89.IR "encoding context" ,
90represented as an object of type
91.BR url_ectx .
92This must be initialized before use by passing it to the function
93.BR url_initenc .
94Each call to
95.B url_enc
96encodes one name/value pair, appending the encoded output to a dynamic
97string (see
08da152e 98.BR dstr (3)
b6b9d458 99for details).
100.PP
7e4708e4
MW
101You can set flags in the encoding context's
102.B f
103member:
104.TP
105.B URLF_STRICT
106Be strict about escaping non-alphanumeric characters. Without this,
d4efbcd9 107potentially unsafe characters such as
7e4708e4
MW
108.RB ` / '
109and
110.RB ` ~ '
111will be left unescaped, which makes encoded filenames (for example) more
112readable.
113.TP
114.B URLF_LAX
115Be very lax about non-alphanumeric characters. Everything except
116obviously-unsafe characters like
117.RB ` & '
d4efbcd9 118and
7e4708e4
MW
119.RB ` = '
120are left unescaped.
73f6fe8e
MW
121.TP
122.B URLF_SEMI
123Use a semicolon
124.RB ` ; '
125to separate name/value pairs, rather than the ampersand
126.RB ` & '.
7e4708e4 127.PP
b6b9d458 128Decoding a sequence of name/value pairs is performed using the
129.B url_dec
130function. It requires as input a
131.IR "decoding context" ,
132represented as an object of type
133.BR url_dctx .
134This must be initialized before use by passing it to the function
135.BR url_initdec ,
136along with the address of the urlencoded string to decode. The string
137is not modified during decoding. Each call to
138.B url_dec
139extracts a name/value pair. The name and value are written to the
140dynamic strings
141.I n
142and
143.IR v ,
144so you probably want to reset them before each call. If there are no
145more name/value pairs to read,
146.B url_dec
147returns zero; otherwise it returns a nonzero value.
73f6fe8e
MW
148.PP
149You can set flags in the encoding context's
150.B f
151member:
152.TP
153.B URLF_SEMI
154Allow a semicolon
155.RB ` ; '
156to separate name/value pairs,
157.I in addition to
158the ampersand
159.RB ` & '.
160Without this flag, the semicolon is considered an `ordinary' character
161which can appear unescaped as part of names and values. (Note the
162difference from the same flag's meaning when encoding. When encoding,
163it
164.I forces
165the use of the semicolon, and when decoding, it
166.I permits
167its use.)
c4ccbbf9
MW
168.
169.\"--------------------------------------------------------------------------
b6b9d458 170.SH EXAMPLE
c4ccbbf9 171.
b6b9d458 172The example code below demonstrates converting between a symbol table
173and a urlencoded representation. The code is untested.
174.VS
adec5584 175.ta 2n +2n
b6b9d458 176#include <stdlib.h>
177#include <mLib/alloc.h>
178#include <mLib/dstr.h>
179#include <mLib/sym.h>
180#include <mLib/url.h>
d056fbdf 181.VP
b6b9d458 182typedef struct {
adec5584
MW
183 sym_base _b;
184 char *v;
b6b9d458 185} val;
d056fbdf 186.VP
b6b9d458 187void decode(sym_table *t, const char *p)
188{
adec5584
MW
189 url_dctx c;
190 dstr n = DSTR_INIT, v = DSTR_INIT;
191 val *vv;
192 unsigned f;
d056fbdf 193.VP
adec5584
MW
194 for (url_initdec(&c, p); url_dec(&c, &n, &v); ) {
195 vv = sym_find(t, n.buf, -1, sizeof(*vv), &f);
196 if (f) free(vv->v);
197 vv->v = xstrdup(v.buf);
198 DRESET(&n);
199 DRESET(&v);
200 }
201 dstr_destroy(&n); dstr_destroy(&v);
b6b9d458 202}
d056fbdf 203.VP
b6b9d458 204void encode(sym_table *t, dstr *d)
205{
adec5584
MW
206 sym_iter i;
207 url_ectx c;
208 val *v;
d056fbdf 209.VP
adec5584
MW
210 url_initenc(&c);
211 for (sym_mkiter(&i, t); (v = sym_next(&i)) != 0; )
212 url_enc(&c, d, SYM_NAME(v), v->v);
b6b9d458 213}
214.VE
c4ccbbf9
MW
215.
216.\"--------------------------------------------------------------------------
08da152e 217.SH "SEE ALSO"
c4ccbbf9 218.
08da152e 219.BR mLib (3).
c4ccbbf9
MW
220.
221.\"--------------------------------------------------------------------------
b6b9d458 222.SH AUTHOR
c4ccbbf9 223.
9b5ac6ff 224Mark Wooding, <mdw@distorted.org.uk>.
c4ccbbf9
MW
225.
226.\"----- That's all, folks --------------------------------------------------