@@@ misc wip
[mLib] / codec / url.3.in
1 .\" -*-nroff-*-
2 .\"
3 .\" Manual for form-urlencoding
4 .\"
5 .\" (c) 1999, 2001, 2005--2007, 2009, 2023, 2024 Straylight/Edgeware
6 .\"
7 .
8 .\"----- Licensing notice ---------------------------------------------------
9 .\"
10 .\" This file is part of the mLib utilities library.
11 .\"
12 .\" mLib is free software: you can redistribute it and/or modify it under
13 .\" the terms of the GNU Library General Public License as published by
14 .\" the Free Software Foundation; either version 2 of the License, or (at
15 .\" your option) any later version.
16 .\"
17 .\" mLib is distributed in the hope that it will be useful, but WITHOUT
18 .\" ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19 .\" FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
20 .\" License for more details.
21 .\"
22 .\" You should have received a copy of the GNU Library General Public
23 .\" License along with mLib. If not, write to the Free Software
24 .\" Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
25 .\" USA.
26 .
27 .\"--------------------------------------------------------------------------
28 .so ../defs.man \" @@@PRE@@@
29 .
30 .\"--------------------------------------------------------------------------
31 .TH url 3mLib "20 June 1999" "Straylight/Edgeware" "mLib utilities library"
32 .\" @url_initenc
33 .\" @url_enc
34 .\" @url_initdec
35 .\" @url_dec
36 .
37 .\"--------------------------------------------------------------------------
38 .SH NAME
39 url \- manipulation of form-urlencoded strings
40 .
41 .\"--------------------------------------------------------------------------
42 .SH SYNOPSIS
43 .
44 .nf
45 .B "#include <mLib/url.h>"
46 .PP
47 .ta 2n
48 .B "typedef struct {"
49 .B " unsigned f;"
50 .B " ..."
51 .B "} url_ectx;"
52 .PP
53 .B "typedef struct {"
54 .B " unsigned f;"
55 .B " ..."
56 .B "} url_dctx;"
57 .PP
58 .B "#define URLF_STRICT ..."
59 .B "#define URLF_LAX ..."
60 .B "#define URLF_SEMI ..."
61 .PP
62 .BI "void url_initenc(url_ectx *" ctx );
63 .ta \w'\fBvoid url_enc('u
64 .BI "void url_enc(url_ectx *" ctx ", dstr *" d ,
65 .BI " const char *" name ", const char *" value );
66 .PP
67 .BI "void url_initdec(url_dctx *" ctx ", const char *" p );
68 .BI "int url_dec(url_dctx *" ctx ", dstr *" n ", dstr *" v );
69 .fi
70 .
71 .\"--------------------------------------------------------------------------
72 .SH DESCRIPTION
73 .
74 The functions in
75 .B <mLib/url.h>
76 read and write `form-urlencoded' data, as specified in RFC1866. The
77 encoding represents a sequence of name/value pairs where both the name
78 and value are arbitrary binary strings (although the format is optimized
79 for textual data). An encoded string contains no nonprintable
80 characters or whitespace. This interface is capable of decoding any
81 urlencoded string; however, it can currently only
82 .I encode
83 names and values which do not contain null bytes, because the encoding
84 interface uses standard C strings.
85 .PP
86 Encoding a sequence of name/value pairs is achieved using the
87 .B url_enc
88 function. It requires as input an
89 .IR "encoding context" ,
90 represented as an object of type
91 .BR url_ectx .
92 This must be initialized before use by passing it to the function
93 .BR url_initenc .
94 Each call to
95 .B url_enc
96 encodes one name/value pair, appending the encoded output to a dynamic
97 string (see
98 .BR dstr (3)
99 for details).
100 .PP
101 You can set flags in the encoding context's
102 .B f
103 member:
104 .TP
105 .B URLF_STRICT
106 Be strict about escaping non-alphanumeric characters. Without this,
107 potentially unsafe characters such as
108 .RB ` / '
109 and
110 .RB ` ~ '
111 will be left unescaped, which makes encoded filenames (for example) more
112 readable.
113 .TP
114 .B URLF_LAX
115 Be very lax about non-alphanumeric characters. Everything except
116 obviously-unsafe characters like
117 .RB ` & '
118 and
119 .RB ` = '
120 are left unescaped.
121 .TP
122 .B URLF_SEMI
123 Use a semicolon
124 .RB ` ; '
125 to separate name/value pairs, rather than the ampersand
126 .RB ` & '.
127 .PP
128 Decoding a sequence of name/value pairs is performed using the
129 .B url_dec
130 function. It requires as input a
131 .IR "decoding context" ,
132 represented as an object of type
133 .BR url_dctx .
134 This must be initialized before use by passing it to the function
135 .BR url_initdec ,
136 along with the address of the urlencoded string to decode. The string
137 is not modified during decoding. Each call to
138 .B url_dec
139 extracts a name/value pair. The name and value are written to the
140 dynamic strings
141 .I n
142 and
143 .IR v ,
144 so you probably want to reset them before each call. If there are no
145 more name/value pairs to read,
146 .B url_dec
147 returns zero; otherwise it returns a nonzero value.
148 .PP
149 You can set flags in the encoding context's
150 .B f
151 member:
152 .TP
153 .B URLF_SEMI
154 Allow a semicolon
155 .RB ` ; '
156 to separate name/value pairs,
157 .I in addition to
158 the ampersand
159 .RB ` & '.
160 Without this flag, the semicolon is considered an `ordinary' character
161 which can appear unescaped as part of names and values. (Note the
162 difference from the same flag's meaning when encoding. When encoding,
163 it
164 .I forces
165 the use of the semicolon, and when decoding, it
166 .I permits
167 its use.)
168 .
169 .\"--------------------------------------------------------------------------
170 .SH EXAMPLE
171 .
172 The example code below demonstrates converting between a symbol table
173 and a urlencoded representation. The code is untested.
174 .VS
175 .ta 2n +2n
176 #include <stdlib.h>
177 #include <mLib/alloc.h>
178 #include <mLib/dstr.h>
179 #include <mLib/sym.h>
180 #include <mLib/url.h>
181 .VP
182 typedef struct {
183 sym_base _b;
184 char *v;
185 } val;
186 .VP
187 void decode(sym_table *t, const char *p)
188 {
189 url_dctx c;
190 dstr n = DSTR_INIT, v = DSTR_INIT;
191 val *vv;
192 unsigned f;
193 .VP
194 for (url_initdec(&c, p); url_dec(&c, &n, &v); ) {
195 vv = sym_find(t, n.buf, -1, sizeof(*vv), &f);
196 if (f) free(vv->v);
197 vv->v = xstrdup(v.buf);
198 DRESET(&n);
199 DRESET(&v);
200 }
201 dstr_destroy(&n); dstr_destroy(&v);
202 }
203 .VP
204 void encode(sym_table *t, dstr *d)
205 {
206 sym_iter i;
207 url_ectx c;
208 val *v;
209 .VP
210 url_initenc(&c);
211 for (sym_mkiter(&i, t); (v = sym_next(&i)) != 0; )
212 url_enc(&c, d, SYM_NAME(v), v->v);
213 }
214 .VE
215 .
216 .\"--------------------------------------------------------------------------
217 .SH "SEE ALSO"
218 .
219 .BR mLib (3).
220 .
221 .\"--------------------------------------------------------------------------
222 .SH AUTHOR
223 .
224 Mark Wooding, <mdw@distorted.org.uk>.
225 .
226 .\"----- That's all, folks --------------------------------------------------