struct/hash.3: Zap spurious space before pointer argument name.
[mLib] / codec / url.3
1 .\" -*-nroff-*-
2 .de VS
3 .sp 1
4 .in +5n
5 .ft B
6 .nf
7 ..
8 .de VE
9 .ft R
10 .in -5n
11 .sp 1
12 .fi
13 ..
14 .TH url 3 "20 June 1999" "Straylight/Edgeware" "mLib utilities library"
15 .SH NAME
16 url \- manipulation of form-urlencoded strings
17 .\" @url_initenc
18 .\" @url_enc
19 .\" @url_initdec
20 .\" @url_dec
21 .SH SYNOPSIS
22 .nf
23 .B "#include <mLib/url.h>"
24
25 .BI "void url_initenc(url_ectx *" ctx );
26 .ds mT \fBvoid url_enc(
27 .BI "\*(mTurl_ectx *" ctx ", dstr *" d ,
28 .BI "\h'\w'\*(mT'u'const char *" name ", const char *" value );
29
30 .BI "void url_initdec(url_dctx *" ctx ", const char *" p );
31 .BI "int url_dec(url_dctx *" ctx ", dstr *" n ", dstr *" v );
32 .fi
33 .SH DESCRIPTION
34 The functions in
35 .B <mLib/url.h>
36 read and write `form-urlencoded' data, as specified in RFC1866. The
37 encoding represents a sequence of name/value pairs where both the name
38 and value are arbitrary binary strings (although the format is optimized
39 for textual data). An encoded string contains no nonprintable
40 characters or whitespace. This interface is capable of decoding any
41 urlencoded string; however, it can currently only
42 .I encode
43 names and values which do not contain null bytes, because the encoding
44 interface uses standard C strings.
45 .PP
46 Encoding a sequence of name/value pairs is achieved using the
47 .B url_enc
48 function. It requires as input an
49 .IR "encoding context" ,
50 represented as an object of type
51 .BR url_ectx .
52 This must be initialized before use by passing it to the function
53 .BR url_initenc .
54 Each call to
55 .B url_enc
56 encodes one name/value pair, appending the encoded output to a dynamic
57 string (see
58 .BR dstr (3)
59 for details).
60 .PP
61 You can set flags in the encoding context's
62 .B f
63 member:
64 .TP
65 .B URLF_STRICT
66 Be strict about escaping non-alphanumeric characters. Without this,
67 potentially unsafe characters such as
68 .RB ` / '
69 and
70 .RB ` ~ '
71 will be left unescaped, which makes encoded filenames (for example) more
72 readable.
73 .TP
74 .B URLF_LAX
75 Be very lax about non-alphanumeric characters. Everything except
76 obviously-unsafe characters like
77 .RB ` & '
78 and
79 .RB ` = '
80 are left unescaped.
81 .TP
82 .B URLF_SEMI
83 Use a semicolon
84 .RB ` ; '
85 to separate name/value pairs, rather than the ampersand
86 .RB ` & '.
87 .PP
88 Decoding a sequence of name/value pairs is performed using the
89 .B url_dec
90 function. It requires as input a
91 .IR "decoding context" ,
92 represented as an object of type
93 .BR url_dctx .
94 This must be initialized before use by passing it to the function
95 .BR url_initdec ,
96 along with the address of the urlencoded string to decode. The string
97 is not modified during decoding. Each call to
98 .B url_dec
99 extracts a name/value pair. The name and value are written to the
100 dynamic strings
101 .I n
102 and
103 .IR v ,
104 so you probably want to reset them before each call. If there are no
105 more name/value pairs to read,
106 .B url_dec
107 returns zero; otherwise it returns a nonzero value.
108 .PP
109 You can set flags in the encoding context's
110 .B f
111 member:
112 .TP
113 .B URLF_SEMI
114 Allow a semicolon
115 .RB ` ; '
116 to separate name/value pairs,
117 .I in addition to
118 the ampersand
119 .RB ` & '.
120 Without this flag, the semicolon is considered an `ordinary' character
121 which can appear unescaped as part of names and values. (Note the
122 difference from the same flag's meaning when encoding. When encoding,
123 it
124 .I forces
125 the use of the semicolon, and when decoding, it
126 .I permits
127 its use.)
128 .SH EXAMPLE
129 The example code below demonstrates converting between a symbol table
130 and a urlencoded representation. The code is untested.
131 .VS
132 #include <stdlib.h>
133 #include <mLib/alloc.h>
134 #include <mLib/dstr.h>
135 #include <mLib/sym.h>
136 #include <mLib/url.h>
137
138 typedef struct {
139 sym_base _b;
140 char *v;
141 } val;
142
143 void decode(sym_table *t, const char *p)
144 {
145 url_dctx c;
146 dstr n = DSTR_INIT, v = DSTR_INIT;
147
148 for (url_initdec(&c, p); url_dec(&c, &n, &v); ) {
149 unsigned f;
150 val *vv = sym_find(t, n.buf, -1, sizeof(*vv), &f);
151 if (f)
152 free(vv->v);
153 vv->v = xstrdup(v.buf);
154 DRESET(&n);
155 DRESET(&v);
156 }
157 dstr_destroy(&n);
158 dstr_destroy(&v);
159 }
160
161 void encode(sym_table *t, dstr *d)
162 {
163 sym_iter i;
164 url_ectx c;
165 val *v;
166
167 url_initenc(&c);
168 for (sym_mkiter(&i, t); (v = sym_next(&i)) != 0; )
169 url_enc(&c, d, SYM_NAME(v), v->v);
170 }
171 .VE
172 .SH "SEE ALSO"
173 .BR mLib (3).
174 .SH AUTHOR
175 Mark Wooding, <mdw@distorted.org.uk>.