Commit | Line | Data |
---|---|---|
b6b9d458 | 1 | .\" -*-nroff-*- |
2 | .de VS | |
3 | .sp 1 | |
4 | .in +5n | |
5 | .ft B | |
6 | .nf | |
7 | .. | |
8 | .de VE | |
9 | .ft R | |
10 | .in -5n | |
11 | .sp 1 | |
12 | .fi | |
13 | .. | |
fbf20b5b | 14 | .TH url 3 "20 June 1999" "Straylight/Edgeware" "mLib utilities library" |
b6b9d458 | 15 | .SH NAME |
16 | url \- manipulation of form-urlencoded strings | |
08da152e | 17 | .\" @url_initenc |
18 | .\" @url_enc | |
19 | .\" @url_initdec | |
20 | .\" @url_dec | |
b6b9d458 | 21 | .SH SYNOPSIS |
22 | .nf | |
d2a91066 | 23 | .B "#include <mLib/url.h>" |
b6b9d458 | 24 | |
25 | .BI "void url_initenc(url_ectx *" ctx ); | |
26 | .BI "void url_enc(url_ectx *" ctx ", dstr *" d , | |
27 | .BI " const char *" name ", const char *" value ); | |
28 | ||
29 | .BI "void url_initdec(url_dctx *" ctx ", const char *" p ); | |
30 | .BI "int url_dec(url_dctx *" ctx ", dstr *" n ", dstr *" v ); | |
31 | .fi | |
32 | .SH DESCRIPTION | |
33 | The functions in | |
34 | .B <mLib/url.h> | |
35 | read and write `form-urlencoded' data, as specified in RFC1866. The | |
36 | encoding represents a sequence of name/value pairs where both the name | |
37 | and value are arbitrary binary strings (although the format is optimized | |
38 | for textual data). An encoded string contains no nonprintable | |
39 | characters or whitespace. This interface is capable of decoding any | |
40 | urlencoded string; however, it can currently only | |
41 | .I encode | |
42 | names and values which do not contain null bytes, because the encoding | |
43 | interface uses standard C strings. | |
44 | .PP | |
45 | Encoding a sequence of name/value pairs is achieved using the | |
46 | .B url_enc | |
47 | function. It requires as input an | |
48 | .IR "encoding context" , | |
49 | represented as an object of type | |
50 | .BR url_ectx . | |
51 | This must be initialized before use by passing it to the function | |
52 | .BR url_initenc . | |
53 | Each call to | |
54 | .B url_enc | |
55 | encodes one name/value pair, appending the encoded output to a dynamic | |
56 | string (see | |
08da152e | 57 | .BR dstr (3) |
b6b9d458 | 58 | for details). |
59 | .PP | |
7e4708e4 MW |
60 | You can set flags in the encoding context's |
61 | .B f | |
62 | member: | |
63 | .TP | |
64 | .B URLF_STRICT | |
65 | Be strict about escaping non-alphanumeric characters. Without this, | |
d4efbcd9 | 66 | potentially unsafe characters such as |
7e4708e4 MW |
67 | .RB ` / ' |
68 | and | |
69 | .RB ` ~ ' | |
70 | will be left unescaped, which makes encoded filenames (for example) more | |
71 | readable. | |
72 | .TP | |
73 | .B URLF_LAX | |
74 | Be very lax about non-alphanumeric characters. Everything except | |
75 | obviously-unsafe characters like | |
76 | .RB ` & ' | |
d4efbcd9 | 77 | and |
7e4708e4 MW |
78 | .RB ` = ' |
79 | are left unescaped. | |
73f6fe8e MW |
80 | .TP |
81 | .B URLF_SEMI | |
82 | Use a semicolon | |
83 | .RB ` ; ' | |
84 | to separate name/value pairs, rather than the ampersand | |
85 | .RB ` & '. | |
7e4708e4 | 86 | .PP |
b6b9d458 | 87 | Decoding a sequence of name/value pairs is performed using the |
88 | .B url_dec | |
89 | function. It requires as input a | |
90 | .IR "decoding context" , | |
91 | represented as an object of type | |
92 | .BR url_dctx . | |
93 | This must be initialized before use by passing it to the function | |
94 | .BR url_initdec , | |
95 | along with the address of the urlencoded string to decode. The string | |
96 | is not modified during decoding. Each call to | |
97 | .B url_dec | |
98 | extracts a name/value pair. The name and value are written to the | |
99 | dynamic strings | |
100 | .I n | |
101 | and | |
102 | .IR v , | |
103 | so you probably want to reset them before each call. If there are no | |
104 | more name/value pairs to read, | |
105 | .B url_dec | |
106 | returns zero; otherwise it returns a nonzero value. | |
73f6fe8e MW |
107 | .PP |
108 | You can set flags in the encoding context's | |
109 | .B f | |
110 | member: | |
111 | .TP | |
112 | .B URLF_SEMI | |
113 | Allow a semicolon | |
114 | .RB ` ; ' | |
115 | to separate name/value pairs, | |
116 | .I in addition to | |
117 | the ampersand | |
118 | .RB ` & '. | |
119 | Without this flag, the semicolon is considered an `ordinary' character | |
120 | which can appear unescaped as part of names and values. (Note the | |
121 | difference from the same flag's meaning when encoding. When encoding, | |
122 | it | |
123 | .I forces | |
124 | the use of the semicolon, and when decoding, it | |
125 | .I permits | |
126 | its use.) | |
b6b9d458 | 127 | .SH EXAMPLE |
128 | The example code below demonstrates converting between a symbol table | |
129 | and a urlencoded representation. The code is untested. | |
130 | .VS | |
131 | #include <stdlib.h> | |
132 | #include <mLib/alloc.h> | |
133 | #include <mLib/dstr.h> | |
134 | #include <mLib/sym.h> | |
135 | #include <mLib/url.h> | |
136 | ||
137 | typedef struct { | |
138 | sym_base _b; | |
139 | char *v; | |
140 | } val; | |
141 | ||
142 | void decode(sym_table *t, const char *p) | |
143 | { | |
144 | url_dctx c; | |
145 | dstr n = DSTR_INIT, v = DSTR_INIT; | |
146 | ||
147 | for (url_initdec(&c, p); url_dec(&c, &n, &v); ) { | |
148 | unsigned f; | |
149 | val *vv = sym_find(t, n.buf, -1, sizeof(*vv), &f); | |
150 | if (f) | |
151 | free(vv->v); | |
152 | vv->v = xstrdup(v.buf); | |
153 | DRESET(&n); | |
154 | DRESET(&v); | |
155 | } | |
156 | dstr_destroy(&n); | |
157 | dstr_destroy(&v); | |
158 | } | |
159 | ||
160 | void encode(sym_table *t, dstr *d) | |
161 | { | |
162 | sym_iter i; | |
163 | url_ectx c; | |
164 | val *v; | |
165 | ||
166 | url_initenc(&c); | |
167 | for (sym_mkiter(&i, t); (v = sym_next(&i)) != 0; ) | |
168 | url_enc(&c, d, SYM_NAME(v), v->v); | |
169 | } | |
170 | .VE | |
08da152e | 171 | .SH "SEE ALSO" |
172 | .BR mLib (3). | |
b6b9d458 | 173 | .SH AUTHOR |
9b5ac6ff | 174 | Mark Wooding, <mdw@distorted.org.uk>. |