Switch to GPL v3
[disorder] / lib / split.c
1 /*
2 * This file is part of DisOrder.
3 * Copyright (C) 2004, 2006-2008 Richard Kettlewell
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19 #include "common.h"
20
21 #include <ctype.h>
22 #include <errno.h>
23
24 #include "mem.h"
25 #include "split.h"
26 #include "log.h"
27 #include "charset.h"
28 #include "vector.h"
29
30 static inline int space(int c) {
31 return (c == ' '
32 || c == '\t'
33 || c == '\n'
34 || c == '\r');
35 }
36
37 static void no_error_handler(const char attribute((unused)) *msg,
38 void attribute((unused)) *u) {
39 }
40
41 /* TODO: handle combining characters attached to delimiters in some
42 * sane way (might include reporting an error) */
43
44 char **split(const char *p,
45 int *np,
46 unsigned flags,
47 void (*error_handler)(const char *msg, void *u),
48 void *u) {
49 char *f, *g;
50 const char *q;
51 struct vector v;
52 size_t l;
53 int qc;
54
55 if(!error_handler)
56 error_handler = no_error_handler;
57 vector_init(&v);
58 while(*p && !(*p == '#' && (flags & SPLIT_COMMENTS))) {
59 if(space(*p)) {
60 ++p;
61 continue;
62 }
63 if((flags & SPLIT_QUOTES) && (*p == '"' || *p == '\'')) {
64 qc = *p++;
65 l = 0;
66 for(q = p; *q && *q != qc; ++q) {
67 if(*q == '\\' && q[1])
68 ++q;
69 ++l;
70 }
71 if(!*q) {
72 error_handler("unterminated quoted string", u);
73 return 0;
74 }
75 f = g = xmalloc_noptr(l + 1);
76 for(q = p; *q != qc;) {
77 if(*q == '\\') {
78 ++q;
79 switch(*q) {
80 case '\\':
81 case '"':
82 case '\'':
83 *g++ = *q++;
84 break;
85 case 'n':
86 ++q;
87 *g++ = '\n';
88 break;
89 default:
90 error_handler("illegal escape sequence", u);
91 return 0;
92 }
93 } else
94 *g++ = *q++;
95 }
96 *g = 0;
97 p = q + 1;
98 } else {
99 for(q = p; *q && !space(*q); ++q)
100 ;
101 l = q - p;
102 f = xstrndup(p, l);
103 p = q;
104 }
105 vector_append(&v, f);
106 }
107 vector_terminate(&v);
108 if(np)
109 *np = v.nvec;
110 return v.vec;
111 }
112
113 /* TODO handle initial combining characters sanely */
114
115 const char *quoteutf8(const char *s) {
116 size_t len = 3 + strlen(s);
117 const char *t;
118 char *r, *q;
119
120 /* see if we need to quote */
121 if(*s) {
122 for(t = s; *t; t++)
123 if((unsigned char)*t <= ' '
124 || *t == '"'
125 || *t == '\\'
126 || *t == '\''
127 || *t == '#')
128 break;
129 if(!*t)
130 return s;
131 }
132
133 /* we rely on ASCII characters only ever representing themselves in UTF-8. */
134 for(t = s; *t; t++) {
135 switch(*t) {
136 case '"':
137 case '\\':
138 case '\n':
139 ++len;
140 break;
141 }
142 }
143 q = r = xmalloc_noptr(len);
144 *q++ = '"';
145 for(t = s; *t; t++) {
146 switch(*t) {
147 case '"':
148 case '\\':
149 *q++ = '\\';
150 /* fall through */
151 default:
152 *q++ = *t;
153 break;
154 case '\n':
155 *q++ = '\\';
156 *q++ = 'n';
157 break;
158 }
159 }
160 *q++ = '"';
161 *q = 0;
162 return r;
163 }
164
165 /*
166 Local Variables:
167 c-basic-offset:2
168 comment-column:40
169 End:
170 */