Very ragged work-in-progress.
[sod] / pset.lisp
CommitLineData
abdf50aa
MW
1;;; -*-lisp-*-
2;;;
3;;; Collections of properties
4;;;
5;;; (c) 2009 Straylight/Edgeware
6;;;
7
8;;;----- Licensing notice ---------------------------------------------------
9;;;
10;;; This file is part of the Simple Object Definition system.
11;;;
12;;; SOD is free software; you can redistribute it and/or modify
13;;; it under the terms of the GNU General Public License as published by
14;;; the Free Software Foundation; either version 2 of the License, or
15;;; (at your option) any later version.
16;;;
17;;; SOD is distributed in the hope that it will be useful,
18;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20;;; GNU General Public License for more details.
21;;;
22;;; You should have received a copy of the GNU General Public License
23;;; along with SOD; if not, write to the Free Software Foundation,
24;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25
26(cl:in-package #:sod)
27
28;;;--------------------------------------------------------------------------
29;;; Basic definitions.
30
31(defun property-key (name)
32 "Convert NAME into a keyword.
33
34 If NAME isn't a symbol already, then flip its case (using FROB-CASE),
35 replace underscores by hyphens, and intern into the KEYWORD package."
36 (etypecase name
37 (symbol name)
38 (string (intern (substitute #\- #\_ (frob-case name)) :keyword))))
39
40(defun property-type (value)
41 "Guess the right property type to use for VALUE."
1f1d88f5 42 (typecase value
abdf50aa
MW
43 (symbol :symbol)
44 (integer :integer)
45 (string :string)
1f1d88f5
MW
46 (character :char)
47 (c-fragment :frag)
48 (t :other)))
abdf50aa
MW
49
50(defstruct (property
51 (:conc-name p-)
52 (:constructor make-property
53 (name value
54 &key (type (property-type value)) location seenp
55 &aux (key (property-key name)))))
56 "A simple structure for holding a property in a property set.
57
58 The main useful feature is the ability to tick off properties which have
59 been used, so that we can complain about unrecognized properties."
60 (name nil :type (or symbol string))
61 (value nil :type t)
62 (type nil :type symbol)
63 (location (file-location nil) :type file-location)
64 (key nil :type symbol)
65 (seenp nil :type boolean))
66
67(defun make-property-set (&rest plist)
68 "Make a new property set, with given properties.
69
70 This isn't the way to make properties when parsing, but it works well for
71 programmatic generation. The arguments should form a property list
72 (alternating keywords and values is good).
73
74 An attempt is made to guess property types from the Lisp types of the
75 values. This isn't always successful but it's not too bad. The
76 alternative is manufacturing a PROPERTY-VALUE object by hand and stuffing
77 into the set."
78
79 (do ((plist plist (cddr plist))
80 (pset nil (cons (make-property (car plist) (cadr plist)) pset)))
81 ((endp plist) (nreverse pset))))
82
83(defun string-to-symbol (string &optional (package *package*))
84 "Convert STRING to a symbol in PACKAGE.
85
86 If PACKAGE is nil, then parse off a `PACKAGE:' prefix from STRING to
87 identify the package. A doubled colon allows access to internal symbols,
88 and will intern if necessary. Note that escape characters are /not/
89 processed; don't put colons in package names if you want to use them from
90 SOD property sets."
91
abdf50aa
MW
92 (let* ((length (length string))
93 (colon (position #\: string)))
94 (multiple-value-bind (start internalp)
95 (cond ((not colon) (values 0 t))
96 ((and (< (1+ colon) length)
97 (char= (char string (1+ colon)) #\:))
98 (values (+ colon 2) t))
99 (t
100 (values (1+ colon) nil)))
101 (when colon
102 (let* ((package-name (subseq string 0 colon))
103 (found (find-package package-name)))
104 (unless found
105 (error "Unknown package `~A'" package-name))
106 (setf package found)))
107 (let ((name (subseq string start)))
108 (multiple-value-bind (symbol status)
109 (funcall (if internalp #'intern #'find-symbol) name package)
110 (cond ((or internalp (eq status :external))
111 symbol)
112 ((not status)
113 (error "Symbol `~A' not found in package `~A'"
114 name (package-name package)))
115 (t
116 (error "Symbol `~A' not external in package `~A'"
117 name (package-name package)))))))))
118
119(defgeneric coerce-property-value (value type wanted)
120 (:documentation
121 "Convert VALUE, a property of type TYPE, to be of type WANTED.")
122
123 ;; If TYPE matches WANTED, we'll assume that VALUE already has the right
1f1d88f5
MW
124 ;; form. Otherwise, if nothing else matched, then I guess we'll have to
125 ;; say it didn't work.
abdf50aa
MW
126 (:method (value type wanted)
127 (if (eql type wanted)
128 value
129 (error "Incorrect type: expected ~A but found ~A" wanted type)))
130
1f1d88f5
MW
131 ;; If the caller asks for type T then give him the raw thing.
132 (:method (value type (wanted (eql t)))
133 value)
134
abdf50aa
MW
135 ;; Keywords.
136 (:method ((value symbol) (type (eql :symbol)) (wanted (eql :keyword)))
137 value)
138 (:method ((value string) (type (eql :id)) (wanted (eql :keyword)))
139 (string-to-symbol (substitute #\- #\_ (frob-case value)) :keyword))
140 (:method ((value string) (type (eql :string)) (wanted (eql :keyword)))
141 (string-to-symbol (frob-case value) :keyword))
142
143 ;; Symbols.
144 (:method ((value string) (type (eql :id)) (wanted (eql :symbol)))
145 (string-to-symbol (substitute #\- #\_ (frob-case value))))
146 (:method ((value string) (type (eql :string)) (wanted (eql :symbol)))
147 (string-to-symbol (frob-case value)))
148
149 ;; Identifiers.
150 (:method ((value symbol) (type (eql :symbol)) (wanted (eql :id)))
151 (substitute #\_ #\- (frob-case (symbol-name value)))))
152
153(defun get-property (pset name type &optional default)
154 "Fetch a property from a property set.
155
156 If a property NAME is not found in PSET, or if a property is found, but
157 its type doesn't match TYPE, then return DEFAULT and NIL; otherwise return
158 the value and its file location. In the latter case, mark the property as
159 having been used.
160
1f1d88f5
MW
161 The value returned depends on the TYPE argument provided. If you pass NIL
162 then you get back the entire PROPERTY object. If you pass T, then you get
163 whatever was left in the property set, uninterpreted. Otherwise the value
164 is coerced to the right kind of thing (where possible) and returned."
abdf50aa
MW
165
166 (let ((prop (find name pset :key #'p-key)))
167 (with-default-error-location ((and prop (p-location prop)))
168 (cond ((not prop)
169 (values default nil))
170 ((not type)
171 (setf (p-seenp prop) t)
172 (values prop (p-location prop)))
173 (t
174 (setf (p-seenp prop) t)
175 (values (coerce-property-value (p-value prop)
176 (p-type prop)
177 type)
178 (p-location prop)))))))
179
180(defun check-unused-properties (pset)
181 "Issue errors about unused properties in PSET."
182 (dolist (prop pset)
183 (unless (p-seenp prop)
184 (cerror*-with-location (p-location prop) "Unknown property `~A'"
1f1d88f5 185 (p-name prop)))))
abdf50aa
MW
186
187;;;--------------------------------------------------------------------------
188;;; Property set parsing.
189
190(defun parse-expression (lexer)
191 "Parse an expression from the LEXER.
192
1f1d88f5
MW
193 The return values are the expression's VALUE and TYPE; currently the types
194 are :ID, :INTEGER, :STRING, and :CHAR. If an error prevented a sane value
abdf50aa
MW
195 being produced, the TYPE :INVALID is returned.
196
197 Expression syntax is rather limited at the moment:
198
199 expression : term | expression `+' term | expression `-' term
200 term : factor | term `*' factor | term `/' factor
201 factor : primary | `+' factor | `-' factor
202 primary : integer | identifier | string
203 | `(' expression `)'
204 | `?' lisp-expression
205
206 Identifiers are just standalone things. They don't name values. The
207 operators only work on integer values at the moment. (Confusingly, you
208 can manufacture rational numbers using the division operator, but they
209 still get called integers.)"
210
211 (let ((valstack nil)
212 (opstack nil))
213
214 ;; The following is a simple operator-precedence parser: the
215 ;; recursive-descent parser I wrote the first time was about twice the
216 ;; size and harder to extend.
217 ;;
218 ;; The parser flips between two states, OPERAND and OPERATOR. It starts
219 ;; out in OPERAND state, and tries to parse a sequence of prefix
220 ;; operators followed by a primary expression. Once it's found one, it
221 ;; pushes the operand onto the value stack and flips to OPERATOR state;
222 ;; if it fails, it reports a syntax error and exits. The OPERAND state
223 ;; tries to read a sequence of postfix operators followed by an infix
224 ;; operator; if it fails, it assumes that it hit the stuff following the
225 ;; expression and stops.
226 ;;
227 ;; Each operator is pushed onto a stack consisting of lists of the form
228 ;; (FUNC PREC TY*). The PREC is a precedence -- higher numbers mean
229 ;; tighter binding. The TY* are operand types; operands are popped off
230 ;; the operand stack, checked against the requested types, and passed to
231 ;; the FUNC, which returns a new operand to be pushed in their place.
232 ;;
233 ;; Usually, when a binary operator is pushed, existing stacked operators
234 ;; with higher precedence are applied. Whether operators with /equal/
235 ;; precedence are also applied depends on the associativity of the
236 ;; operator: apply equal precedence operators for left-associative
237 ;; operators, don't apply for right-associative. When we reach the end
238 ;; of the expression, all the remaining operators on the stack are
239 ;; applied.
240 ;;
241 ;; Parenthesized subexpressions are implemented using a hack: when we
242 ;; find an open paren in operand position, a fake operator is pushed with
243 ;; an artificially low precedece, which protects the operators beneath
244 ;; from premature application. The fake operator's function reports an
245 ;; error -- this will be triggered only if we reach the end of the
246 ;; expression before a matching close-paren, because the close-paren
247 ;; handler will pop the fake operator before it does any harm.
248
249 (restart-case
250 (labels ((apply-op (op)
251 ;; Apply the single operator list OP to the values on the
252 ;; value stack.
253 (let ((func (pop op))
254 (args nil))
255 (dolist (ty (reverse (cdr op)))
256 (let ((arg (pop valstack)))
257 (cond ((eq (car arg) :invalid)
258 (setf func nil))
259 ((eq (car arg) ty)
260 (push (cdr arg) args))
261 (t
262 (cerror* "Type mismatch: wanted ~A; found ~A"
263 ty (car arg))
264 (setf func nil)))))
265 (if func
266 (multiple-value-bind (type value) (apply func args)
267 (push (cons type value) valstack))
268 (push '(:invalid . nil) valstack))))
269
270 (apply-all (prec)
271 ;; Apply all operators with precedence PREC or higher.
272 (loop
273 (when (or (null opstack) (< (cadar opstack) prec))
274 (return))
275 (apply-op (pop opstack)))))
276
277 (tagbody
278
279 operand
280 ;; Operand state. Push prefix operators, and try to read a
281 ;; primary operand.
282 (case (token-type lexer)
283
284 ;; Aha. A primary. Push it onto the stack, and see if
285 ;; there's an infix operator.
1f1d88f5 286 ((:integer :id :string :char)
abdf50aa
MW
287 (push (cons (token-type lexer)
288 (token-value lexer))
289 valstack)
290 (go operator))
291
292 ;; Look for a Lisp S-expression.
293 (#\?
294 (with-lexer-stream (stream lexer)
295 (let ((value (eval (read stream t))))
296 (push (cons (property-type value) value) valstack)))
297 (go operator))
298
299 ;; Arithmetic unary operators. Push an operator for `+' for
300 ;; the sake of type-checking.
301 (#\+
302 (push (list (lambda (x) (values :integer x))
303 10 :integer)
304 opstack))
305 (#\-
306 (push (list (lambda (x) (values :integer (- x)))
307 10 :integer)
308 opstack))
309
310 ;; The open-paren hack. Push a magic marker which will
311 ;; trigger an error if we hit the end of the expression.
312 ;; Inside the paren, we're still looking for an operand.
313 (#\(
314 (push (list (lambda ()
315 (error "Expected `)' but found ~A"
316 (format-token lexer)))
317 -1)
318 opstack))
319
320 ;; Failed to find anything. Report an error and give up.
321 (t
322 (error "Expected expression but found ~A"
323 (format-token lexer))))
324
325 ;; Assume prefix operators as the default, so go round for more.
326 (next-token lexer)
327 (go operand)
328
329 operator
330 ;; Operator state. Push postfix operators, and try to read an
331 ;; infix operator. It turns out that we're always a token
332 ;; behind here, so catch up.
333 (next-token lexer)
334 (case (token-type lexer)
335
336 ;; Binary operators.
337 (#\+ (apply-all 3)
338 (push (list (lambda (x y) (values :integer (+ x y)))
339 3 :integer :integer)
340 opstack))
341 (#\- (apply-all 3)
342 (push (list (lambda (x y) (values :integer (- x y)))
343 3 :integer :integer)
344 opstack))
345 (#\* (apply-all 5)
346 (push (list (lambda (x y) (values :integer (* x y)))
347 5 :integer :integer)
348 opstack))
349 (#\/ (apply-all 5)
350 (push (list (lambda (x y)
351 (if (zerop y)
352 (progn (cerror* "Division by zero")
353 (values nil :invalid))
354 (values (/ x y) :integer)))
355 5 :integer :integer)
356 opstack))
357
358 ;; The close-paren hack. Finish off the operators pushed
359 ;; since the open-paren. If the operator stack is now empty,
360 ;; this is someone else's paren, so exit. Otherwise pop our
361 ;; magic marker, and continue looking for an operator.
362 (#\) (apply-all 0)
363 (when (null opstack)
364 (go done))
365 (pop opstack)
366 (go operator))
367
368 ;; Nothing useful. Must have hit the end, so leave.
369 (t (go done)))
370
371 ;; Assume we found the binary operator as a default, so snarf a
372 ;; token and head back.
373 (next-token lexer)
374 (go operand)
375
376 done)
377
378 ;; Apply all the pending operators. If there's an unmatched
379 ;; open paren, this will trigger the error message.
380 (apply-all -99)
381
382 ;; If everything worked out, we should have exactly one operand
383 ;; left. This is the one we want.
384 (assert (and (consp valstack)
385 (null (cdr valstack))))
386 (values (cdar valstack) (caar valstack)))
387 (continue ()
388 :report "Return an invalid value and continue"
389 (values nil :invalid)))))
390
391(defun parse-property-set (lexer)
392 "Parse a property set from LEXER.
393
394 If there wasn't one to parse, return nil; this isn't considered an error,
395 and GET-PROPERTY will perfectly happily report defaults for all requested
396 properties."
397
398 (let ((pset nil))
399 (when (require-token lexer #\[ :errorp nil)
400 (loop
401 (let ((name (require-token lexer :id)))
402 (require-token lexer #\=)
403 (multiple-value-bind (value type) (parse-expression lexer)
404 (unless (eq type :invalid)
405 (push (make-property name value
406 :type type
407 :location (file-location lexer))
408 pset))))
409 (unless (require-token lexer #\, :errorp nil)
410 (return)))
411 (require-token lexer #\])
412 (nreverse pset))))
413
414;;;--------------------------------------------------------------------------
415;;; Testing cruft.
416
417#+test
418(with-input-from-string (raw "[role = before, integer = 42 * (3 - 1]")
419 (let* ((in (make-instance 'position-aware-input-stream :stream raw))
420 (lexer (make-instance 'sod-lexer :stream in)))
421 (next-char lexer)
422 (next-token lexer)
423 (multiple-value-call #'values
424 (parse-property-set lexer)
425 (token-type lexer))))
426
427;;;----- That's all, folks --------------------------------------------------