Another day.
[sod] / pset.lisp
1 ;;; -*-lisp-*-
2 ;;;
3 ;;; Collections of properties
4 ;;;
5 ;;; (c) 2009 Straylight/Edgeware
6 ;;;
7
8 ;;;----- Licensing notice ---------------------------------------------------
9 ;;;
10 ;;; This file is part of the Simple Object Definition system.
11 ;;;
12 ;;; SOD is free software; you can redistribute it and/or modify
13 ;;; it under the terms of the GNU General Public License as published by
14 ;;; the Free Software Foundation; either version 2 of the License, or
15 ;;; (at your option) any later version.
16 ;;;
17 ;;; SOD is distributed in the hope that it will be useful,
18 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;;; GNU General Public License for more details.
21 ;;;
22 ;;; You should have received a copy of the GNU General Public License
23 ;;; along with SOD; if not, write to the Free Software Foundation,
24 ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25
26 (cl:in-package #:sod)
27
28 ;;;--------------------------------------------------------------------------
29 ;;; Property representation.
30
31 (defun property-key (name)
32 "Convert NAME into a keyword.
33
34 If NAME isn't a symbol already, then flip its case (using FROB-CASE),
35 replace underscores by hyphens, and intern into the KEYWORD package."
36 (etypecase name
37 (symbol name)
38 (string (intern (substitute #\- #\_ (frob-case name)) :keyword))))
39
40 (defun property-type (value)
41 "Guess a sensible property type to use for VALUE."
42 (typecase value
43 (symbol :symbol)
44 (integer :integer)
45 (string :string)
46 (character :char)
47 (c-fragment :frag)
48 (t :other)))
49
50 (defstruct (property
51 (:conc-name p-)
52 (:constructor make-property
53 (name value
54 &key (type (property-type value))
55 ((:location %loc))
56 seenp
57 &aux (key (property-key name))
58 (location (file-location %loc)))))
59 "A simple structure for holding a property in a property set.
60
61 The main useful feature is the ability to tick off properties which have
62 been used, so that we can complain about unrecognized properties.
63
64 An explicit type tag is necessary because we need to be able to talk
65 distinctly about identifiers, strings and symbols, and we've only got two
66 obvious Lisp types to play with. Sad, but true."
67
68 (name nil :type (or string symbol))
69 (value nil :type t)
70 (type nil :type symbol)
71 (location (file-location nil) :type file-location)
72 (key nil :type symbol)
73 (seenp nil :type boolean))
74
75 (defun string-to-symbol (string &optional (package *package*))
76 "Convert STRING to a symbol in PACKAGE.
77
78 If PACKAGE is nil, then parse off a `PACKAGE:' prefix from STRING to
79 identify the package. A doubled colon allows access to internal symbols,
80 and will intern if necessary. Note that escape characters are /not/
81 processed; don't put colons in package names if you want to use them from
82 SOD property sets."
83
84 (let* ((length (length string))
85 (colon (position #\: string)))
86 (multiple-value-bind (start internalp)
87 (cond ((not colon) (values 0 t))
88 ((and (< (1+ colon) length)
89 (char= (char string (1+ colon)) #\:))
90 (values (+ colon 2) t))
91 (t
92 (values (1+ colon) nil)))
93 (when colon
94 (let* ((package-name (subseq string 0 colon))
95 (found (find-package package-name)))
96 (unless found
97 (error "Unknown package `~A'" package-name))
98 (setf package found)))
99 (let ((name (subseq string start)))
100 (multiple-value-bind (symbol status)
101 (funcall (if internalp #'intern #'find-symbol) name package)
102 (cond ((or internalp (eq status :external))
103 symbol)
104 ((not status)
105 (error "Symbol `~A' not found in package `~A'"
106 name (package-name package)))
107 (t
108 (error "Symbol `~A' not external in package `~A'"
109 name (package-name package)))))))))
110
111 (defgeneric coerce-property-value (value type wanted)
112 (:documentation
113 "Convert VALUE, a property of type TYPE, to be of type WANTED.
114
115 It's sensible to add additional methods to this function, but there are
116 all the ones we need.")
117
118 ;; If TYPE matches WANTED, we'll assume that VALUE already has the right
119 ;; form. Otherwise, if nothing else matched, then I guess we'll have to
120 ;; say it didn't work.
121 (:method (value type wanted)
122 (if (eql type wanted)
123 value
124 (error "Incorrect type: expected ~A but found ~A" wanted type)))
125
126 ;; If the caller asks for type T then give him the raw thing.
127 (:method (value type (wanted (eql t)))
128 value)
129
130 ;; Keywords.
131 (:method ((value symbol) (type (eql :symbol)) (wanted (eql :keyword)))
132 value)
133 (:method ((value string) (type (eql :id)) (wanted (eql :keyword)))
134 (string-to-symbol (substitute #\- #\_ (frob-case value)) :keyword))
135 (:method ((value string) (type (eql :string)) (wanted (eql :keyword)))
136 (string-to-symbol (frob-case value) :keyword))
137
138 ;; Symbols.
139 (:method ((value string) (type (eql :id)) (wanted (eql :symbol)))
140 (string-to-symbol (substitute #\- #\_ (frob-case value))))
141 (:method ((value string) (type (eql :string)) (wanted (eql :symbol)))
142 (string-to-symbol (frob-case value)))
143
144 ;; Identifiers.
145 (:method ((value symbol) (type (eql :symbol)) (wanted (eql :id)))
146 (substitute #\_ #\- (frob-case (symbol-name value)))))
147
148 ;;;--------------------------------------------------------------------------
149 ;;; Property set representation.
150 ;;;
151 ;;; There shouldn't be any code elsewhere which depends on the
152 ;;; representation. It's changed before; it may change again.
153
154 (defstruct (pset (:constructor %make-pset)
155 (:conc-name %pset-))
156 "A property set.
157
158 Wrapped up in a structure so that we can define a print function."
159 (hash (make-hash-table) :type hash-table))
160
161 (declaim (inline make-pset pset-get pset-store pset-map))
162
163 (defun make-pset ()
164 "Constructor for property sets."
165 (%make-pset))
166
167 (defun pset-get (pset key)
168 "Look KEY up in PSET and return what we find.
169
170 If there's no property by that name, return NIL."
171 (values (gethash key (%pset-hash pset))))
172
173 (defun pset-store (pset prop)
174 "Store property PROP in PSET.
175
176 Overwrite or replace any previous property with the same name. Mutates
177 the property set."
178 (setf (gethash (p-key prop) (%pset-hash pset)) prop))
179
180 (defun pset-map (func pset)
181 "Call FUNC for each property in PSET."
182 (maphash (lambda (key value) (declare (ignore key)) (funcall func value))
183 (%pset-hash pset)))
184
185 ;;;--------------------------------------------------------------------------
186 ;;; `Cooked' property set operations.
187
188 (defun store-property
189 (pset name value &key (type (property-type value)) location)
190 "Store a property in PSET."
191 (%pset-store pset
192 (make-property name value :type type :location location)))
193
194 (defun get-property (pset name type &optional default)
195 "Fetch a property from a property set.
196
197 If a property NAME is not found in PSET, or if a property is found, but
198 its type doesn't match TYPE, then return DEFAULT and NIL; otherwise return
199 the value and its file location. In the latter case, mark the property as
200 having been used.
201
202 The value returned depends on the TYPE argument provided. If you pass NIL
203 then you get back the entire PROPERTY object. If you pass T, then you get
204 whatever was left in the property set, uninterpreted. Otherwise the value
205 is coerced to the right kind of thing (where possible) and returned.
206
207 If PSET is nil, then return DEFAULT."
208
209 (let ((prop (and pset (%pset-get pset (property-key name)))))
210 (with-default-error-location ((and prop (p-location prop)))
211 (cond ((not prop)
212 (values default nil))
213 ((not type)
214 (setf (p-seenp prop) t)
215 (values prop (p-location prop)))
216 (t
217 (setf (p-seenp prop) t)
218 (values (coerce-property-value (p-value prop)
219 (p-type prop)
220 type)
221 (p-location prop)))))))
222
223 (defun add-property
224 (pset name value &key (type (property-type value)) location)
225 "Add a property to PSET.
226
227 If a property with the same NAME already exists, report an error."
228
229 (with-default-error-location (location)
230 (let ((existing (get-property pset name nil)))
231 (when existing
232 (error "Property ~S already defined~@[ at ~A~]"
233 name (p-location existing)))
234 (store-property pset name value :type type :location location))))
235
236 (defun make-property-set (&rest plist)
237 "Make a new property set, with given properties.
238
239 This isn't the way to make properties when parsing, but it works well for
240 programmatic generation. The arguments should form a property list
241 (alternating keywords and values is good).
242
243 An attempt is made to guess property types from the Lisp types of the
244 values. This isn't always successful but it's not too bad. The
245 alternative is manufacturing a PROPERTY-VALUE object by hand and stuffing
246 into the set."
247
248 (do ((pset (%make-pset))
249 (plist plist (cddr plist)))
250 ((endp plist) pset)
251 (add-property pset (car plist) (cadr plist))))
252
253 (defmethod print-object ((pset pset) stream)
254 (print-unreadable-object (pset stream :type t)
255 (pprint-logical-block (stream nil)
256 (let ((firstp t))
257 (%pset-map (lambda (prop)
258 (cond (firstp (setf firstp nil))
259 (t (write-char #\space stream)
260 (pprint-newline :linear stream)))
261 (format stream "~:@<~S ~@_~S ~@_~S~:>"
262 (p-name prop) (p-type prop) (p-value prop)))
263 pset)))))
264
265 (defun check-unused-properties (pset)
266 "Issue errors about unused properties in PSET."
267 (%pset-map (lambda (prop)
268 (unless (p-seenp prop)
269 (cerror*-with-location (p-location prop)
270 "Unknown property `~A'"
271 (p-name prop))))
272 pset))
273
274 ;;;--------------------------------------------------------------------------
275 ;;; Expression parser.
276
277 (defun parse-expression (lexer)
278 "Parse an expression from the LEXER.
279
280 The return values are the expression's VALUE and TYPE; currently the types
281 are :ID, :INTEGER, :STRING, and :CHAR. If an error prevented a sane value
282 being produced, the TYPE :INVALID is returned.
283
284 Expression syntax is rather limited at the moment:
285
286 expression : term | expression `+' term | expression `-' term
287 term : factor | term `*' factor | term `/' factor
288 factor : primary | `+' factor | `-' factor
289 primary : integer | identifier | string
290 | `(' expression `)'
291 | `?' lisp-expression
292
293 Identifiers are just standalone things. They don't name values. The
294 operators only work on integer values at the moment. (Confusingly, you
295 can manufacture rational numbers using the division operator, but they
296 still get called integers.)"
297
298 (let ((valstack nil)
299 (opstack nil))
300
301 ;; The following is a simple operator-precedence parser: the
302 ;; recursive-descent parser I wrote the first time was about twice the
303 ;; size and harder to extend.
304 ;;
305 ;; The parser flips between two states, OPERAND and OPERATOR. It starts
306 ;; out in OPERAND state, and tries to parse a sequence of prefix
307 ;; operators followed by a primary expression. Once it's found one, it
308 ;; pushes the operand onto the value stack and flips to OPERATOR state;
309 ;; if it fails, it reports a syntax error and exits. The OPERAND state
310 ;; tries to read a sequence of postfix operators followed by an infix
311 ;; operator; if it fails, it assumes that it hit the stuff following the
312 ;; expression and stops.
313 ;;
314 ;; Each operator is pushed onto a stack consisting of lists of the form
315 ;; (FUNC PREC TY*). The PREC is a precedence -- higher numbers mean
316 ;; tighter binding. The TY* are operand types; operands are popped off
317 ;; the operand stack, checked against the requested types, and passed to
318 ;; the FUNC, which returns a new operand to be pushed in their place.
319 ;;
320 ;; Usually, when a binary operator is pushed, existing stacked operators
321 ;; with higher precedence are applied. Whether operators with /equal/
322 ;; precedence are also applied depends on the associativity of the
323 ;; operator: apply equal precedence operators for left-associative
324 ;; operators, don't apply for right-associative. When we reach the end
325 ;; of the expression, all the remaining operators on the stack are
326 ;; applied.
327 ;;
328 ;; Parenthesized subexpressions are implemented using a hack: when we
329 ;; find an open paren in operand position, a fake operator is pushed with
330 ;; an artificially low precedece, which protects the operators beneath
331 ;; from premature application. The fake operator's function reports an
332 ;; error -- this will be triggered only if we reach the end of the
333 ;; expression before a matching close-paren, because the close-paren
334 ;; handler will pop the fake operator before it does any harm.
335
336 (restart-case
337 (labels ((apply-op (op)
338 ;; Apply the single operator list OP to the values on the
339 ;; value stack.
340 (let ((func (pop op))
341 (args nil))
342 (dolist (ty (reverse (cdr op)))
343 (let ((arg (pop valstack)))
344 (cond ((eq (car arg) :invalid)
345 (setf func nil))
346 ((eq (car arg) ty)
347 (push (cdr arg) args))
348 (t
349 (cerror* "Type mismatch: wanted ~A; found ~A"
350 ty (car arg))
351 (setf func nil)))))
352 (if func
353 (multiple-value-bind (type value) (apply func args)
354 (push (cons type value) valstack))
355 (push '(:invalid . nil) valstack))))
356
357 (apply-all (prec)
358 ;; Apply all operators with precedence PREC or higher.
359 (loop
360 (when (or (null opstack) (< (cadar opstack) prec))
361 (return))
362 (apply-op (pop opstack)))))
363
364 (tagbody
365
366 operand
367 ;; Operand state. Push prefix operators, and try to read a
368 ;; primary operand.
369 (case (token-type lexer)
370
371 ;; Aha. A primary. Push it onto the stack, and see if
372 ;; there's an infix operator.
373 ((:integer :id :string :char)
374 (push (cons (token-type lexer)
375 (token-value lexer))
376 valstack)
377 (go operator))
378
379 ;; Look for a Lisp S-expression.
380 (#\?
381 (with-lexer-stream (stream lexer)
382 (let ((value (eval (read stream t))))
383 (push (cons (property-type value) value) valstack)))
384 (go operator))
385
386 ;; Arithmetic unary operators. Push an operator for `+' for
387 ;; the sake of type-checking.
388 (#\+
389 (push (list (lambda (x) (values :integer x))
390 10 :integer)
391 opstack))
392 (#\-
393 (push (list (lambda (x) (values :integer (- x)))
394 10 :integer)
395 opstack))
396
397 ;; The open-paren hack. Push a magic marker which will
398 ;; trigger an error if we hit the end of the expression.
399 ;; Inside the paren, we're still looking for an operand.
400 (#\(
401 (push (list (lambda ()
402 (error "Expected `)' but found ~A"
403 (format-token lexer)))
404 -1)
405 opstack))
406
407 ;; Failed to find anything. Report an error and give up.
408 (t
409 (error "Expected expression but found ~A"
410 (format-token lexer))))
411
412 ;; Assume prefix operators as the default, so go round for more.
413 (next-token lexer)
414 (go operand)
415
416 operator
417 ;; Operator state. Push postfix operators, and try to read an
418 ;; infix operator. It turns out that we're always a token
419 ;; behind here, so catch up.
420 (next-token lexer)
421 (case (token-type lexer)
422
423 ;; Binary operators.
424 (#\+ (apply-all 3)
425 (push (list (lambda (x y) (values :integer (+ x y)))
426 3 :integer :integer)
427 opstack))
428 (#\- (apply-all 3)
429 (push (list (lambda (x y) (values :integer (- x y)))
430 3 :integer :integer)
431 opstack))
432 (#\* (apply-all 5)
433 (push (list (lambda (x y) (values :integer (* x y)))
434 5 :integer :integer)
435 opstack))
436 (#\/ (apply-all 5)
437 (push (list (lambda (x y)
438 (if (zerop y)
439 (progn (cerror* "Division by zero")
440 (values nil :invalid))
441 (values (/ x y) :integer)))
442 5 :integer :integer)
443 opstack))
444
445 ;; The close-paren hack. Finish off the operators pushed
446 ;; since the open-paren. If the operator stack is now empty,
447 ;; this is someone else's paren, so exit. Otherwise pop our
448 ;; magic marker, and continue looking for an operator.
449 (#\) (apply-all 0)
450 (when (null opstack)
451 (go done))
452 (pop opstack)
453 (go operator))
454
455 ;; Nothing useful. Must have hit the end, so leave.
456 (t (go done)))
457
458 ;; Assume we found the binary operator as a default, so snarf a
459 ;; token and head back.
460 (next-token lexer)
461 (go operand)
462
463 done)
464
465 ;; Apply all the pending operators. If there's an unmatched
466 ;; open paren, this will trigger the error message.
467 (apply-all -99)
468
469 ;; If everything worked out, we should have exactly one operand
470 ;; left. This is the one we want.
471 (assert (and (consp valstack)
472 (null (cdr valstack))))
473 (values (cdar valstack) (caar valstack)))
474 (continue ()
475 :report "Return an invalid value and continue."
476 (values nil :invalid)))))
477
478 ;;;--------------------------------------------------------------------------
479 ;;; Property set parsing.
480
481 (defun parse-property (lexer pset)
482 "Parse a single property from LEXER; add it to PSET."
483 (let ((name (require-token lexer :id)))
484 (require-token lexer #\=)
485 (multiple-value-bind (value type) (parse-expression lexer)
486 (unless (eq type :invalid)
487 (add-property pset name value :type type :location lexer)))))
488
489 (defun parse-property-set (lexer)
490 "Parse a property set from LEXER.
491
492 If there wasn't one to parse, return nil; this isn't considered an error,
493 and GET-PROPERTY will perfectly happily report defaults for all requested
494 properties."
495
496 (when (require-token lexer #\[ :errorp nil)
497 (let ((pset (make-pset)))
498 (loop
499 (parse-property lexer pset)
500 (unless (require-token lexer #\, :errorp nil)
501 (return)))
502 (require-token lexer #\])
503 pset)))
504
505 ;;;--------------------------------------------------------------------------
506 ;;; Testing cruft.
507
508 #+test
509 (with-input-from-string (raw "[role = before, integer = 42 * (3 - 1)]")
510 (let* ((in (make-instance 'position-aware-input-stream :stream raw))
511 (lexer (make-instance 'sod-lexer :stream in)))
512 (next-char lexer)
513 (next-token lexer)
514 (multiple-value-call #'values
515 (parse-property-set lexer)
516 (token-type lexer))))
517
518 ;;;----- That's all, folks --------------------------------------------------