Add .gitattributes file.
[sod] / pset.lisp
CommitLineData
abdf50aa
MW
1;;; -*-lisp-*-
2;;;
3;;; Collections of properties
4;;;
5;;; (c) 2009 Straylight/Edgeware
6;;;
7
8;;;----- Licensing notice ---------------------------------------------------
9;;;
10;;; This file is part of the Simple Object Definition system.
11;;;
12;;; SOD is free software; you can redistribute it and/or modify
13;;; it under the terms of the GNU General Public License as published by
14;;; the Free Software Foundation; either version 2 of the License, or
15;;; (at your option) any later version.
16;;;
17;;; SOD is distributed in the hope that it will be useful,
18;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20;;; GNU General Public License for more details.
21;;;
22;;; You should have received a copy of the GNU General Public License
23;;; along with SOD; if not, write to the Free Software Foundation,
24;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25
26(cl:in-package #:sod)
27
28;;;--------------------------------------------------------------------------
77027cca 29;;; Property representation.
abdf50aa
MW
30
31(defun property-key (name)
32 "Convert NAME into a keyword.
33
34 If NAME isn't a symbol already, then flip its case (using FROB-CASE),
35 replace underscores by hyphens, and intern into the KEYWORD package."
36 (etypecase name
37 (symbol name)
38 (string (intern (substitute #\- #\_ (frob-case name)) :keyword))))
39
40(defun property-type (value)
77027cca 41 "Guess a sensible property type to use for VALUE."
1f1d88f5 42 (typecase value
abdf50aa
MW
43 (symbol :symbol)
44 (integer :integer)
45 (string :string)
1f1d88f5
MW
46 (character :char)
47 (c-fragment :frag)
48 (t :other)))
abdf50aa
MW
49
50(defstruct (property
51 (:conc-name p-)
52 (:constructor make-property
53 (name value
77027cca
MW
54 &key (type (property-type value))
55 ((:location %loc))
56 seenp
57 &aux (key (property-key name))
58 (location (file-location %loc)))))
abdf50aa
MW
59 "A simple structure for holding a property in a property set.
60
61 The main useful feature is the ability to tick off properties which have
77027cca
MW
62 been used, so that we can complain about unrecognized properties.
63
64 An explicit type tag is necessary because we need to be able to talk
65 distinctly about identifiers, strings and symbols, and we've only got two
66 obvious Lisp types to play with. Sad, but true."
67
68 (name nil :type (or string symbol))
abdf50aa
MW
69 (value nil :type t)
70 (type nil :type symbol)
71 (location (file-location nil) :type file-location)
72 (key nil :type symbol)
73 (seenp nil :type boolean))
74
abdf50aa
MW
75(defun string-to-symbol (string &optional (package *package*))
76 "Convert STRING to a symbol in PACKAGE.
77
78 If PACKAGE is nil, then parse off a `PACKAGE:' prefix from STRING to
79 identify the package. A doubled colon allows access to internal symbols,
80 and will intern if necessary. Note that escape characters are /not/
81 processed; don't put colons in package names if you want to use them from
82 SOD property sets."
83
abdf50aa
MW
84 (let* ((length (length string))
85 (colon (position #\: string)))
86 (multiple-value-bind (start internalp)
87 (cond ((not colon) (values 0 t))
88 ((and (< (1+ colon) length)
89 (char= (char string (1+ colon)) #\:))
90 (values (+ colon 2) t))
91 (t
92 (values (1+ colon) nil)))
93 (when colon
94 (let* ((package-name (subseq string 0 colon))
95 (found (find-package package-name)))
96 (unless found
97 (error "Unknown package `~A'" package-name))
98 (setf package found)))
99 (let ((name (subseq string start)))
100 (multiple-value-bind (symbol status)
101 (funcall (if internalp #'intern #'find-symbol) name package)
102 (cond ((or internalp (eq status :external))
103 symbol)
104 ((not status)
105 (error "Symbol `~A' not found in package `~A'"
106 name (package-name package)))
107 (t
108 (error "Symbol `~A' not external in package `~A'"
109 name (package-name package)))))))))
110
111(defgeneric coerce-property-value (value type wanted)
112 (:documentation
77027cca
MW
113 "Convert VALUE, a property of type TYPE, to be of type WANTED.
114
115 It's sensible to add additional methods to this function, but there are
116 all the ones we need.")
abdf50aa
MW
117
118 ;; If TYPE matches WANTED, we'll assume that VALUE already has the right
1f1d88f5
MW
119 ;; form. Otherwise, if nothing else matched, then I guess we'll have to
120 ;; say it didn't work.
abdf50aa
MW
121 (:method (value type wanted)
122 (if (eql type wanted)
123 value
124 (error "Incorrect type: expected ~A but found ~A" wanted type)))
125
1f1d88f5
MW
126 ;; If the caller asks for type T then give him the raw thing.
127 (:method (value type (wanted (eql t)))
128 value)
129
abdf50aa
MW
130 ;; Keywords.
131 (:method ((value symbol) (type (eql :symbol)) (wanted (eql :keyword)))
132 value)
133 (:method ((value string) (type (eql :id)) (wanted (eql :keyword)))
134 (string-to-symbol (substitute #\- #\_ (frob-case value)) :keyword))
135 (:method ((value string) (type (eql :string)) (wanted (eql :keyword)))
136 (string-to-symbol (frob-case value) :keyword))
137
138 ;; Symbols.
139 (:method ((value string) (type (eql :id)) (wanted (eql :symbol)))
140 (string-to-symbol (substitute #\- #\_ (frob-case value))))
141 (:method ((value string) (type (eql :string)) (wanted (eql :symbol)))
142 (string-to-symbol (frob-case value)))
143
144 ;; Identifiers.
145 (:method ((value symbol) (type (eql :symbol)) (wanted (eql :id)))
146 (substitute #\_ #\- (frob-case (symbol-name value)))))
147
77027cca
MW
148;;;--------------------------------------------------------------------------
149;;; Property set representation.
150;;;
151;;; There shouldn't be any code elsewhere which depends on the
152;;; representation. It's changed before; it may change again.
153
154(defstruct (pset (:constructor %make-pset)
155 (:conc-name %pset-))
156 "A property set.
157
158 Wrapped up in a structure so that we can define a print function."
159 (hash (make-hash-table) :type hash-table))
160
161(declaim (inline make-pset pset-get pset-store pset-map))
162
163(defun make-pset ()
164 "Constructor for property sets."
165 (%make-pset))
166
167(defun pset-get (pset key)
168 "Look KEY up in PSET and return what we find.
169
170 If there's no property by that name, return NIL."
171 (values (gethash key (%pset-hash pset))))
172
173(defun pset-store (pset prop)
174 "Store property PROP in PSET.
175
176 Overwrite or replace any previous property with the same name. Mutates
177 the property set."
178 (setf (gethash (p-key prop) (%pset-hash pset)) prop))
179
180(defun pset-map (func pset)
181 "Call FUNC for each property in PSET."
182 (maphash (lambda (key value) (declare (ignore key)) (funcall func value))
183 (%pset-hash pset)))
184
185;;;--------------------------------------------------------------------------
186;;; `Cooked' property set operations.
187
188(defun store-property
189 (pset name value &key (type (property-type value)) location)
190 "Store a property in PSET."
d9c15186
MW
191 (pset-store pset
192 (make-property name value :type type :location location)))
77027cca 193
abdf50aa
MW
194(defun get-property (pset name type &optional default)
195 "Fetch a property from a property set.
196
197 If a property NAME is not found in PSET, or if a property is found, but
198 its type doesn't match TYPE, then return DEFAULT and NIL; otherwise return
199 the value and its file location. In the latter case, mark the property as
200 having been used.
201
1f1d88f5
MW
202 The value returned depends on the TYPE argument provided. If you pass NIL
203 then you get back the entire PROPERTY object. If you pass T, then you get
204 whatever was left in the property set, uninterpreted. Otherwise the value
77027cca 205 is coerced to the right kind of thing (where possible) and returned.
abdf50aa 206
77027cca
MW
207 If PSET is nil, then return DEFAULT."
208
d9c15186 209 (let ((prop (and pset (pset-get pset (property-key name)))))
abdf50aa
MW
210 (with-default-error-location ((and prop (p-location prop)))
211 (cond ((not prop)
212 (values default nil))
213 ((not type)
214 (setf (p-seenp prop) t)
215 (values prop (p-location prop)))
216 (t
217 (setf (p-seenp prop) t)
218 (values (coerce-property-value (p-value prop)
219 (p-type prop)
220 type)
221 (p-location prop)))))))
222
77027cca
MW
223(defun add-property
224 (pset name value &key (type (property-type value)) location)
225 "Add a property to PSET.
226
227 If a property with the same NAME already exists, report an error."
228
229 (with-default-error-location (location)
230 (let ((existing (get-property pset name nil)))
231 (when existing
232 (error "Property ~S already defined~@[ at ~A~]"
233 name (p-location existing)))
234 (store-property pset name value :type type :location location))))
235
236(defun make-property-set (&rest plist)
237 "Make a new property set, with given properties.
238
239 This isn't the way to make properties when parsing, but it works well for
240 programmatic generation. The arguments should form a property list
241 (alternating keywords and values is good).
242
243 An attempt is made to guess property types from the Lisp types of the
244 values. This isn't always successful but it's not too bad. The
245 alternative is manufacturing a PROPERTY-VALUE object by hand and stuffing
246 into the set."
247
d9c15186 248 (do ((pset (make-pset))
77027cca
MW
249 (plist plist (cddr plist)))
250 ((endp plist) pset)
251 (add-property pset (car plist) (cadr plist))))
252
253(defmethod print-object ((pset pset) stream)
254 (print-unreadable-object (pset stream :type t)
255 (pprint-logical-block (stream nil)
256 (let ((firstp t))
d9c15186
MW
257 (pset-map (lambda (prop)
258 (cond (firstp (setf firstp nil))
259 (t (write-char #\space stream)
260 (pprint-newline :linear stream)))
261 (format stream "~:@<~S ~@_~S ~@_~S~:>"
262 (p-name prop) (p-type prop) (p-value prop)))
263 pset)))))
77027cca 264
abdf50aa
MW
265(defun check-unused-properties (pset)
266 "Issue errors about unused properties in PSET."
d9c15186
MW
267 (when pset
268 (pset-map (lambda (prop)
269 (unless (p-seenp prop)
270 (cerror*-with-location (p-location prop)
271 "Unknown property `~A'"
272 (p-name prop))))
273 pset)))
abdf50aa
MW
274
275;;;--------------------------------------------------------------------------
77027cca 276;;; Expression parser.
abdf50aa
MW
277
278(defun parse-expression (lexer)
279 "Parse an expression from the LEXER.
280
1f1d88f5
MW
281 The return values are the expression's VALUE and TYPE; currently the types
282 are :ID, :INTEGER, :STRING, and :CHAR. If an error prevented a sane value
abdf50aa
MW
283 being produced, the TYPE :INVALID is returned.
284
285 Expression syntax is rather limited at the moment:
286
287 expression : term | expression `+' term | expression `-' term
288 term : factor | term `*' factor | term `/' factor
289 factor : primary | `+' factor | `-' factor
290 primary : integer | identifier | string
291 | `(' expression `)'
292 | `?' lisp-expression
293
294 Identifiers are just standalone things. They don't name values. The
295 operators only work on integer values at the moment. (Confusingly, you
296 can manufacture rational numbers using the division operator, but they
297 still get called integers.)"
298
299 (let ((valstack nil)
300 (opstack nil))
301
302 ;; The following is a simple operator-precedence parser: the
303 ;; recursive-descent parser I wrote the first time was about twice the
304 ;; size and harder to extend.
305 ;;
306 ;; The parser flips between two states, OPERAND and OPERATOR. It starts
307 ;; out in OPERAND state, and tries to parse a sequence of prefix
308 ;; operators followed by a primary expression. Once it's found one, it
309 ;; pushes the operand onto the value stack and flips to OPERATOR state;
310 ;; if it fails, it reports a syntax error and exits. The OPERAND state
311 ;; tries to read a sequence of postfix operators followed by an infix
312 ;; operator; if it fails, it assumes that it hit the stuff following the
313 ;; expression and stops.
314 ;;
315 ;; Each operator is pushed onto a stack consisting of lists of the form
316 ;; (FUNC PREC TY*). The PREC is a precedence -- higher numbers mean
317 ;; tighter binding. The TY* are operand types; operands are popped off
318 ;; the operand stack, checked against the requested types, and passed to
319 ;; the FUNC, which returns a new operand to be pushed in their place.
320 ;;
321 ;; Usually, when a binary operator is pushed, existing stacked operators
322 ;; with higher precedence are applied. Whether operators with /equal/
323 ;; precedence are also applied depends on the associativity of the
324 ;; operator: apply equal precedence operators for left-associative
325 ;; operators, don't apply for right-associative. When we reach the end
326 ;; of the expression, all the remaining operators on the stack are
327 ;; applied.
328 ;;
329 ;; Parenthesized subexpressions are implemented using a hack: when we
330 ;; find an open paren in operand position, a fake operator is pushed with
331 ;; an artificially low precedece, which protects the operators beneath
332 ;; from premature application. The fake operator's function reports an
333 ;; error -- this will be triggered only if we reach the end of the
334 ;; expression before a matching close-paren, because the close-paren
335 ;; handler will pop the fake operator before it does any harm.
336
337 (restart-case
338 (labels ((apply-op (op)
339 ;; Apply the single operator list OP to the values on the
340 ;; value stack.
341 (let ((func (pop op))
342 (args nil))
343 (dolist (ty (reverse (cdr op)))
344 (let ((arg (pop valstack)))
345 (cond ((eq (car arg) :invalid)
346 (setf func nil))
347 ((eq (car arg) ty)
348 (push (cdr arg) args))
349 (t
350 (cerror* "Type mismatch: wanted ~A; found ~A"
351 ty (car arg))
352 (setf func nil)))))
353 (if func
354 (multiple-value-bind (type value) (apply func args)
355 (push (cons type value) valstack))
356 (push '(:invalid . nil) valstack))))
357
358 (apply-all (prec)
359 ;; Apply all operators with precedence PREC or higher.
360 (loop
361 (when (or (null opstack) (< (cadar opstack) prec))
362 (return))
363 (apply-op (pop opstack)))))
364
365 (tagbody
366
367 operand
368 ;; Operand state. Push prefix operators, and try to read a
369 ;; primary operand.
370 (case (token-type lexer)
371
372 ;; Aha. A primary. Push it onto the stack, and see if
373 ;; there's an infix operator.
1f1d88f5 374 ((:integer :id :string :char)
abdf50aa
MW
375 (push (cons (token-type lexer)
376 (token-value lexer))
377 valstack)
378 (go operator))
379
380 ;; Look for a Lisp S-expression.
381 (#\?
382 (with-lexer-stream (stream lexer)
383 (let ((value (eval (read stream t))))
384 (push (cons (property-type value) value) valstack)))
385 (go operator))
386
387 ;; Arithmetic unary operators. Push an operator for `+' for
388 ;; the sake of type-checking.
389 (#\+
390 (push (list (lambda (x) (values :integer x))
391 10 :integer)
392 opstack))
393 (#\-
394 (push (list (lambda (x) (values :integer (- x)))
395 10 :integer)
396 opstack))
397
398 ;; The open-paren hack. Push a magic marker which will
399 ;; trigger an error if we hit the end of the expression.
400 ;; Inside the paren, we're still looking for an operand.
401 (#\(
402 (push (list (lambda ()
403 (error "Expected `)' but found ~A"
404 (format-token lexer)))
405 -1)
406 opstack))
407
408 ;; Failed to find anything. Report an error and give up.
409 (t
410 (error "Expected expression but found ~A"
411 (format-token lexer))))
412
413 ;; Assume prefix operators as the default, so go round for more.
414 (next-token lexer)
415 (go operand)
416
417 operator
418 ;; Operator state. Push postfix operators, and try to read an
419 ;; infix operator. It turns out that we're always a token
420 ;; behind here, so catch up.
421 (next-token lexer)
422 (case (token-type lexer)
423
424 ;; Binary operators.
425 (#\+ (apply-all 3)
426 (push (list (lambda (x y) (values :integer (+ x y)))
427 3 :integer :integer)
428 opstack))
429 (#\- (apply-all 3)
430 (push (list (lambda (x y) (values :integer (- x y)))
431 3 :integer :integer)
432 opstack))
433 (#\* (apply-all 5)
434 (push (list (lambda (x y) (values :integer (* x y)))
435 5 :integer :integer)
436 opstack))
437 (#\/ (apply-all 5)
438 (push (list (lambda (x y)
439 (if (zerop y)
440 (progn (cerror* "Division by zero")
441 (values nil :invalid))
442 (values (/ x y) :integer)))
443 5 :integer :integer)
444 opstack))
445
446 ;; The close-paren hack. Finish off the operators pushed
447 ;; since the open-paren. If the operator stack is now empty,
448 ;; this is someone else's paren, so exit. Otherwise pop our
449 ;; magic marker, and continue looking for an operator.
450 (#\) (apply-all 0)
451 (when (null opstack)
452 (go done))
453 (pop opstack)
454 (go operator))
455
456 ;; Nothing useful. Must have hit the end, so leave.
457 (t (go done)))
458
459 ;; Assume we found the binary operator as a default, so snarf a
460 ;; token and head back.
461 (next-token lexer)
462 (go operand)
463
464 done)
465
466 ;; Apply all the pending operators. If there's an unmatched
467 ;; open paren, this will trigger the error message.
468 (apply-all -99)
469
470 ;; If everything worked out, we should have exactly one operand
471 ;; left. This is the one we want.
472 (assert (and (consp valstack)
473 (null (cdr valstack))))
474 (values (cdar valstack) (caar valstack)))
475 (continue ()
77027cca 476 :report "Return an invalid value and continue."
abdf50aa
MW
477 (values nil :invalid)))))
478
77027cca
MW
479;;;--------------------------------------------------------------------------
480;;; Property set parsing.
481
482(defun parse-property (lexer pset)
483 "Parse a single property from LEXER; add it to PSET."
484 (let ((name (require-token lexer :id)))
485 (require-token lexer #\=)
486 (multiple-value-bind (value type) (parse-expression lexer)
487 (unless (eq type :invalid)
488 (add-property pset name value :type type :location lexer)))))
489
abdf50aa
MW
490(defun parse-property-set (lexer)
491 "Parse a property set from LEXER.
492
493 If there wasn't one to parse, return nil; this isn't considered an error,
494 and GET-PROPERTY will perfectly happily report defaults for all requested
495 properties."
496
77027cca
MW
497 (when (require-token lexer #\[ :errorp nil)
498 (let ((pset (make-pset)))
abdf50aa 499 (loop
77027cca 500 (parse-property lexer pset)
abdf50aa
MW
501 (unless (require-token lexer #\, :errorp nil)
502 (return)))
503 (require-token lexer #\])
77027cca 504 pset)))
abdf50aa
MW
505
506;;;--------------------------------------------------------------------------
507;;; Testing cruft.
508
509#+test
77027cca 510(with-input-from-string (raw "[role = before, integer = 42 * (3 - 1)]")
abdf50aa
MW
511 (let* ((in (make-instance 'position-aware-input-stream :stream raw))
512 (lexer (make-instance 'sod-lexer :stream in)))
513 (next-char lexer)
514 (next-token lexer)
515 (multiple-value-call #'values
516 (parse-property-set lexer)
517 (token-type lexer))))
518
519;;;----- That's all, folks --------------------------------------------------