;;; -*-lisp-*- ;;; ;;; Collections of properties ;;; ;;; (c) 2009 Straylight/Edgeware ;;; ;;;----- Licensing notice --------------------------------------------------- ;;; ;;; This file is part of the Simple Object Definition system. ;;; ;;; SOD is free software; you can redistribute it and/or modify ;;; it under the terms of the GNU General Public License as published by ;;; the Free Software Foundation; either version 2 of the License, or ;;; (at your option) any later version. ;;; ;;; SOD is distributed in the hope that it will be useful, ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;; GNU General Public License for more details. ;;; ;;; You should have received a copy of the GNU General Public License ;;; along with SOD; if not, write to the Free Software Foundation, ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. (cl:in-package #:sod) ;;;-------------------------------------------------------------------------- ;;; Property representation. (defun property-key (name) "Convert NAME into a keyword. If NAME isn't a symbol already, then flip its case (using FROB-CASE), replace underscores by hyphens, and intern into the KEYWORD package." (etypecase name (symbol name) (string (intern (substitute #\- #\_ (frob-case name)) :keyword)))) (defun property-type (value) "Guess a sensible property type to use for VALUE." (typecase value (symbol :symbol) (integer :integer) (string :string) (character :char) (c-fragment :frag) (t :other))) (defstruct (property (:conc-name p-) (:constructor make-property (name value &key (type (property-type value)) ((:location %loc)) seenp &aux (key (property-key name)) (location (file-location %loc))))) "A simple structure for holding a property in a property set. The main useful feature is the ability to tick off properties which have been used, so that we can complain about unrecognized properties. An explicit type tag is necessary because we need to be able to talk distinctly about identifiers, strings and symbols, and we've only got two obvious Lisp types to play with. Sad, but true." (name nil :type (or string symbol)) (value nil :type t) (type nil :type symbol) (location (file-location nil) :type file-location) (key nil :type symbol) (seenp nil :type boolean)) (defun string-to-symbol (string &optional (package *package*)) "Convert STRING to a symbol in PACKAGE. If PACKAGE is nil, then parse off a `PACKAGE:' prefix from STRING to identify the package. A doubled colon allows access to internal symbols, and will intern if necessary. Note that escape characters are /not/ processed; don't put colons in package names if you want to use them from SOD property sets." (let* ((length (length string)) (colon (position #\: string))) (multiple-value-bind (start internalp) (cond ((not colon) (values 0 t)) ((and (< (1+ colon) length) (char= (char string (1+ colon)) #\:)) (values (+ colon 2) t)) (t (values (1+ colon) nil))) (when colon (let* ((package-name (subseq string 0 colon)) (found (find-package package-name))) (unless found (error "Unknown package `~A'" package-name)) (setf package found))) (let ((name (subseq string start))) (multiple-value-bind (symbol status) (funcall (if internalp #'intern #'find-symbol) name package) (cond ((or internalp (eq status :external)) symbol) ((not status) (error "Symbol `~A' not found in package `~A'" name (package-name package))) (t (error "Symbol `~A' not external in package `~A'" name (package-name package))))))))) (defgeneric coerce-property-value (value type wanted) (:documentation "Convert VALUE, a property of type TYPE, to be of type WANTED. It's sensible to add additional methods to this function, but there are all the ones we need.") ;; If TYPE matches WANTED, we'll assume that VALUE already has the right ;; form. Otherwise, if nothing else matched, then I guess we'll have to ;; say it didn't work. (:method (value type wanted) (if (eql type wanted) value (error "Incorrect type: expected ~A but found ~A" wanted type))) ;; If the caller asks for type T then give him the raw thing. (:method (value type (wanted (eql t))) value) ;; Keywords. (:method ((value symbol) (type (eql :symbol)) (wanted (eql :keyword))) value) (:method ((value string) (type (eql :id)) (wanted (eql :keyword))) (string-to-symbol (substitute #\- #\_ (frob-case value)) :keyword)) (:method ((value string) (type (eql :string)) (wanted (eql :keyword))) (string-to-symbol (frob-case value) :keyword)) ;; Symbols. (:method ((value string) (type (eql :id)) (wanted (eql :symbol))) (string-to-symbol (substitute #\- #\_ (frob-case value)))) (:method ((value string) (type (eql :string)) (wanted (eql :symbol))) (string-to-symbol (frob-case value))) ;; Identifiers. (:method ((value symbol) (type (eql :symbol)) (wanted (eql :id))) (substitute #\_ #\- (frob-case (symbol-name value))))) ;;;-------------------------------------------------------------------------- ;;; Property set representation. ;;; ;;; There shouldn't be any code elsewhere which depends on the ;;; representation. It's changed before; it may change again. (defstruct (pset (:constructor %make-pset) (:conc-name %pset-)) "A property set. Wrapped up in a structure so that we can define a print function." (hash (make-hash-table) :type hash-table)) (declaim (inline make-pset pset-get pset-store pset-map)) (defun make-pset () "Constructor for property sets." (%make-pset)) (defun pset-get (pset key) "Look KEY up in PSET and return what we find. If there's no property by that name, return NIL." (values (gethash key (%pset-hash pset)))) (defun pset-store (pset prop) "Store property PROP in PSET. Overwrite or replace any previous property with the same name. Mutates the property set." (setf (gethash (p-key prop) (%pset-hash pset)) prop)) (defun pset-map (func pset) "Call FUNC for each property in PSET." (maphash (lambda (key value) (declare (ignore key)) (funcall func value)) (%pset-hash pset))) ;;;-------------------------------------------------------------------------- ;;; `Cooked' property set operations. (defun store-property (pset name value &key (type (property-type value)) location) "Store a property in PSET." (pset-store pset (make-property name value :type type :location location))) (defun get-property (pset name type &optional default) "Fetch a property from a property set. If a property NAME is not found in PSET, or if a property is found, but its type doesn't match TYPE, then return DEFAULT and NIL; otherwise return the value and its file location. In the latter case, mark the property as having been used. The value returned depends on the TYPE argument provided. If you pass NIL then you get back the entire PROPERTY object. If you pass T, then you get whatever was left in the property set, uninterpreted. Otherwise the value is coerced to the right kind of thing (where possible) and returned. If PSET is nil, then return DEFAULT." (let ((prop (and pset (pset-get pset (property-key name))))) (with-default-error-location ((and prop (p-location prop))) (cond ((not prop) (values default nil)) ((not type) (setf (p-seenp prop) t) (values prop (p-location prop))) (t (setf (p-seenp prop) t) (values (coerce-property-value (p-value prop) (p-type prop) type) (p-location prop))))))) (defun add-property (pset name value &key (type (property-type value)) location) "Add a property to PSET. If a property with the same NAME already exists, report an error." (with-default-error-location (location) (let ((existing (get-property pset name nil))) (when existing (error "Property ~S already defined~@[ at ~A~]" name (p-location existing))) (store-property pset name value :type type :location location)))) (defun make-property-set (&rest plist) "Make a new property set, with given properties. This isn't the way to make properties when parsing, but it works well for programmatic generation. The arguments should form a property list (alternating keywords and values is good). An attempt is made to guess property types from the Lisp types of the values. This isn't always successful but it's not too bad. The alternative is manufacturing a PROPERTY-VALUE object by hand and stuffing into the set." (do ((pset (make-pset)) (plist plist (cddr plist))) ((endp plist) pset) (add-property pset (car plist) (cadr plist)))) (defmethod print-object ((pset pset) stream) (print-unreadable-object (pset stream :type t) (pprint-logical-block (stream nil) (let ((firstp t)) (pset-map (lambda (prop) (cond (firstp (setf firstp nil)) (t (write-char #\space stream) (pprint-newline :linear stream))) (format stream "~:@<~S ~@_~S ~@_~S~:>" (p-name prop) (p-type prop) (p-value prop))) pset))))) (defun check-unused-properties (pset) "Issue errors about unused properties in PSET." (when pset (pset-map (lambda (prop) (unless (p-seenp prop) (cerror*-with-location (p-location prop) "Unknown property `~A'" (p-name prop)))) pset))) ;;;-------------------------------------------------------------------------- ;;; Expression parser. (defun parse-expression (lexer) "Parse an expression from the LEXER. The return values are the expression's VALUE and TYPE; currently the types are :ID, :INTEGER, :STRING, and :CHAR. If an error prevented a sane value being produced, the TYPE :INVALID is returned. Expression syntax is rather limited at the moment: expression : term | expression `+' term | expression `-' term term : factor | term `*' factor | term `/' factor factor : primary | `+' factor | `-' factor primary : integer | identifier | string | `(' expression `)' | `?' lisp-expression Identifiers are just standalone things. They don't name values. The operators only work on integer values at the moment. (Confusingly, you can manufacture rational numbers using the division operator, but they still get called integers.)" (let ((valstack nil) (opstack nil)) ;; The following is a simple operator-precedence parser: the ;; recursive-descent parser I wrote the first time was about twice the ;; size and harder to extend. ;; ;; The parser flips between two states, OPERAND and OPERATOR. It starts ;; out in OPERAND state, and tries to parse a sequence of prefix ;; operators followed by a primary expression. Once it's found one, it ;; pushes the operand onto the value stack and flips to OPERATOR state; ;; if it fails, it reports a syntax error and exits. The OPERAND state ;; tries to read a sequence of postfix operators followed by an infix ;; operator; if it fails, it assumes that it hit the stuff following the ;; expression and stops. ;; ;; Each operator is pushed onto a stack consisting of lists of the form ;; (FUNC PREC TY*). The PREC is a precedence -- higher numbers mean ;; tighter binding. The TY* are operand types; operands are popped off ;; the operand stack, checked against the requested types, and passed to ;; the FUNC, which returns a new operand to be pushed in their place. ;; ;; Usually, when a binary operator is pushed, existing stacked operators ;; with higher precedence are applied. Whether operators with /equal/ ;; precedence are also applied depends on the associativity of the ;; operator: apply equal precedence operators for left-associative ;; operators, don't apply for right-associative. When we reach the end ;; of the expression, all the remaining operators on the stack are ;; applied. ;; ;; Parenthesized subexpressions are implemented using a hack: when we ;; find an open paren in operand position, a fake operator is pushed with ;; an artificially low precedece, which protects the operators beneath ;; from premature application. The fake operator's function reports an ;; error -- this will be triggered only if we reach the end of the ;; expression before a matching close-paren, because the close-paren ;; handler will pop the fake operator before it does any harm. (restart-case (labels ((apply-op (op) ;; Apply the single operator list OP to the values on the ;; value stack. (let ((func (pop op)) (args nil)) (dolist (ty (reverse (cdr op))) (let ((arg (pop valstack))) (cond ((eq (car arg) :invalid) (setf func nil)) ((eq (car arg) ty) (push (cdr arg) args)) (t (cerror* "Type mismatch: wanted ~A; found ~A" ty (car arg)) (setf func nil))))) (if func (multiple-value-bind (type value) (apply func args) (push (cons type value) valstack)) (push '(:invalid . nil) valstack)))) (apply-all (prec) ;; Apply all operators with precedence PREC or higher. (loop (when (or (null opstack) (< (cadar opstack) prec)) (return)) (apply-op (pop opstack))))) (tagbody operand ;; Operand state. Push prefix operators, and try to read a ;; primary operand. (case (token-type lexer) ;; Aha. A primary. Push it onto the stack, and see if ;; there's an infix operator. ((:integer :id :string :char) (push (cons (token-type lexer) (token-value lexer)) valstack) (go operator)) ;; Look for a Lisp S-expression. (#\? (with-lexer-stream (stream lexer) (let ((value (eval (read stream t)))) (push (cons (property-type value) value) valstack))) (go operator)) ;; Arithmetic unary operators. Push an operator for `+' for ;; the sake of type-checking. (#\+ (push (list (lambda (x) (values :integer x)) 10 :integer) opstack)) (#\- (push (list (lambda (x) (values :integer (- x))) 10 :integer) opstack)) ;; The open-paren hack. Push a magic marker which will ;; trigger an error if we hit the end of the expression. ;; Inside the paren, we're still looking for an operand. (#\( (push (list (lambda () (error "Expected `)' but found ~A" (format-token lexer))) -1) opstack)) ;; Failed to find anything. Report an error and give up. (t (error "Expected expression but found ~A" (format-token lexer)))) ;; Assume prefix operators as the default, so go round for more. (next-token lexer) (go operand) operator ;; Operator state. Push postfix operators, and try to read an ;; infix operator. It turns out that we're always a token ;; behind here, so catch up. (next-token lexer) (case (token-type lexer) ;; Binary operators. (#\+ (apply-all 3) (push (list (lambda (x y) (values :integer (+ x y))) 3 :integer :integer) opstack)) (#\- (apply-all 3) (push (list (lambda (x y) (values :integer (- x y))) 3 :integer :integer) opstack)) (#\* (apply-all 5) (push (list (lambda (x y) (values :integer (* x y))) 5 :integer :integer) opstack)) (#\/ (apply-all 5) (push (list (lambda (x y) (if (zerop y) (progn (cerror* "Division by zero") (values nil :invalid)) (values (/ x y) :integer))) 5 :integer :integer) opstack)) ;; The close-paren hack. Finish off the operators pushed ;; since the open-paren. If the operator stack is now empty, ;; this is someone else's paren, so exit. Otherwise pop our ;; magic marker, and continue looking for an operator. (#\) (apply-all 0) (when (null opstack) (go done)) (pop opstack) (go operator)) ;; Nothing useful. Must have hit the end, so leave. (t (go done))) ;; Assume we found the binary operator as a default, so snarf a ;; token and head back. (next-token lexer) (go operand) done) ;; Apply all the pending operators. If there's an unmatched ;; open paren, this will trigger the error message. (apply-all -99) ;; If everything worked out, we should have exactly one operand ;; left. This is the one we want. (assert (and (consp valstack) (null (cdr valstack)))) (values (cdar valstack) (caar valstack))) (continue () :report "Return an invalid value and continue." (values nil :invalid))))) ;;;-------------------------------------------------------------------------- ;;; Property set parsing. (defun parse-property (lexer pset) "Parse a single property from LEXER; add it to PSET." (let ((name (require-token lexer :id))) (require-token lexer #\=) (multiple-value-bind (value type) (parse-expression lexer) (unless (eq type :invalid) (add-property pset name value :type type :location lexer))))) (defun parse-property-set (lexer) "Parse a property set from LEXER. If there wasn't one to parse, return nil; this isn't considered an error, and GET-PROPERTY will perfectly happily report defaults for all requested properties." (when (require-token lexer #\[ :errorp nil) (let ((pset (make-pset))) (loop (parse-property lexer pset) (unless (require-token lexer #\, :errorp nil) (return))) (require-token lexer #\]) pset))) ;;;-------------------------------------------------------------------------- ;;; Testing cruft. #+test (with-input-from-string (raw "[role = before, integer = 42 * (3 - 1)]") (let* ((in (make-instance 'position-aware-input-stream :stream raw)) (lexer (make-instance 'sod-lexer :stream in))) (next-char lexer) (next-token lexer) (multiple-value-call #'values (parse-property-set lexer) (token-type lexer)))) ;;;----- That's all, folks --------------------------------------------------