3 ;;; Infix-to-S-exp translation
5 ;;; (c) 2006 Mark Wooding
8 ;;;----- Licensing notice ---------------------------------------------------
10 ;;; This program is free software; you can redistribute it and/or modify
11 ;;; it under the terms of the GNU General Public License as published by
12 ;;; the Free Software Foundation; either version 2 of the License, or
13 ;;; (at your option) any later version.
15 ;;; This program is distributed in the hope that it will be useful,
16 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;;; GNU General Public License for more details.
20 ;;; You should have received a copy of the GNU General Public License
21 ;;; along with this program; if not, write to the Free Software Foundation,
22 ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
24 ;;;--------------------------------------------------------------------------
27 (defpackage #:infix-keywords
29 (:export #:|(| #:|)| #:{ #:} #:|,| #:@ #:|$| #:& #:\| #:~
30 #:and #:or #:not #:xor
31 #:== #:/= #:< #:<= #:> #:>= #:eq #:eql #:equal #:equalp
32 #:+ #:- #:* #:/ #:// #:% #:^ #:= #:!
33 #:+= #:-= #:*= #:%= #:&= #:\|= #:xor= #:<<= #:>>=
41 (:use #:common-lisp #:infix-keywords))
45 ;;;--------------------------------------------------------------------------
48 (export '(operator operatorp
49 op-name op-lprec op-rprec op-func))
50 (defstruct (operator (:predicate operatorp)
52 "An operator object. The name serves mainly for documentation. The left
53 and right precedences control operator stacking behaviour. The function
54 is called when this operator is popped off the stack.
56 If the left precedence is not nil, then operators currently on the stack
57 whose /right/-precedence is greater than or equal to this operator's
58 /left/-precedence are popped before this operator can be pushed. If the
59 right precedence is nil, then this operator is not in fact pushed, but
60 processed immediately."
61 (name nil :type symbol :read-only t)
62 (lprec nil :type (or fixnum null) :read-only t)
63 (rprec nil :type (or fixnum null) :read-only t)
65 :type #-ecl (function () t) #+ecl function
68 ;;;--------------------------------------------------------------------------
69 ;;; Global parser state.
72 "The parser input stream. Bound automatically by `read-infix'.")
74 ;;;--------------------------------------------------------------------------
75 ;;; State for one level of `parse-infix'.
78 "Value stack. Contains (partially constructed) Lisp forms.")
80 "Operator stack. Contains operator objects.")
83 "The current token. Could be any Lisp object.")
84 (defvar *paren-depth* 0
85 "Depth of parentheses in the current `parse-infix'. Used to override the
86 minprec restriction.")
88 ;;;--------------------------------------------------------------------------
91 (eval-when (:compile-toplevel :load-toplevel :execute)
92 (let ((value (cons :eof nil)))
93 (unless (and (boundp 'eof)
94 (equal (symbol-value 'eof) value))
95 (defconstant eof (cons :eof nil)
96 "A magical object which `get-token' returns at end-of-file."))))
98 (defun default-get-token ()
99 "Read a token from *stream* and store it in *token*."
100 (flet ((whitespacep (ch)
101 (member ch '(#\newline #\space #\tab #\page)))
103 (member ch '(#\; #\, #\: #\( #\) #\@ #\$ #\[ #\] #\{ #\})))
105 (member ch '(#\# #\| #\\ #\" #\' #\`)))
108 (return-from default-get-token)))
112 (setf ch (read-char *stream* nil nil t))
113 (cond ((null ch) (done eof))
114 ((whitespacep ch) (go top))
115 ((eql ch #\;) (go comment))
116 ((self-delim-p ch) (done (intern (string ch)
118 ((or (macro-char-p ch) (alphanumericp ch)) (go read))
121 (unread-char ch *stream*)
122 (done (read *stream* t nil t))
124 (done (intern (with-output-to-string (out)
127 (setf ch (read-char *stream* nil nil t))
131 ((or (self-delim-p ch)
134 (unread-char ch *stream*)
137 (write-char ch out)))))
141 (case (setf ch (read-char *stream* nil nil t))
143 ((#\newline) (go top))
144 (t (go comment)))))))
146 (export '*get-token*)
147 (defvar *get-token* #'default-get-token
148 "The current tokenizing function.")
152 "Read a token, and store it in *token*. Indirects via *get-token*."
153 (funcall *get-token*))
155 ;;;--------------------------------------------------------------------------
156 ;;; Stack manipulation.
160 "Push VAL onto the value stack."
165 "Pop a value off the value stack and return it."
169 (defun flushops (prec)
170 "Flush out operators on the operator stack with precedecnce higher than or
171 equal to PREC. This is used when a new operator is pushed, to ensure that
172 higher-precedence operators snarf their arguments."
176 (let ((head (car *opstk*)))
177 (when (> prec (op-rprec head))
180 (funcall (op-func head)))))
184 "Push the operator OP onto the stack. If the operator has a
185 left-precedence, then operators with higher precedence are flushed (see
186 `flushops'). If the operator has no left-precedence, the operator is
187 invoked immediately."
188 (let ((lp (op-lprec op)))
193 (funcall (op-func op))))
195 ;;;--------------------------------------------------------------------------
200 "Signal that `parse-infix' has reached the end of an expression. This is
201 primarily used by the `)' handler function if it finds there are no
203 (throw 'infix-done nil))
205 (export 'parse-infix)
206 (defun parse-infix (&optional minprec)
207 "Parses an infix expression and return the resulting Lisp form. This is
208 the heart of the whole thing.
210 Expects a token to be ready in *token*; leaves *token* as the first token
211 which couldn't be parsed.
213 The syntax parsed by this function doesn't fit nicely into a BNF, since we
214 parsing is effected by the precedences of the various operators. We have
215 low-precedence prefix operators such as `not', for example."
216 (flet ((lookup (items)
217 (dolist (item items (values nil nil))
218 (let ((op (get *token* (car item))))
219 (when op (return (values op (cdr item))))))))
228 (when (eq *token* eof)
229 (error "operand expected; found eof"))
232 (multiple-value-bind (op newstate)
233 (lookup '((prefix . :operand)
234 (operand . :operator)))
239 (setf state :operator))
242 (setf state newstate))
249 (setf state :operator))))
253 (multiple-value-bind (op newstate)
254 (lookup '((infix . :operand)
255 (postfix . :operator)))
263 (zerop *paren-depth*)
265 (< (op-lprec op) minprec))
269 (setf state newstate)))
272 (flushops most-negative-fixnum)
273 (assert (and (consp *valstk*)
274 (null (cdr *valstk*))))
277 ;;;--------------------------------------------------------------------------
278 ;;; Machinery for defining operators.
281 (defmacro defopfunc (op kind &body body)
282 "Defines a magical operator. The operator's name is the symbol OP. The
283 KIND must be one of the symbols `infix', `prefix' or `postfix'. The body
284 is evaluated when the operator is parsed, and must either push appropriate
285 things on the operator stack or do its own parsing and push a result on
288 (setf (get ',op ',kind)
293 (defmacro definfix (op prec &body body)
294 "Defines an infix operator. The operator's name is the symbol OP. The
295 operator's precedence is specified by PREC, which may be one of the
298 * PREC -- equivalent to (:lassoc PREC)
299 * (:lassoc PREC) -- left-associative with precedence PREC
300 * (:rassoc PREC) -- right-associative with precedence PREC
301 * (LPREC . RPREC) -- independent left- and right-precedences
302 * (LPREC RPREC) -- synonym for the dotted form
304 In fact, (:lassoc PREC) is the same as (PREC . PREC), and (:rassoc PREC)
305 is the same as (PREC . (1- PREC)).
307 The BODY is evaluated when the operator's arguments are fully resolved.
308 It should pop off two arguments and push one result. Nobody will check
309 that this is done correctly."
313 (error "bad precedence spec ~S" prec)))
314 (cond ((integerp prec)
318 ((and (integerp (car prec))
319 (integerp (cdr prec)))
320 (values (car prec) (cdr prec)))
321 ((or (not (consp (cdr prec)))
322 (not (integerp (cadr prec)))
323 (not (null (cddr prec))))
325 ((integerp (car prec))
326 (values (car prec) (cadr prec)))
327 ((eq (car prec) :lassoc)
328 (values (cadr prec) (cadr prec)))
329 ((eq (car prec) :rassoc)
330 (values (cadr prec) (1- (cadr prec))))
334 (setf (get ',op 'infix)
335 (make-operator :name ',op
336 :lprec ,lprec :rprec ,rprec
337 :func (lambda () ,@body)))
340 (eval-when (:compile-toplevel :load-toplevel)
341 (defun do-defunary (kind op prec body)
342 (unless (integerp prec)
343 (error "bad precedence spec ~S" prec))
345 (setf (get ',op ',kind)
346 (make-operator :name ',op
349 (postfix :lprec)) ,prec
350 :func (lambda () ,@body)))
354 (defmacro defprefix (op prec &body body)
355 "Defines a prefix operator. The operator's name is the symbol OP. The
356 operator's (right) precedence is PREC. The body is evaluated with the
357 operator's argument is fully determined. It should pop off one argument
358 and push one result."
359 (do-defunary 'prefix op prec body))
362 (defmacro defpostfix (op prec &body body)
363 "Defines a postfix operator. The operator's name is the symbol OP. The
364 operator's (left) precedence is PREC. The body is evaluated with the
365 operator's argument is fully determined. It should pop off one argument
366 and push one result."
367 (do-defunary 'postfix op prec body))
369 ;;;--------------------------------------------------------------------------
370 ;;; Infrastructure for operator definitions.
373 (defun delim (delim &optional (requiredp t))
374 "Parse DELIM, and read the next token. Returns t if the DELIM was found,
375 or nil if not (and REQUIREDP was nil)."
376 (cond ((eq *token* delim) (get-token) t)
377 (requiredp (error "expected `~(~A~)'; found ~S" delim *token*))
381 (defun errfunc (&rest args)
382 "Returns a function which reports an error. Useful when constructing
384 (lambda () (apply #'error args)))
386 (export 'binop-apply)
387 (defun binop-apply (name)
388 "Apply the Lisp binop NAME to the top two items on the value stack; i.e.,
389 if the top two items are Y and X, then we push (NAME X Y)."
390 (let ((y (popval)) (x (popval)))
391 (pushval (list name x y))))
393 (export 'binop-apply-append)
394 (defun binop-apply-append (name)
395 "As for `binop-apply' but if the second-from-top item on the stack has the
396 form (NAME SOMETHING ...) then fold the top item into the form rather than
398 (let ((y (popval)) (x (popval)))
399 (pushval (if (and (consp x) (eq (car x) name))
404 (defun unop-apply (name)
405 "Apply the Lisp unop NAME to the top item on the value stack; i.e., if the
406 top item is X, then push (NAME X)."
407 (pushval (list name (popval))))
409 (export 'unop-apply-toggle)
410 (defun unop-apply-toggle (name)
411 "As for `unop-apply', but if the top item has the form (NAME X) already,
414 (pushval (if (and (consp x)
421 (export 'strip-progn)
422 (defun strip-progn (form)
423 "Return a version of FORM suitable for putting somewhere where there's an
424 implicit `progn'. If FORM has the form (PROGN . FOO) then return FOO,
425 otherwise return (FORM)."
426 (if (and (consp form)
427 (eq (car form) 'progn))
431 (export 'parse-expr-list)
432 (defun parse-expr-list ()
433 "Parse a list of expressions separated by commas."
436 (push (parse-infix 0) stuff)
437 (unless (delim '|,| nil)
441 (export 'parse-ident-list)
442 (defun parse-ident-list ()
443 "Parse a list of symbols separated by commas."
446 (unless (symbolp *token*)
447 (error "expected symbol; found ~S" *token*))
450 (unless (delim '|,| nil)
454 ;;;--------------------------------------------------------------------------
455 ;;; Various simple operators.
457 (definfix |,| (:lassoc -1) (binop-apply-append 'progn))
459 (definfix or (:lassoc 10) (binop-apply-append 'or))
460 (definfix and (:lassoc 15) (binop-apply-append 'and))
462 (defprefix not 19 (unop-apply-toggle 'not))
464 (definfix == (:lassoc 20) (binop-apply-append '=))
465 (definfix /= (:lassoc 20) (binop-apply-append '/=))
466 (definfix < (:lassoc 20) (binop-apply-append '<))
467 (definfix <= (:lassoc 20) (binop-apply-append '<=))
468 (definfix >= (:lassoc 20) (binop-apply-append '>=))
469 (definfix > (:lassoc 20) (binop-apply-append '>))
470 (definfix eq (:lassoc 20) (binop-apply-append 'eq))
471 (definfix eql (:lassoc 20) (binop-apply-append 'eql))
472 (definfix equal (:lassoc 20) (binop-apply-append 'equal))
473 (definfix equalp (:lassoc 20) (binop-apply-append 'equalp))
475 (definfix \| (:lassoc 30) (binop-apply-append 'logior))
476 (definfix xor (:lassoc 30) (binop-apply-append 'logxor))
477 (definfix & (:lassoc 35) (binop-apply-append 'logand))
479 (definfix << (:lassoc 40) (binop-apply 'ash))
480 (definfix >> (:lassoc 40) (unop-apply-toggle '-) (binop-apply 'ash))
482 (definfix + (:lassoc 50) (binop-apply-append '+))
483 (definfix - (:lassoc 50) (binop-apply-append '-))
485 (definfix * (:lassoc 60) (binop-apply-append '*))
486 (definfix / (:lassoc 60) (binop-apply '/))
487 (definfix // (:lassoc 60) (binop-apply 'floor))
488 (definfix % (:lassoc 60) (binop-apply 'mod))
490 (definfix ^ (:rassoc 70) (binop-apply 'expt))
492 (definfix = (120 . 5) (binop-apply 'setf))
493 (definfix += (120 . 5) (binop-apply 'incf))
494 (definfix -= (120 . 5) (binop-apply 'decf))
496 (defprefix + 100 nil)
497 (defprefix - 100 (unop-apply-toggle '-))
498 (defprefix ~ 100 (unop-apply-toggle 'lognot))
500 (defprefix ++ 100 (unop-apply 'incf))
501 (defprefix -- 100 (unop-apply 'decf))
503 ;;(defpostfix ! 110 (unop-apply 'factorial))
506 "An escape to the standard Lisp reader."
507 (pushval (read *stream* t nil t))
510 ;;;--------------------------------------------------------------------------
511 ;;; Parentheses, for grouping and function-calls.
514 (defun push-paren (right)
515 "Pushes a funny parenthesis operator. Since this operator has no left
516 precedence, and very low right precedence, it is pushed over any stack of
517 operators and can only be popped by magic or end-of-file. In the latter
518 case, cause an error."
519 (pushop (make-operator :name right
520 :lprec nil :rprec -1000
521 :func (errfunc "missing `~A'" right)))
526 (defun pop-paren (right)
527 "Pops a parenthesis. If there are no parentheses, maybe they belong to the
528 caller's syntax. Otherwise, pop off operators above the current funny
529 parenthesis operator, and then remove it."
530 (when (zerop *paren-depth*)
534 (unless (eq (op-name (car *opstk*)) right)
535 (error "spurious `~A'" right))
536 (assert (plusp *paren-depth*))
541 (defopfunc |(| prefix (push-paren '\)))
542 (defopfunc |)| postfix (pop-paren '\)))
543 (defopfunc |{| prefix (push-paren '\}))
544 (defopfunc |}| postfix (pop-paren '\}))
546 (defopfunc |(| postfix
548 (pushval (cons (popval) (and (not (eq *token* '|)|)) (parse-expr-list))))
551 ;;;--------------------------------------------------------------------------
552 ;;; Various bits of special syntax.
554 (defopfunc if operand
555 "Parse an `if' form. Syntax:
557 IF ::= `if' CONDITION `then' CONSEQUENCE [`else' ALTERNATIVE]
559 We parse this into an `if' where sensible, or into a `cond' if we see an
560 `else if' pair. The usual `dangling else' rule is followed."
563 (setf cond (parse-infix))
565 (setf cons (parse-infix 0))
566 (if (not (eq *token* 'else))
567 (pushval (list 'if cond cons))
570 (cond ((not (eq *token* 'if))
571 (pushval (list 'if cond cons (parse-infix 0))))
574 (flet ((clause (cond cons)
575 (push (cons cond (strip-progn cons)) clauses)))
579 (setf cond (parse-infix))
581 (setf cons (parse-infix 0))
583 (unless (eq *token* 'else) (return))
588 (clause t (parse-infix 0))
590 (pushval (cons 'cond (nreverse clauses)))))))))))
592 (defun do-letlike (kind)
593 "Parse a `let' form. Syntax:
595 LET ::= `let' | `let*' VARS `in' EXPR
596 VARS ::= VAR | VARS `,' VAR
597 VAR ::= NAME [`=' VALUE]
599 Translates into the obvious Lisp code."
600 (let ((clauses nil) name value)
603 (unless (symbolp *token*)
604 (error "symbol expected, found ~S" *token*))
610 (setf value (parse-infix 0))
611 (push (list name value) clauses))
613 (unless (eq *token* '|,|)
617 (pushval `(,kind ,(nreverse clauses) ,@(strip-progn (parse-infix 0))))))
618 (defopfunc let operand (do-letlike 'let))
619 (defopfunc let* operand (do-letlike 'let*))
621 (defopfunc when operand
623 (pushval `(when ,(parse-infix)
624 ,@(progn (delim 'do) (strip-progn (parse-infix 0))))))
626 (defopfunc unless operand
628 (pushval `(unless ,(parse-infix)
629 ,@(progn (delim 'do) (strip-progn (parse-infix 0))))))
631 (defopfunc loop operand
633 (pushval `(loop ,@(strip-progn (parse-infix 0)))))
635 (defopfunc bind operand
637 (let ((ids (parse-ident-list))
638 (valform (progn (delim '=) (parse-infix 0)))
639 (body (if (delim '|,| nil)
643 (strip-progn (parse-infix 0))))))
644 (list (if (and ids (null (cdr ids)))
645 `(let ((,(car ids) ,valform)) ,@body)
646 `(multiple-value-bind ,ids ,valform ,@body))))))
648 (pushval (car (loopy)))))
650 ;;;--------------------------------------------------------------------------
651 ;;; Parsing function bodies and lambda lists.
653 (defun parse-lambda-list ()
654 "Parse an infix-form lambda list and return the Lisp equivalent."
655 (flet ((ampersand-symbol-p (thing)
657 (let ((name (symbol-name thing)))
658 (plusp (length name))
659 (char= (char name 0) #\&))))
662 (when (or (eq *token* '&)
664 (unread-char #\& *stream*)
665 (setf *token* (read *stream* t nil t)))))
667 (let ((*get-token* #'get-lambda-token))
669 (unless (eq *token* '|)|)
672 (cond ((ampersand-symbol-p *token*)
675 (when (eq *token* '|)|)
680 (let ((name *token*))
683 (push (list name (parse-infix 0)) args)
688 (when (delim '|,| nil)
694 (defun parse-func-name ()
695 "Parse a function name and return its Lisp equivalent."
696 (cond ((delim '|(| nil)
697 (prog1 (parse-infix) (delim '|)|)))
698 (t (prog1 *token* (get-token)))))
700 (defopfunc lambda operand
702 (pushval `(lambda ,(parse-lambda-list) ,@(strip-progn (parse-infix 0)))))
704 (defun do-defunlike (kind)
705 "Process a defun-like form."
707 (pushval `(,kind ,(parse-func-name) ,(parse-lambda-list)
708 ,@(strip-progn (parse-infix 0)))))
710 (defopfunc defun operand (do-defunlike 'defun))
711 (defopfunc defmacro operand (do-defunlike 'defmacro))
713 (defun do-fletlike (kind)
714 "Process a flet-like form."
718 (push `(,(parse-func-name) ,(parse-lambda-list)
719 ,@(strip-progn (parse-infix 0)))
721 (unless (delim '|,| nil)
724 (pushval `(,kind ,(nreverse clauses) ,@(strip-progn (parse-infix 0))))))
726 (defopfunc flet operand (do-fletlike 'flet))
727 (defopfunc labels operand (do-fletlike 'labels))
729 ;;;--------------------------------------------------------------------------
730 ;;; User-interface stuff.
733 (defun read-infix (&optional (*stream* *standard-input*) &key (delim eof))
734 "Reads an infix expression from STREAM and returns the corresponding Lisp.
735 Requires the expression to be delimited properly by DELIM (by default
741 (unless (eq *token* delim)
742 (error "expected ~S; found ~S" delim *token*)))))
744 (export 'install-infix-reader)
745 (defun install-infix-reader
746 (&optional (start #\{) (end #\}) &key dispatch (readtable *readtable*))
747 "Installs a macro character `{ INFIX... }' for translating infix notation
748 to Lisp forms. You also want to (use-package :infix-keywords) if you do
750 (let ((delim (intern (string end) 'infix-keywords)))
751 (flet ((doit (stream &rest noise)
752 (declare (ignore noise))
753 (read-infix stream :delim delim)))
755 (set-dispatch-macro-character dispatch start #'doit readtable)
756 (set-macro-character start #'doit nil readtable))
757 (unless (or (eql start end)
760 (get-macro-character end readtable)
761 (and func (not nontermp))))
762 (set-macro-character end (lambda (&rest noise)
763 (declare (ignore noise))
764 (error "Unexpected `~C'." end))
767 ;;;--------------------------------------------------------------------------
770 (defun test-infix (string)
771 (with-input-from-string (in string)
774 (defun test-tokenize (string &optional (get-token #'get-token))
775 (with-input-from-string (*stream* string)
776 (loop with *token* = nil
777 do (funcall get-token)
778 until (eq *token* eof)
781 (defun testrig (what run tests)
784 for (input . output) in tests
785 for result = (handler-case (funcall run input)
787 (setf error (format nil "~A" err))
789 unless (equal result output)
793 result = ~:[~S~*~;~*error ~A~]
797 (eq result 'fail) result error
800 finally (return ok)))
803 (testrig "tokenize" #'test-tokenize
808 ("&optional" . (& optional))
809 ("(4)" . (|(| 4 |)|))))
812 (testrig "infix" #'test-infix
819 ("1 + 2 + 3" . (+ 1 2 3))
821 ("x += 5" . (incf x 5))
822 ("1 << 5" . (ash 1 5))
823 ("1 >> 5" . (ash 1 (- 5)))
824 ("1 & 5" . (logand 1 5))
825 ("lambda (x, y) x + y" . (lambda (x y) (+ x y)))
826 ("lambda (x, y) (x += y, x - 1)" . (lambda (x y) (incf x y) (- x 1)))
827 ("lambda (x, &optional y = 1) x - y" .
828 (lambda (x &optional (y 1)) (- x y)))
829 ("foo(x, y)" . (foo x y))
830 ("if a == b then x + y" . (if (= a b) (+ x y)))
831 ("if a == b then x + y else x - y" . (if (= a b) (+ x y) (- x y)))
832 ("if a == b then x + y else if a == -b then x - y" .
833 (cond ((= a b) (+ x y)) ((= a (- b)) (- x y))))
834 ("let x = 1 in x ^ 4" . (let ((x 1)) (expt x 4)))
835 ("x ^ y ^ z" . (expt x (expt y z)))
836 ("a < b and not b < c or c > d" .
837 (or (and (< a b) (not (< b c))) (> c d)))
838 ("cdr(x) = nil" . (setf (cdr x) nil))
839 ("labels foo (x) x + 1, bar (x) x - 1 in foo(bar(y))".
840 (labels ((foo (x) (+ x 1)) (bar (x) (- x 1))) (foo (bar y))))
841 ("defun foo (x) x - 6" .
842 (defun foo (x) (- x 6)))
843 ("bind x = 3 in x - 2" . (let ((x 3)) (- x 2)))
844 ("bind x, y = values(1, 2),
846 docs, decls, body = parse-body(body) in complicated" .
847 (multiple-value-bind (x y) (values 1 2)
849 (multiple-value-bind (docs decls body) (parse-body body)
852 ;;;--------------------------------------------------------------------------
856 (flet ((dotrace (func)
858 (trace :function func
862 :print-all *valstk*))))
864 (dolist (s '(if \( \) \:))
865 (dolist (p '(infix prefix postfix))
866 (let ((op (get s p)))
867 (dotrace (etypecase op
869 (operator (op-func op))
871 (dolist (f '(read-infix parse-infix binop-apply unop-apply pushval popval
872 pushop flushops push-paren get-token))
875 ;;;--------------------------------------------------------------------------