\def\@scripts{\futurelet\@ch\@scripts@i}
\atdef ;#1\\{\normalfont\itshape;#1\\}
+\let\@@grammar\grammar
+\def\grammar{\def\textbar{\hbox{$|$}}\@@grammar}
\begingroup\lccode`\~=`\_\lowercase{\endgroup
\def\@scripts@i{\if1\ifx\@ch~1\else\ifx\@ch^1\else0\fi\fi%
\begin{quote}
$\epsilon$ ::=
\end{quote}
-\item $[$@<item>$]$ means an optional @<item>:
+\item @[@<item>@] means an optional @<item>:
\begin{quote}
- \syntax{$[$<item>$]$ ::= $\epsilon$ | <item>}
+ \syntax{@[<item>@] ::= $\epsilon$ | <item>}
\end{quote}
-\item @<item>$^*$ means a sequence of zero or more @<item>s:
+\item @<item>^* means a sequence of zero or more @<item>s:
\begin{quote}
- \syntax{<item>$^*$ ::= $\epsilon$ | <item>$^*$ <item>}
+ \syntax{@<item>^* ::= $\epsilon$ | @<item>^* <item>}
\end{quote}
-\item @<item>$^+$ means a sequence of one or more @<item>s:
+\item @<item>^+ means a sequence of one or more @<item>s:
\begin{quote}
- \syntax{<item>$^+$ ::= <item> <item>$^*$}
+ \syntax{@<item>^+ ::= <item> @<item>^*}
\end{quote}
\item @<item-list> means a sequence of one or more @<item>s separated
by commas:
\subsubsection{Identifiers} \label{sec:syntax.lex.id}
\begin{grammar}
-<identifier> ::= <id-start-char> <id-body-char>$^*$
+<identifier> ::= <id-start-char> @<id-body-char>^*
-<id-start-char> ::= <alpha-char> $|$ "_"
+<id-start-char> ::= <alpha-char> | "_"
-<id-body-char> ::= <id-start-char> $|$ <digit-char>
+<id-body-char> ::= <id-start-char> @! <digit-char>
-<alpha-char> ::= "A" $|$ "B" $|$ \dots\ $|$ "Z"
-\alt "a" $|$ "b" $|$ \dots\ $|$ "z"
+<alpha-char> ::= "A" | "B" | \dots\ | "Z"
+\alt "a" | "b" | \dots\ | "z"
\alt <extended-alpha-char>
-<digit-char> ::= "0" $|$ <nonzero-digit-char>
+<digit-char> ::= "0" | <nonzero-digit-char>
-<nonzero-digit-char> ::= "1" $|$ "2" $| \cdots |$ "9"
+<nonzero-digit-char> ::= "1" | "2" $| \cdots |$ "9"
\end{grammar}
The precise definition of @<alpha-char> is left to the function
\begin{grammar}
<reserved-word> ::=
-"char" $|$ "class" $|$ "code" $|$ "const" $|$ "double" $|$ "enum" $|$
-"extern" $|$ "float" $|$ "import" $|$ "int" $|$ "lisp" $|$ "load" $|$ "long"
-$|$ "restrict" $|$ "short" $|$ "signed" $|$ "struct" $|$ "typename" $|$
-"union" $|$ "unsigned" $|$ "void" $|$ "volatile"
+"char" | "class" | "code" | "const" | "double" | "enum" |
+"extern" | "float" | "import" | "int" | "lisp" | "load" | "long"
+| "restrict" | "short" | "signed" | "struct" | "typename" |
+"union" | "unsigned" | "void" | "volatile"
\end{grammar}
Many of these are borrowed from~C; however, some (e.g., @"import" and
\subsubsection{String and character literals} \label{sec:syntax.lex.string}
\begin{grammar}
-<string-literal> ::= "\"" <string-literal-char>$^*$ "\""
+<string-literal> ::= "\"" @<string-literal-char>^* "\""
<char-literal> ::= "'" <char-literal-char> "'"
\alt <octal-integer>
\alt <hex-integer>
-<decimal-integer> ::= <nonzero-digit-char> <digit-char>$^*$
+<decimal-integer> ::= <nonzero-digit-char> @<digit-char>^*
-<binary-integer> ::= "0" $($"b"$|$"B"$)$ <binary-digit-char>$^+$
+<binary-integer> ::= "0" @("b"|"B"@) @<binary-digit-char>^+
-<binary-digit-char> ::= "0" $|$ "1"
+<binary-digit-char> ::= "0" | "1"
-<octal-integer> ::= "0" $[$"o"$|$"O"$]$ <octal-digit-char>$^+$
+<octal-integer> ::= "0" @["o"|"O"@] @<octal-digit-char>^+
-<octal-digit-char> ::= "0" $|$ "1" $| \cdots |$ "7"
+<octal-digit-char> ::= "0" | "1" $| \cdots |$ "7"
-<hex-integer> ::= "0" $($"x"$|$"X"$)$ <hex-digit-char>$^+$
+<hex-integer> ::= "0" @("x"|"X"@) @<hex-digit-char>^+
<hex-digit-char> ::= <digit-char>
-\alt "A" $|$ "B" $|$ "C" $|$ "D" $|$ "E" $|$ "F"
-\alt "a" $|$ "b" $|$ "c" $|$ "d" $|$ "e" $|$ "f"
+\alt "A" | "B" | "C" | "D" | "E" | "F"
+\alt "a" | "b" | "c" | "d" | "e" | "f"
\end{grammar}
Sod understands only integers, not floating-point numbers; its integer syntax
<block-comment> ::=
"/*"
- <not-star>$^*$ $($<star>$^+$ <not-star-or-slash> <not-star>$^*)^*$
- <star>$^*$
+ @<not-star>^* @(@<star>^+ <not-star-or-slash> @<not-star>^*@)^*
+ @<star>^*
"*/"
<star> ::= "*"
<not-star-or-slash> ::= any character other than "*" or "/"
-<line-comment> ::= "//" <not-newline>$^*$ <newline>
+<line-comment> ::= "//" @<not-newline>^* <newline>
<newline> ::= a newline character
\subsection{Module syntax} \label{sec:syntax-module}
\begin{grammar}
-<module> ::= <definition>$^*$
+<module> ::= @<definition>^*
<definition> ::= <import-definition>
\alt <load-definition>
\begin{grammar}
<code-definition> ::=
- "code" <identifier> ":" <identifier> $[$<constraints>$]$
+ "code" <identifier> ":" <identifier> @[<constraints>@]
"{" <c-fragment> "}"
<constraints> ::= "[" <constraint-list> "]"
-<constraint> ::= <identifier>$^+$
+<constraint> ::= @<identifier>^+
\end{grammar}
The @<c-fragment> will be output unchanged to one of the output files.
more @<qualifiers>, and one of the following, up to reordering.
\begin{itemize}
\item @<type-name>
-\item @"struct" <identifier>, @"union" <identifier>, @"enum" <identifier>
+\item @"struct" @<identifier>, @"union" @<identifier>, @"enum" @<identifier>
\item @"void"
\item @"char", @"unsigned char", @"signed char"
\item @"short", @"unsigned short", @"signed short"
\begin{grammar}
<declarator> ::=
- <pointer>$^*$ <inner-declarator> <declarator-suffix>$^*$
+ @<pointer>^* <inner-declarator> @<declarator-suffix>^*
<inner-declarator> ::= <identifier> | <qualified-identifier>
\alt "(" <declarator> ")"
<qualified-identifier> ::= <identifier> "." <identifier>
-<pointer> ::= "*" <qualifier>$^*$
+<pointer> ::= "*" @<qualifier>^*
<declarator-suffix> ::= "[" <c-fragment> "]"
\alt "(" <arguments> ")"
<arguments> ::= <empty> | "..."
-\alt <argument-list> $[$"," "..."$]$
+\alt <argument-list> @["," "..."@]
-<argument> ::= <declaration-specifier>$^+$ <argument-declarator>
+<argument> ::= @<declaration-specifier>^+ <argument-declarator>
-<argument-declarator> ::= <declarator> | $[$<abstract-declarator>$]$
+<argument-declarator> ::= <declarator> | @[<abstract-declarator>@]
<abstract-declarator> ::=
- <pointer>$^+$ | <pointer>$^*$ <inner-abstract-declarator>
+ @<pointer>^+ | @<pointer>^* <inner-abstract-declarator>
<inner-abstract-declarator> ::= "(" <abstract-declarator> ")"
-\alt $[$<inner-abstract-declarator>$]$ <declarator-suffix>$^+$
+\alt @[<inner-abstract-declarator>@] @<declarator-suffix>^+
\end{grammar}
The declarator syntax is taken from C, but with some differences.
\begin{grammar}
<full-class-definition> ::=
- $[$<properties>$]$
+ @[<properties>@]
"class" <identifier> ":" <identifier-list>
- "{" <class-item>$^*$ "}"
+ "{" @<class-item>^* "}"
<class-item> ::= <slot-item> ";"
\alt <message-item>
\begin{grammar}
<slot-item> ::=
- $[$<properties>$]$
- <declaration-specifier>$^+$ <init-declarator-list>
+ @[<properties>@]
+ @<declaration-specifier>^+ <init-declarator-list>
-<init-declarator> ::= <declarator> $[$"=" <initializer>$]$
+<init-declarator> ::= <declarator> @["=" <initializer>@]
\end{grammar}
A @<slot-item> defines one or more slots. All instances of the class and any
\subsubsection{Initializer items} \label{sec:syntax.class.init}
\begin{grammar}
-<initializer-item> ::= $[$"class"$]$ <slot-initializer-list>
+<initializer-item> ::= @["class"@] <slot-initializer-list>
<slot-initializer> ::= <qualified-identifier> "=" <initializer>
\begin{grammar}
<message-item> ::=
- $[$<properties>$]$
- <declaration-specifier>$^+$ <declarator> $[$<method-body>$]$
+ @[<properties>@]
+ @<declaration-specifier>^+ <declarator> @[<method-body>@]
\end{grammar}
\subsubsection{Method items} \label{sec:syntax.class.method}
\begin{grammar}
<method-item> ::=
- $[$<properties>$]$
- <declaration-specifier>$^+$ <declarator> <method-body>
+ @[<properties>@]
+ @<declaration-specifier>^+ <declarator> <method-body>
<method-body> ::= "{" <c-fragment> "}" | "extern" ";"
\end{grammar}
(if-parse 2)
(if-char 2)
(expr 1)
+ (label 1)
(acond . cond)
(define-class-slot 3)))
(put (car entry) 'common-lisp-indent-function
;; `inline') since it's meaningless to me.
(defclass declspec ()
- ;; This could have been done with DEFSTRUCT just as well, but a DEFCLASS
- ;; can be tweaked interactively, which is a win at the moment.
+ ;; Despite the fact that it looks pretty trivial, this can't be done with
+ ;; `defstruct' for the simple reason that we add more methods to the
+ ;; accessor functions later.
((label :type keyword :initarg :label :reader ds-label)
(name :type string :initarg :name :reader ds-name)
(kind :type (member type sign size qualifier)
;; A collection of declaration specifiers, and how to merge them together.
(defclass declspecs ()
- ;; Despite the fact that it looks pretty trivial, this can't be done with
- ;; DEFCLASS for the simple reason that we add more methods to the accessor
- ;; functions later.
+ ;; This could have been done with `defstruct' just as well, but a
+ ;; `defclass' can be tweaked interactively, which is a win at the moment.
((type :initform nil :initarg :type :reader ds-type)
(sign :initform nil :initarg :sign :reader ds-sign)
(size :initform nil :initarg :size :reader ds-size)
((null type)
(setf type (gethash :int *declspec-map*))))
(make-simple-type (format nil "~{~@[~A~^ ~]~}"
- (mapcar #'ds-label
+ (mapcar #'ds-name
(remove nil
(list sign size type))))
quals))
(values it t consumedp)
(values (list :declspec) nil consumedp)))))
+(export 'parse-c-type)
(defun parse-c-type (scanner)
"Parse a C type from declaration specifiers.
;;; (funcall FUNC TYPE) returns the derived type. The result of
;;; `parse-declarator' will be of this form.
-(defun parse-declarator (scanner base-type &key abstractp)
- (with-parser-context (token-scanner-context :scanner scanner)
+(export 'parse-declarator)
+(defun parse-declarator (scanner base-type &key centre abstractp)
+ "Parse a C declarator, returning a pair (C-TYPE . NAME).
- (labels ((qualifiers ()
- ;; QUALIFIER*
-
- (parse
- (seq ((quals (list ()
- (scan-declspec
- scanner
- :indicator :qualifier
- :predicate (lambda (ds)
- (and (typep ds 'declspec)
- (eq (ds-kind ds)
- 'qualifier)))))))
- (mapcar #'ds-label quals))))
-
- (star ()
- ;; Prefix: `*' QUALIFIERS
-
- (parse (seq (#\* (quals (qualifiers)))
- (preop "*" (state 9)
- (cons (lambda (type)
- (funcall (car state)
- (make-pointer-type type quals)))
- (cdr state))))))
-
- (prefix-lparen ()
- ;; Prefix: `('
- ;;
- ;; Opening parentheses are treated as prefix operators by the
- ;; expression parsing engine. There's an annoying ambiguity
- ;; in the syntax if abstract declarators are permitted: a `('
- ;; might be either the start of a nested subdeclarator or the
- ;; start of a postfix function argument list. The two are
- ;; disambiguated by stating that if the token following the
- ;; `(' is a `)' or a declaration specifier, then we have a
- ;; postfix argument list.
-
- (parse
- (peek (seq (#\(
- (nil (if (and abstractp
- (eq (token-type scanner) :id)
- (let ((id (token-value scanner)))
- (or (gethash id
- *module-type-map*)
- (gethash id
- *declspec-map*))))
- (values nil nil nil)
- (values t t nil))))
- (lparen #\))))))
-
- (centre ()
- ;; ID | empty
- ;;
- ;; The centre might be empty or contain an identifier,
- ;; depending on the setting of ABSTRACTP.
-
- (parse (or (when (not (eq abstractp t))
- (seq ((id :id)) (cons #'identity id)))
- (when abstractp
- (t (cons #'identity nil))))))
-
- (argument-list ()
- ;; [ ARGUMENT [ `,' ARGUMENT ]* ]
-
- (parse (list ()
- (seq ((base-type (parse-c-type scanner))
- (dtor (parse-declarator scanner
- base-type
- :abstractp :maybe)))
- (make-argument (cdr dtor) (car dtor)))
- #\,)))
-
- (postfix-lparen ()
- ;; Postfix: `(' ARGUMENT-LIST `)'
-
- (parse (seq (#\( (args (argument-list)) #\))
- (postop "()" (state 9)
- (cons (lambda (type)
- (funcall (car state)
- (make-function-type type args)))
- (cdr state))))))
-
- (dimension ()
- ;; `[' C-FRAGMENT ']'
-
- (parse-delimited-fragment scanner #\[ #\]))
-
- (lbracket ()
- ;; Postfix: DIMENSION+
-
- (parse (seq ((dims (list (:min 1) (dimension))))
- (postop "[]" (state 10)
- (cons (lambda (type)
- (funcall (car state)
- (make-array-type type dims)))
- (cdr state)))))))
-
- ;; And now we actually do the declarator parsing.
- (parse (seq ((value (expr (:nestedp nestedp)
-
- ;; An actual operand.
- (centre)
-
- ;; Binary operators. There aren't any.
- nil
-
- ;; Prefix operators.
- (or (star)
- (prefix-lparen))
-
- ;; Postfix operators.
- (or (postfix-lparen)
- (lbracket)
- (when nestedp (seq (#\)) (rparen #\))))))))
- (cons (funcall (car value) base-type) (cdr value)))))))
+ The SCANNER is a token scanner to read from. The BASE-TYPE is the type
+ extracted from the preceding declaration specifiers, as parsed by
+ `parse-c-type'.
+
+ The result contains both the resulting constructed C-TYPE (with any
+ qualifiers etc. as necessary), and the name from the middle of the
+ declarator. The name is parsed using the CENTRE parser provided, and
+ defaults to matching a simple identifier `:id'. This might, e.g., be
+ (? :id) to parse an `abstract declarator' which has optional names.
+
+ There's an annoying ambiguity in the syntax, if an empty CENTRE is
+ permitted. In this case, you must ensure that ABSTRACTP is true so that
+ the appropriate heuristic can be applied. As a convenience, if ABSTRACTP
+ is true then `(? :id)' is used as the default CENTRE."
+ (with-parser-context (token-scanner-context :scanner scanner)
+ (let ((centre-parser (cond (centre centre)
+ (abstractp (parser () (? :id)))
+ (t (parser () :id)))))
+
+ (labels ((qualifiers ()
+ ;; qualifier*
+
+ (parse
+ (seq ((quals (list ()
+ (scan-declspec
+ scanner
+ :indicator :qualifier
+ :predicate (lambda (ds)
+ (and (typep ds 'declspec)
+ (eq (ds-kind ds)
+ 'qualifier)))))))
+ (mapcar #'ds-label quals))))
+
+ (star ()
+ ;; Prefix: `*' qualifiers
+
+ (parse (seq (#\* (quals (qualifiers)))
+ (preop "*" (state 9)
+ (cons (lambda (type)
+ (funcall (car state)
+ (make-pointer-type type quals)))
+ (cdr state))))))
+
+ (next-declspec-p ()
+ ;; Ansert whether the next token is a valid declaration
+ ;; specifier, without consuming it.
+ (and (eq (token-type scanner) :id)
+ (let ((id (token-value scanner)))
+ (or (gethash id *module-type-map*)
+ (gethash id *declspec-map*)))))
+
+ (prefix-lparen ()
+ ;; Prefix: `('
+ ;;
+ ;; Opening parentheses are treated as prefix operators by
+ ;; the expression parsing engine. There's an annoying
+ ;; ambiguity in the syntax if abstract declarators are
+ ;; permitted: a `(' might be either the start of a nested
+ ;; subdeclarator or the start of a postfix function argument
+ ;; list. The two are disambiguated by stating that if the
+ ;; token following the `(' is a `)' or a declaration
+ ;; specifier, then we have a postfix argument list.
+ (parse
+ (peek (seq (#\(
+ (nil (if (and abstractp (next-declspec-p))
+ (values nil nil nil)
+ (values t t nil))))
+ (lparen #\))))))
+
+ (centre ()
+ (parse (seq ((name (funcall centre-parser)))
+ (cons #'identity name))))
+
+ (argument-list ()
+ ;; [ argument [ `,' argument ]* ]
+
+ (parse (list ()
+ (seq ((base-type (parse-c-type scanner))
+ (dtor (parse-declarator scanner
+ base-type
+ :abstractp t)))
+ (make-argument (cdr dtor) (car dtor)))
+ #\,)))
+
+ (postfix-lparen ()
+ ;; Postfix: `(' argument-list `)'
+
+ (parse (seq (#\( (args (argument-list)) #\))
+ (postop "()" (state 10)
+ (cons (lambda (type)
+ (funcall (car state)
+ (make-function-type type args)))
+ (cdr state))))))
+
+ (dimension ()
+ ;; `[' c-fragment ']'
+
+ (parse (seq ((frag (parse-delimited-fragment
+ scanner #\[ #\])))
+ (c-fragment-text frag))))
+
+ (lbracket ()
+ ;; Postfix: dimension+
+
+ (parse (seq ((dims (list (:min 1) (dimension))))
+ (postop "[]" (state 10)
+ (cons (lambda (type)
+ (funcall (car state)
+ (make-array-type type dims)))
+ (cdr state)))))))
+
+ ;; And now we actually do the declarator parsing.
+ (parse (seq ((value (expr (:nestedp nestedp)
+
+ ;; An actual operand.
+ (centre)
+
+ ;; Binary operators. There aren't any.
+ nil
+
+ ;; Prefix operators.
+ (or (star)
+ (prefix-lparen))
+
+ ;; Postfix operators.
+ (or (postfix-lparen)
+ (lbracket)
+ (when nestedp (seq (#\)) (rparen #\))))))))
+ (cons (funcall (car value) base-type) (cdr value))))))))
;;;----- That's all, folks --------------------------------------------------
(when (c-type-equal-p a b)
(failure "Assert unequal C types: ~A ~_and ~A" a b)))
+(defun expand-tabs (string)
+ (with-output-to-string (out)
+ (do ((i 0 (1+ i))
+ (char (char string 0) (char string i))
+ (pos 0))
+ ((>= i (length string)))
+ (case char
+ (#\newline (write-char char out)
+ (setf pos 0))
+ (#\tab (write-string " " out :end (- 8 (mod pos 8)))
+ (setf pos (logandc2 (+ pos 8) 7)))
+ (t (write-char char out)
+ (incf pos))))))
+
(defun assert-pp-ctype (type kernel string)
(let* ((*print-right-margin* 77)
(print (with-output-to-string (out)
(pprint-c-type type out kernel))))
- (assert-equal print string
+ (assert-equal (expand-tabs print) (expand-tabs string)
(format nil "Type ~S with kernel ~S ~_prints as `~A' ~_~
rather than `~A'."
type kernel print string))))
int typeflag),
int /*nopenfd*/)")))
+;;;--------------------------------------------------------------------------
+;;; Parsing.
+
+(def-test-method parse-c-type ((test c-types-test) :run nil)
+ (flet ((check (string c-type name)
+ (let* ((char-scanner (make-string-scanner string))
+ (scanner (make-instance 'sod-token-scanner
+ :char-scanner char-scanner
+ :filename "<none>")))
+ (with-parser-context (token-scanner-context :scanner scanner)
+ (define-module ("<temporary>" :truename nil :location scanner)
+ (multiple-value-bind (result winp consumedp)
+ (parse (seq ((ds (parse-c-type scanner))
+ (dc (parse-declarator scanner ds))
+ :eof)
+ dc))
+ (declare (ignore consumedp))
+ (cond ((null c-type)
+ (assert-false winp))
+ (t
+ (assert-true winp)
+ (unless (eq c-type t)
+ (assert-cteqp (car result) c-type))
+ (unless (eq name t)
+ (assert-equal (cdr result) name))))))))))
+
+ (check "int x" (c-type int) "x")
+ (check "int long unsigned long y" (c-type unsigned-long-long) "y")
+ (check "int long int x" nil nil)
+ (check "float v[69][42]" (c-type ([] float "69" "42")) "v")
+ (check "const char *const tab[]"
+ (c-type ([] (* (char :const) :const) ""))
+ "tab")
+ (check "void (*signal(int, void (*)(int)))(int)"
+ (c-type (func (* (func void (nil int)))
+ (nil int)
+ (nil (* (func void (nil int))))))
+ "signal")))
+
;;;----- That's all, folks --------------------------------------------------
(cl:in-package #:sod)
;;;--------------------------------------------------------------------------
-;;; Basic lexical analyser.
+;;; Class implementation.
-(defstruct (pushed-token
- (:constructor make-pushed-token (type value location)))
- "A token that has been pushed back into a lexer for later processing."
- type value location)
+(defmethod shared-initialize :after
+ ((scanner sod-token-scanner) slot-names &key)
+ (default-slot (scanner 'sod-parser::filename slot-names)
+ (scanner-filename (token-scanner-char-scanner scanner))))
-;;; Class definition.
-
-(export 'basic-lexer)
-(defclass basic-lexer ()
- ((stream :initarg :stream :type stream :reader lexer-stream)
- (char :initform nil :type (or character null) :reader lexer-char)
- (pushback-chars :initform nil :type list)
- (token-type :initform nil :accessor token-type)
- (token-value :initform nil :accessor token-value)
- (location :initform nil :reader file-location)
- (pushback-tokens :initform nil :type list))
- (:documentation
- "Base class for lexical analysers.
-
- The lexer reads characters from STREAM, which, for best results, wants to
- be a `position-aware-input-stream'.
-
- The lexer provides one-character lookahead by default: the current
- lookahead character is available to subclasses in the slot CHAR. Before
- beginning lexical analysis, the lookahead character needs to be
- established with `next-char'. If one-character lookahead is insufficient,
- the analyser can push back an arbitrary number of characters using
- `pushback-char'.
-
- The `next-token' function scans and returns the next token from the
- STREAM, and makes it available as TOKEN-TYPE and TOKEN-VALUE, providing
- one-token lookahead. A parser using the lexical analyser can push back
- tokens using `pushback-tokens'.
-
- For convenience, the lexer implements a `file-location' method (delegated
- to the underlying stream)."))
-
-;;; Reading and pushing back characters.
-
-(defmethod next-char ((lexer basic-lexer))
- (with-slots (stream char pushback-chars) lexer
- (setf char (if pushback-chars
- (pop pushback-chars)
- (read-char stream nil)))))
-
-(defmethod pushback-char ((lexer basic-lexer) new-char)
- (with-slots (char pushback-chars) lexer
- (push char pushback-chars)
- (setf char new-char)))
-
-(defmethod fixup-stream* ((lexer basic-lexer) thunk)
- (with-slots (stream char pushback-chars) lexer
- (when pushback-chars
- (error "Lexer has pushed-back characters."))
- (when (slot-boundp lexer 'char)
- (unread-char char stream))
- (unwind-protect
- (funcall thunk stream)
- (setf char (read-char stream nil)))))
-
-;;; Reading and pushing back tokens.
-
-(defmethod next-token :around ((lexer basic-lexer))
- (unless (slot-boundp lexer 'char)
- (next-char lexer)))
-
-(defmethod next-token ((lexer basic-lexer))
- (with-slots (pushback-tokens token-type token-value location) lexer
- (setf (values token-type token-value)
- (if pushback-tokens
- (let ((pushback (pop pushback-tokens)))
- (setf location (pushed-token-location pushback))
- (values (pushed-token-type pushback)
- (pushed-token-value pushback)))
- (scan-token lexer)))))
-
-(defmethod scan-token :around ((lexer basic-lexer))
- (with-default-error-location (lexer)
- (call-next-method)))
-
-(defmethod pushback-token ((lexer basic-lexer) new-token-type
- &optional new-token-value new-location)
- (with-slots (pushback-tokens token-type token-value location) lexer
- (push (make-pushed-token token-type token-value location)
- pushback-tokens)
- (when new-location (setf location new-location))
- (setf token-type new-token-type
- token-value new-token-value)))
-
-;;; Utilities.
-
-(defmethod skip-spaces ((lexer basic-lexer))
- (do ((ch (lexer-char lexer) (next-char lexer)))
- ((not (whitespace-char-p ch)) ch)))
+(defmethod make-scanner-stream ((scanner sod-token-scanner))
+ (make-scanner-stream (token-scanner-char-scanner scanner)))
;;;--------------------------------------------------------------------------
-;;; Our main lexer.
-
-(export 'sod-lexer)
-(defclass sod-lexer (basic-lexer)
- ()
- (:documentation
- "Lexical analyser for the SOD lanuage.
-
- See the `lexer' class for the gory details about the lexer protocol."))
+;;; Indicators and error messages.
-(defmethod scan-token ((lexer sod-lexer))
- (with-slots (stream char keywords location) lexer
- (prog (ch)
+(defvar *indicator-map* (make-hash-table)
+ "Hash table mapping indicator objects to human-readable descriptions.")
- consider
+(defun show-char (stream char &optional colonp atsignp)
+ "Format CHAR to STREAM in a readable way.
- ;; Stash the position of this token so that we can report it later.
- (setf ch (skip-spaces lexer)
- location (file-location stream))
+ Usable in `format''s ~/.../ command."
+ (declare (ignore colonp atsignp))
+ (cond ((null char) (write-string "<eof>" stream))
+ ((and (graphic-char-p char) (char/= char #\space))
+ (format stream "`~C'" char))
+ (t (format stream "<~(~:C~)>" char))))
- ;; Now work out what it is that we're dealing with.
- (cond
-
- ;; End-of-file brings its own peculiar joy.
- ((null ch) (return (values :eof t)))
-
- ;; Strings.
- ((or (char= ch #\") (char= ch #\'))
- (let* ((quote ch)
- (string
- (with-output-to-string (out)
- (loop
- (flet ((getch ()
- (setf ch (next-char lexer))
- (when (null ch)
- (cerror* "Unexpected end of file in ~
- ~:[string~;character~] constant"
- (char= quote #\'))
- (return))))
- (getch)
- (cond ((char= ch quote) (return))
- ((char= ch #\\) (getch)))
- (write-char ch out))))))
- (setf ch (next-char lexer))
- (ecase quote
- (#\" (return (values :string string)))
- (#\' (case (length string)
- (0 (cerror* "Empty character constant")
- (return (values :char #\?)))
- (1 (return (values :char (char string 0))))
- (t (cerror* "Multiple characters in character constant")
- (return (values :char (char string 0)))))))))
-
- ;; Pick out identifiers and keywords.
- ((or (alpha-char-p ch) (char= ch #\_))
-
- ;; Scan a sequence of alphanumerics and underscores. We could
- ;; allow more interesting identifiers, but it would damage our C
- ;; lexical compatibility.
- (let ((id (with-output-to-string (out)
- (loop
- (write-char ch out)
- (setf ch (next-char lexer))
- (when (or (null ch)
- (not (or (alphanumericp ch)
- (char= ch #\_))))
- (return))))))
-
- ;; Done.
- (return (values :id id))))
-
- ;; Pick out numbers. Currently only integers, but we support
- ;; multiple bases.
- ((digit-char-p ch)
-
- ;; Sort out the prefix. If we're looking at `0b', `0o' or `0x'
- ;; (maybe uppercase) then we've got a funny radix to deal with.
- ;; Otherwise, a leading zero signifies octal (daft, I know), else
- ;; we're left with decimal.
- (multiple-value-bind (radix skip-char)
- (if (char/= ch #\0)
- (values 10 nil)
- (case (and (setf ch (next-char lexer))
- (char-downcase ch))
- (#\b (values 2 t))
- (#\o (values 8 t))
- (#\x (values 16 t))
- (t (values 8 nil))))
-
- ;; If we last munched an interesting letter, we need to skip over
- ;; it. That's what the SKIP-CHAR flag is for.
- ;;
- ;; Danger, Will Robinson! If we're just about to eat a radix
- ;; letter, then the next thing must be a digit. For example,
- ;; `0xfatenning' parses as a hex number followed by an identifier
- ;; `0xfa ttening', but `0xturning' is an octal number followed by
- ;; an identifier `0 xturning'.
- (when skip-char
- (let ((peek (next-char lexer)))
- (unless (digit-char-p peek radix)
- (pushback-char lexer ch)
- (return-from scan-token (values :integer 0)))
- (setf ch peek)))
-
- ;; Scan an integer. While there are digits, feed them into the
- ;; accumulator.
- (do ((accum 0 (+ (* accum radix) digit))
- (digit (and ch (digit-char-p ch radix))
- (and ch (digit-char-p ch radix))))
- ((null digit) (return-from scan-token
- (values :integer accum)))
- (setf ch (next-char lexer)))))
-
- ;; A slash might be the start of a comment.
- ((char= ch #\/)
- (setf ch (next-char lexer))
- (case ch
-
- ;; Comment up to the end of the line.
- (#\/
- (loop
- (setf ch (next-char lexer))
- (when (or (null ch) (char= ch #\newline))
- (go scan))))
-
- ;; Comment up to the next `*/'.
- (#\*
- (tagbody
- top
- (case (setf ch (next-char lexer))
- (#\* (go star))
- ((nil) (go done))
- (t (go top)))
- star
- (case (setf ch (next-char lexer))
- (#\* (go star))
- (#\/ (setf ch (next-char lexer))
- (go done))
- ((nil) (go done))
- (t (go top)))
- done)
- (go consider))
-
- ;; False alarm. (The next character is already set up.)
- (t
- (return (values #\/ t)))))
-
- ;; A dot: might be `...'. Tread carefully! We need more lookahead
- ;; than is good for us.
- ((char= ch #\.)
- (setf ch (next-char lexer))
- (cond ((eql ch #\.)
- (setf ch (next-char lexer))
- (cond ((eql ch #\.) (return (values :ellipsis nil)))
- (t (pushback-char lexer #\.)
- (return (values #\. t)))))
- (t
- (return (values #\. t)))))
-
- ;; Anything else is a lone delimiter.
- (t
- (return (multiple-value-prog1
- (values ch t)
- (next-char lexer)))))
-
- scan
- ;; Scan a new character and try again.
- (setf ch (next-char lexer))
- (go consider))))
+;;;--------------------------------------------------------------------------
+;;; Token scanning.
+
+(defmethod scanner-token ((scanner sod-token-scanner))
+ (with-slots (char-scanner line column) scanner
+ (with-parser-context (character-scanner-context :scanner char-scanner)
+
+ (flet ((scan-digits (&key (radix 10) (min 1) (init 0))
+ ;; Scan and return a sequence of digits.
+ (parse (many (acc init (+ (* acc radix) it) :min min)
+ (label (list :digit radix)
+ (filter (lambda (ch)
+ (digit-char-p ch radix))))))))
+
+ ;; Skip initial junk, and remember the place.
+ (loop
+ (setf (scanner-line scanner) (scanner-line char-scanner)
+ (scanner-column scanner) (scanner-column char-scanner))
+ (cond-parse (:consumedp cp :expected exp)
+ ((satisfies whitespace-char-p) (parse :whitespace))
+ ((scan-comment char-scanner))
+ (t (if cp (lexer-error char-scanner exp cp) (return)))))
+
+ ;; Now parse something.
+ (cond-parse (:consumedp cp :expected exp)
+
+ ;; Alphanumerics mean we read an identifier.
+ ((or #\_ (satisfies alpha-char-p))
+ (values :id (with-output-to-string (out)
+ (write-char it out)
+ (parse (many (nil nil (write-char it out))
+ (or #\_ (satisfies alphanumericp)))))))
+
+ ;; Quotes introduce a literal.
+ ((seq ((quote (or #\" #\'))
+ (contents (many (out (make-string-output-stream)
+ (progn (write-char it out) out)
+ :final (get-output-stream-string out))
+ (or (and #\\ :any) (not quote))))
+ (nil (char quote)))
+ (ecase quote
+ (#\" contents)
+ (#\' (case (length contents)
+ (1 (char contents 0))
+ (0 (cerror* "Empty character literal") #\?)
+ (t (cerror* "Too many characters in literal")
+ (char contents 0))))))
+ (values (etypecase it
+ (character :char)
+ (string :string))
+ it))
+
+ ;; Zero introduces a chosen-radix integer.
+ ((and #\0
+ (or (and (or #\b #\B) (scan-digits :radix 2))
+ (and (or #\o #\O) (scan-digits :radix 8))
+ (and (or #\x #\X) (scan-digits :radix 16))
+ (scan-digits :radix 8 :min 0)))
+ (values :int it))
+
+ ;; Any other digit forces radix-10.
+ ((seq ((d (filter digit-char-p))
+ (i (scan-digits :radix 10 :min 0 :init d)))
+ i)
+ (values :int it))
+
+ ;; Some special punctuation sequences are single tokens.
+ ("..." (values :ellipsis nil))
+
+ ;; Any other character is punctuation.
+ (:any (values it nil))
+
+ ;; End of file means precisely that.
+ (:eof (values :eof nil))
+
+ ;; Report errors and try again. Because we must have consumed some
+ ;; input in order to get here (we've matched both :any and :eof) we
+ ;; must make progress on every call.
+ (t
+ (assert cp)
+ (lexer-error char-scanner exp cp)
+ (scanner-token scanner)))))))
;;;----- That's all, folks --------------------------------------------------
(cl:in-package #:sod)
;;;--------------------------------------------------------------------------
-;;; Accessors.
+;;; Class definition.
-(export 'lexer-char)
-(defgeneric lexer-char (lexer)
+(export 'sod-token-scanner)
+(defclass sod-token-scanner (token-scanner)
+ ((char-scanner :initarg :char-scanner :reader token-scanner-char-scanner))
(:documentation
- "Return the current lookahead character from the LEXER.
+ "A token scanner for SOD input files.
- When the lexer is first created, there is no lookahead character: you must
- `prime the pump' by calling `next-char'. The lexer represents
- encountering the end of its input stream by setting the lookahead
- character to nil. At this point it is still possible to push back
- characters."))
+ Not a lot here, apart from a character scanner to read from and the
+ standard token scanner infrastructure."))
;;;--------------------------------------------------------------------------
-;;; Formatting tokens.
-
-(defgeneric format-token (token-type &optional token-value)
- (:documentation
- "Return a string describing a token with the specified type and value.")
- (:method ((token-type (eql :eof)) &optional token-value)
- (declare (ignore token-value))
- "<end-of-file>")
- (:method ((token-type (eql :string)) &optional token-value)
- (declare (ignore token-value))
- "<string-literal>")
- (:method ((token-type (eql :char)) &optional token-value)
- (declare (ignore token-value))
- "<character-literal>")
- (:method ((token-type (eql :id)) &optional token-value)
- (format nil "<identifier~@[ `~A'~]>" token-value))
- (:method ((token-type symbol) &optional token-value)
- (declare (ignore token-value))
- (check-type token-type keyword)
- (format nil "`~(~A~)'" token-type))
- (:method ((token-type character) &optional token-value)
- (declare (ignore token-value))
- (format nil "~:[<~:C>~;`~C'~]"
- (and (graphic-char-p token-type)
- (char/= token-type #\space))
- token-type)))
-
-;;;--------------------------------------------------------------------------
-;;; Reading and pushing back characters.
-
-(export 'next-char)
-(defgeneric next-char (lexer)
- (:documentation
- "Fetch the next character from the LEXER's input stream.
-
- Read a character from the input stream, and store it in the LEXER's CHAR
- slot. The character stored is returned. If characters have been pushed
- back then pushed-back characters are used instead of the input stream. If
- there are no more characters to be read then the lookahead character is
- nil. Returns the new lookahead character.
-
- (This function is primarily intended for the use of lexer subclasses.)"))
-
-(export 'pushback-char)
-(defgeneric pushback-char (lexer char)
- (:documentation
- "Push the CHAR back into the lexer.
-
- Make CHAR be the current lookahead character (stored in the LEXER's CHAR
- slot). The previous lookahead character is pushed down, and will be made
- available again once this character is consumed by NEXT-CHAR.
-
- (This function is primarily intended for the use of lexer subclasses.)"))
-
-(defgeneric fixup-stream* (lexer thunk)
- (:documentation
- "Helper function for `with-lexer-stream'.
-
- This function does the main work for `with-lexer-stream'. The THUNK is
- invoked on a single argument, the LEXER's underlying STREAM."))
-
-(export 'with-lexer-stream)
-(defmacro with-lexer-stream ((streamvar lexer) &body body)
- "Evaluate BODY with STREAMVAR bound to the LEXER's input stream.
-
- The STREAM is fixed up so that the next character read (e.g., using
- `read-char') will be the lexer's current lookahead character. Once the
- BODY completes, the next character in the stream is read and set as the
- lookahead character. It is an error if the lexer has pushed-back
- characters (since these can't be pushed back into the input stream
- properly)."
-
- `(fixup-stream* ,lexer (lambda (,streamvar) ,@body)))
+;;; Indicators and error messages.
+
+(export 'define-indicator)
+(defun define-indicator (indicator description)
+ "Associate an INDICATOR with its textual DESCRIPTION.
+
+ Updates the the `*indicator-map*'."
+ (setf (gethash indicator *indicator-map*) description)
+ indicator)
+
+(export 'syntax-error)
+(defun syntax-error (scanner expected &key (continuep t))
+ "Signal a (maybe) continuable syntax error."
+ (labels ((show-token (type value)
+ (if (characterp type)
+ (format nil "~/sod::show-char/" type)
+ (case type
+ (:id (format nil "<identifier~@[ `~A'~]>" value))
+ (:string "<string-literal>")
+ (:char "<character-literal>")
+ (:eof "<end-of-file>")
+ (:ellipsis "`...'")
+ (t (format nil "<? ~S~@[ ~S~]>" type value)))))
+ (show-expected (thing)
+ (acond ((gethash thing *indicator-map*) it)
+ ((atom thing) (show-token thing nil))
+ ((eq (car thing) :id)
+ (format nil "`~A'" (cadr thing)))
+ (t (format nil "<? ~S>" thing)))))
+ (funcall (if continuep #'cerror* #'error)
+ "Syntax error: ~
+ expected ~{~#[<bug>~;~A~;~A or ~A~:;~A, ~]~} ~
+ but found ~A"
+ (mapcar #'show-expected expected)
+ (show-token (token-type scanner) (token-value scanner)))))
+
+(export 'lexer-error)
+(defun lexer-error (char-scanner expected consumedp)
+ "Signal a continuable lexical error."
+ (cerror* "Lexical error: ~
+ expected ~{~#[<bug>~;~A~;~A or ~A~;:~A, ~]~} ~
+ but found ~/sod::show-char/~
+ ~@[ at ~A~]"
+ (mapcar (lambda (exp)
+ (typecase exp
+ (character (format nil "~/sod::show-char/" exp))
+ (string (format nil "`~A'" exp))
+ ((cons (eql :digit) *) (format nil "<radix-~A digit>"
+ (cadr exp)))
+ ((eql :eof) "<end-of-file>")
+ ((eql :any) "<character>")
+ (t (format nil "<? ~S>" exp))))
+ expected)
+ (and (not (scanner-at-eof-p char-scanner))
+ (scanner-current-char char-scanner))
+ (and consumedp (file-location char-scanner))))
;;;--------------------------------------------------------------------------
-;;; Reading and pushing back tokens.
-
-(export 'scan-token)
-(defgeneric scan-token (lexer)
- (:documentation
- "Internal protocol for scanning tokens from an input stream.
-
- Implementing a method on this function is the main responsibility of LEXER
- subclasses; it is called by the user-facing `next-token' function.
-
- The method should consume characters (using `next-char') as necessary, and
- return two values: a token type and token value. These will be stored in
- the corresponding slots in the lexer object in order to provide the user
- with one-token lookahead."))
-
-(export 'next-token)
-(defgeneric next-token (lexer)
- (:documentation
- "Scan a token from an input stream.
-
- This function scans a token from an input stream. Two values are
- returned: a `token type' and a `token value'. These are opaque to the
- LEXER base class, but the intent is that the token type be significant to
- determining the syntax of the input, while the token value carries any
- additional information about the token's semantic content. The token type
- and token value are also made available for lookahead via accessors
- TOKEN-TYPE and TOKEN-VALUE on the `lexer' object.
-
- The new lookahead token type and value are returned as two separate
- values.
-
- If tokens have been pushed back (see `pushback-token') then they are
- returned one by one instead of scanning the stream."))
-
-(export 'pushback-token)
-(defgeneric pushback-token (lexer token-type &optional token-value location)
- (:documentation
- "Push a token back into the lexer.
-
- Make the given TOKEN-TYPE and TOKEN-VALUE be the current lookahead token.
- The previous lookahead token is pushed down, and will be made available
- agan once this new token is consumed by NEXT-TOKEN. If LOCATION is
- non-nil then `file-location' is saved and replaced by LOCATION. The
- TOKEN-TYPE and TOKEN-VALUE can be anything at all: for instance, they need
- not be values which can actually be returned by NEXT-TOKEN."))
-
-;;;--------------------------------------------------------------------------
-;;; Utilities.
-
-(export 'skip-spaces)
-(defgeneric skip-spaces (lexer)
- (:documentation
- "Skip over whitespace characters in the LEXER.
-
- There must be a lookahead character; when the function returns, the
- lookahead character will be a non-whitespace character or nil if there
- were no non-whitespace characters remaining. Returns the new lookahead
- character."))
-
-(export 'require-token)
-(defun require-token
- (lexer wanted-token-type &key (errorp t) (consumep t) default)
- "Require a particular token to appear.
-
- If the LEXER's current lookahead token has type `wanted-token-type' then
- consume it (using `next-token') and return its value. Otherwise, if the
- token doesn't have the requested type then signal a continuable error
- describing the situation and return DEFAULT (which defaults to nil).
-
- If ERRORP is false then no error is signalled; this is useful for
- consuming or checking for optional punctuation. If CONSUMEP is false then
- a matching token is not consumed; non-matching tokens are never consumed."
-
- (with-slots (token-type token-value) lexer
- (cond ((eql token-type wanted-token-type)
- (prog1 token-value
- (when consumep (next-token lexer))))
- (errorp
- (cerror* "Expected ~A but found ~A"
- (format-token wanted-token-type)
- (format-token token-type token-value))
- default)
- (t
- default))))
+;;; Lexical analysis utilities.
+
+(defun scan-comment (char-scanner)
+ "Scan a comment (either `/* ... */' or `// ...') from CHAR-SCANNER.
+
+ The result isn't interesting."
+ (with-parser-context (character-scanner-context :scanner char-scanner)
+ (parse (or (and "/*"
+ (and (skip-many ()
+ (and (skip-many () (not #\*))
+ (label "*/" (skip-many (:min 1) #\*)))
+ (not #\/))
+ #\/))
+ (and "//"
+ (skip-many () (not #\newline))
+ (? #\newline))))))
;;;----- That's all, folks --------------------------------------------------
+++ /dev/null
-;;; -*-lisp-*-
-;;;
-;;; Lexical analysis for input parser
-;;;
-;;; (c) 2009 Straylight/Edgeware
-;;;
-
-;;;----- Licensing notice ---------------------------------------------------
-;;;
-;;; This file is part of the Sensble Object Design, an object system for C.
-;;;
-;;; SOD is free software; you can redistribute it and/or modify
-;;; it under the terms of the GNU General Public License as published by
-;;; the Free Software Foundation; either version 2 of the License, or
-;;; (at your option) any later version.
-;;;
-;;; SOD is distributed in the hope that it will be useful,
-;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
-;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-;;; GNU General Public License for more details.
-;;;
-;;; You should have received a copy of the GNU General Public License
-;;; along with SOD; if not, write to the Free Software Foundation,
-;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-
-(cl:in-package #:sod)
-
-;;;--------------------------------------------------------------------------
-;;; Class definition.
-
-(export 'sod-token-scanner)
-(defclass sod-token-scanner (token-scanner)
- ((char-scanner :initarg :char-scanner :reader token-scanner-char-scanner))
- (:documentation
- "A token scanner for SOD input files.
-
- Not a lot here, apart from a character scanner to read from and the
- standard token scanner infrastructure."))
-
-(defmethod shared-initialize :after
- ((scanner sod-token-scanner) slot-names &key)
- (default-slot (scanner 'sod-parser::filename slot-names)
- (scanner-filename (token-scanner-char-scanner scanner))))
-
-;;;--------------------------------------------------------------------------
-;;; Utilities.
-
-(defun show-char (stream char &optional colonp atsignp)
- "Format CHAR to STREAM in a readable way.
-
- Usable in `format''s ~/.../ command."
- (declare (ignore colonp atsignp))
- (cond ((null char) (write-string "<eof>" stream))
- ((and (graphic-char-p char) (char/= char #\space))
- (format stream "`~C'" char))
- (t (format stream "<~(~:C~)>" char))))
-
-(defun scan-comment (scanner)
- "Scan a comment (either `/* ... */' or `// ...') from SCANNER.
-
- The result isn't interesting."
- (with-parser-context (character-scanner-context :scanner scanner)
- (parse (or (and "/*"
- (and (skip-many ()
- (and (skip-many () (not #\*))
- (label "*/" (skip-many (:min 1) #\*)))
- (not #\/))
- #\/))
- (and "//"
- (skip-many () (not #\newline))
- (? #\newline))))))
-
-(defmethod make-scanner-stream ((scanner sod-token-scanner))
- (make-scanner-stream (token-scanner-char-scanner scanner)))
-
-;;;--------------------------------------------------------------------------
-;;; Error reporting.
-
-(defvar *indicator-map* (make-hash-table)
- "Hash table mapping indicator objects to human-readable descriptions.")
-
-(defun define-indicator (indicator description)
- (setf (gethash indicator *indicator-map*) description)
- indicator)
-
-(export 'syntax-error)
-(defun syntax-error (scanner expected &key (continuep t))
- "Signal a (maybe) continuable syntax error."
- (labels ((show-token (type value)
- (if (characterp type)
- (format nil "~/sod::show-char/" type)
- (case type
- (:id (format nil "<identifier~@[ `~A'~]>" value))
- (:string "<string-literal>")
- (:char "<character-literal>")
- (:eof "<end-of-file>")
- (:ellipsis "`...'")
- (t (format nil "<? ~S~@[ ~S~]>" type value)))))
- (show-expected (thing)
- (acond ((gethash thing *indicator-map*) it)
- ((atom thing) (show-token thing nil))
- ((eq (car thing) :id)
- (format nil "`~A'" (cadr thing)))
- (t (format nil "<? ~S>" thing)))))
- (funcall (if continuep #'cerror* #'error)
- "Syntax error: ~
- expected ~{~#[<bug>~;~A~;~A or ~A~:;~A, ~]~} ~
- but found ~A"
- (mapcar #'show-expected expected)
- (show-token (token-type scanner) (token-value scanner)))))
-
-(export 'lexer-error)
-(defun lexer-error (char-scanner expected consumedp)
- "Signal a continuable lexical error."
- (cerror* "Lexical error: ~
- expected ~{~#[<bug>~;~A~;~A or ~A~;:~A, ~]~} ~
- but found ~/sod::show-char/~
- ~@[ at ~A~]"
- (mapcar (lambda (exp)
- (typecase exp
- (character (format nil "~/sod::show-char/" exp))
- (string (format nil "`~A'" exp))
- ((cons (eql :digit) *) (format nil "<radix-~A digit>"
- (cadr exp)))
- ((eql :eof) "<end-of-file>")
- ((eql :any) "<character>")
- (t (format nil "<? ~S>" exp))))
- expected)
- (and (not (scanner-at-eof-p char-scanner))
- (scanner-current-char char-scanner))
- (and consumedp (file-location char-scanner))))
-
-;;;--------------------------------------------------------------------------
-;;; Token scanner protocol implementation.
-
-(defmethod scanner-token ((scanner sod-token-scanner))
- (with-slots (char-scanner line column) scanner
- (with-parser-context (character-scanner-context :scanner char-scanner)
-
- (flet ((scan-digits (&key (radix 10) (min 1) (init 0))
- ;; Scan an return a sequence of digits.
- (parse (many (acc init (+ (* acc radix) it) :min min)
- (label (list :digit radix)
- (filter (lambda (ch)
- (digit-char-p ch radix))))))))
-
- ;; Skip initial junk, and remember the place.
- (loop
- (setf (scanner-line scanner) (scanner-line char-scanner)
- (scanner-column scanner) (scanner-column char-scanner))
- (cond-parse (:consumedp cp :expected exp)
- ((satisfies whitespace-char-p) (parse :whitespace))
- ((scan-comment char-scanner))
- (t (if cp (lexer-error char-scanner exp cp) (return)))))
-
- ;; Now parse something.
- (cond-parse (:consumedp cp :expected exp)
-
- ;; Alphanumerics mean we read an identifier.
- ((or #\_ (satisfies alpha-char-p))
- (values :id (with-output-to-string (out)
- (write-char it out)
- (parse (many (nil nil (write-char it out))
- (or #\_ (satisfies alphanumericp)))))))
-
- ;; Quotes introduce a literal.
- ((seq ((quote (or #\" #\'))
- (contents (many (out (make-string-output-stream)
- (progn (write-char it out) out)
- :final (get-output-stream-string out))
- (or (and #\\ :any) (not quote))))
- (nil (char quote)))
- (ecase quote
- (#\" contents)
- (#\' (case (length contents)
- (1 (char contents 0))
- (0 (cerror* "Empty character literal") #\?)
- (t (cerror* "Too many characters in literal")
- (char contents 0))))))
- (values (etypecase it
- (character :char)
- (string :string))
- it))
-
- ;; Zero introduces a chosen-radix integer.
- ((and #\0
- (or (and (or #\b #\B) (scan-digits :radix 2))
- (and (or #\o #\O) (scan-digits :radix 8))
- (and (or #\x #\X) (scan-digits :radix 16))
- (scan-digits :radix 8 :min 0)))
- (values :int it))
-
- ;; Any other digit forces radix-10.
- ((seq ((d (filter digit-char-p))
- (i (scan-digits :radix 10 :min 0 :init d)))
- i)
- (values :int it))
-
- ;; Some special punctuation sequences are single tokens.
- ("..." (values :ellipsis nil))
-
- ;; Any other character is punctuation.
- (:any (values it nil))
-
- ;; End of file means precisely that.
- (:eof (values :eof nil))
-
- ;; Report errors and try again. Because we must have consumed some
- ;; input in order to get here (we've matched both :any and :eof) we
- ;; must make progress on every call.
- (t
- (assert cp)
- (lexer-error char-scanner exp cp)
- (scanner-token scanner)))))))
-
-;;;----- That's all, folks --------------------------------------------------
(when (and truename (not (eq (module-state *module*) t)))
(remhash truename *module-map*)))))
+(defun call-with-temporary-module (thunk)
+ "Invoke THUNK in the context of a temporary module, returning its values.
+
+ This is mainly useful for testing things which depend on module variables.
+ This is the functionality underlying `with-temporary-module'."
+ (let ((*module* (make-instance 'module
+ :name "<temp>"
+ :state nil)))
+ (call-with-module-environment
+ (lambda ()
+ (module-import *builtin-module*)
+ (funcall thunk)))))
+
;;;--------------------------------------------------------------------------
;;; Type definitions.
;; `class' id [`:' id-list] `{' class-item* `}'
(with-parser-context (token-scanner-context :scanner scanner)
+ (labels ((parse-item ()
+ ;; class-item ::= property-set
(parse (seq ("class"
(name :id)
(supers (? (seq (#\: (supers (list (:min 1) :id #\,)))
,@(and truenamep `(:truename ,truename))
,@(and locationp `(:location ,location))))
+(export 'with-temporary-module)
+(defmacro with-temporary-module ((&key) &body body)
+ "Evaluate BODY within the context of a temporary module."
+ `(call-with-temporary-module (lambda () ,@body)))
+
;;;----- That's all, folks --------------------------------------------------
(seq (#\&) and)
(seq (#\|) or)))
(token (or (seq (#\() lp)
+ (seq (#\[) lb)
(seq (#\-) neg)
(seq (#\!) not)))
(token (or (seq (#\!) fact)
- (when nestedp (seq (#\)) rp))))))
+ (when nestedp
+ (or (seq (#\)) rp)
+ (seq (#\]) rb)))))))
(next (or :any (t :eof))))
(cons value next))))))
(assert-expr-parse "1 ** 2 ** 3" '((** 1 (** 2 3)) . :eof) t t)
(assert-expr-parse "1 + 2) * 3" '((+ 1 2) . #\)) t t)
(assert-expr-parse "1 + 2 * 3" '((+ 1 (* 2 3)) . :eof) t t)
-
(assert-expr-parse "! 1 + 2 = 3 | 6 - 3 /= 12/6"
'((or (not (= (+ 1 2) 3))
(/= (- 6 3) (/ 12 6)))
;;; along with SOD; if not, write to the Free Software Foundation,
;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-(defun play (args)
- "Parse and evaluate a simple expression.
+;;;--------------------------------------------------------------------------
+;;; The expression parser.
+
+(flet ((dispatch (name args &rest spec)
+ (acond ((find :invalid args :key #'car)
+ (cons :invalid nil))
+ ((find-if (lambda (item)
+ (every (lambda (type arg)
+ (eql type (car arg)))
+ (cddr item)
+ args))
+ spec)
+ (cons (car it) (apply (cadr it)
+ (mapcar #'cdr args))))
+ (t
+ (cerror* "Type mismatch: operator `~A' applied to ~
+ types ~{~(~A~)~#[~; and ~;, ~]~}"
+ name
+ (mapcar #'car args))
+ (cons :invalid nil)))))
+ (let ((add (binop "+" (x y 5)
+ (dispatch "+" (list x y) (list :int #'+ :int :int))))
+ (sub (binop "-" (x y 5)
+ (dispatch "-" (list x y) (list :int #'- :int :int))))
+ (mul (binop "*" (x y 7)
+ (dispatch "*" (list x y) (list :int #'* :int :int))))
+ (div (binop "/" (x y 7)
+ (dispatch "/" (list x y)
+ (list :int
+ (lambda (x y)
+ (cond ((zerop y)
+ (cerror*
+ "Division by zero")
+ (cons :invalid nil))
+ (t
+ (floor x y))))
+ :int :int))))
+ (nop (preop "+" (x 9)
+ (dispatch "+" (list x) (list :int #'+ :int))))
+ (neg (preop "-" (x 9)
+ (dispatch "-" (list x) (list :int #'- :int))))
+ (lp (lparen #\)))
+ (rp (rparen #\))))
+
+ (defun parse-expression (scanner)
+ "Parse and evaluate a simple expression.
The result is a pair (TYPE . VALUE). Currently, type types are `:id',
- `:int', `:string', and `:char'. If an error prevented a sane ; value from
+ `:int', `:string', and `:char'. If an error prevented a sane value from
being produced, the type `:invalid' is returned.
The syntax of expressions is rather limited at the moment, but more may be
primary: int | id | string | `(' expression `)' | `?' lisp-expression
Only operators for dealing with integers are provided."
-
- (labels ((type-dispatch (name args &rest spec)
- (acond ((find :invalid args :key #'car)
- (cons :invalid nil))
- ((find-if (lambda (item)
- (every (lambda (type arg)
- (eql type (car arg)))
- (cddr item)
- args))
- spec)
- (cons (car it) (apply (cadr it)
- (mapcar #'cdr args))))
- (t
- (cerror* "Type mismatch: operator `~A' applied to ~
- types ~{~(~A~)~#[~; and ~;, ~]~}"
- name
- (mapcar #'car args))
- (cons :invalid nil))))
- (add (x y) (type-dispatch "+" (list x y)
- (list :integer #'+ :integer :integer)))
- (sub (x y) (type-dispatch "-" (list x y)
- (list :integer #'- :integer :integer)))
- (mul (x y) (type-dispatch "*" (list x y)
- (list :integer #'* :integer :integer)))
- (div (x y) (type-dispatch "/" (list x y)
- (list :integer
- (lambda (x y)
- (cond ((zerop y)
- (cerror*
- "Division by zero")
- (cons :invalid nil))
- (t
- (floor x y))))
- :integer :integer)))
- (nop (x) (type-dispatch "+" (list x)
- (list :integer #'+ :integer)))
- (neg (x) (type-dispatch "-" (list x)
- (list :integer #'- :integer))))
-
- (with-parser-context (token-scanner-context :scanner scanner)
- (parse (expr (lisp (flet ((prop (type value)
- (scanner-step scanner)
- (values (cons type value) t t)))
- (case (token-type scanner)
- (:int
- (prop :integer (token-value scanner)))
- ((:id :char :string)
- (prop (token-type scanner) (token-value scanner)))
- (#\?
- (let* ((stream (make-scanner-stream scanner))
- (sexp (read stream t)))
+ (with-parser-context (token-scanner-context :scanner scanner)
+ (parse (expr (:nestedp nestedp)
+ (lisp (flet ((prop (type value)
(scanner-step scanner)
- (values (cons (property-type sexp) sexp)
- t t)))
- (t
- (values (list :int :id :char :string #\?)
- nil nil)))))
+ (values (cons type value) t t)))
+ (case (token-type scanner)
+ ((:int :id :char :string)
+ (prop (token-type scanner)
+ (token-value scanner)))
+ (#\?
+ (let* ((stream (make-scanner-stream scanner))
+ (sexp (read stream t)))
+ (scanner-step scanner)
+ (values (cons (property-type sexp) sexp)
+ t t)))
+ (t
+ (values (list :int :id :char :string #\?)
+ nil nil)))))
+ (or (seq (#\+) add)
+ (seq (#\-) sub)
+ (seq (#\*) mul)
+ (seq (#\/) div))
+ (or (seq (#\() lp)
+ (seq (#\+) nop)
+ (seq (#\-) neg))
+ (when nestedp (seq (#\)) rp))))))))
+
+;;;--------------------------------------------------------------------------
+;;; Parsing property sets.
(defun parse-property (scanner pset)
"Parse a single property using the SCANNER; add it to the PSET."
- ;; id `=' expression
+ ;; property ::= id `=' expression
+ (with-parser-context (token-scanner-context :scanner scanner)
+ (parse (seq ((name :id) #\= (result (parse-expression scanner)))
+ (let ((type (car result))
+ (value (cdr result)))
+ (unless (eq type :invalid)
+ (add-property pset name value
+ :type type
+ :location scanner)))))))
+
+(export 'parse-property-set)
+(defun parse-property-set (scanner)
+ "Parse an optional property set from the SCANNER and return it, or `nil'."
+ ;; property-set ::= [`[' property-list `]']
+ (with-parser-context (token-scanner-context :scanner scanner)
+ (parse (? (seq (#\[
+ (pset (many (pset (make-property-set) pset)
+ (parse-property scanner pset)
+ #\,))
+ #\])
+ pset)))))
;;;----- That's all, folks --------------------------------------------------
(defgeneric property-type (value)
(:documentation "Guess a sensible property type to use for VALUE.")
(:method ((value symbol)) :symbol)
- (:method ((value integer)) :integer)
+ (:method ((value integer)) :int)
(:method ((value string)) :string)
(:method ((value character)) :char)
(:method (value) :other))
(:file "package" :depends-on ("parser"))
+ ;; Lexical analysis.
+ (:file "lexer-proto" :depends-on ("package" "parser"))
+ (:file "lexer-impl" :depends-on ("lexer-proto"))
+ (:file "fragment-parse" :depends-on ("lexer-proto"))
+
;; C type representation protocol.
(:file "c-types-proto" :depends-on ("package"))
(:file "c-types-impl" :depends-on ("c-types-proto"))
+ (:file "c-types-parse" :depends-on ("c-types-proto" "fragment-parse"))
;; Property set protocol.
(:file "pset-proto" :depends-on ("package"))
(:file "pset-impl" :depends-on ("pset-proto"))
- (:file "pset-parse" :depends-on ("pset-proto" "lexical-parse"))
-
- ;; Lexical analysis.
- (:file "lexical-parse" :depends-on ("parser"))
- (:file "fragment-parse" :depends-on ("lexical-parse"))
+ (:file "pset-parse" :depends-on ("pset-proto" "lexer-proto"))
;; Code generation protocol.
(:file "codegen-proto" :depends-on ("package"))
("module-proto" "pset-proto" "c-types-class-impl" "builtin"))
(:file "builtin" :depends-on ("module-proto" "pset-proto" "classes"
"c-types-impl" "c-types-class-impl"))
+ #+no
(:file "module-parse" :depends-on ("module-impl"
- "lexical-parse" "fragment-parse"))
+ "lexer-proto" "fragment-parse"))
;; Output.
(:file "output-proto" :depends-on ("package"))