--- /dev/null
+;;; -*-lisp-*-
+;;;
+;;; Lexical analysis for input parser
+;;;
+;;; (c) 2009 Straylight/Edgeware
+;;;
+
+;;;----- Licensing notice ---------------------------------------------------
+;;;
+;;; This file is part of the Sensble Object Design, an object system for C.
+;;;
+;;; SOD is free software; you can redistribute it and/or modify
+;;; it under the terms of the GNU General Public License as published by
+;;; the Free Software Foundation; either version 2 of the License, or
+;;; (at your option) any later version.
+;;;
+;;; SOD is distributed in the hope that it will be useful,
+;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;;; GNU General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU General Public License
+;;; along with SOD; if not, write to the Free Software Foundation,
+;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+(cl:in-package #:sod)
+
+;;;--------------------------------------------------------------------------
+;;; Class definition.
+
+(export 'sod-token-scanner)
+(defclass sod-token-scanner (token-scanner)
+ ((char-scanner :initarg :char-scanner :reader token-scanner-char-scanner))
+ (:documentation
+ "A token scanner for SOD input files.
+
+ Not a lot here, apart from a character scanner to read from and the
+ standard token scanner infrastructure."))
+
+(defmethod shared-initialize :after
+ ((scanner sod-token-scanner) slot-names &key)
+ (default-slot (scanner 'sod-parser::filename slot-names)
+ (scanner-filename (token-scanner-char-scanner scanner))))
+
+;;;--------------------------------------------------------------------------
+;;; Utilities.
+
+(defun show-char (stream char &optional colonp atsignp)
+ "Format CHAR to STREAM in a readable way.
+
+ Usable in `format''s ~/.../ command."
+ (declare (ignore colonp atsignp))
+ (cond ((null char) (write-string "<eof>" stream))
+ ((and (graphic-char-p char) (char/= char #\space))
+ (format stream "`~C'" char))
+ (t (format stream "<~(~:C~)>" char))))
+
+(defun scan-comment (scanner)
+ "Scan a comment (either `/* ... */' or `// ...') from SCANNER.
+
+ The result isn't interesting."
+ (with-parser-context (character-scanner-context :scanner scanner)
+ (parse (or (and "/*"
+ (and (skip-many ()
+ (and (skip-many () (not #\*))
+ (label "*/" (skip-many (:min 1) #\*)))
+ (not #\/))
+ #\/))
+ (and "//"
+ (skip-many () (not #\newline))
+ (? #\newline))))))
+
+(defmethod make-scanner-stream ((scanner sod-token-scanner))
+ (make-scanner-stream (token-scanner-char-scanner scanner)))
+
+;;;--------------------------------------------------------------------------
+;;; Error reporting.
+
+(defvar *indicator-map* (make-hash-table)
+ "Hash table mapping indicator objects to human-readable descriptions.")
+
+(defun define-indicator (indicator description)
+ (setf (gethash indicator *indicator-map*) description)
+ indicator)
+
+(export 'syntax-error)
+(defun syntax-error (scanner expected &key (continuep t))
+ "Signal a (maybe) continuable syntax error."
+ (labels ((show-token (type value)
+ (if (characterp type)
+ (format nil "~/sod::show-char/" type)
+ (case type
+ (:id (format nil "<identifier~@[ `~A'~]>" value))
+ (:string "<string-literal>")
+ (:char "<character-literal>")
+ (:eof "<end-of-file>")
+ (:ellipsis "`...'")
+ (t (format nil "<? ~S~@[ ~S~]>" type value)))))
+ (show-expected (thing)
+ (acond ((gethash thing *indicator-map*) it)
+ ((atom thing) (show-token thing nil))
+ ((eq (car thing) :id)
+ (format nil "`~A'" (cadr thing)))
+ (t (format nil "<? ~S>" thing)))))
+ (funcall (if continuep #'cerror* #'error)
+ "Syntax error: ~
+ expected ~{~#[<bug>~;~A~;~A or ~A~:;~A, ~]~} ~
+ but found ~A"
+ (mapcar #'show-expected expected)
+ (show-token (token-type scanner) (token-value scanner)))))
+
+(export 'lexer-error)
+(defun lexer-error (char-scanner expected consumedp)
+ "Signal a continuable lexical error."
+ (cerror* "Lexical error: ~
+ expected ~{~#[<bug>~;~A~;~A or ~A~;:~A, ~]~} ~
+ but found ~/sod::show-char/~
+ ~@[ at ~A~]"
+ (mapcar (lambda (exp)
+ (typecase exp
+ (character (format nil "~/sod::show-char/" exp))
+ (string (format nil "`~A'" exp))
+ ((cons (eql :digit) *) (format nil "<radix-~A digit>"
+ (cadr exp)))
+ ((eql :eof) "<end-of-file>")
+ ((eql :any) "<character>")
+ (t (format nil "<? ~S>" exp))))
+ expected)
+ (and (not (scanner-at-eof-p char-scanner))
+ (scanner-current-char char-scanner))
+ (and consumedp (file-location char-scanner))))
+
+;;;--------------------------------------------------------------------------
+;;; Token scanner protocol implementation.
+
+(defmethod scanner-token ((scanner sod-token-scanner))
+ (with-slots (char-scanner line column) scanner
+ (with-parser-context (character-scanner-context :scanner char-scanner)
+
+ (flet ((scan-digits (&key (radix 10) (min 1) (init 0))
+ ;; Scan an return a sequence of digits.
+ (parse (many (acc init (+ (* acc radix) it) :min min)
+ (label (list :digit radix)
+ (filter (lambda (ch)
+ (digit-char-p ch radix))))))))
+
+ ;; Skip initial junk, and remember the place.
+ (loop
+ (setf (scanner-line scanner) (scanner-line char-scanner)
+ (scanner-column scanner) (scanner-column char-scanner))
+ (cond-parse (:consumedp cp :expected exp)
+ ((satisfies whitespace-char-p) (parse :whitespace))
+ ((scan-comment char-scanner))
+ (t (if cp (lexer-error char-scanner exp cp) (return)))))
+
+ ;; Now parse something.
+ (cond-parse (:consumedp cp :expected exp)
+
+ ;; Alphanumerics mean we read an identifier.
+ ((or #\_ (satisfies alpha-char-p))
+ (values :id (with-output-to-string (out)
+ (write-char it out)
+ (parse (many (nil nil (write-char it out))
+ (or #\_ (satisfies alphanumericp)))))))
+
+ ;; Quotes introduce a literal.
+ ((seq ((quote (or #\" #\'))
+ (contents (many (out (make-string-output-stream)
+ (progn (write-char it out) out)
+ :final (get-output-stream-string out))
+ (or (and #\\ :any) (not quote))))
+ (nil (char quote)))
+ (ecase quote
+ (#\" contents)
+ (#\' (case (length contents)
+ (1 (char contents 0))
+ (0 (cerror* "Empty character literal") #\?)
+ (t (cerror* "Too many characters in literal")
+ (char contents 0))))))
+ (values (etypecase it
+ (character :char)
+ (string :string))
+ it))
+
+ ;; Zero introduces a chosen-radix integer.
+ ((and #\0
+ (or (and (or #\b #\B) (scan-digits :radix 2))
+ (and (or #\o #\O) (scan-digits :radix 8))
+ (and (or #\x #\X) (scan-digits :radix 16))
+ (scan-digits :radix 8 :min 0)))
+ (values :int it))
+
+ ;; Any other digit forces radix-10.
+ ((seq ((d (filter digit-char-p))
+ (i (scan-digits :radix 10 :min 0 :init d)))
+ i)
+ (values :int it))
+
+ ;; Some special punctuation sequences are single tokens.
+ ("..." (values :ellipsis nil))
+
+ ;; Any other character is punctuation.
+ (:any (values it nil))
+
+ ;; End of file means precisely that.
+ (:eof (values :eof nil))
+
+ ;; Report errors and try again. Because we must have consumed some
+ ;; input in order to get here (we've matched both :any and :eof) we
+ ;; must make progress on every call.
+ (t
+ (assert cp)
+ (lexer-error char-scanner exp cp)
+ (scanner-token scanner)))))))
+
+;;;----- That's all, folks --------------------------------------------------