X-Git-Url: https://git.distorted.org.uk/~mdw/sod/blobdiff_plain/bf090e021a5c20da452a4841cdfb8eb78e29544e..aa14a4cddcb96b681d5c19a2ec8bad382f43b264:/src/lexical-parse.lisp?ds=sidebyside diff --git a/src/lexical-parse.lisp b/src/lexical-parse.lisp new file mode 100644 index 0000000..1e9a76c --- /dev/null +++ b/src/lexical-parse.lisp @@ -0,0 +1,216 @@ +;;; -*-lisp-*- +;;; +;;; Lexical analysis for input parser +;;; +;;; (c) 2009 Straylight/Edgeware +;;; + +;;;----- Licensing notice --------------------------------------------------- +;;; +;;; This file is part of the Sensble Object Design, an object system for C. +;;; +;;; SOD is free software; you can redistribute it and/or modify +;;; it under the terms of the GNU General Public License as published by +;;; the Free Software Foundation; either version 2 of the License, or +;;; (at your option) any later version. +;;; +;;; SOD is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;; GNU General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with SOD; if not, write to the Free Software Foundation, +;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +(cl:in-package #:sod) + +;;;-------------------------------------------------------------------------- +;;; Class definition. + +(export 'sod-token-scanner) +(defclass sod-token-scanner (token-scanner) + ((char-scanner :initarg :char-scanner :reader token-scanner-char-scanner)) + (:documentation + "A token scanner for SOD input files. + + Not a lot here, apart from a character scanner to read from and the + standard token scanner infrastructure.")) + +(defmethod shared-initialize :after + ((scanner sod-token-scanner) slot-names &key) + (default-slot (scanner 'sod-parser::filename slot-names) + (scanner-filename (token-scanner-char-scanner scanner)))) + +;;;-------------------------------------------------------------------------- +;;; Utilities. + +(defun show-char (stream char &optional colonp atsignp) + "Format CHAR to STREAM in a readable way. + + Usable in `format''s ~/.../ command." + (declare (ignore colonp atsignp)) + (cond ((null char) (write-string "" stream)) + ((and (graphic-char-p char) (char/= char #\space)) + (format stream "`~C'" char)) + (t (format stream "<~(~:C~)>" char)))) + +(defun scan-comment (scanner) + "Scan a comment (either `/* ... */' or `// ...') from SCANNER. + + The result isn't interesting." + (with-parser-context (character-scanner-context :scanner scanner) + (parse (or (and "/*" + (and (skip-many () + (and (skip-many () (not #\*)) + (label "*/" (skip-many (:min 1) #\*))) + (not #\/)) + #\/)) + (and "//" + (skip-many () (not #\newline)) + (? #\newline)))))) + +(defmethod make-scanner-stream ((scanner sod-token-scanner)) + (make-scanner-stream (token-scanner-char-scanner scanner))) + +;;;-------------------------------------------------------------------------- +;;; Error reporting. + +(defvar *indicator-map* (make-hash-table) + "Hash table mapping indicator objects to human-readable descriptions.") + +(defun define-indicator (indicator description) + (setf (gethash indicator *indicator-map*) description) + indicator) + +(export 'syntax-error) +(defun syntax-error (scanner expected &key (continuep t)) + "Signal a (maybe) continuable syntax error." + (labels ((show-token (type value) + (if (characterp type) + (format nil "~/sod::show-char/" type) + (case type + (:id (format nil "" value)) + (:string "") + (:char "") + (:eof "") + (:ellipsis "`...'") + (t (format nil "" type value))))) + (show-expected (thing) + (acond ((gethash thing *indicator-map*) it) + ((atom thing) (show-token thing nil)) + ((eq (car thing) :id) + (format nil "`~A'" (cadr thing))) + (t (format nil "" thing))))) + (funcall (if continuep #'cerror* #'error) + "Syntax error: ~ + expected ~{~#[~;~A~;~A or ~A~:;~A, ~]~} ~ + but found ~A" + (mapcar #'show-expected expected) + (show-token (token-type scanner) (token-value scanner))))) + +(export 'lexer-error) +(defun lexer-error (char-scanner expected consumedp) + "Signal a continuable lexical error." + (cerror* "Lexical error: ~ + expected ~{~#[~;~A~;~A or ~A~;:~A, ~]~} ~ + but found ~/sod::show-char/~ + ~@[ at ~A~]" + (mapcar (lambda (exp) + (typecase exp + (character (format nil "~/sod::show-char/" exp)) + (string (format nil "`~A'" exp)) + ((cons (eql :digit) *) (format nil "" + (cadr exp))) + ((eql :eof) "") + ((eql :any) "") + (t (format nil "" exp)))) + expected) + (and (not (scanner-at-eof-p char-scanner)) + (scanner-current-char char-scanner)) + (and consumedp (file-location char-scanner)))) + +;;;-------------------------------------------------------------------------- +;;; Token scanner protocol implementation. + +(defmethod scanner-token ((scanner sod-token-scanner)) + (with-slots (char-scanner line column) scanner + (with-parser-context (character-scanner-context :scanner char-scanner) + + (flet ((scan-digits (&key (radix 10) (min 1) (init 0)) + ;; Scan an return a sequence of digits. + (parse (many (acc init (+ (* acc radix) it) :min min) + (label (list :digit radix) + (filter (lambda (ch) + (digit-char-p ch radix)))))))) + + ;; Skip initial junk, and remember the place. + (loop + (setf (scanner-line scanner) (scanner-line char-scanner) + (scanner-column scanner) (scanner-column char-scanner)) + (cond-parse (:consumedp cp :expected exp) + ((satisfies whitespace-char-p) (parse :whitespace)) + ((scan-comment char-scanner)) + (t (if cp (lexer-error char-scanner exp cp) (return))))) + + ;; Now parse something. + (cond-parse (:consumedp cp :expected exp) + + ;; Alphanumerics mean we read an identifier. + ((or #\_ (satisfies alpha-char-p)) + (values :id (with-output-to-string (out) + (write-char it out) + (parse (many (nil nil (write-char it out)) + (or #\_ (satisfies alphanumericp))))))) + + ;; Quotes introduce a literal. + ((seq ((quote (or #\" #\')) + (contents (many (out (make-string-output-stream) + (progn (write-char it out) out) + :final (get-output-stream-string out)) + (or (and #\\ :any) (not quote)))) + (nil (char quote))) + (ecase quote + (#\" contents) + (#\' (case (length contents) + (1 (char contents 0)) + (0 (cerror* "Empty character literal") #\?) + (t (cerror* "Too many characters in literal") + (char contents 0)))))) + (values (etypecase it + (character :char) + (string :string)) + it)) + + ;; Zero introduces a chosen-radix integer. + ((and #\0 + (or (and (or #\b #\B) (scan-digits :radix 2)) + (and (or #\o #\O) (scan-digits :radix 8)) + (and (or #\x #\X) (scan-digits :radix 16)) + (scan-digits :radix 8 :min 0))) + (values :int it)) + + ;; Any other digit forces radix-10. + ((seq ((d (filter digit-char-p)) + (i (scan-digits :radix 10 :min 0 :init d))) + i) + (values :int it)) + + ;; Some special punctuation sequences are single tokens. + ("..." (values :ellipsis nil)) + + ;; Any other character is punctuation. + (:any (values it nil)) + + ;; End of file means precisely that. + (:eof (values :eof nil)) + + ;; Report errors and try again. Because we must have consumed some + ;; input in order to get here (we've matched both :any and :eof) we + ;; must make progress on every call. + (t + (assert cp) + (lexer-error char-scanner exp cp) + (scanner-token scanner))))))) + +;;;----- That's all, folks --------------------------------------------------