| 1 | ;;; -*-lisp-*- |
| 2 | ;;; |
| 3 | ;;; Lexical analysis for input parser |
| 4 | ;;; |
| 5 | ;;; (c) 2009 Straylight/Edgeware |
| 6 | ;;; |
| 7 | |
| 8 | ;;;----- Licensing notice --------------------------------------------------- |
| 9 | ;;; |
| 10 | ;;; This file is part of the Sensble Object Design, an object system for C. |
| 11 | ;;; |
| 12 | ;;; SOD is free software; you can redistribute it and/or modify |
| 13 | ;;; it under the terms of the GNU General Public License as published by |
| 14 | ;;; the Free Software Foundation; either version 2 of the License, or |
| 15 | ;;; (at your option) any later version. |
| 16 | ;;; |
| 17 | ;;; SOD is distributed in the hope that it will be useful, |
| 18 | ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 19 | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 20 | ;;; GNU General Public License for more details. |
| 21 | ;;; |
| 22 | ;;; You should have received a copy of the GNU General Public License |
| 23 | ;;; along with SOD; if not, write to the Free Software Foundation, |
| 24 | ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
| 25 | |
| 26 | (cl:in-package #:sod) |
| 27 | |
| 28 | ;;;-------------------------------------------------------------------------- |
| 29 | ;;; Class definition. |
| 30 | |
| 31 | (export 'sod-token-scanner) |
| 32 | (defclass sod-token-scanner (token-scanner) |
| 33 | ((char-scanner :initarg :char-scanner :reader token-scanner-char-scanner)) |
| 34 | (:documentation |
| 35 | "A token scanner for SOD input files. |
| 36 | |
| 37 | Not a lot here, apart from a character scanner to read from and the |
| 38 | standard token scanner infrastructure.")) |
| 39 | |
| 40 | (defmethod shared-initialize :after |
| 41 | ((scanner sod-token-scanner) slot-names &key) |
| 42 | (default-slot (scanner 'sod-parser::filename slot-names) |
| 43 | (scanner-filename (token-scanner-char-scanner scanner)))) |
| 44 | |
| 45 | ;;;-------------------------------------------------------------------------- |
| 46 | ;;; Utilities. |
| 47 | |
| 48 | (defun show-char (stream char &optional colonp atsignp) |
| 49 | "Format CHAR to STREAM in a readable way. |
| 50 | |
| 51 | Usable in `format''s ~/.../ command." |
| 52 | (declare (ignore colonp atsignp)) |
| 53 | (cond ((null char) (write-string "<eof>" stream)) |
| 54 | ((and (graphic-char-p char) (char/= char #\space)) |
| 55 | (format stream "`~C'" char)) |
| 56 | (t (format stream "<~(~:C~)>" char)))) |
| 57 | |
| 58 | (defun scan-comment (scanner) |
| 59 | "Scan a comment (either `/* ... */' or `// ...') from SCANNER. |
| 60 | |
| 61 | The result isn't interesting." |
| 62 | (with-parser-context (character-scanner-context :scanner scanner) |
| 63 | (parse (or (and "/*" |
| 64 | (and (skip-many () |
| 65 | (and (skip-many () (not #\*)) |
| 66 | (label "*/" (skip-many (:min 1) #\*))) |
| 67 | (not #\/)) |
| 68 | #\/)) |
| 69 | (and "//" |
| 70 | (skip-many () (not #\newline)) |
| 71 | (? #\newline)))))) |
| 72 | |
| 73 | (defmethod make-scanner-stream ((scanner sod-token-scanner)) |
| 74 | (make-scanner-stream (token-scanner-char-scanner scanner))) |
| 75 | |
| 76 | ;;;-------------------------------------------------------------------------- |
| 77 | ;;; Error reporting. |
| 78 | |
| 79 | (defvar *indicator-map* (make-hash-table) |
| 80 | "Hash table mapping indicator objects to human-readable descriptions.") |
| 81 | |
| 82 | (defun define-indicator (indicator description) |
| 83 | (setf (gethash indicator *indicator-map*) description) |
| 84 | indicator) |
| 85 | |
| 86 | (export 'syntax-error) |
| 87 | (defun syntax-error (scanner expected &key (continuep t)) |
| 88 | "Signal a (maybe) continuable syntax error." |
| 89 | (labels ((show-token (type value) |
| 90 | (if (characterp type) |
| 91 | (format nil "~/sod::show-char/" type) |
| 92 | (case type |
| 93 | (:id (format nil "<identifier~@[ `~A'~]>" value)) |
| 94 | (:string "<string-literal>") |
| 95 | (:char "<character-literal>") |
| 96 | (:eof "<end-of-file>") |
| 97 | (:ellipsis "`...'") |
| 98 | (t (format nil "<? ~S~@[ ~S~]>" type value))))) |
| 99 | (show-expected (thing) |
| 100 | (acond ((gethash thing *indicator-map*) it) |
| 101 | ((atom thing) (show-token thing nil)) |
| 102 | ((eq (car thing) :id) |
| 103 | (format nil "`~A'" (cadr thing))) |
| 104 | (t (format nil "<? ~S>" thing))))) |
| 105 | (funcall (if continuep #'cerror* #'error) |
| 106 | "Syntax error: ~ |
| 107 | expected ~{~#[<bug>~;~A~;~A or ~A~:;~A, ~]~} ~ |
| 108 | but found ~A" |
| 109 | (mapcar #'show-expected expected) |
| 110 | (show-token (token-type scanner) (token-value scanner))))) |
| 111 | |
| 112 | (export 'lexer-error) |
| 113 | (defun lexer-error (char-scanner expected consumedp) |
| 114 | "Signal a continuable lexical error." |
| 115 | (cerror* "Lexical error: ~ |
| 116 | expected ~{~#[<bug>~;~A~;~A or ~A~;:~A, ~]~} ~ |
| 117 | but found ~/sod::show-char/~ |
| 118 | ~@[ at ~A~]" |
| 119 | (mapcar (lambda (exp) |
| 120 | (typecase exp |
| 121 | (character (format nil "~/sod::show-char/" exp)) |
| 122 | (string (format nil "`~A'" exp)) |
| 123 | ((cons (eql :digit) *) (format nil "<radix-~A digit>" |
| 124 | (cadr exp))) |
| 125 | ((eql :eof) "<end-of-file>") |
| 126 | ((eql :any) "<character>") |
| 127 | (t (format nil "<? ~S>" exp)))) |
| 128 | expected) |
| 129 | (and (not (scanner-at-eof-p char-scanner)) |
| 130 | (scanner-current-char char-scanner)) |
| 131 | (and consumedp (file-location char-scanner)))) |
| 132 | |
| 133 | ;;;-------------------------------------------------------------------------- |
| 134 | ;;; Token scanner protocol implementation. |
| 135 | |
| 136 | (defmethod scanner-token ((scanner sod-token-scanner)) |
| 137 | (with-slots (char-scanner line column) scanner |
| 138 | (with-parser-context (character-scanner-context :scanner char-scanner) |
| 139 | |
| 140 | (flet ((scan-digits (&key (radix 10) (min 1) (init 0)) |
| 141 | ;; Scan an return a sequence of digits. |
| 142 | (parse (many (acc init (+ (* acc radix) it) :min min) |
| 143 | (label (list :digit radix) |
| 144 | (filter (lambda (ch) |
| 145 | (digit-char-p ch radix)))))))) |
| 146 | |
| 147 | ;; Skip initial junk, and remember the place. |
| 148 | (loop |
| 149 | (setf (scanner-line scanner) (scanner-line char-scanner) |
| 150 | (scanner-column scanner) (scanner-column char-scanner)) |
| 151 | (cond-parse (:consumedp cp :expected exp) |
| 152 | ((satisfies whitespace-char-p) (parse :whitespace)) |
| 153 | ((scan-comment char-scanner)) |
| 154 | (t (if cp (lexer-error char-scanner exp cp) (return))))) |
| 155 | |
| 156 | ;; Now parse something. |
| 157 | (cond-parse (:consumedp cp :expected exp) |
| 158 | |
| 159 | ;; Alphanumerics mean we read an identifier. |
| 160 | ((or #\_ (satisfies alpha-char-p)) |
| 161 | (values :id (with-output-to-string (out) |
| 162 | (write-char it out) |
| 163 | (parse (many (nil nil (write-char it out)) |
| 164 | (or #\_ (satisfies alphanumericp))))))) |
| 165 | |
| 166 | ;; Quotes introduce a literal. |
| 167 | ((seq ((quote (or #\" #\')) |
| 168 | (contents (many (out (make-string-output-stream) |
| 169 | (progn (write-char it out) out) |
| 170 | :final (get-output-stream-string out)) |
| 171 | (or (and #\\ :any) (not quote)))) |
| 172 | (nil (char quote))) |
| 173 | (ecase quote |
| 174 | (#\" contents) |
| 175 | (#\' (case (length contents) |
| 176 | (1 (char contents 0)) |
| 177 | (0 (cerror* "Empty character literal") #\?) |
| 178 | (t (cerror* "Too many characters in literal") |
| 179 | (char contents 0)))))) |
| 180 | (values (etypecase it |
| 181 | (character :char) |
| 182 | (string :string)) |
| 183 | it)) |
| 184 | |
| 185 | ;; Zero introduces a chosen-radix integer. |
| 186 | ((and #\0 |
| 187 | (or (and (or #\b #\B) (scan-digits :radix 2)) |
| 188 | (and (or #\o #\O) (scan-digits :radix 8)) |
| 189 | (and (or #\x #\X) (scan-digits :radix 16)) |
| 190 | (scan-digits :radix 8 :min 0))) |
| 191 | (values :int it)) |
| 192 | |
| 193 | ;; Any other digit forces radix-10. |
| 194 | ((seq ((d (filter digit-char-p)) |
| 195 | (i (scan-digits :radix 10 :min 0 :init d))) |
| 196 | i) |
| 197 | (values :int it)) |
| 198 | |
| 199 | ;; Some special punctuation sequences are single tokens. |
| 200 | ("..." (values :ellipsis nil)) |
| 201 | |
| 202 | ;; Any other character is punctuation. |
| 203 | (:any (values it nil)) |
| 204 | |
| 205 | ;; End of file means precisely that. |
| 206 | (:eof (values :eof nil)) |
| 207 | |
| 208 | ;; Report errors and try again. Because we must have consumed some |
| 209 | ;; input in order to get here (we've matched both :any and :eof) we |
| 210 | ;; must make progress on every call. |
| 211 | (t |
| 212 | (assert cp) |
| 213 | (lexer-error char-scanner exp cp) |
| 214 | (scanner-token scanner))))))) |
| 215 | |
| 216 | ;;;----- That's all, folks -------------------------------------------------- |