| 1 | ;;; -*-lisp-*- |
| 2 | ;;; |
| 3 | ;;; Protocol for lexical analysis |
| 4 | ;;; |
| 5 | ;;; (c) 2009 Straylight/Edgeware |
| 6 | ;;; |
| 7 | |
| 8 | ;;;----- Licensing notice --------------------------------------------------- |
| 9 | ;;; |
| 10 | ;;; This file is part of the Sensible Object Design, an object system for C. |
| 11 | ;;; |
| 12 | ;;; SOD is free software; you can redistribute it and/or modify |
| 13 | ;;; it under the terms of the GNU General Public License as published by |
| 14 | ;;; the Free Software Foundation; either version 2 of the License, or |
| 15 | ;;; (at your option) any later version. |
| 16 | ;;; |
| 17 | ;;; SOD is distributed in the hope that it will be useful, |
| 18 | ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 19 | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 20 | ;;; GNU General Public License for more details. |
| 21 | ;;; |
| 22 | ;;; You should have received a copy of the GNU General Public License |
| 23 | ;;; along with SOD; if not, write to the Free Software Foundation, |
| 24 | ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
| 25 | |
| 26 | (cl:in-package #:sod) |
| 27 | |
| 28 | ;;;-------------------------------------------------------------------------- |
| 29 | ;;; Class definition. |
| 30 | |
| 31 | (export 'sod-token-scanner) |
| 32 | (defclass sod-token-scanner (token-scanner) |
| 33 | ((char-scanner :initarg :char-scanner :reader token-scanner-char-scanner)) |
| 34 | (:documentation |
| 35 | "A token scanner for SOD input files. |
| 36 | |
| 37 | Not a lot here, apart from a character scanner to read from and the |
| 38 | standard token scanner infrastructure.")) |
| 39 | |
| 40 | ;;;-------------------------------------------------------------------------- |
| 41 | ;;; Indicators and error messages. |
| 42 | |
| 43 | (defvar *indicator-map* (make-hash-table) |
| 44 | "Hash table mapping indicator objects to human-readable descriptions.") |
| 45 | |
| 46 | (export 'define-indicator) |
| 47 | (defun define-indicator (indicator description) |
| 48 | "Associate an INDICATOR with its textual DESCRIPTION. |
| 49 | |
| 50 | Updates the the `*indicator-map*'." |
| 51 | (setf (gethash indicator *indicator-map*) description) |
| 52 | indicator) |
| 53 | |
| 54 | (export 'syntax-error) |
| 55 | (define-condition syntax-error (parser-error base-syntax-error) |
| 56 | ((found :type cons)) |
| 57 | (:report (lambda (error stream) |
| 58 | (labels ((show-token (type value) |
| 59 | (if (characterp type) (show-char type) |
| 60 | (case type |
| 61 | (:id (format nil "<identifier~@[ `~A'~]>" |
| 62 | value)) |
| 63 | (:int "<integer-literal>") |
| 64 | (:string "<string-literal>") |
| 65 | (:char "<character-literal>") |
| 66 | (:eof "<end-of-file>") |
| 67 | (:ellipsis "`...'") |
| 68 | (:shl "`<<'") |
| 69 | (:shr "`>>'") |
| 70 | (:eq "`=='") |
| 71 | (:ne "`!='") |
| 72 | (:le "`<='") |
| 73 | (:ge "`>='") |
| 74 | (:and "`&&'") |
| 75 | (:or "`||'") |
| 76 | (t (format nil "<? ~S~@[ ~S~]>" type value))))) |
| 77 | (show-expected (thing) |
| 78 | (acond ((gethash thing *indicator-map*) it) |
| 79 | ((atom thing) (show-token thing nil)) |
| 80 | ((eq (car thing) :id) |
| 81 | (format nil "`~A'" (cadr thing))) |
| 82 | (t (format nil "<? ~S>" thing))))) |
| 83 | (report-parser-error error stream |
| 84 | #'show-expected |
| 85 | (lambda (found) |
| 86 | (show-token (car found) |
| 87 | (cdr found)))))))) |
| 88 | (defun syntax-error (scanner expected &key (continuep t) location) |
| 89 | "Signal a (maybe) continuable syntax error." |
| 90 | (funcall (if continuep #'cerror*-with-location #'error-with-location) |
| 91 | (or location scanner) 'syntax-error |
| 92 | :expected expected |
| 93 | :found (cons (token-type scanner) (token-value scanner)))) |
| 94 | |
| 95 | (export 'lexer-error) |
| 96 | (define-condition lexer-error (parser-error base-lexer-error) |
| 97 | ((found :type (or character nil))) |
| 98 | (:report (lambda (error stream) |
| 99 | (flet ((show-expected (exp) |
| 100 | (typecase exp |
| 101 | (character (show-char exp)) |
| 102 | (string (format nil "`~A'" exp)) |
| 103 | ((cons (eql :digit) *) |
| 104 | (format nil "<radix-~A digit>" (cadr exp))) |
| 105 | ((eql :eof) "<end-of-file>") |
| 106 | ((eql :any) "<character>") |
| 107 | (t (format nil "<? ~S>" exp))))) |
| 108 | (report-parser-error error stream |
| 109 | #'show-expected #'show-char))))) |
| 110 | (defun lexer-error (char-scanner expected &key location) |
| 111 | "Signal a continuable lexical error." |
| 112 | (cerror*-with-location (or location char-scanner) 'lexer-error |
| 113 | :expected expected |
| 114 | :found (and (not (scanner-at-eof-p char-scanner)) |
| 115 | (scanner-current-char char-scanner)))) |
| 116 | |
| 117 | (export 'skip-until) |
| 118 | (defparse skip-until (:context (context token-scanner-context) |
| 119 | (&key (keep-end nil keep-end-p)) |
| 120 | &rest token-types) |
| 121 | "Discard tokens until we find one listed in TOKEN-TYPES. |
| 122 | |
| 123 | Each of the TOKEN-TYPES is an expression which evaluates to either a |
| 124 | two-item list (TYPE VALUE), or a singleton TYPE; the latter is equivalent |
| 125 | to a list (TYPE t). Such a pair matches a token with the corresponding |
| 126 | TYPE and VALUE, except that a VALUE of `t' matches any token value. |
| 127 | |
| 128 | If KEEP-END is true then retain the found token for later; otherwise |
| 129 | discard it. KEEP-END defaults to true if multiple TOKEN-TYPES are given; |
| 130 | otherwise false. If end-of-file is encountered then the indicator list is |
| 131 | simply the list of TOKEN-TYPES; otherwise the result is `nil'." |
| 132 | `(%skip-until ,(parser-scanner context) |
| 133 | (list ,@token-types) |
| 134 | :keep-end ,(if keep-end-p keep-end |
| 135 | (> (length token-types) 1)))) |
| 136 | |
| 137 | (export 'error) |
| 138 | (defparse error (:context (context token-scanner-context) |
| 139 | (&key ignore-unconsumed force-progress) |
| 140 | sub &optional (recover t) &body body) |
| 141 | "Try to parse SUB; if it fails then report an error, and parse RECOVER. |
| 142 | |
| 143 | This is the main way to recover from errors and continue parsing. Even |
| 144 | then, it's not especially brilliant. |
| 145 | |
| 146 | If the SUB parser succeeds then just propagate its result: it's like we |
| 147 | were never here. Otherwise, try to recover in a sensible way so we can |
| 148 | continue parsing. The details of this recovery are subject to change, but |
| 149 | the final action is generally to invoke the RECOVER parser and return its |
| 150 | result. |
| 151 | |
| 152 | If IGNORE-UNCONSUMED evaluates non-nil, then just propagate a failure of |
| 153 | SUB if it didn't consume input. (This makes it suitable for use where the |
| 154 | parser containing `error' might be optional.)" |
| 155 | `(parse-error-recover ,(parser-scanner context) |
| 156 | (parser () ,sub) |
| 157 | (parser () ,recover) |
| 158 | :ignore-unconsumed ,ignore-unconsumed |
| 159 | :force-progress ,force-progress |
| 160 | :action ,(and body `(lambda () ,@body)))) |
| 161 | |
| 162 | (export 'must) |
| 163 | (defparse must (:context (context token-scanner-context) |
| 164 | sub &optional default) |
| 165 | "Try to parse SUB; if it fails, report an error, and return DEFAULT. |
| 166 | |
| 167 | This parser can't actually fail." |
| 168 | `(parse (error () ,sub (t ,default)))) |
| 169 | |
| 170 | ;;;-------------------------------------------------------------------------- |
| 171 | ;;; Lexical analysis utilities. |
| 172 | |
| 173 | (export 'scan-comment) |
| 174 | (defun scan-comment (char-scanner) |
| 175 | "Scan a comment (either `/* ... */' or `// ...') from CHAR-SCANNER. |
| 176 | |
| 177 | The result isn't interesting." |
| 178 | (with-parser-context (character-scanner-context :scanner char-scanner) |
| 179 | (let ((start (file-location char-scanner))) |
| 180 | (parse (or (and "/*" |
| 181 | (lisp (let ((state nil)) |
| 182 | (loop (cond ((scanner-at-eof-p char-scanner) |
| 183 | (lexer-error char-scanner |
| 184 | (list "*/")) |
| 185 | (info-with-location |
| 186 | start "Comment started here") |
| 187 | (return (values nil t t))) |
| 188 | ((char= (scanner-current-char |
| 189 | char-scanner) |
| 190 | #\*) |
| 191 | (setf state '*) |
| 192 | (scanner-step char-scanner)) |
| 193 | ((and (eq state '*) |
| 194 | (char= (scanner-current-char |
| 195 | char-scanner) |
| 196 | #\/)) |
| 197 | (scanner-step char-scanner) |
| 198 | (return (values nil t t))) |
| 199 | (t |
| 200 | (setf state nil) |
| 201 | (scanner-step char-scanner))))))) |
| 202 | (and "//" |
| 203 | (skip-many () (not #\newline)) |
| 204 | (? #\newline))))))) |
| 205 | |
| 206 | ;;;----- That's all, folks -------------------------------------------------- |