;;; -*-lisp-*- ;;; ;;; Implementation of lexical analysis protocol. ;;; ;;; (c) 2009 Straylight/Edgeware ;;; ;;;----- Licensing notice --------------------------------------------------- ;;; ;;; This file is part of the Sensible Object Design, an object system for C. ;;; ;;; SOD is free software; you can redistribute it and/or modify ;;; it under the terms of the GNU General Public License as published by ;;; the Free Software Foundation; either version 2 of the License, or ;;; (at your option) any later version. ;;; ;;; SOD is distributed in the hope that it will be useful, ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;; GNU General Public License for more details. ;;; ;;; You should have received a copy of the GNU General Public License ;;; along with SOD; if not, write to the Free Software Foundation, ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. (cl:in-package #:sod) ;;;-------------------------------------------------------------------------- ;;; Class implementation. (defmethod shared-initialize :after ((scanner sod-token-scanner) slot-names &key) (default-slot (scanner 'sod-parser::filename slot-names) (scanner-filename (token-scanner-char-scanner scanner)))) (defmethod make-scanner-stream ((scanner sod-token-scanner)) (make-scanner-stream (token-scanner-char-scanner scanner))) ;;;-------------------------------------------------------------------------- ;;; Indicators and error messages. (defun show-char (char) "Format CHAR as a string in a readable way." (cond ((null char) "") ((and (graphic-char-p char) (char/= char #\space)) (format nil "`~C'" char)) (t (format nil "<~(~:C~)>" char)))) (defun %skip-until (scanner token-types &key (keep-end (not (null (cdr token-types))))) "This is the implementation of the `skip-until' parser." (do ((consumedp nil t)) ((let ((type (token-type scanner)) (value (token-value scanner))) (some (lambda (spec) (multiple-value-bind (want-type want-value) (cond ((listp spec) (values (car spec) (cadr spec))) (t (values spec t))) (and (eq want-type type) (or (eq want-value t) (equal want-value value))))) token-types)) (unless keep-end (scanner-step scanner)) (values nil t (or keep-end consumedp))) (when (scanner-at-eof-p scanner) (return (values token-types nil consumedp))) (scanner-step scanner))) (defun parse-error-recover (scanner parser recover &key ignore-unconsumed force-progress action) "This is the implementation of the `error' parser." (multiple-value-bind (result win consumedp) (funcall parser) (cond ((or win (and (not consumedp) (or ignore-unconsumed (scanner-at-eof-p scanner)))) ;; If we succeeded, or if we didn't consume any tokens and the ;; caller's OK with that, then there's nothing for us to do here. ;; On the other hand, if we failed, didn't consume any tokens, and ;; we're at end-of-file, then there's not much hope of making ;; onward progress, so in this case we propagate the failure ;; rather than trying to recover. And we assume that the ;; continuation will somehow arrange to report the problem, and ;; avoid inundating the user with error reports. (values result win consumedp)) (t ;; Now we have to do some kind of sensible error recovery. The ;; important thing to do here is to make sure that we make some ;; progress. If we consumed any tokens then we're fine, and we'll ;; just try the provided recovery strategy. Otherwise, if we're ;; not at EOF, then we can ensure progress by discarding the ;; current token. Finally, if we are at EOF then our best bet is ;; simply to propagate the current failure back to the caller, but ;; we handled that case above. (syntax-error scanner result) (when action (funcall action)) (when (and force-progress (not consumedp)) (scanner-step scanner)) (funcall recover))))) ;;;-------------------------------------------------------------------------- ;;; Token scanning. (defmethod scanner-token ((scanner sod-token-scanner)) (with-slots (char-scanner line column) scanner (with-parser-context (character-scanner-context :scanner char-scanner) (flet ((scan-digits (&key (radix 10) (min 1) (init 0)) ;; Scan and return a sequence of digits. (parse (many (acc init (+ (* acc radix) it) :min min) (label (list :digit radix) (filter (lambda (ch) (digit-char-p ch radix))))))) (start-floc () ;; This is a little nasty. We scan the first token during ;; instance initialization, as a result of `shared-initialize' ;; on `token-scanner'. Unfortunately, this happens before ;; we've had a chance to initialize our own `filename' slot. ;; This means that we can't use the SCANNER as a file ;; location, however tempting it might be. So we have this ;; hack. (make-file-location (scanner-filename char-scanner) (scanner-line scanner) (scanner-column scanner)))) ;; Skip initial junk, and remember the place. (loop (setf (scanner-line scanner) (scanner-line char-scanner) (scanner-column scanner) (scanner-column char-scanner)) (cond-parse (:consumedp cp :expected exp) ((satisfies whitespace-char-p) (parse :whitespace)) ((scan-comment char-scanner)) (t (if cp (lexer-error char-scanner exp) (return))))) ;; Now parse something. (cond-parse (:consumedp cp :expected exp) ;; Alphanumerics mean we read an identifier. ((or #\_ (satisfies alpha-char-p)) (values :id (with-output-to-string (out) (write-char it out) (parse (many (nil nil (write-char it out)) (or #\_ (satisfies alphanumericp))))))) ;; Quotes introduce a literal. ((seq ((quote (or #\" #\')) (contents (many (out (make-string-output-stream) (progn (write-char it out) out) :final (get-output-stream-string out)) (or (and #\\ :any) (not quote)))) (nil (or (char quote) (seq (:eof) (lexer-error char-scanner (list quote)) (info-with-location (start-floc) "Literal started here"))))) (ecase quote (#\" contents) (#\' (case (length contents) (1 (char contents 0)) (0 (cerror*-with-location (start-floc) 'simple-lexer-error :format-control "Empty character literal") #\?) (t (cerror*-with-location (start-floc) 'simple-lexer-error :format-control "Too many characters ~ in character literal") (char contents 0)))))) (values (etypecase it (character :char) (string :string)) it)) ;; Zero introduces a chosen-radix integer. ((and #\0 (or (and (or #\b #\B) (scan-digits :radix 2)) (and (or #\o #\O) (scan-digits :radix 8)) (and (or #\x #\X) (scan-digits :radix 16)) (scan-digits :radix 8 :min 0))) (values :int it)) ;; Any other digit forces radix-10. ((seq ((d (filter digit-char-p)) (i (scan-digits :radix 10 :min 0 :init d))) i) (values :int it)) ;; Some special punctuation sequences are single tokens. ("..." (values :ellipsis nil)) ("==" (values :eq)) ("!=" (values :ne)) ("<=" (values :le)) (">=" (values :ge)) ("&&" (values :and)) ("||" (values :or)) ("<<" (values :shl)) (">>" (values :shr)) ;; Any other character is punctuation. (:any (values it nil)) ;; End of file means precisely that. (:eof (values :eof nil)) ;; Report errors and try again. Because we must have consumed some ;; input in order to get here (we've matched both :any and :eof) we ;; must make progress on every call. (t (assert cp) (lexer-error char-scanner exp) (scanner-token scanner))))))) ;;;----- That's all, folks --------------------------------------------------