Today's wip.
[sod] / src / lexer-proto.lisp
CommitLineData
dea4d055
MW
1;;; -*-lisp-*-
2;;;
3;;; Protocol for lexical analysis
4;;;
5;;; (c) 2009 Straylight/Edgeware
6;;;
7
8;;;----- Licensing notice ---------------------------------------------------
9;;;
10;;; This file is part of the Sensble Object Design, an object system for C.
11;;;
12;;; SOD is free software; you can redistribute it and/or modify
13;;; it under the terms of the GNU General Public License as published by
14;;; the Free Software Foundation; either version 2 of the License, or
15;;; (at your option) any later version.
16;;;
17;;; SOD is distributed in the hope that it will be useful,
18;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20;;; GNU General Public License for more details.
21;;;
22;;; You should have received a copy of the GNU General Public License
23;;; along with SOD; if not, write to the Free Software Foundation,
24;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25
26(cl:in-package #:sod)
27
28;;;--------------------------------------------------------------------------
239fa5bd 29;;; Class definition.
dea4d055 30
239fa5bd
MW
31(export 'sod-token-scanner)
32(defclass sod-token-scanner (token-scanner)
33 ((char-scanner :initarg :char-scanner :reader token-scanner-char-scanner))
dea4d055 34 (:documentation
239fa5bd 35 "A token scanner for SOD input files.
dea4d055 36
239fa5bd
MW
37 Not a lot here, apart from a character scanner to read from and the
38 standard token scanner infrastructure."))
dea4d055 39
dea4d055 40;;;--------------------------------------------------------------------------
239fa5bd
MW
41;;; Indicators and error messages.
42
43(export 'define-indicator)
44(defun define-indicator (indicator description)
45 "Associate an INDICATOR with its textual DESCRIPTION.
46
47 Updates the the `*indicator-map*'."
48 (setf (gethash indicator *indicator-map*) description)
49 indicator)
50
51(export 'syntax-error)
52(defun syntax-error (scanner expected &key (continuep t))
53 "Signal a (maybe) continuable syntax error."
54 (labels ((show-token (type value)
55 (if (characterp type)
56 (format nil "~/sod::show-char/" type)
57 (case type
58 (:id (format nil "<identifier~@[ `~A'~]>" value))
048d0b2d 59 (:int "<integer-literal>")
239fa5bd
MW
60 (:string "<string-literal>")
61 (:char "<character-literal>")
62 (:eof "<end-of-file>")
63 (:ellipsis "`...'")
64 (t (format nil "<? ~S~@[ ~S~]>" type value)))))
65 (show-expected (thing)
66 (acond ((gethash thing *indicator-map*) it)
67 ((atom thing) (show-token thing nil))
68 ((eq (car thing) :id)
69 (format nil "`~A'" (cadr thing)))
70 (t (format nil "<? ~S>" thing)))))
71 (funcall (if continuep #'cerror* #'error)
72 "Syntax error: ~
73 expected ~{~#[<bug>~;~A~;~A or ~A~:;~A, ~]~} ~
74 but found ~A"
75 (mapcar #'show-expected expected)
76 (show-token (token-type scanner) (token-value scanner)))))
77
78(export 'lexer-error)
79(defun lexer-error (char-scanner expected consumedp)
80 "Signal a continuable lexical error."
81 (cerror* "Lexical error: ~
82 expected ~{~#[<bug>~;~A~;~A or ~A~;:~A, ~]~} ~
83 but found ~/sod::show-char/~
84 ~@[ at ~A~]"
85 (mapcar (lambda (exp)
86 (typecase exp
87 (character (format nil "~/sod::show-char/" exp))
88 (string (format nil "`~A'" exp))
89 ((cons (eql :digit) *) (format nil "<radix-~A digit>"
90 (cadr exp)))
91 ((eql :eof) "<end-of-file>")
92 ((eql :any) "<character>")
93 (t (format nil "<? ~S>" exp))))
94 expected)
95 (and (not (scanner-at-eof-p char-scanner))
96 (scanner-current-char char-scanner))
97 (and consumedp (file-location char-scanner))))
dea4d055 98
048d0b2d
MW
99(defparse skip-until (:context (context token-scanner-context)
100 (&key (keep-end nil keep-end-p))
101 &rest token-types)
102 "Discard tokens until we find one listed in TOKEN-TYPES.
103
104 If KEEP-END is true then retain the found token for later; otherwise
105 discard it. KEEP-END defaults to true if multiple TOKEN-TYPES are given;
106 otherwise false. If end-of-file is encountered then the indicator list is
107 simply the list of TOKEN-TYPES; otherwise the result is `nil'."
108 `(skip-until ,(parser-scanner context)
109 (list ,@token-types)
110 :keep-end ,(if keep-end-p keep-end
111 (> (length token-types) 1))))
112
113(defparse error (:context (context token-scanner-context)
114 (&key) sub &optional (recover t))
115 "Try to parse SUB; if it fails then report an error, and parse RECOVER.
116
117 This is the main way to recover from errors and continue parsing. Even
118 then, it's not especially brilliant.
119
120 If the SUB parser succeeds then just propagate its result: it's like we
121 were never here. Otherwise, try to recover in a sensible way so we can
122 continue parsing. The details of this recovery are subject to change, but
123 the final action is generally to invoke the RECOVER parser and return its
124 result."
125 `(parse-error-recover ,(parser-scanner context)
126 (parser () ,sub)
127 (parser () ,recover)))
128
dea4d055 129;;;--------------------------------------------------------------------------
239fa5bd
MW
130;;; Lexical analysis utilities.
131
132(defun scan-comment (char-scanner)
133 "Scan a comment (either `/* ... */' or `// ...') from CHAR-SCANNER.
134
135 The result isn't interesting."
136 (with-parser-context (character-scanner-context :scanner char-scanner)
137 (parse (or (and "/*"
138 (and (skip-many ()
139 (and (skip-many () (not #\*))
140 (label "*/" (skip-many (:min 1) #\*)))
141 (not #\/))
142 #\/))
143 (and "//"
144 (skip-many () (not #\newline))
145 (? #\newline))))))
dea4d055
MW
146
147;;;----- That's all, folks --------------------------------------------------