Daily work in progress.
[sod] / src / lexer-impl.lisp
CommitLineData
dea4d055
MW
1;;; -*-lisp-*-
2;;;
3;;; Implementation of lexical analysis protocol.
4;;;
5;;; (c) 2009 Straylight/Edgeware
6;;;
7
8;;;----- Licensing notice ---------------------------------------------------
9;;;
10;;; This file is part of the Sensble Object Design, an object system for C.
11;;;
12;;; SOD is free software; you can redistribute it and/or modify
13;;; it under the terms of the GNU General Public License as published by
14;;; the Free Software Foundation; either version 2 of the License, or
15;;; (at your option) any later version.
16;;;
17;;; SOD is distributed in the hope that it will be useful,
18;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20;;; GNU General Public License for more details.
21;;;
22;;; You should have received a copy of the GNU General Public License
23;;; along with SOD; if not, write to the Free Software Foundation,
24;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25
26(cl:in-package #:sod)
27
28;;;--------------------------------------------------------------------------
239fa5bd 29;;; Class implementation.
dea4d055 30
239fa5bd
MW
31(defmethod shared-initialize :after
32 ((scanner sod-token-scanner) slot-names &key)
33 (default-slot (scanner 'sod-parser::filename slot-names)
34 (scanner-filename (token-scanner-char-scanner scanner))))
dea4d055 35
239fa5bd
MW
36(defmethod make-scanner-stream ((scanner sod-token-scanner))
37 (make-scanner-stream (token-scanner-char-scanner scanner)))
dea4d055
MW
38
39;;;--------------------------------------------------------------------------
239fa5bd 40;;; Indicators and error messages.
dea4d055 41
239fa5bd
MW
42(defvar *indicator-map* (make-hash-table)
43 "Hash table mapping indicator objects to human-readable descriptions.")
dea4d055 44
239fa5bd
MW
45(defun show-char (stream char &optional colonp atsignp)
46 "Format CHAR to STREAM in a readable way.
dea4d055 47
239fa5bd
MW
48 Usable in `format''s ~/.../ command."
49 (declare (ignore colonp atsignp))
50 (cond ((null char) (write-string "<eof>" stream))
51 ((and (graphic-char-p char) (char/= char #\space))
52 (format stream "`~C'" char))
53 (t (format stream "<~(~:C~)>" char))))
dea4d055 54
239fa5bd
MW
55;;;--------------------------------------------------------------------------
56;;; Token scanning.
57
58(defmethod scanner-token ((scanner sod-token-scanner))
59 (with-slots (char-scanner line column) scanner
60 (with-parser-context (character-scanner-context :scanner char-scanner)
61
62 (flet ((scan-digits (&key (radix 10) (min 1) (init 0))
63 ;; Scan and return a sequence of digits.
64 (parse (many (acc init (+ (* acc radix) it) :min min)
65 (label (list :digit radix)
66 (filter (lambda (ch)
67 (digit-char-p ch radix))))))))
68
69 ;; Skip initial junk, and remember the place.
70 (loop
71 (setf (scanner-line scanner) (scanner-line char-scanner)
72 (scanner-column scanner) (scanner-column char-scanner))
73 (cond-parse (:consumedp cp :expected exp)
74 ((satisfies whitespace-char-p) (parse :whitespace))
75 ((scan-comment char-scanner))
76 (t (if cp (lexer-error char-scanner exp cp) (return)))))
77
78 ;; Now parse something.
79 (cond-parse (:consumedp cp :expected exp)
80
81 ;; Alphanumerics mean we read an identifier.
82 ((or #\_ (satisfies alpha-char-p))
83 (values :id (with-output-to-string (out)
84 (write-char it out)
85 (parse (many (nil nil (write-char it out))
86 (or #\_ (satisfies alphanumericp)))))))
87
88 ;; Quotes introduce a literal.
89 ((seq ((quote (or #\" #\'))
90 (contents (many (out (make-string-output-stream)
91 (progn (write-char it out) out)
92 :final (get-output-stream-string out))
93 (or (and #\\ :any) (not quote))))
94 (nil (char quote)))
95 (ecase quote
96 (#\" contents)
97 (#\' (case (length contents)
98 (1 (char contents 0))
99 (0 (cerror* "Empty character literal") #\?)
100 (t (cerror* "Too many characters in literal")
101 (char contents 0))))))
102 (values (etypecase it
103 (character :char)
104 (string :string))
105 it))
106
107 ;; Zero introduces a chosen-radix integer.
108 ((and #\0
109 (or (and (or #\b #\B) (scan-digits :radix 2))
110 (and (or #\o #\O) (scan-digits :radix 8))
111 (and (or #\x #\X) (scan-digits :radix 16))
112 (scan-digits :radix 8 :min 0)))
113 (values :int it))
114
115 ;; Any other digit forces radix-10.
116 ((seq ((d (filter digit-char-p))
117 (i (scan-digits :radix 10 :min 0 :init d)))
118 i)
119 (values :int it))
120
121 ;; Some special punctuation sequences are single tokens.
122 ("..." (values :ellipsis nil))
123
124 ;; Any other character is punctuation.
125 (:any (values it nil))
126
127 ;; End of file means precisely that.
128 (:eof (values :eof nil))
129
130 ;; Report errors and try again. Because we must have consumed some
131 ;; input in order to get here (we've matched both :any and :eof) we
132 ;; must make progress on every call.
133 (t
134 (assert cp)
135 (lexer-error char-scanner exp cp)
136 (scanner-token scanner)))))))
dea4d055
MW
137
138;;;----- That's all, folks --------------------------------------------------