| 1 | %%% -*-latex-*- |
| 2 | %%% |
| 3 | %%% Description of the parsing machinery |
| 4 | %%% |
| 5 | %%% (c) 2015 Straylight/Edgeware |
| 6 | %%% |
| 7 | |
| 8 | %%%----- Licensing notice --------------------------------------------------- |
| 9 | %%% |
| 10 | %%% This file is part of the Sensible Object Design, an object system for C. |
| 11 | %%% |
| 12 | %%% SOD is free software; you can redistribute it and/or modify |
| 13 | %%% it under the terms of the GNU General Public License as published by |
| 14 | %%% the Free Software Foundation; either version 2 of the License, or |
| 15 | %%% (at your option) any later version. |
| 16 | %%% |
| 17 | %%% SOD is distributed in the hope that it will be useful, |
| 18 | %%% but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 19 | %%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 20 | %%% GNU General Public License for more details. |
| 21 | %%% |
| 22 | %%% You should have received a copy of the GNU General Public License |
| 23 | %%% along with SOD; if not, write to the Free Software Foundation, |
| 24 | %%% Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
| 25 | |
| 26 | \chapter{Parsing} \label{ch:parsing} |
| 27 | |
| 28 | %%%-------------------------------------------------------------------------- |
| 29 | \section{The parser protocol} \label{sec:parsing.proto} |
| 30 | |
| 31 | For the purpose of Sod's parsing library, \emph{parsing} is the process of |
| 32 | reading a sequence of input items, in order, and computing an output value. |
| 33 | |
| 34 | A \emph{parser} is an expression which consumes zero or more input items and |
| 35 | returns three values: a \emph{result}, a \emph{success flag}, and a |
| 36 | \emph{consumed flag}. The two flags are (generalized) booleans. If the |
| 37 | success flag is non-nil, then the parser is said to have \emph{succeeded}, |
| 38 | and the result is the parser's output. If the success flag is nil then the |
| 39 | parser is said to have \emph{failed}, and the result is a list of |
| 40 | \emph{indicators}. Finally, the consumed flag is non-nil if the parser |
| 41 | consumed any input items. |
| 42 | |
| 43 | \begin{describe}{fun}{combine-parser-failures @<failures> @> @<list>} |
| 44 | \end{describe} |
| 45 | |
| 46 | \begin{describe}{fun}{parse-empty \&optional @<value> @> @<function>} |
| 47 | \end{describe} |
| 48 | |
| 49 | \begin{describe}{fun} |
| 50 | {parse-fail @<indicator> \&optional @<consumedp> @> @<function>} |
| 51 | \end{describe} |
| 52 | |
| 53 | %%%-------------------------------------------------------------------------- |
| 54 | \section{Streams} \label{sec:parsing.streams} |
| 55 | |
| 56 | \begin{describe*} |
| 57 | {\dhead{cls}{position-aware-stream \&key :file :line :column} |
| 58 | \dhead{gf}{position-aware-stream-file @<stream> @> @<pathname>} |
| 59 | \dhead{gf}{setf (position-aware-stream-file @<stream>) @<pathname>} |
| 60 | \dhead{gf}{position-aware-stream-line @<stream> @> @<fixnum>} |
| 61 | \dhead{gf}{setf (position-aware-stream-line @<stream>) @<fixnum>} |
| 62 | \dhead{gf}{position-aware-stream-column @<stream> @> @<fixnum>} |
| 63 | \dhead{gf}{setf (position-aware-stream-column @<stream>) @<fixnum>}} |
| 64 | \end{describe*} |
| 65 | |
| 66 | \begin{describe*} |
| 67 | {\dhead{cls}{position-aware-input-stream \&key :file :line :column} |
| 68 | \dhead{cls}{position-aware-output-stream \&key :file :line :column}} |
| 69 | \end{describe*} |
| 70 | |
| 71 | \begin{describe}{gf}{stream-pathname @<stream> @> @<pathname-or-nil>} |
| 72 | \begin{describe}{meth}{stream} |
| 73 | {stream-pathname (@<stream> stream) @> nil} |
| 74 | \end{describe} |
| 75 | \begin{describe}{meth}{file-stream} |
| 76 | {stream-pathname (@<stream> file-stream) @> @<pathname>} |
| 77 | \end{describe} |
| 78 | \begin{describe}{meth}{position-aware-stream} |
| 79 | {stream-pathname (@<stream> position-aware-stream) @> @<pathname>} |
| 80 | \end{describe} |
| 81 | \end{describe} |
| 82 | |
| 83 | \begin{describe}{gf}{stream-line-and-column @<stream> @> @<line> @<column>} |
| 84 | \begin{describe}{meth}{stream} |
| 85 | {stream-line-and-column (@<stream> stream) @> nil nil} |
| 86 | \end{describe} |
| 87 | \begin{describe}{meth}{position-aware-stream} |
| 88 | {stream-line-and-column (@<stream> position-aware-stream) |
| 89 | \nlret @<line> @<column>} |
| 90 | \end{describe} |
| 91 | \end{describe} |
| 92 | |
| 93 | %%%-------------------------------------------------------------------------- |
| 94 | \section{File locations} \label{sec:parsing.floc} |
| 95 | |
| 96 | \begin{describe}{cls}{file-location} |
| 97 | \end{describe} |
| 98 | |
| 99 | \begin{describe}{fun}{file-location-p @<object> @> @<generalized-boolean>} |
| 100 | \end{describe} |
| 101 | |
| 102 | \begin{describe}{fun} |
| 103 | {make-file-location @<filename> \&optional @<line> @<column> |
| 104 | @> @<file-location>} |
| 105 | \end{describe} |
| 106 | |
| 107 | \begin{describe*} |
| 108 | {\dhead{fun}{file-location-filename @<floc> @> @<string-or-nil>} |
| 109 | \dhead{fun}{file-location-line @<floc> @> @<fixnum-or-nil>} |
| 110 | \dhead{fun}{file-location-column @<floc> @> @<fixnum-or-nil>}} |
| 111 | \end{describe*} |
| 112 | |
| 113 | \begin{describe}{gf}{file-location @<object> @> @<floc>} |
| 114 | \begin{describe}{meth}{file-location} |
| 115 | {file-location (@<floc> file-location) @> @<floc>} |
| 116 | \end{describe} |
| 117 | \begin{describe}{meth}{stream} |
| 118 | {file-location (@<stream> stream) @> @<floc>} |
| 119 | \end{describe} |
| 120 | \begin{describe}{meth}{t} |
| 121 | {file-location (@<any> t) @> @<floc>} |
| 122 | \end{describe} |
| 123 | \end{describe} |
| 124 | |
| 125 | \begin{describe}{cls}{condition-with-location (condition) \&key :location} |
| 126 | \end{describe} |
| 127 | |
| 128 | \begin{describe}{meth}{condition-with-location} |
| 129 | {file-location (@<condition> condition-with-location) @> @<floc>} |
| 130 | \end{describe} |
| 131 | |
| 132 | \begin{describe*} |
| 133 | {\quad\=\quad\=\kill |
| 134 | \dhead{cls} |
| 135 | {error-with-location (condition-with-location error) \\ \> |
| 136 | \&key :location} |
| 137 | \dhead{cls} |
| 138 | {warning-with-location (condition-with-location warning) \\ \> |
| 139 | \&key :location} |
| 140 | \dhead{cls} |
| 141 | {information-with-location (condition-with-location information) \\ \> |
| 142 | \&key :location} |
| 143 | \dhead{cls} |
| 144 | {enclosing-condition-with-location (condition) \\ \> |
| 145 | \&key :location} |
| 146 | \dhead{cls} |
| 147 | {enclosing-error-with-location |
| 148 | (enclosing-error-with-location error) \\ \> |
| 149 | \&key :condition :location} |
| 150 | \dhead{cls} |
| 151 | {enclosing-warning-with-location |
| 152 | (enclosing-condition-with-location warning) \\ \> |
| 153 | \&key :condition :location} |
| 154 | \dhead{cls} |
| 155 | {enclosing-information-with-location |
| 156 | (enclosing-condition-with-location information) \\ \> |
| 157 | \&key :condition :location} |
| 158 | \dhead{cls} |
| 159 | {simple-condition-with-location |
| 160 | (condition-with-location simple-condition) \\ \> |
| 161 | \&key :format-control :format-arguments :location} |
| 162 | \dhead{cls} |
| 163 | {simple-error-with-location |
| 164 | (error-with-location simple-error) \\ \> |
| 165 | \&key :format-control :format-arguments :location} |
| 166 | \dhead{cls} |
| 167 | {simple-warning-with-location |
| 168 | (warning-with-location simple-warning) \\ \> |
| 169 | \&key :format-control :format-arguments :location} |
| 170 | \dhead{cls} |
| 171 | {simple-information-with-location |
| 172 | (information-with-location simple-information) \\ \> |
| 173 | \&key :format-control :format-arguments :location}} |
| 174 | \end{describe*} |
| 175 | |
| 176 | \begin{describe}{gf} |
| 177 | {enclosing-condition-with-location-type @<condition> @> @<symbol>} |
| 178 | \end{describe} |
| 179 | |
| 180 | \begin{describe}{fun} |
| 181 | {make-condition-with-location @<default-type> @<floc> |
| 182 | @<datum> \&rest @<arguments> |
| 183 | \nlret @<condition-with-location>} |
| 184 | \end{describe} |
| 185 | |
| 186 | \begin{describe*} |
| 187 | {\dhead{fun}{error-with-location @<floc> @<datum> \&rest @<arguments>} |
| 188 | \dhead{fun}{cerror-with-location @<floc> @<continue-string> |
| 189 | @<datum> \&rest @<arguments>} |
| 190 | \dhead{fun}{cerror*-with-location @<floc> @<datum> \&rest @<arguments>} |
| 191 | \dhead{fun}{warn-with-location @<floc> @<datum> \&rest @<arguments>} |
| 192 | \dhead{fun}{info-with-location @<floc> @<datum> \&rest @<arguments>}} |
| 193 | \end{describe*} |
| 194 | |
| 195 | \begin{describe*} |
| 196 | {\dhead{cls}{parser-error (error) \\ \ind |
| 197 | \&key :expected :found \-} |
| 198 | \dhead{gf}{parser-error-expected @<condition> @> @<list>} |
| 199 | \dhead{gf}{parser-error-found @<condition> @> @<value>}} |
| 200 | \end{describe*} |
| 201 | |
| 202 | \begin{describe}{fun} |
| 203 | {report-parser-error @<error> @<stream> @<show-expected> @<show-found>} |
| 204 | \end{describe} |
| 205 | |
| 206 | \begin{describe*} |
| 207 | {\quad\=\kill |
| 208 | \dhead{cls}{base-lexer-error (error-with-location) \&key :location} |
| 209 | \dhead{cls}{simple-lexer-error |
| 210 | (base-lexer-error simple-error-with-location) \\\> |
| 211 | \&key :format-control :format-arguments :location} |
| 212 | \dhead{cls}{base-syntax-error (error-with-location) \&key :location} |
| 213 | \dhead{cls}{simple-syntax-error |
| 214 | (base-syntax-error simple-error-with-location) \\\> |
| 215 | \&key :format-control :format-arguments :location}} |
| 216 | \end{describe*} |
| 217 | |
| 218 | \begin{describe}{mac} |
| 219 | {with-default-error-location (@<floc>) @<declaration>^* @<form>^* |
| 220 | @> @<value>^*} |
| 221 | \end{describe} |
| 222 | |
| 223 | \begin{describe}{gf}{classify-condition @<condition> @> @<string>} |
| 224 | \begin{describe*} |
| 225 | {\dhead{meth}{error} |
| 226 | {classify-condition (@<condition> error) @> @<string>} |
| 227 | \dhead{meth}{warning} |
| 228 | {classify-condition (@<condition> warning) @> @<string>} |
| 229 | \dhead{meth}{information} |
| 230 | {classify-condition (@<condition> information) |
| 231 | @> @<string>} |
| 232 | \dhead{meth}{base-lexer-error} |
| 233 | {classify-condition (@<condition> base-lexer-error) |
| 234 | @> @<string>} |
| 235 | \dhead{meth}{base-syntax-error} |
| 236 | {classify-condition (@<condition> base-syntax-error) |
| 237 | @> @<string>}} |
| 238 | \end{describe*} |
| 239 | \end{describe} |
| 240 | |
| 241 | \begin{describe}{mac} |
| 242 | {count-and-report-errors () @<declaration>^* @<form>^* |
| 243 | @> @<value> @<n-errors> @<n-warnings>} |
| 244 | \end{describe} |
| 245 | |
| 246 | %%%-------------------------------------------------------------------------- |
| 247 | \section{Scanners} \label{sec:parsing.scanner} |
| 248 | |
| 249 | A \emph{scanner} is an object which keeps track of a parser's progress as it |
| 250 | works through its input. There's no common base class for scanners: a |
| 251 | scanner is simply any object which implements the scanner protocol described |
| 252 | here. |
| 253 | |
| 254 | A scanner maintains a sequence of items to read. It can step forwards |
| 255 | through the items, one at a time, until it reaches the end (if, indeed, the |
| 256 | sequence is finite, which it needn't be). Until that point, there is a |
| 257 | current item, though there's no protocol for accessing it at this level |
| 258 | because the nature of the items is left unspecified. |
| 259 | |
| 260 | Some scanners support an additional \emph{place-capture} protocol which |
| 261 | allows rewinding the scanner to an earlier point in the input so that it can |
| 262 | be scanned again. |
| 263 | |
| 264 | |
| 265 | \subsection{Basic scanner protocol} \label{sec:parsing.scanner.basic} |
| 266 | |
| 267 | The basic protocol supports stepping the scanner forward through its input |
| 268 | sequence, and detecting the end of the sequence. |
| 269 | |
| 270 | \begin{describe}{gf}{scanner-step @<scanner>} |
| 271 | Advance the @<scanner> to the next item, which becomes current. |
| 272 | |
| 273 | It is an error to step the scanner if the scanner is at end-of-file. |
| 274 | \end{describe} |
| 275 | |
| 276 | \begin{describe}{gf}{scanner-at-eof-p @<scanner> @> @<generalized-boolean>} |
| 277 | Return non-nil if the scanner is at end-of-file, i.e., there are no more |
| 278 | items to read. |
| 279 | |
| 280 | If nil is returned, there is a current item, and it is safe to step the |
| 281 | scanner again; otherwise, it is an error to query the current item or to |
| 282 | step the scanner. |
| 283 | \end{describe} |
| 284 | |
| 285 | |
| 286 | \subsection{Place-capture scanner protocol} \label{sec:parsing.scanner.place} |
| 287 | |
| 288 | The place-capture protocol allows rewinding to an earlier point in the |
| 289 | sequence. Not all scanners support the place-capture protocol. |
| 290 | |
| 291 | To rewind a scanner to a particular point, that point must be \emph{captured} |
| 292 | as a \emph{place} when it's current -- so you must know in advance that this |
| 293 | is an interesting place that's worth capturing. The type of place returned |
| 294 | depends on the type of scanner. Given a captured place, the scanner can be |
| 295 | rewound to the position held in it. |
| 296 | |
| 297 | Depending on how the scanner works, holding onto a captured place might |
| 298 | consume a lot of memory or cause poor performance. For example, if the |
| 299 | scanner is reading from an input stream, having a captured place means that |
| 300 | data from that point on must be buffered in case the program needs to rewind |
| 301 | the scanner and read that data again. Therefore it's possible to |
| 302 | \emph{release} a place when it turns out not to be needed any more. |
| 303 | |
| 304 | \begin{describe}{gf}{scanner-capture-place @<scanner> @> @<place>} |
| 305 | Capture the @<scanner>'s current position as a place, and return the place. |
| 306 | \end{describe} |
| 307 | |
| 308 | \begin{describe}{gf}{scanner-restore-place @<scanner> @<place>} |
| 309 | Rewind the @<scanner> to the state it was in when @<place> was captured. |
| 310 | In particular, the item that was current when the @<place> was captured |
| 311 | becomes current again. |
| 312 | |
| 313 | It is an error to restore a @<place> that has been released, or if the |
| 314 | @<place> wasn't captured from the @<scanner>. |
| 315 | \end{describe} |
| 316 | |
| 317 | \begin{describe}{gf}{scanner-release-place @<scanner> @<place>} |
| 318 | Release the @<place>, to avoid having to maintaining the ability to restore |
| 319 | it after it's not needed any more.. |
| 320 | |
| 321 | It is an error if the @<place> wasn't captured from the @<scanner>. |
| 322 | \end{describe} |
| 323 | |
| 324 | \begin{describe}{mac} |
| 325 | {with-scanner-place (@<place> @<scanner>) @<declarations>^* @<form>^* |
| 326 | @> @<value>^*} |
| 327 | Capture the @<scanner>'s current position as a place, evaluate the @<form>s |
| 328 | as an implicit progn with the variable @<place> bound to the captured |
| 329 | place. When control leaves the @<form>s, the place is released. The |
| 330 | return values are the values of the final @<form>. |
| 331 | \end{describe} |
| 332 | |
| 333 | |
| 334 | \subsection{Scanner file-location protocol} \label{sec:parsing.scanner.floc} |
| 335 | |
| 336 | Some scanners participate in the file-location protocol |
| 337 | (\xref{sec:parsing.floc}). They implement a method on @|file-location| which |
| 338 | collects the necessary information using scanner-specific functions described |
| 339 | here. |
| 340 | |
| 341 | \begin{describe}{fun}{scanner-file-location @<scanner> @> @<file-location>} |
| 342 | Return a @|file-location| object describing the current position of the |
| 343 | @<scanner>. |
| 344 | |
| 345 | This calls the @|scanner-filename|, @|scanner-line| and @|scanner-column| |
| 346 | generic functions on the scanner, and uses these to fill in an appropriate |
| 347 | @|file-location|. |
| 348 | |
| 349 | Since there are default methods on these generic functions, it is not an |
| 350 | error to call @|scanner-file-location| on any kind of value, but it might |
| 351 | not be very useful. This function exists to do the work of appropriately |
| 352 | specialized methods on @|file-location|. |
| 353 | \end{describe} |
| 354 | |
| 355 | \begin{describe*} |
| 356 | {\dhead{gf}{scanner-filename @<scanner> @> @<string>} |
| 357 | \dhead{gf}{scanner-line @<scanner> @> @<integer>} |
| 358 | \dhead{gf}{scanner-column @<scanner> @> @<integer>}} |
| 359 | Return the filename, line and column components of the @<scanner>'s current |
| 360 | position, for use in assembling a @<file-location>: see the |
| 361 | @|scanner-file-location| function. |
| 362 | |
| 363 | There are default methods on all three generic functions which simply |
| 364 | return nil. |
| 365 | \end{describe*} |
| 366 | |
| 367 | |
| 368 | \subsection{Character scanners} \label{sec:parsing.scanner.char} |
| 369 | |
| 370 | Character scanners are scanners which read sequences of characters. |
| 371 | |
| 372 | \begin{describe}{cls}{character-scanner () \&key} |
| 373 | Base class for character scanners. This provides some very basic |
| 374 | functionality. |
| 375 | |
| 376 | Not all character scanners are subclasses of @|character-scanner|. |
| 377 | \end{describe} |
| 378 | |
| 379 | \begin{describe}{gf}{scanner-current-char @<scanner> @> @<character>} |
| 380 | Returns the current character. |
| 381 | \end{describe} |
| 382 | |
| 383 | \begin{describe}{gf}{scanner-unread @<scanner> @<character>} |
| 384 | Rewind the @<scanner> by one step. The @<chararacter> must be the previous |
| 385 | current character, and becomes the current character again. It is an error |
| 386 | if: the @<scanner> has reached end-of-file; the @<scanner> has never been |
| 387 | stepped; or @<character> was not the previous current character. |
| 388 | \end{describe} |
| 389 | |
| 390 | \begin{describe}{gf} |
| 391 | {scanner-interval @<scanner> @<place-a> \&optional @<place-b> |
| 392 | @> @<string>} |
| 393 | Return the characters in the @<scanner>'s input from @<place-a> up to (but |
| 394 | not including) @<place-b>. |
| 395 | |
| 396 | The characters are returned as a string. If @<place-b> is omitted, return |
| 397 | the characters up to (but not including) the current position. It is an |
| 398 | error if @<place-b> precedes @<place-a> or they are from different |
| 399 | scanners. |
| 400 | |
| 401 | This function is a character-scanner-specific extension to the |
| 402 | place-capture protocol; not all character scanners implement the |
| 403 | place-capture protocol, and some that do may not implement this function. |
| 404 | \end{describe} |
| 405 | |
| 406 | \subsubsection{Stream access to character scanners} |
| 407 | Sometimes it can be useful to apply the standard Lisp character input |
| 408 | operations to the sequence of characters held by a character scanner. |
| 409 | |
| 410 | \begin{describe}{gf}{make-scanner-stream @<scanner> @> @<stream>} |
| 411 | Returns a fresh input @|stream| object which fetches input characters from |
| 412 | the character scanner object @<scanner>. Reading characters from the |
| 413 | stream steps the scanner. The stream will reach end-of-file when the |
| 414 | scanner reports end-of-file. If the scanner implements the file-location |
| 415 | protocol then reading from the stream will change the file location in an |
| 416 | appropriate manner. |
| 417 | |
| 418 | This is mostly useful for applying standard Lisp stream functions, most |
| 419 | particularly the @|read| function, in the middle of a parsing operation. |
| 420 | \end{describe} |
| 421 | |
| 422 | \begin{describe}{cls}{character-scanner-stream (stream) \&key :scanner} |
| 423 | A Common Lisp input @|stream| object which works using the character |
| 424 | scanner protocol. Any @<scanner> which implements the base scanner and |
| 425 | character scanner protocols is suitable. See @|make-scanner-stream|. |
| 426 | \end{describe} |
| 427 | |
| 428 | |
| 429 | \subsection{String scanners} \label{sec:parsing.scanner.string} |
| 430 | |
| 431 | A \emph{string scanner} is a simple kind of character scanner which reads |
| 432 | input from a string object. String scanners implement the character scanner |
| 433 | and place-capture protocols. |
| 434 | |
| 435 | \begin{describe}{cls}{string-scanner} |
| 436 | The class of string scanners. The @|string-scanner| class is not a |
| 437 | subclass of @|character-scanner|. |
| 438 | \end{describe} |
| 439 | |
| 440 | \begin{describe}{fun}{string-scanner-p @<value> @> @<generalized-boolean>} |
| 441 | Return non-nil if @<value> is a @|string-scanner| object; otherwise return |
| 442 | nil. |
| 443 | \end{describe} |
| 444 | |
| 445 | \begin{describe}{fun} |
| 446 | {make-string-scanner @<string> \&key :start :end @> @<string-scanner>} |
| 447 | Construct and return a fresh @|string-scanner| object. The new scanner |
| 448 | will read characters from @<string>, starting at index @<start> (which |
| 449 | defaults to zero), and continuing until it reaches index @<end> (defaults |
| 450 | to the end of the @<string>). |
| 451 | \end{describe} |
| 452 | |
| 453 | |
| 454 | \subsection{Character buffer scanners} \label{sec:parsing.scanner.charbuf} |
| 455 | |
| 456 | A \emph{character buffer scanner}, or \emph{charbuf scanner} for short, is an |
| 457 | efficient scanner for reading characters from an input stream. Charbuf |
| 458 | scanners implements the basic scanner, character buffer, place-capture, and |
| 459 | file-location protocols. |
| 460 | |
| 461 | \begin{describe}{cls} |
| 462 | {charbuf-scanner (character-scanner) |
| 463 | \&key :stream :filename :line :column} |
| 464 | The class of charbuf scanners. The scanner will read characters from |
| 465 | @<stream>. Charbuf scanners implement the file-location protocol: the |
| 466 | initial location is set from the given @<filename>, @<line> and @<column>; |
| 467 | the scanner will update the location as it reads its input. |
| 468 | \end{describe} |
| 469 | |
| 470 | \begin{describe}{cls} |
| 471 | {charbuf-scanner-stream (character-scanner-stream) \&key :scanner} |
| 472 | \end{describe} |
| 473 | |
| 474 | \begin{describe}{cls}{charbuf-scanner-place} |
| 475 | The class of place objects captured by a charbuf scanner. |
| 476 | \end{describe} |
| 477 | |
| 478 | \begin{describe}{fun} |
| 479 | {charbuf-scanner-place-p @<value> @> @<generalized-boolean>} |
| 480 | Type predicate for charbuf scanner places: returns non-nil if @<value> is a |
| 481 | place captured by a charbuf scanner, and nil otherwise. |
| 482 | \end{describe} |
| 483 | |
| 484 | \begin{describe}{gf} |
| 485 | {charbuf-scanner-map @<scanner> @<func> \&optional @<fail> |
| 486 | \nlret @<result> @<success-flag> @<consumed-flag>} |
| 487 | Read characters from the @<scanner>'s buffers. |
| 488 | |
| 489 | This is intended to be an efficient and versatile interface for reading |
| 490 | characters from a scanner in bulk. The function @<func> is invoked |
| 491 | repeatedly, as if by |
| 492 | \begin{prog} |
| 493 | (multiple-value-bind (@<donep> @<used>) \\ \ind\ind |
| 494 | (funcall @<func> @<buf> @<start> @<end>) \-\\ |
| 495 | \textrm\ldots) |
| 496 | \end{prog} |
| 497 | The argument @<buf> is a simple string; @<start> and @<end> are two |
| 498 | nonnegative fixnums, indicating that the subsequence of @<buf> between |
| 499 | @<start> (inclusive) and @<end> (exclusive) should be processed. If |
| 500 | @<func>'s return value @<donep> is nil then @<used> is ignored: the |
| 501 | function has consumed the entire buffer and wishes to read more. If |
| 502 | @<donep> is non-nil, then @<used> must be a fixnum such that $@<start> \le |
| 503 | @<used> \le @<end>$: the function has consumed the buffer as far as @<used> |
| 504 | (exclusive) and has completed successfully. |
| 505 | |
| 506 | If end-of-file is encountered before @<func> completes successfully then it |
| 507 | fails: the @<fail> function is called with no arguments, and is expected to |
| 508 | return two values. If omitted, @<fail> defaults to |
| 509 | \begin{prog} |
| 510 | (lambda () \\ \ind |
| 511 | (values nil nil)) |
| 512 | \end{prog} |
| 513 | |
| 514 | The @|charbuf-scanner-map| function returns three values. The first value |
| 515 | is the non-nil @<donep> value returned by @<func> if @|charbuf-scanner-map| |
| 516 | succeeded, or the first value returned by @<fail>; the second value is @|t| |
| 517 | on success, or the second value returned by @<fail>; the third value is |
| 518 | non-nil if @<func> consumed any input, i.e., it returned with @<donep> nil |
| 519 | at least once, or with $@<used> > @<start>$. |
| 520 | \end{describe} |
| 521 | |
| 522 | |
| 523 | \subsection{Token scanners} \label{sec:parsing.scanner.token} |
| 524 | |
| 525 | \begin{describe}{cls} |
| 526 | {token-scanner () \&key :filename (:line 1) (:column 0)} |
| 527 | \end{describe} |
| 528 | |
| 529 | \begin{describe*} |
| 530 | {\dhead{gf}{setf (scanner-line @<scanner>) @<fixnum>} |
| 531 | \dhead{gf}{setf (scanner-column @<scanner>) @<fixnum>}} |
| 532 | \end{describe*} |
| 533 | |
| 534 | \begin{describe}{gf}{token-type @<scanner> @> @<type>} |
| 535 | \end{describe} |
| 536 | |
| 537 | \begin{describe}{gf}{token-value @<scanner> @> @<value>} |
| 538 | \end{describe} |
| 539 | |
| 540 | \begin{describe}{gf}{scanner-token @<scanner> @> @<type> @<value>} |
| 541 | \end{describe} |
| 542 | |
| 543 | \begin{describe}{ty}{token-scanner-place} |
| 544 | \end{describe} |
| 545 | |
| 546 | \begin{describe}{fun} |
| 547 | {token-scanner-place-p @<value> @> @<generalized-boolean>} |
| 548 | \end{describe} |
| 549 | |
| 550 | |
| 551 | \subsection{List scanners} |
| 552 | |
| 553 | \begin{describe}{ty}{list-scanner} |
| 554 | \end{describe} |
| 555 | |
| 556 | \begin{describe}{fun}{list-scanner-p @<value> @> @<generalized-boolean>} |
| 557 | \end{describe} |
| 558 | |
| 559 | \begin{describe}{fun}{make-list-scanner @<list> @> @<list-scanner>} |
| 560 | \end{describe} |
| 561 | |
| 562 | %%%-------------------------------------------------------------------------- |
| 563 | \section{Parser contexts and parser syntax} \label{sec:parsing.syntax} |
| 564 | |
| 565 | |
| 566 | \subsection{Parser contexts} \label{sec:parsing.syntax.contexts} |
| 567 | |
| 568 | \begin{describe}{mac} |
| 569 | {with-parser-context |
| 570 | (@<context-class> @{ @<init-keyword> @<value> @}^*) \\ \ind |
| 571 | @<declaration>^* \\ |
| 572 | @<form>^* |
| 573 | \-\nlret @<value>^*} |
| 574 | \end{describe} |
| 575 | |
| 576 | \begin{describe}{gf}{expand-parser-spec @<context> @<spec> @> @<form>} |
| 577 | \end{describe} |
| 578 | |
| 579 | \begin{describe}{gf} |
| 580 | {expand-parser-form @<context> @<head> @<tail> @> @<form>} |
| 581 | \end{describe} |
| 582 | |
| 583 | \begin{describe}{gf}{wrap-parser @<context> @<form> @> @<wrapped-form>} |
| 584 | \end{describe} |
| 585 | |
| 586 | \begin{describe}{mac} |
| 587 | {defparse @<name> (@[[ :context (@<var> @<context-class>) @]] |
| 588 | @<destructuring-lambda-list-item>^*) \\ \ind |
| 589 | @[[ @<declaration>^* @! @<doc-string> @]] \\ |
| 590 | @<form>^* |
| 591 | \-\nlret @<name>} |
| 592 | \end{describe} |
| 593 | |
| 594 | \begin{describe}{lmac} |
| 595 | {parse @<parser> @> @<result> @<success-flag> @<consumed-flag>} |
| 596 | \end{describe} |
| 597 | |
| 598 | \begin{describe}{mac} |
| 599 | {parser @<lambda-list> |
| 600 | @[[ @<declaration>^* @! @<doc-string> @]] |
| 601 | @<parser> |
| 602 | @> @<function>} |
| 603 | \end{describe} |
| 604 | |
| 605 | \begin{describe}{gf}{parser-at-eof-p @<context> @> @<form>} |
| 606 | \end{describe} |
| 607 | |
| 608 | \begin{describe}{gf}{parser-step @<context> @> @<form>} |
| 609 | \end{describe} |
| 610 | |
| 611 | \begin{describe}{mac} |
| 612 | {if-parse (@[[ \=:result @<result-var> @! |
| 613 | :expected @<expected-var> @! \+\\ |
| 614 | :consumedp @<consumed-var> @]]) \-\\ \ind\ind |
| 615 | @<parser> \-\\ |
| 616 | @<consequent> \\ |
| 617 | @[@<alternatve>@] |
| 618 | \-\nlret @<value>^*} |
| 619 | \end{describe} |
| 620 | |
| 621 | \begin{describe}{mac} |
| 622 | {when-parse (@[@<result-var>@]) @<parser> \\ \ind |
| 623 | @<form>^* |
| 624 | \-\nlret @<value>^*} |
| 625 | \end{describe} |
| 626 | |
| 627 | \begin{describe}{mac} |
| 628 | {cond-parse (@[[ \=:result @<result-var> @! |
| 629 | :expected @<expected-var> @! \+\\ |
| 630 | :consumedp @<consumed-var> @]]) \-\\ \ind |
| 631 | @{ (@<parser> @<form>^*) @}^* |
| 632 | \-\nlret @<value>^*} |
| 633 | \end{describe} |
| 634 | |
| 635 | \begin{describe}{cls}{list-parser () \&key :var} |
| 636 | \end{describe} |
| 637 | |
| 638 | |
| 639 | \subsection{Basic parser syntax} \label{sec:parsing.syntax.basic} |
| 640 | |
| 641 | \begin{describe}{parse}{:eof} |
| 642 | \end{describe} |
| 643 | |
| 644 | \begin{describe}{parseform}{lisp @<form>^*} |
| 645 | \end{describe} |
| 646 | |
| 647 | \begin{describe}{parseform}{label @<parser>} |
| 648 | \end{describe} |
| 649 | |
| 650 | \begin{describe}{parse}{t} |
| 651 | \end{describe} |
| 652 | |
| 653 | \begin{describe}{parseform}{t @<value>} |
| 654 | \end{describe} |
| 655 | |
| 656 | \begin{describe}{parse}{nil} |
| 657 | \end{describe} |
| 658 | |
| 659 | \begin{describe}{parseform}{nil @<indicator>} |
| 660 | \end{describe} |
| 661 | |
| 662 | \begin{describe}{parseform}{when @<cond> @<parser>} |
| 663 | \end{describe} |
| 664 | |
| 665 | \begin{describe}{parseform} |
| 666 | {seq (@{ @<atomic-parser-spec> @! |
| 667 | (@[@<var>@] @<parser>) @}^*) \\ \ind |
| 668 | @<form>^*} |
| 669 | \end{describe} |
| 670 | |
| 671 | \begin{describe}{parseform}{and @<parser>^*} |
| 672 | \end{describe} |
| 673 | |
| 674 | \begin{describe}{parseform}{or @<parser>^*} |
| 675 | \end{describe} |
| 676 | |
| 677 | \begin{describe}{parseform}{? @<parser> @[@<default>@]} |
| 678 | \end{describe} |
| 679 | |
| 680 | \begin{describe}{parseform} |
| 681 | {many (\=@<accumulator-var> @<init-form> @<update-form> \+\\ |
| 682 | @[[ \=:new @<new-var> @! :final @<final-form> @! \+\\ |
| 683 | :min @<minimum> @! :max @<maximum> @! \\ |
| 684 | :commitp @<commitp> @]]) \-\-\\ \ind |
| 685 | @<item-parser> @[@<sep-parser>@]} |
| 686 | \end{describe} |
| 687 | |
| 688 | \begin{describe}{parseform} |
| 689 | {list (@[[ :min @<minimum> @! :max @<maximum> @! |
| 690 | :commitp @<commitp> @]]) \\ \ind |
| 691 | @<item-parser> @[@<sep-parser>@]} |
| 692 | \end{describe} |
| 693 | |
| 694 | \begin{describe}{parseform} |
| 695 | {skip-many (@[[ :min @<minimum> @! :max @<maximum> @! |
| 696 | :commitp @<commitp> @]]) \\ \ind |
| 697 | @<item-parser> @[@<sep-parser>@]} |
| 698 | \end{describe} |
| 699 | |
| 700 | \begin{describe}{fun}{call-pluggable-parser @<symbol> \&rest @<args>} |
| 701 | \end{describe} |
| 702 | |
| 703 | \begin{describe}{parseform}{plug @<symbol> @<arg>^*} |
| 704 | \end{describe} |
| 705 | |
| 706 | \begin{describe}{fun} |
| 707 | {pluggable-parser-add @<symbol> @<tag> @<parser-function>} |
| 708 | \end{describe} |
| 709 | |
| 710 | \begin{describe}{mac} |
| 711 | {define-pluggable-parser @<symbol> @<tag> @<lambda-list> |
| 712 | @[[ @<declaration>^* @! @<doc-string> @]] |
| 713 | @<form>^*} |
| 714 | \end{describe} |
| 715 | |
| 716 | |
| 717 | \subsection{Place-capture protocol} \label{sec:parsing.syntax.place} |
| 718 | |
| 719 | \begin{describe}{gf}{parser-capture-place @<context> @> @<form>} |
| 720 | \end{describe} |
| 721 | |
| 722 | \begin{describe}{gf}{parser-restore-place @<context> @<place> @> @<form>} |
| 723 | \end{describe} |
| 724 | |
| 725 | \begin{describe}{gf}{parser-release-place @<context> @<place> @> @<form>} |
| 726 | \end{describe} |
| 727 | |
| 728 | \begin{describe}{gf} |
| 729 | {parser-places-must-be-released-p @<context> @> @<generalized-boolean>} |
| 730 | \end{describe} |
| 731 | |
| 732 | \begin{describe}{mac} |
| 733 | {with-parser-place (@<place-var> @<context>) |
| 734 | @[[ @<declaration>^* @! @<doc-string> @]] |
| 735 | @<form>^*} |
| 736 | \end{describe} |
| 737 | |
| 738 | \begin{describe}{parseform}{peek @<parser>} |
| 739 | \end{describe} |
| 740 | |
| 741 | \begin{describe}{parseform}{commit} |
| 742 | \end{describe} |
| 743 | |
| 744 | |
| 745 | \subsection{Character parsers} \label{sec:parsing.syntax.character} |
| 746 | |
| 747 | \begin{describe}{cls}{character-parser-context () \&key} |
| 748 | \end{describe} |
| 749 | |
| 750 | \begin{describe}{gf}{parser-current-char @<context> @> @<form>} |
| 751 | \end{describe} |
| 752 | |
| 753 | \begin{describe}{cls} |
| 754 | {string-parser (character-parser-context) \&key :string :index :length} |
| 755 | \end{describe} |
| 756 | |
| 757 | \begin{describe}{parseform} |
| 758 | {if-char (@[@<result-var>@]) @<condition> @<consequent> @<alternative>} |
| 759 | \end{describe} |
| 760 | |
| 761 | \begin{describe}{parseform}{char @<character>} |
| 762 | \end{describe} |
| 763 | |
| 764 | \begin{describe}{parse}[char]{@<character>} |
| 765 | \end{describe} |
| 766 | |
| 767 | \begin{describe}{parse}[string]{@<string>} |
| 768 | \end{describe} |
| 769 | |
| 770 | \begin{describe}{parse}{:any} |
| 771 | \end{describe} |
| 772 | |
| 773 | \begin{describe}{parseform}{satisfies @<predicate>} |
| 774 | \end{describe} |
| 775 | |
| 776 | \begin{describe}{parseform}{not @<character>} |
| 777 | \end{describe} |
| 778 | |
| 779 | \begin{describe}{parseform}{filter @<predicate>} |
| 780 | \end{describe} |
| 781 | |
| 782 | \begin{describe}{parse}{:whitespace} |
| 783 | \end{describe} |
| 784 | |
| 785 | \begin{describe}{cls}{token-parser-context () \&key} |
| 786 | \end{describe} |
| 787 | |
| 788 | \begin{describe}{gf}{parser-token-type @<context> @> @<form>} |
| 789 | \end{describe} |
| 790 | |
| 791 | \begin{describe}{gf}{parser-token-value @<context> @> @<form>} |
| 792 | \end{describe} |
| 793 | |
| 794 | \begin{describe}{parseform}{token @<type> @[@<value>@] @[:peekp @<peek>@]} |
| 795 | \end{describe} |
| 796 | |
| 797 | \begin{describe}{parse}[atom]{@<atom>} |
| 798 | \end{describe} |
| 799 | |
| 800 | |
| 801 | \subsection{Scanner contexts} \label{sec:parsing.syntax.scanner} |
| 802 | |
| 803 | \begin{describe}{cls}{scanner-context () \&key :scanner} |
| 804 | \end{describe} |
| 805 | |
| 806 | \begin{describe}{gf}{parser-scanner @<context> @> @<symbol>} |
| 807 | \end{describe} |
| 808 | |
| 809 | \begin{describe}{cls} |
| 810 | {character-scanner-context (scanner-context character-parser-context) |
| 811 | \&key :scanner} |
| 812 | \end{describe} |
| 813 | |
| 814 | \begin{describe}{cls} |
| 815 | {token-scanner-context (scanner-context token-parser-context) |
| 816 | \&key :scanner} |
| 817 | \end{describe} |
| 818 | |
| 819 | |
| 820 | \subsection{Expression parsing} \label{sec:parsing.syntax.expression} |
| 821 | |
| 822 | \begin{describe}{gf}{operator-push-action @<left> @<right>} |
| 823 | \end{describe} |
| 824 | |
| 825 | \begin{describe}{parseform} |
| 826 | {expr \=(@[[ :nestedp @<nestedp-var> @]]) \+\\ |
| 827 | @<operand-parser> @<binop-parser> |
| 828 | @<preop-parser> @<postop-parser>} |
| 829 | \end{describe} |
| 830 | |
| 831 | \begin{describe}{gf}{operator-left-precedence @<operator> @> @<prec>} |
| 832 | \end{describe} |
| 833 | |
| 834 | \begin{describe}{gf}{operator-right-precedence @<operator> @> @<prec>} |
| 835 | \end{describe} |
| 836 | |
| 837 | \begin{describe}{gf}{operator-associativity @<operator> @> @<assoc>} |
| 838 | \end{describe} |
| 839 | |
| 840 | \begin{describe}{cls}{prefix-operator () \&key} |
| 841 | \end{describe} |
| 842 | |
| 843 | \begin{describe}{cls}{simple-operator () \&key :name :function} |
| 844 | \end{describe} |
| 845 | |
| 846 | \begin{describe}{cls} |
| 847 | {simple-unary-operator (simple-operator) \&key :name :function} |
| 848 | \end{describe} |
| 849 | |
| 850 | \begin{describe*} |
| 851 | {\quad\=\kill |
| 852 | \dhead{cls}{simple-binary-operator (simple-operator) \\ \> |
| 853 | \&key :name :function |
| 854 | :lprec :rprec :associativity} |
| 855 | \dhead{cls}{simple-postfix-operator (simple-unary-operator) \\ \> |
| 856 | \&key :name :function :lprec :rprec} |
| 857 | \dhead{cls}{simple-prefix-operator |
| 858 | (prefix-operator simple-unary-operator) \\ \> |
| 859 | \&key :name :function :rprec}} |
| 860 | \end{describe*} |
| 861 | |
| 862 | \begin{describe*} |
| 863 | {\dhead{mac}{preop @<name> (@<operand-var> @<lprec>) |
| 864 | @<declaration>^* @<form>^* |
| 865 | @> @<prefix-operator>} |
| 866 | \dhead{mac}{postop @<name> |
| 867 | (@<operand-var> @<lprec> @[[ :rprec @<rprec> @]]) |
| 868 | @<declaration>^* @<form>^* |
| 869 | \nlret @<postfix-operator>} |
| 870 | \dhead{mac}{binop @<name> (@<operand-var> @<lprec> @<rprec> @<assoc>) |
| 871 | @<declaration>^*@<form>^* |
| 872 | @> @<binary-operator>}} |
| 873 | \end{describe*} |
| 874 | |
| 875 | \begin{describe*} |
| 876 | {\dhead{cls}{open-parenthesis (parenthesis prefix-operator) \&key :tag} |
| 877 | \dhead{cls}{close-parenthesis (parenthesis) \&key :tag}} |
| 878 | \end{describe*} |
| 879 | |
| 880 | \begin{describe*} |
| 881 | {\dhead{fun}{lparen @<tag> @> @<open-paren>} |
| 882 | \dhead{fun}{rparen @<tag> @> @<close-paren>}} |
| 883 | \end{describe*} |
| 884 | |
| 885 | %%%------------------------------------------------------------------------- |
| 886 | \section{Lexical analyser} \label{sec:parsing.lexical} |
| 887 | |
| 888 | \begin{describe}{cls} |
| 889 | {sod-token-scanner (token-scanner) |
| 890 | \&key :filename (:line 1) (:column 0) :char-scanner} |
| 891 | \end{describe} |
| 892 | |
| 893 | \begin{describe}{fun}{define-indicator @<indicator> @<description>} |
| 894 | \end{describe} |
| 895 | |
| 896 | \begin{describe*} |
| 897 | {\dhead{cls}{lexer-error (parser-error base-lexer-error) \\ \ind |
| 898 | \&key :expected :found :location \-} |
| 899 | \dhead{cls}{syntax-error (parser-error base-syntax-error) \\ \ind |
| 900 | \&key :expected :found :location \-}} |
| 901 | \end{describe*} |
| 902 | |
| 903 | \begin{describe}{fun} |
| 904 | {syntax-error @<scanner> @<expected> \&key :continuep :location} |
| 905 | \end{describe} |
| 906 | |
| 907 | \begin{describe}{fun} |
| 908 | {lexer-error @<char-scanner> @<expected> \&key :location} |
| 909 | \end{describe} |
| 910 | |
| 911 | \begin{describe}{parseform} |
| 912 | {skip-until (@[[ :keep-end @<keep-end-flag> @]]) @<token-type>^*} |
| 913 | \end{describe} |
| 914 | |
| 915 | \begin{describe}{parseform} |
| 916 | {error (@[[ :ignore-unconsumed @<flag> @! |
| 917 | :force-process @<flag> @]]) \\ \ind\ind |
| 918 | @<sub-parser> @<recover-parser> \-\\ |
| 919 | @<declaration>^* \\ |
| 920 | @<form>^*} |
| 921 | \end{describe} |
| 922 | |
| 923 | \begin{describe}{parseform}{must @<sub-parser> @[@<default>@]} |
| 924 | \end{describe} |
| 925 | |
| 926 | \begin{describe}{fun} |
| 927 | {scan-comment @<char-scanner> |
| 928 | @> @<result> @<success-flag> @<consumed-flag>} |
| 929 | \end{describe} |
| 930 | |
| 931 | %%%----- That's all, folks -------------------------------------------------- |
| 932 | |
| 933 | %%% Local variables: |
| 934 | %%% mode: LaTeX |
| 935 | %%% TeX-master: "sod.tex" |
| 936 | %%% TeX-PDF-mode: t |
| 937 | %%% End: |