mdw@git.distorted.org.uk Git - sod/blob - doc/syntax.tex

   1 %%% -*-latex-*-
   2 %%%
   3 %%% Module syntax
   4 %%%
   5 %%% (c) 2015 Straylight/Edgeware
   6 %%%
   7
   8 %%%----- Licensing notice ---------------------------------------------------
   9 %%%
  10 %%% This file is part of the Sensible Object Design, an object system for C.
  11 %%%
  12 %%% SOD is free software; you can redistribute it and/or modify
  13 %%% it under the terms of the GNU General Public License as published by
  14 %%% the Free Software Foundation; either version 2 of the License, or
  15 %%% (at your option) any later version.
  16 %%%
  17 %%% SOD is distributed in the hope that it will be useful,
  18 %%% but WITHOUT ANY WARRANTY; without even the implied warranty of
  19 %%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20 %%% GNU General Public License for more details.
  21 %%%
  22 %%% You should have received a copy of the GNU General Public License
  23 %%% along with SOD; if not, write to the Free Software Foundation,
  24 %%% Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  25
  26 \chapter{Module syntax} \label{ch:syntax}
  27
  28 %%%--------------------------------------------------------------------------
  29 \section{Lexical syntax} \label{sec:syntax.lex}
  30
  31 Whitespace and comments are discarded.  The remaining characters are
  32 collected into tokens according to the following syntax.
  33
  34 \begin{grammar}
  35 <token> ::= <identifier>
  36 \alt <string-literal>
  37 \alt <char-literal>
  38 \alt <integer-literal>
  39 \alt <punctuation>
  40 \end{grammar}
  41
  42 This syntax is slightly ambiguous, and is disambiguated by the \emph{maximal
  43 munch} rule: at each stage we take the longest sequence of characters which
  44 could be a token.
  45
  46
  47 \subsection{Identifiers} \label{sec:syntax.lex.id}
  48
  49 \begin{grammar}
  50 <identifier> ::= <id-start-char> @<id-body-char>^*
  51
  52 <id-start-char> ::= <alpha-char> | "_"
  53
  54 <id-body-char> ::= <id-start-char> @! <digit-char>
  55
  56 <alpha-char> ::= "A" | "B" $| \cdots |$ "Z"
  57   | "a" | "b" $| \cdots |$ "z"
  58   | <extended-alpha-char>
  59
  60 <digit-char> ::= "0" | <nonzero-digit-char>
  61
  62 <nonzero-digit-char> ::= "1" | "2" $| \cdots |$ "9"
  63 \end{grammar}
  64
  65 The precise definition of @<alpha-char> is left to the function
  66 @|alpha-char-p| in the hosting Lisp system.  For portability, programmers are
  67 encouraged to limit themselves to the standard ASCII letters.
  68
  69 There are no reserved words at the lexical level, but the higher-level syntax
  70 recognizes certain identifiers as \emph{keywords} in some contexts.  There is
  71 also an ambiguity (inherited from C) in the declaration syntax which is
  72 settled by distinguishing type names from other identifiers at a lexical
  73 level.
  74
  75
  76 \subsection{String and character literals} \label{sec:syntax.lex.string}
  77
  78 \begin{grammar}
  79 <string-literal> ::= "\"" @<string-literal-char>^* "\""
  80
  81 <char-literal> ::= "'" <char-literal-char> "'"
  82
  83 <string-literal-char> :: "\\" <char>
  84   | any character other than "\\" or "\""
  85
  86 <char-literal-char> :: "\\" <char>
  87   | any character other than "\\" or "'"
  88
  89 <char> ::= any single character
  90 \end{grammar}
  91
  92 The syntax for string and character literals differs from~C.  In particular,
  93 escape sequences such as @`\textbackslash n' are not recognized.  The use
  94 of string and character literals in Sod, outside of C~fragments, is limited,
  95 and the simple syntax seems adequate.  For the sake of future compatibility,
  96 the use of character sequences which resemble C escape sequences is
  97 discouraged.
  98
  99
 100 \subsection{Integer literals} \label{sec:syntax.lex.int}
 101
 102 \begin{grammar}
 103 <integer-literal> ::= <decimal-integer>
 104   | <binary-integer>
 105   | <octal-integer>
 106   | <hex-integer>
 107
 108 <decimal-integer> ::= "0" | <nonzero-digit-char> @<digit-char>^*
 109
 110 <binary-integer> ::= "0" @("b" @! "B"@) @<binary-digit-char>^+
 111
 112 <binary-digit-char> ::= "0" | "1"
 113
 114 <octal-integer> ::= "0" @["o" @! "O"@] @<octal-digit-char>^+
 115
 116 <octal-digit-char> ::= "0" | "1" $| \cdots |$ "7"
 117
 118 <hex-integer> ::= "0" @("x" @! "X"@) @<hex-digit-char>^+
 119
 120 <hex-digit-char> ::= <digit-char>
 121   | "A" | "B" | "C" | "D" | "E" | "F"
 122   | "a" | "b" | "c" | "d" | "e" | "f"
 123 \end{grammar}
 124
 125 Sod understands only integers, not floating-point numbers; its integer syntax
 126 goes slightly beyond C in allowing a @`0o' prefix for octal and @`0b' for
 127 binary.  However, length and signedness indicators are not permitted.
 128
 129
 130 \subsection{Punctuation} \label{sec:syntax.lex.punct}
 131
 132 \begin{grammar}
 133 <punctuation> ::= "\dots"
 134 \alt any nonalphanumeric character other than "_", "\"", or "'"
 135 \end{grammar}
 136
 137
 138 \subsection{Comments} \label{sec:syntax.lex.comment}
 139
 140 \begin{grammar}
 141 <comment> ::= <block-comment> | <line-comment>
 142
 143 <block-comment> ::=
 144   "/*"
 145   @<not-star>^* @(@<star>^+ <not-star-or-slash> @<not-star>^*@)^*
 146   @<star>^*
 147   "*/"
 148
 149 <star> ::= "*"
 150
 151 <not-star> ::= any character other than "*"
 152
 153 <not-star-or-slash> ::= any character other than "*" or  "/"
 154
 155 <line-comment> ::= "/\,/" @<not-newline>^* <newline>
 156
 157 <newline> ::= a newline character
 158
 159 <not-newline> ::= any character other than newline
 160 \end{grammar}
 161
 162 Comments are exactly as in C99: both traditional block comments `@|/*| \dots\
 163 @|*/|' and \Cplusplus-style `@|/\,/| \dots' comments are permitted and
 164 ignored.
 165
 166
 167 \subsection{Special nonterminals} \label{sec:syntax.lex.special}
 168
 169 Aside from the lexical syntax presented above (\xref{sec:lexical-syntax}),
 170 two special nonterminals occur in the module syntax.
 171
 172 \subsubsection{S-expressions}
 173 \begin{grammar}
 174 <s-expression> ::= an S-expression, as parsed by the Lisp reader
 175 \end{grammar}
 176
 177 When an S-expression is expected, the Sod parser simply calls the host Lisp
 178 system's @|read| function.  Sod modules are permitted to modify the read
 179 table to extend the S-expression syntax.
 180
 181 S-expressions are self-delimiting, so no end-marker is needed.
 182
 183 \subsubsection{C fragments}
 184 \begin{grammar}
 185 <c-fragment> ::= a sequence of C tokens, with matching brackets
 186 \end{grammar}
 187
 188 Sequences of C code are simply stored and written to the output unchanged
 189 during translation.  They are read using a simple scanner which nonetheless
 190 understands C comments and string and character literals.
 191
 192 A C fragment is terminated by one of a small number of delimiter characters
 193 determined by the immediately surrounding context -- usually some kind of
 194 bracket.  The first such delimiter character which is not enclosed in
 195 brackets, braces or parentheses ends the fragment.
 196
 197 %%%--------------------------------------------------------------------------
 198 \section{C types} \label{sec:syntax.type}
 199
 200 Sod's syntax for C types closely mirrors the standard C syntax.  A C type has
 201 two parts: a sequence of @<declaration-specifier>s and a @<declarator>.  In
 202 Sod, a type must contain at least one @<declaration-specifier> (i.e.,
 203 `implicit @|int|' is forbidden), and storage-class specifiers are not
 204 recognized.
 205
 206
 207 \subsection{Declaration specifiers} \label{sec:syntax.type.declspec}
 208
 209 \begin{grammar}
 210 <declaration-specifier> ::= <type-name>
 211 \alt "struct" <identifier> | "union" <identifier> | "enum" <identifier>
 212 \alt "void" | "char" | "int" | "float" | "double"
 213 \alt "short" | "long"
 214 \alt "signed" | "unsigned"
 215 \alt "bool" | "_Bool"
 216 \alt "imaginary" | "_Imaginary" | "complex" | "_Complex"
 217 \alt <qualifier>
 218 \alt <storage-specifier>
 219 \alt <atomic-type>
 220 \alt <other-declspec>
 221
 222 <qualifier> ::= <atomic> | "const" | "volatile" | "restrict"
 223
 224 <plain-type> ::= @<declaration-specifier>^+ <abstract-declarator>
 225
 226 <atomic-type> ::= <atomic> "(" <plain-type> ")"
 227
 228 <atomic> ::= "atomic" | "_Atomic"
 229
 230 <storage-specifier> ::= <alignas> "(" <c-fragment> ")"
 231
 232 <alignas> ::= "alignas" "_Alignas"
 233
 234 <type-name> ::= <identifier>
 235 \end{grammar}
 236
 237 Declaration specifiers may appear in any order.  However, not all
 238 combinations are permitted.  A declaration specifier must consist of zero or
 239 more @<qualifier>s, zero or more @<storage-specifier>s, and one of the
 240 following, up to reordering:
 241 \begin{itemize}
 242 \item @<type-name>;
 243 \item @<atomic-type>;
 244 \item @"struct" @<identifier>; @"union" @<identifier>; @"enum" @<identifier>;
 245 \item @"void";
 246 \item @"_Bool", @"bool";
 247 \item @"char"; @"unsigned char"; @"signed char";
 248 \item @"short", @"signed short", @"short int", @"signed short int";
 249   @"unsigned short", @"unsigned short int";
 250 \item @"int", @"signed", @"signed int"; @"unsigned", @"unsigned int";
 251 \item @"long", @"signed long", @"long int", @"signed long int"; @"unsigned
 252   long", @"unsigned long int";
 253 \item @"long long", @"signed long long", @"long long int", @"signed long long
 254   int"; @"unsigned long long", @"unsigned long long int";
 255 \item @"float"; @"double"; @"long double";
 256 \item @"float _Imaginary", @"float imaginary"; @"double _Imaginary", @"double
 257   imaginary"; @"long double _Imaginary", @"long double imaginary";
 258 \item @"float _Complex", @"float complex"; @"double _Complex", @"double
 259   complex"; @"long double _Complex", @"long double complex".
 260 \end{itemize}
 261 All of these have their usual C meanings.  Groups separated by commas mean
 262 the same thing, and Sod will not preserve the distinction.
 263
 264 Almost all of these mean the same as they do in C.  There are some minor
 265 differences:
 266 \begin{itemize}
 267 \item In C, the `tag' namespace is shared between @|struct|, @|union|, and
 268   @|enum|; Sod has three distinct namespaces for tags.  This may be fixed in
 269   the future.
 270 \item The @<other-declspec> production is a syntactic extension point, where
 271   extensions can introduce their own additions to the type system.
 272 \end{itemize}
 273
 274 C standards from C99 onwards have tended to introduce new keywords beginning
 275 with an underscore followed by an uppercase letter, so as to avoid conflicts
 276 with existing code.  More conventional spellings are then provided by macros
 277 in new header files.  For example, C99 introduced @"_Bool", and a header file
 278 @|<stdbool.h>| which defines the macro @|bool|.  Sod recognizes both the ugly
 279 underscore names and the more conventional macro names on input, but always
 280 emits the ugly names.  This doesn't cause a compatibility problem in Sod,
 281 because Sod's parser recognizes keywords only in the appropriate context.
 282 For example, the (ill-advised) slot declaration
 283 \begin{prog}
 284   bool bool;
 285 \end{prog}
 286 is completely acceptable, and will cause the C structure member
 287 \begin{prog}
 288   \_Bool bool;
 289 \end{prog}
 290 to be emitted on output, which will be acceptable to C as long as
 291 @|<stdbool.h>| is not included.
 292
 293 A @<type-name> is an identifier which has been declared as being a type name,
 294 using the @"typename" or @"class" definitions.  The following type names are
 295 defined in the built-in module.
 296 \begin{itemize}
 297 \item @|va_list|
 298 \item @|size_t|
 299 \item @|ptrdiff_t|
 300 \item @|wchar_t|
 301 \end{itemize}
 302
 303
 304 \subsection{Declarators} \label{sec:syntax.type.declarator}
 305
 306 \begin{grammar}
 307 <declarator>$[k, a]$ ::= @<pointer>^* <primary-declarator>$[k, a]$
 308
 309 <primary-declarator>$[k, a]$ ::= $k$
 310 \alt "(" <primary-declarator>$[k, a]$ ")"
 311 \alt <primary-declarator>$[k, a]$ @<declarator-suffix>$[a]$
 312
 313 <pointer> ::= "*" @<qualifier>^*
 314
 315 <declarator-suffix>$[a]$ ::= "[" <c-fragment> "]"
 316 \alt "(" $a$ ")"
 317
 318 <argument-list> ::= $\epsilon$ | "\dots"
 319 \alt <list>$[\mbox{@<argument>}]$ @["," "\dots"@]
 320
 321 <argument> ::= @<declaration-specifier>^+ <argument-declarator>
 322
 323 <abstract-declarator> ::= <declarator>$[\epsilon, \mbox{@<argument-list>}]$
 324
 325 <argument-declarator> ::=
 326   <declarator>$[\mbox{@<identifier> @! $\epsilon$}, \mbox{@<argument-list>}]$
 327
 328 <simple-declarator> ::=
 329   <declarator>$[\mbox{@<identifier>}, \mbox{@<argument-list>}]$
 330 \end{grammar}
 331
 332 The declarator syntax is taken from C, but with some differences.
 333 \begin{itemize}
 334 \item Array dimensions are uninterpreted @<c-fragments>, terminated by a
 335   closing square bracket.  This allows array dimensions to contain arbitrary
 336   constant expressions.
 337 \item A declarator may have either a single @<identifier> at its centre or a
 338   pair of @<identifier>s separated by a @`.'; this is used to refer to
 339   slots or messages defined in superclasses.
 340 \end{itemize}
 341 The remaining differences are (I hope) a matter of presentation rather than
 342 substance.
 343
 344 There is additional syntax to support messages and methods which accept
 345 keyword arguments.
 346
 347 \begin{grammar}
 348 <keyword-argument> ::= <argument> @["=" <c-fragment>@]
 349
 350 <keyword-argument-list> ::=
 351   @[<list>$[\mbox{@<argument>}]$@]
 352   "?" @[<list>$[\mbox{@<keyword-argument>}]$@]
 353
 354 <method-argument-list> ::= <argument-list> @! <keyword-argument-list>
 355
 356 <dotted-name> ::= <identifier> "." <identifier>
 357
 358 <keyword-declarator>$[k]$ ::=
 359   <declarator>$[k, \mbox{@<method-argument-list>}]$
 360 \end{grammar}
 361
 362 %%%--------------------------------------------------------------------------
 363 \section{Properties} \label{sec:syntax.prop}
 364
 365 \begin{grammar}
 366 <properties> ::= "[" <list>$[\mbox{@<property>}]$ "]"
 367
 368 <property> ::= <identifier> "=" <expression>
 369
 370 <expression> ::= <additive>
 371
 372 <additive> ::= <term>
 373   | <additive> "+" <term>
 374   | <additive> "--" <term>
 375
 376 <term> ::= <factor>
 377   | <term> "*" <factor>
 378   | <term> "/" <factor>
 379
 380 <factor> ::= <primary>
 381   | "!" <factor> | "~" factor
 382   | "+" <factor> | "--" <factor>
 383
 384 <primary> ::=
 385      <integer-literal> | <string-literal> | <char-literal> | <identifier>
 386 \alt "<" <plain-type> ">" | "{" <c-fragment> "}" | "?" <s-expression>
 387   | "(" <expression> ")"
 388 \end{grammar}
 389
 390 \emph{Property sets} are a means for associating miscellaneous information
 391 with compile-time metaobjects such as modules, classes, messages, methods,
 392 slots, and initializers.  By using property sets, additional information can
 393 be passed to extensions without the need to introduce idiosyncratic syntax.
 394 (That said, extensions can add additional first-class syntax, if necessary.)
 395
 396 An error is reported if an unrecognized property is associated with an
 397 object.
 398
 399
 400 \subsection{Property values} \label{sec:syntax.prop.value}
 401
 402 A property has a name, given as an @<identifier>, and a value computed by
 403 evaluating an @<expression>.  The value can be one of a number of types.
 404
 405 \begin{itemize}
 406
 407 \item An @<integer-literal> denotes a value of type @|int|.
 408
 409 \item Similarly @<string-literal> and @<char-literal> denote @|string| and
 410   @|char| values respectively.  Note that, as properties, characters are
 411   quite distinct from integers, whereas in C, a character literal denotes a
 412   value of type @|int|.
 413
 414 \item There are no variables in the property-value syntax.  Rather, an
 415   @<identifier> denotes that identifier, as a value of type @|id|.
 416
 417 \item A C type (a @<plain-type>, as described in \xref{sec:syntax.type})
 418   between angle brackets, e.g., @|<int>|, or @|<char *>|, or @|<void (*(int,
 419   void (*)(int)))(int)>|, denotes that C type, as a value of type @|type|.
 420
 421 \item A @<c-fragment> within braces denotes the tokens between (and not
 422   including) the braces, as a value of type @|c-fragment|.
 423
 424 \end{itemize}
 425
 426 As shown in the grammar, there are four binary operators, @"+" (addition),
 427 @"--" (subtraction), @"*" (multiplication), and @"/" (division);
 428 multiplication and division have higher precedence than addition and
 429 subtraction, and operators of the same precedence associate left-to-right.
 430 There are also unary @"+" (no effect) and @"--" (negation) operators, with
 431 higher precedence.  All of the above operators act only on integer operands
 432 and yield integer results.  (Although the unary @"+" operator yields its
 433 operand unchanged, an error is still reported if it is applied to a
 434 non-integer value.)  There are currently no bitwise, logical, or comparison
 435 operators.
 436
 437 Finally, an S-expression preceded by @|?| causes the expression to be read in
 438 the current package (which is always @|sod-user| at the start of a module)
 439 and immediately evaluated (using @|eval|); the resulting value is converted
 440 into a property value using the \descref{gf}{decode-property}[generic
 441 function].
 442
 443
 444 \subsection{Property output types and coercions}
 445 \label{sec:syntax.prop.coerce}
 446
 447 When a property value is inspected by the Sod translator, or an extension, it
 448 is \emph{coerced} so as to conform to a requested output type.  This coercion
 449 process is performed by the \descref{gf}{coerce-property-value}[generic
 450 function], and additional output types and coercions can be defined by
 451 extensions.  The built-in output types coercions, from the value types listed
 452 above, are as follows.
 453
 454 \begin{itemize}
 455
 456 \item The output types @|int|, @|string|, @|char|, @|id|, and @|c-fragment|
 457   correspond to the like-named value types described above.  No coercions to
 458   these output types are defined for the described value types.\footnote{%
 459     There is a coercion to @|id| from the value type @|symbol|, but it is
 460     only possible to generate a property value of type @|symbol| using Lisp.}
 461
 462 \item The output type @|type| denotes a C type, as does the value type
 463   @|type|.  In addition, a value of type @|id| can be coerced to a C type if
 464   it is the name of a class, a type name explicitly declared by @|typename|,
 465   or it is one of: @|bool|, @|_Bool|, @|void|, @|char|, @|short|, @|int|,
 466   @|signed|, @|unsigned|, @|long|, @|size_t|, @|ptrdiff_t|, @|wchar_t|,
 467   or @|va_list|.
 468
 469 \item The @|boolean| output type denotes a boolean value, which may be either
 470   true or false.  A value of type @|id| is considered true if it is @|true|,
 471   @|t|, @|yes|, @|on|, @|yup|, or @|verily|; or false if it is @|false|,
 472   @|nil|, @|no|, @|off|, @|nope|, or @|nowise|; it is erroneous to provide
 473   any other identifier where a boolean value is wanted.  A value of type
 474   @|int| is considered true if it is nonzero, or false if it is zero.
 475
 476 \item The @|symbol| output type denotes a Lisp symbol.
 477
 478   A value of type @|id| is coerced to a symbol as follows.  First, the
 479   identifier name is subjected to \emph{case inversion}: if all of the
 480   letters in the name have the same case, either upper or lower, then they
 481   are replaced with the corresponding letters in the opposite case, lower or
 482   upper; if the name contains letters of both cases, then it is not changed.
 483   For example, @|foo45| becomes @|FOO45|, or \emph{vice-versa}; but @|Splat|
 484   remains as it is.  Second, the name is subjected to \emph{separator
 485   switching}: all underscores in the name are replaced with hyphens (and
 486   \emph{vice-versa}, though hyphens aren't permitted in identifiers in the
 487   first place).  Finally, the resulting name is interned in the current
 488   package, which will usually be @|sod-user| unless changed explicitly by the
 489   module.
 490
 491   A value of type @|string| is coerced to a symbol as follows.  If the string
 492   contains no colons, then it is case-inverted (but not separator-switched)
 493   and interned in the current package.  Otherwise, the string either has the
 494   form $p @|:| q$, where $q$ does not begin with a colon (the
 495   \emph{single-colon} case) or $p @|::| q$ (the \emph{double-colon} case);
 496   where $p$ does not contain a colon.  Both $p$ and $q$ are case-inverted
 497   (but not separator-switched).  If $p$ does not name a package, then an
 498   error is reported; as a special case, if $p$ is empty, then it is
 499   considered to name the @|keyword| package.  Otherwise, $q$ is looked up as
 500   a symbol name in package~$p$; in the single-colon case, if the symbol is
 501   not an exported symbol in package~$p$, then an error is reported; in the
 502   double-colon case, $q$ is interned in package~$p$ (and so there needn't be
 503   an exported symbol -- or, indeed, and symbol at all -- named $q$
 504   beforehand).
 505
 506 \item The @|keyword| output type denotes symbols within the @|keyword|
 507   package.  Value of type @|id| or @|string| can be coerced to a @|keyword|
 508   in the same way as to a @|symbol|, as described above, only the converted
 509   name is looked up in the @|keyword| package rather than the current
 510   package.  (A @|string| can override this by specifying an explicit package
 511   name, but this is unlikely to be very helpful.)
 512
 513 \end{itemize}
 514
 515 %%%--------------------------------------------------------------------------
 516 \section{Module syntax} \label{sec:syntax.module}
 517
 518 \begin{grammar}
 519 <module> ::= @<definition>^*
 520
 521 <definition> ::= <property-definition> \fixme{undefined}
 522 \alt <import-definition>
 523 \alt <load-definition>
 524 \alt <lisp-definition>
 525 \alt <code-definition>
 526 \alt <typename-definition>
 527 \alt <class-definition>
 528 \alt <other-definition> \fixme{undefined}
 529 \end{grammar}
 530
 531 A @<module> is the top-level syntactic item: a source file presented to Sod
 532 is expected to conform with the @<module> syntax.
 533
 534 A module consists of a sequence of definitions.
 535
 536 \fixme{describe syntax; expand}
 537 Properties:
 538 \begin{description}
 539 \item[@|module_class|] A symbol naming the Lisp class to use to
 540   represent the module.
 541 \item[@|guard|] An identifier to use as the guard symbol used to prevent
 542   multiple inclusion in the header file.
 543 \end{description}
 544
 545
 546 \subsection{Simple definitions} \label{sec:syntax.module.simple}
 547
 548 \subsubsection{Importing modules}
 549 \begin{grammar}
 550 <import-definition> ::= "import" <string> ";"
 551 \end{grammar}
 552
 553 The module named @<string> is processed and its definitions made available.
 554
 555 A search is made for a module source file as follows.
 556 \begin{itemize}
 557 \item The module name @<string> is converted into a filename by appending
 558   @`.sod', if it has no extension already.\footnote{%
 559     Technically, what happens is @|(merge-pathnames name (make-pathname :type
 560     "SOD" :case :common))|, so exactly what this means varies according to
 561     the host system.} %
 562 \item The file is looked for relative to the directory containing the
 563   importing module.
 564 \item If that fails, then the file is looked for in each directory on the
 565   module search path in turn.
 566 \item If the file still isn't found, an error is reported and the import
 567   fails.
 568 \end{itemize}
 569 At this point, if the file has previously been imported, nothing further
 570 happens.\footnote{%
 571   This check is done using @|truename|, so it should see through simple
 572   tricks like symbolic links.  However, it may be confused by fancy things
 573   like bind mounts and so on.} %
 574
 575 Recursive imports, either direct or indirect, are an error.
 576
 577 \subsubsection{Loading extensions}
 578 \begin{grammar}
 579 <load-definition> ::= "load" <string> ";"
 580 \end{grammar}
 581
 582 The Lisp file named @<string> is loaded and evaluated.
 583
 584 A search is made for a Lisp source file as follows.
 585 \begin{itemize}
 586 \item The name @<string> is converted into a filename by appending @`.lisp',
 587   if it has no extension already.\footnote{%
 588     Technically, what happens is @|(merge-pathnames name (make-pathname :type
 589     "LISP" :case :common))|, so exactly what this means varies according to
 590     the host system.} %
 591 \item A search is then made in the same manner as for module imports
 592   (\xref{sec:syntax-module}).
 593 \end{itemize}
 594 If the file is found, it is loaded using the host Lisp's @|load| function.
 595
 596 Note that Sod doesn't attempt to compile Lisp files, or even to look for
 597 existing compiled files.  The right way to package a substantial extension to
 598 the Sod translator is to provide the extension as a standard ASDF system (or
 599 similar) and leave a dropping @|foo-extension.lisp| in the module path saying
 600 something like
 601 \begin{prog}
 602   (asdf:load-system :foo-extension)
 603 \end{prog}
 604 which will arrange for the extension to be compiled if necessary.
 605
 606 (This approach means that the language doesn't need to depend on any
 607 particular system definition facility.  It's bad enough already that it
 608 depends on Common Lisp.)
 609
 610 \subsubsection{Lisp escapes}
 611 \begin{grammar}
 612 <lisp-definition> ::= "lisp" <s-expression> ";"
 613 \end{grammar}
 614
 615 The @<s-expression> is evaluated immediately.  It can do anything it likes.
 616
 617 \begin{boxy}[Warning!]
 618   This means that hostile Sod modules are a security hazard.  Lisp code can
 619   read and write files, start other programs, and make network connections.
 620   Don't install Sod modules from sources that you don't trust.\footnote{%
 621     Presumably you were going to run the corresponding code at some point, so
 622     this isn't as unusually scary as it sounds.  But please be careful.} %
 623 \end{boxy}
 624
 625 \subsubsection{Declaring type names}
 626 \begin{grammar}
 627 <typename-definition> ::=
 628   "typename" <list>$[\mbox{@<identifier>}]$ ";"
 629 \end{grammar}
 630
 631 Each @<identifier> is declared as naming a C type.  This is important because
 632 the C type syntax -- which Sod uses -- is ambiguous, and disambiguation is
 633 done by distinguishing type names from other identifiers.
 634
 635 Don't declare class names using @"typename"; use @"class" forward
 636 declarations instead.
 637
 638
 639 \subsection{Literal code} \label{sec:syntax.module.literal}
 640
 641 \begin{grammar}
 642 <code-definition> ::=
 643   "code" <identifier> ":" <item-name> @[<constraints>@]
 644   "{" <c-fragment> "}"
 645
 646 <constraints> ::= "[" <list>$[\mbox{@<constraint>}]$ "]"
 647
 648 <constraint> ::= @<item-name>^+
 649
 650 <item-name> ::= <identifier> @! "(" @<identifier>^+ ")"
 651 \end{grammar}
 652
 653 The @<c-fragment> will be output unchanged to one of the output files.
 654
 655 The first @<identifier> is the symbolic name of an output file.  Predefined
 656 output file names are @|c| and @|h|, which are the implementation code and
 657 header file respectively; other output files can be defined by extensions.
 658
 659 Output items are named with a sequence of identifiers, separated by
 660 whitespace, and enclosed in parentheses.  As an abbreviation, a name
 661 consisting of a single identifier may be written as just that identifier,
 662 without the parentheses.
 663
 664 The @<constraints> provide a means for specifying where in the output file
 665 the output item should appear.  (Note the two kinds of square brackets shown
 666 in the syntax: square brackets must appear around the constraints if they are
 667 present, but that they may be omitted.)  Each comma-separated @<constraint>
 668 is a sequence of names of output items, and indicates that the output items
 669 must appear in the order given -- though the translator is free to insert
 670 additional items in between them.  (The particular output items needn't be
 671 defined already -- indeed, they needn't be defined ever.)
 672
 673 There is a predefined output item @|includes| in both the @|c| and @|h|
 674 output files which is a suitable place for inserting @|\#include|
 675 preprocessor directives in order to declare types and functions for use
 676 elsewhere in the generated output files.
 677
 678
 679 \subsection{Class definitions} \label{sec:syntax.module.class}
 680
 681 \begin{grammar}
 682 <class-definition> ::= <class-forward-declaration>
 683 \alt <full-class-definition>
 684 \end{grammar}
 685
 686 \subsubsection{Forward declarations}
 687 \begin{grammar}
 688 <class-forward-declaration> ::= "class" <identifier> ";"
 689 \end{grammar}
 690
 691 A @<class-forward-declaration> informs Sod that an @<identifier> will be used
 692 to name a class which is currently undefined.  Forward declarations are
 693 necessary in order to resolve certain kinds of circularity.  For example,
 694 \begin{prog}
 695 class Sub;                                                      \\+
 696
 697 class Super: SodObject \{                                       \\ \ind
 698   Sub *sub;                                                   \-\\
 699 \};                                                             \\+
 700
 701 class Sub: Super \{                                             \\ \ind
 702   /* \dots\ */                                                \-\\
 703 \};
 704 \end{prog}
 705
 706 \subsubsection{Full class definitions}
 707 \begin{grammar}
 708 <full-class-definition> ::=
 709   @[<properties>@]
 710   "class" <identifier> ":" <list>$[\mbox{@<identifier>}]$
 711   "{" @<properties-class-item>^* "}"
 712
 713 <properties-class-item> ::= @[<properties>@] <class-item>
 714
 715 <class-item> ::= <slot-item>
 716 \alt <initializer-item>
 717 \alt <initarg-item>
 718 \alt <fragment-item>
 719 \alt <message-item>
 720 \alt <method-item>
 721 \alt <other-item> \fixme{undefined}
 722 \end{grammar}
 723
 724 A full class definition provides a complete description of a class.
 725
 726 The first @<identifier> gives the name of the class.  It is an error to
 727 give the name of an existing class (other than a forward-referenced class),
 728 or an existing type name.  It is conventional to give classes `MixedCase'
 729 names, to distinguish them from other kinds of identifiers.
 730
 731 The @<list>$[\mbox{@<identifier>}]$ names the direct superclasses for the new
 732 class.  It is an error if any of these @<identifier>s does not name a defined
 733 class.  The superclass list is required, and must not be empty; listing
 734 @|SodObject| as your class's superclass is a good choice if nothing else
 735 seems suitable.  A class with no direct superclasses is called a \emph{root
 736 class}.  It is not possible to define a root class in the Sod language: you
 737 must use Lisp to do this, and it's quite involved.
 738
 739 The @<properties> provide additional information.  The standard class
 740 properties are as follows.
 741 \begin{description}
 742 \item[@|lisp_class|] The name of the Lisp class to use within the translator
 743   to represent this class.  The property value must be an identifier; the
 744   default is @|sod_class|.  Extensions may define classes with additional
 745   behaviour, and may recognize additional class properties.
 746 \item[@|metaclass|] The name of the Sod metaclass for this class.  In the
 747   generated code, a class is itself an instance of another class -- its
 748   \emph{metaclass}.  The metaclass defines which slots the class will have,
 749   which messages it will respond to, and what its behaviour will be when it
 750   receives them.  The property value must be an identifier naming a defined
 751   subclass of @|SodClass|.  The default metaclass is @|SodClass|.
 752   See \xref{sec:concepts.metaclasses} for more details.
 753 \item[@|nick|] A nickname for the class, to be used to distinguish it from
 754   other classes in various limited contexts.  The property value must be an
 755   identifier; the default is constructed by forcing the class name to
 756   lower-case.
 757 \end{description}
 758
 759 The class body consists of a sequence of @<class-item>s enclosed in braces.
 760 These items are discussed on the following sections.
 761
 762 \subsubsection{Slot items}
 763 \begin{grammar}
 764 <slot-item> ::=
 765   @<declaration-specifier>^+ <list>$[\mbox{@<init-declarator>}]$ ";"
 766
 767 <init-declarator> ::= <simple-declarator> @["=" <initializer>@]
 768 \end{grammar}
 769
 770 A @<slot-item> defines one or more slots.  All instances of the class and any
 771 subclass will contain these slot, with the names and types given by the
 772 @<declaration-specifiers> and the @<declarators>.  Slot declarators may not
 773 contain dotted names.
 774
 775 It is not possible to declare a slot with function type: such an item is
 776 interpreted as being a @<message-item> or @<method-item>.  Pointers to
 777 functions are fine.
 778
 779 Properties:
 780 \begin{description}
 781 \item[@|slot_class|] A symbol naming the Lisp class to use to represent the
 782   direct slot.
 783 \item[@|initarg|] An identifier naming an initialization argument which can
 784   be used to provide a value for the slot.  See
 785   \xref{sec:concepts.lifecycle.birth} for the details.
 786 \item[@|initarg_class|] A symbol naming the Lisp class to use to represent
 787   the initarg.  Only permitted if @|initarg| is also set.
 788 \end{description}
 789
 790 An @<initializer>, if present, is treated as if a separate
 791 @<initializer-item> containing the slot name and initializer were present.
 792 For example,
 793 \begin{prog}
 794 [nick = eg]                                                     \\
 795 class Example: Super \{                                         \\ \ind
 796   int foo = 17;                                               \-\\
 797 \};
 798 \end{prog}
 799 means the same as
 800 \begin{prog}
 801 [nick = eg]                                                     \\
 802 class Example: Super \{                                         \\ \ind
 803   int foo;                                                      \\
 804   eg.foo = 17;                                                \-\\
 805 \};
 806 \end{prog}
 807
 808 \subsubsection{Initializer items}
 809 \begin{grammar}
 810 <initializer-item> ::= @["class"@] <list>$[\mbox{@<slot-initializer>}]$ ";"
 811
 812 <slot-initializer> ::= <dotted-name> @["=" <initializer>@]
 813
 814 <initializer> ::= <c-fragment>
 815 \end{grammar}
 816
 817 An @<initializer-item> provides an initial value for one or more slots.  If
 818 prefixed by @|class|, then the initial values are for class slots (i.e.,
 819 slots of the class object itself); otherwise they are for instance slots.
 820
 821 The first component of the @<dotted-name> must be the nickname of one of the
 822 class's superclasses (including itself); the second must be the name of a
 823 slot defined in that superclass.
 824
 825 Properties:
 826 \begin{description}
 827 \item[@|initializer_class|] A symbol naming the Lisp class to use to
 828   represent the initializer.
 829 \item[@|initarg|] An identifier naming an initialization argument which can
 830   be used to provide a value for the slot.  See
 831   \xref{sec:concepts.lifecycle.birth} for the details.  An initializer item
 832   must have either an @|initarg| property, or an initializer expression, or
 833   both.
 834 \item[@|initarg_class|] A symbol naming the Lisp class to use to represent
 835   the initarg.  Only permitted if @|initarg| is also set.
 836 \end{description}
 837
 838 Each class may define at most one initializer item with an explicit
 839 initializer expression for a given slot.
 840
 841 \subsubsection{Initarg items}
 842 \begin{grammar}
 843 <initarg-item> ::=
 844   "initarg"
 845   @<declaration-specifier>^+
 846   <list>$[\mbox{@<init-declarator>}]$ ";"
 847 \end{grammar}
 848 Properties:
 849 \begin{description}
 850 \item[@|initarg_class|] A symbol naming the Lisp class to use to represent
 851   the initarg.
 852 \end{description}
 853
 854 \subsubsection{Fragment items}
 855 \begin{grammar}
 856 <fragment-item> ::= <fragment-kind> "{" <c-fragment> "}"
 857
 858 <fragment-kind> ::= "init" | "teardown"
 859 \end{grammar}
 860
 861 \subsubsection{Message items}
 862 \begin{grammar}
 863 <message-item> ::=
 864   @<declaration-specifier>^+
 865   <keyword-declarator>$[\mbox{@<identifier>}]$
 866   @[<method-body>@]
 867 \end{grammar}
 868 Properties:
 869 \begin{description}
 870 \item[@|message_class|] A symbol naming the Lisp class to use to represent
 871   the message.
 872 \item[@|combination|] A keyword naming the aggregating method combination to
 873   use.
 874 \item[@|most_specific|] A keyword, either @`first' or @`last', according to
 875   whether the most specific applicable method should be invoked first or
 876   last.
 877 \end{description}
 878
 879 Properties for the @|custom| aggregating method combination:
 880 \begin{description}
 881 \item[@|retvar|] An identifier for the return value from the effective
 882   method.  The default is @|sod__ret|.  Only permitted if the message return
 883   type is not @|void|.
 884 \item[@|valvar|] An identifier holding each return value from a direct method
 885   in the effective method.  The default is @|sod__val|.  Only permitted if
 886   the method return type (see @|methty| below) is not @|void|.
 887 \item[@|methty|] A C type, which is the return type for direct methods of
 888   this message.  The default is the return type of the message.
 889 \item[@|decls|] A code fragment containing declarations to be inserted at the
 890   head of the effective method body.  The default is to insert nothing.
 891 \item[@|before|] A code fragment containing initialization to be performed at
 892   the beginning of the effective method body.  The default is to insert
 893   nothing.
 894 \item[@|empty|] A code fragment executed if there are no primary methods;
 895   it should usually store a suitable (identity) value in @<retvar>.  The
 896   default is not to emit an effective method at all if there are no primary
 897   methods.
 898 \item[@|first|] A code fragment to set the return value after calling the
 899   first applicable direct method.  The default is to use the @|each|
 900   fragment.
 901 \item[@|each|] A code fragment to set the return value after calling a direct
 902   method.  If @|first| is also set, then it is used after the first direct
 903   method instead of this.  The default is to insert nothing, which is
 904   probably not what you want.
 905 \item[@|after|] A code fragment inserted at the end of the effective method
 906   body.  The default is to insert nothing.
 907 \item[@|count|] An identifier naming a variable to be declared in the
 908   effective method body, of type @|size_t|, holding the number of applicable
 909   methods.  The default is not to provide such a variable.
 910 \end{description}
 911
 912 \subsubsection{Method items}
 913 \begin{grammar}
 914 <method-item> ::=
 915   @<declaration-specifier>^+
 916   <keyword-declarator>$[\mbox{@<dotted-name>}]$
 917   <method-body>
 918
 919 <method-body> ::= "{" <c-fragment> "}" | "extern" ";"
 920 \end{grammar}
 921 Properties:
 922 \begin{description}
 923 \item[@|method_class|] A symbol naming the Lisp class to use to represent
 924   the direct method.
 925 \item[@|role|] A keyword naming the direct method's rôle.  For the built-in
 926   `simple' message classes, the acceptable rôle names are @|before|,
 927   @|after|, and @|around|.  By default, a primary method is constructed.
 928 \end{description}
 929
 930 %%%----- That's all, folks --------------------------------------------------
 931
 932 %%% Local variables:
 933 %%% mode: LaTeX
 934 %%% TeX-master: "sod.tex"
 935 %%% TeX-PDF-mode: t
 936 %%% End: