Static instance support.
[sod] / doc / syntax.tex
CommitLineData
1f7d590d
MW
1%%% -*-latex-*-
2%%%
3%%% Module syntax
4%%%
5%%% (c) 2015 Straylight/Edgeware
6%%%
7
8%%%----- Licensing notice ---------------------------------------------------
9%%%
e0808c47 10%%% This file is part of the Sensible Object Design, an object system for C.
1f7d590d
MW
11%%%
12%%% SOD is free software; you can redistribute it and/or modify
13%%% it under the terms of the GNU General Public License as published by
14%%% the Free Software Foundation; either version 2 of the License, or
15%%% (at your option) any later version.
16%%%
17%%% SOD is distributed in the hope that it will be useful,
18%%% but WITHOUT ANY WARRANTY; without even the implied warranty of
19%%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20%%% GNU General Public License for more details.
21%%%
22%%% You should have received a copy of the GNU General Public License
23%%% along with SOD; if not, write to the Free Software Foundation,
24%%% Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25
26\chapter{Module syntax} \label{ch:syntax}
27
28%%%--------------------------------------------------------------------------
68a620ab 29\section{Lexical syntax} \label{sec:syntax.lex}
1f7d590d
MW
30
31Whitespace and comments are discarded. The remaining characters are
32collected into tokens according to the following syntax.
33
34\begin{grammar}
35<token> ::= <identifier>
36\alt <string-literal>
37\alt <char-literal>
38\alt <integer-literal>
39\alt <punctuation>
40\end{grammar}
41
42This syntax is slightly ambiguous, and is disambiguated by the \emph{maximal
43munch} rule: at each stage we take the longest sequence of characters which
44could be a token.
45
68a620ab
MW
46
47\subsection{Identifiers} \label{sec:syntax.lex.id}
1f7d590d
MW
48
49\begin{grammar}
50<identifier> ::= <id-start-char> @<id-body-char>^*
51
52<id-start-char> ::= <alpha-char> | "_"
53
b04739d0 54<id-body-char> ::= <id-start-char> | <digit-char>
1f7d590d 55
b04739d0
MW
56<alpha-char> ::= "A" | "B" | $\cdots$ | "Z"
57 | "a" | "b" | $\cdots$ | "z"
f575cdca 58 | <extended-alpha-char>
1f7d590d
MW
59
60<digit-char> ::= "0" | <nonzero-digit-char>
61
b04739d0 62<nonzero-digit-char> ::= "1" | "2" | $\cdots$ | "9"
1f7d590d
MW
63\end{grammar}
64
65The precise definition of @<alpha-char> is left to the function
92e590b0
MW
66@|alpha-char-p| in the hosting Lisp system. For portability, programmers are
67encouraged to limit themselves to the standard ASCII letters.
1f7d590d
MW
68
69There are no reserved words at the lexical level, but the higher-level syntax
70recognizes certain identifiers as \emph{keywords} in some contexts. There is
71also an ambiguity (inherited from C) in the declaration syntax which is
72settled by distinguishing type names from other identifiers at a lexical
73level.
74
68a620ab
MW
75
76\subsection{String and character literals} \label{sec:syntax.lex.string}
1f7d590d
MW
77
78\begin{grammar}
79<string-literal> ::= "\"" @<string-literal-char>^* "\""
80
81<char-literal> ::= "'" <char-literal-char> "'"
82
f575cdca
MW
83<string-literal-char> :: "\\" <char>
84 | any character other than "\\" or "\""
1f7d590d 85
f575cdca
MW
86<char-literal-char> :: "\\" <char>
87 | any character other than "\\" or "'"
1f7d590d
MW
88
89<char> ::= any single character
90\end{grammar}
91
92The syntax for string and character literals differs from~C. In particular,
93escape sequences such as @`\textbackslash n' are not recognized. The use
94of string and character literals in Sod, outside of C~fragments, is limited,
95and the simple syntax seems adequate. For the sake of future compatibility,
96the use of character sequences which resemble C escape sequences is
97discouraged.
98
a59e2904
MW
99
100\subsection{Integer literals} \label{sec:syntax.lex.int}
1f7d590d
MW
101
102\begin{grammar}
103<integer-literal> ::= <decimal-integer>
f575cdca
MW
104 | <binary-integer>
105 | <octal-integer>
106 | <hex-integer>
1f7d590d 107
cc0bcf39 108<decimal-integer> ::= "0" | <nonzero-digit-char> @<digit-char>^*
1f7d590d 109
b04739d0 110<binary-integer> ::= "0" @("b" | "B"@) @<binary-digit-char>^+
1f7d590d
MW
111
112<binary-digit-char> ::= "0" | "1"
113
b04739d0 114<octal-integer> ::= "0" @["o" | "O"@] @<octal-digit-char>^+
1f7d590d 115
b04739d0 116<octal-digit-char> ::= "0" | "1" | $\cdots$ | "7"
1f7d590d 117
b04739d0 118<hex-integer> ::= "0" @("x" | "X"@) @<hex-digit-char>^+
1f7d590d
MW
119
120<hex-digit-char> ::= <digit-char>
f575cdca
MW
121 | "A" | "B" | "C" | "D" | "E" | "F"
122 | "a" | "b" | "c" | "d" | "e" | "f"
1f7d590d
MW
123\end{grammar}
124
125Sod understands only integers, not floating-point numbers; its integer syntax
126goes slightly beyond C in allowing a @`0o' prefix for octal and @`0b' for
127binary. However, length and signedness indicators are not permitted.
128
68a620ab
MW
129
130\subsection{Punctuation} \label{sec:syntax.lex.punct}
1f7d590d
MW
131
132\begin{grammar}
fe471148
MW
133<punctuation> ::= "<<" | ">>" | "||" | "&&"
134 | "<=" | ">=" | "==" | "!=" | "\dots"
f575cdca 135\alt any nonalphanumeric character other than "_", "\"", or "'"
1f7d590d
MW
136\end{grammar}
137
68a620ab
MW
138
139\subsection{Comments} \label{sec:syntax.lex.comment}
1f7d590d
MW
140
141\begin{grammar}
f575cdca 142<comment> ::= <block-comment> | <line-comment>
1f7d590d
MW
143
144<block-comment> ::=
145 "/*"
146 @<not-star>^* @(@<star>^+ <not-star-or-slash> @<not-star>^*@)^*
147 @<star>^*
148 "*/"
149
150<star> ::= "*"
151
152<not-star> ::= any character other than "*"
153
154<not-star-or-slash> ::= any character other than "*" or "/"
155
20f9c213 156<line-comment> ::= "/\,/" @<not-newline>^* <newline>
1f7d590d
MW
157
158<newline> ::= a newline character
159
160<not-newline> ::= any character other than newline
161\end{grammar}
162
20f9c213
MW
163Comments are exactly as in C99: both traditional block comments `@|/*| \dots\
164@|*/|' and \Cplusplus-style `@|/\,/| \dots' comments are permitted and
165ignored.
1f7d590d 166
68a620ab
MW
167
168\subsection{Special nonterminals} \label{sec:syntax.lex.special}
1f7d590d
MW
169
170Aside from the lexical syntax presented above (\xref{sec:lexical-syntax}),
171two special nonterminals occur in the module syntax.
172
68a620ab 173\subsubsection{S-expressions}
1f7d590d
MW
174\begin{grammar}
175<s-expression> ::= an S-expression, as parsed by the Lisp reader
176\end{grammar}
177
178When an S-expression is expected, the Sod parser simply calls the host Lisp
68a620ab
MW
179system's @|read| function. Sod modules are permitted to modify the read
180table to extend the S-expression syntax.
1f7d590d
MW
181
182S-expressions are self-delimiting, so no end-marker is needed.
183
68a620ab 184\subsubsection{C fragments}
1f7d590d
MW
185\begin{grammar}
186<c-fragment> ::= a sequence of C tokens, with matching brackets
187\end{grammar}
188
189Sequences of C code are simply stored and written to the output unchanged
190during translation. They are read using a simple scanner which nonetheless
191understands C comments and string and character literals.
192
193A C fragment is terminated by one of a small number of delimiter characters
a5b9e2be
MW
194determined by the immediately surrounding context -- usually some kind of
195bracket. The first such delimiter character which is not enclosed in
196brackets, braces or parentheses ends the fragment.
1f7d590d 197
68a620ab 198%%%--------------------------------------------------------------------------
a58527f3
MW
199\section{C types} \label{sec:syntax.type}
200
201Sod's syntax for C types closely mirrors the standard C syntax. A C type has
202two parts: a sequence of @<declaration-specifier>s and a @<declarator>. In
203Sod, a type must contain at least one @<declaration-specifier> (i.e.,
204`implicit @|int|' is forbidden), and storage-class specifiers are not
205recognized.
206
207
208\subsection{Declaration specifiers} \label{sec:syntax.type.declspec}
209
210\begin{grammar}
211<declaration-specifier> ::= <type-name>
212\alt "struct" <identifier> | "union" <identifier> | "enum" <identifier>
213\alt "void" | "char" | "int" | "float" | "double"
214\alt "short" | "long"
215\alt "signed" | "unsigned"
216\alt "bool" | "_Bool"
217\alt "imaginary" | "_Imaginary" | "complex" | "_Complex"
218\alt <qualifier>
219\alt <storage-specifier>
220\alt <atomic-type>
221\alt <other-declspec>
222
223<qualifier> ::= <atomic> | "const" | "volatile" | "restrict"
224
225<plain-type> ::= @<declaration-specifier>^+ <abstract-declarator>
226
227<atomic-type> ::= <atomic> "(" <plain-type> ")"
228
229<atomic> ::= "atomic" | "_Atomic"
230
231<storage-specifier> ::= <alignas> "(" <c-fragment> ")"
232
233<alignas> ::= "alignas" "_Alignas"
234
235<type-name> ::= <identifier>
236\end{grammar}
237
238Declaration specifiers may appear in any order. However, not all
239combinations are permitted. A declaration specifier must consist of zero or
240more @<qualifier>s, zero or more @<storage-specifier>s, and one of the
241following, up to reordering:
242\begin{itemize}
243\item @<type-name>;
244\item @<atomic-type>;
245\item @"struct" @<identifier>; @"union" @<identifier>; @"enum" @<identifier>;
246\item @"void";
247\item @"_Bool", @"bool";
248\item @"char"; @"unsigned char"; @"signed char";
249\item @"short", @"signed short", @"short int", @"signed short int";
250 @"unsigned short", @"unsigned short int";
251\item @"int", @"signed", @"signed int"; @"unsigned", @"unsigned int";
252\item @"long", @"signed long", @"long int", @"signed long int"; @"unsigned
253 long", @"unsigned long int";
254\item @"long long", @"signed long long", @"long long int", @"signed long long
255 int"; @"unsigned long long", @"unsigned long long int";
256\item @"float"; @"double"; @"long double";
257\item @"float _Imaginary", @"float imaginary"; @"double _Imaginary", @"double
258 imaginary"; @"long double _Imaginary", @"long double imaginary";
259\item @"float _Complex", @"float complex"; @"double _Complex", @"double
260 complex"; @"long double _Complex", @"long double complex".
261\end{itemize}
262All of these have their usual C meanings. Groups separated by commas mean
263the same thing, and Sod will not preserve the distinction.
264
265Almost all of these mean the same as they do in C. There are some minor
266differences:
267\begin{itemize}
268\item In C, the `tag' namespace is shared between @|struct|, @|union|, and
269 @|enum|; Sod has three distinct namespaces for tags. This may be fixed in
270 the future.
271\item The @<other-declspec> production is a syntactic extension point, where
272 extensions can introduce their own additions to the type system.
273\end{itemize}
274
275C standards from C99 onwards have tended to introduce new keywords beginning
276with an underscore followed by an uppercase letter, so as to avoid conflicts
277with existing code. More conventional spellings are then provided by macros
278in new header files. For example, C99 introduced @"_Bool", and a header file
279@|<stdbool.h>| which defines the macro @|bool|. Sod recognizes both the ugly
280underscore names and the more conventional macro names on input, but always
281emits the ugly names. This doesn't cause a compatibility problem in Sod,
282because Sod's parser recognizes keywords only in the appropriate context.
283For example, the (ill-advised) slot declaration
284\begin{prog}
285 bool bool;
286\end{prog}
287is completely acceptable, and will cause the C structure member
288\begin{prog}
289 \_Bool bool;
290\end{prog}
291to be emitted on output, which will be acceptable to C as long as
292@|<stdbool.h>| is not included.
293
294A @<type-name> is an identifier which has been declared as being a type name,
295using the @"typename" or @"class" definitions. The following type names are
296defined in the built-in module.
297\begin{itemize}
298\item @|va_list|
299\item @|size_t|
300\item @|ptrdiff_t|
301\item @|wchar_t|
302\end{itemize}
303
304
305\subsection{Declarators} \label{sec:syntax.type.declarator}
306
307\begin{grammar}
308<declarator>$[k, a]$ ::= @<pointer>^* <primary-declarator>$[k, a]$
309
310<primary-declarator>$[k, a]$ ::= $k$
311\alt "(" <primary-declarator>$[k, a]$ ")"
312\alt <primary-declarator>$[k, a]$ @<declarator-suffix>$[a]$
313
314<pointer> ::= "*" @<qualifier>^*
315
316<declarator-suffix>$[a]$ ::= "[" <c-fragment> "]"
317\alt "(" $a$ ")"
318
319<argument-list> ::= $\epsilon$ | "\dots"
320\alt <list>$[\mbox{@<argument>}]$ @["," "\dots"@]
321
322<argument> ::= @<declaration-specifier>^+ <argument-declarator>
323
324<abstract-declarator> ::= <declarator>$[\epsilon, \mbox{@<argument-list>}]$
325
326<argument-declarator> ::=
b04739d0 327 <declarator>$[\mbox{@<identifier> | $\epsilon$}, \mbox{@<argument-list>}]$
a58527f3
MW
328
329<simple-declarator> ::=
330 <declarator>$[\mbox{@<identifier>}, \mbox{@<argument-list>}]$
331\end{grammar}
332
333The declarator syntax is taken from C, but with some differences.
334\begin{itemize}
335\item Array dimensions are uninterpreted @<c-fragments>, terminated by a
336 closing square bracket. This allows array dimensions to contain arbitrary
337 constant expressions.
338\item A declarator may have either a single @<identifier> at its centre or a
339 pair of @<identifier>s separated by a @`.'; this is used to refer to
340 slots or messages defined in superclasses.
341\end{itemize}
342The remaining differences are (I hope) a matter of presentation rather than
343substance.
344
345There is additional syntax to support messages and methods which accept
346keyword arguments.
347
348\begin{grammar}
349<keyword-argument> ::= <argument> @["=" <c-fragment>@]
350
351<keyword-argument-list> ::=
352 @[<list>$[\mbox{@<argument>}]$@]
353 "?" @[<list>$[\mbox{@<keyword-argument>}]$@]
354
b04739d0 355<method-argument-list> ::= <argument-list> | <keyword-argument-list>
a58527f3
MW
356
357<dotted-name> ::= <identifier> "." <identifier>
358
359<keyword-declarator>$[k]$ ::=
360 <declarator>$[k, \mbox{@<method-argument-list>}]$
361\end{grammar}
362
363%%%--------------------------------------------------------------------------
b0d55f11
MW
364\section{Properties} \label{sec:syntax.prop}
365
366\begin{grammar}
367<properties> ::= "[" <list>$[\mbox{@<property>}]$ "]"
368
369<property> ::= <identifier> "=" <expression>
370
fe471148
MW
371<expression> ::= <logical-or>
372
373<logical-or> ::= <logical-and>
374 | <logical-or> "||" <logical-and>
375
376<logical-and> ::= <bitwise-or>
377 | <logical-and> "&&" <bitwise-or>
378
379<bitwise-or> ::= <bitwise-xor>
380 | <bitwise-or> "|" <bitwise-xor>
381
382<bitwise-xor> ::= <bitwise-and>
383 | <bitwise-xor> "^" <bitwise-and>
384
385<bitwise-and> ::= <equality>
386 | <bitwise-and> "&" <equality>
387
388<equality> ::= <ordering>
389 | <equality> "==" <ordering>
390 | <equality> "!=" <ordering>
391
392<ordering> ::= <shift>
393 | <ordering> "<" <shift>
394 | <ordering> "<=" <shift>
395 | <ordering> ">=" <shift>
396 | <ordering> ">" <shift>
397
398<shift> ::= <additive>
399 | <shift> "<<" <additive>
400 | <shift> ">>" <additive>
b0d55f11 401
f575cdca
MW
402<additive> ::= <term>
403 | <additive> "+" <term>
404 | <additive> "--" <term>
b0d55f11 405
f575cdca
MW
406<term> ::= <factor>
407 | <term> "*" <factor>
408 | <term> "/" <factor>
409
410<factor> ::= <primary>
411 | "!" <factor> | "~" factor
412 | "+" <factor> | "--" <factor>
b0d55f11
MW
413
414<primary> ::=
415 <integer-literal> | <string-literal> | <char-literal> | <identifier>
f575cdca
MW
416\alt "<" <plain-type> ">" | "{" <c-fragment> "}" | "?" <s-expression>
417 | "(" <expression> ")"
b0d55f11
MW
418\end{grammar}
419
420\emph{Property sets} are a means for associating miscellaneous information
421with compile-time metaobjects such as modules, classes, messages, methods,
422slots, and initializers. By using property sets, additional information can
423be passed to extensions without the need to introduce idiosyncratic syntax.
424(That said, extensions can add additional first-class syntax, if necessary.)
425
426An error is reported if an unrecognized property is associated with an
427object.
428
429
430\subsection{Property values} \label{sec:syntax.prop.value}
431
432A property has a name, given as an @<identifier>, and a value computed by
433evaluating an @<expression>. The value can be one of a number of types.
434
435\begin{itemize}
436
437\item An @<integer-literal> denotes a value of type @|int|.
438
439\item Similarly @<string-literal> and @<char-literal> denote @|string| and
440 @|char| values respectively. Note that, as properties, characters are
441 quite distinct from integers, whereas in C, a character literal denotes a
442 value of type @|int|.
443
444\item There are no variables in the property-value syntax. Rather, an
445 @<identifier> denotes that identifier, as a value of type @|id|.
446
447\item A C type (a @<plain-type>, as described in \xref{sec:syntax.type})
448 between angle brackets, e.g., @|<int>|, or @|<char *>|, or @|<void (*(int,
449 void (*)(int)))(int)>|, denotes that C type, as a value of type @|type|.
450
451\item A @<c-fragment> within braces denotes the tokens between (and not
452 including) the braces, as a value of type @|c-fragment|.
453
454\end{itemize}
455
456As shown in the grammar, there are four binary operators, @"+" (addition),
457@"--" (subtraction), @"*" (multiplication), and @"/" (division);
458multiplication and division have higher precedence than addition and
459subtraction, and operators of the same precedence associate left-to-right.
460There are also unary @"+" (no effect) and @"--" (negation) operators, with
461higher precedence. All of the above operators act only on integer operands
462and yield integer results. (Although the unary @"+" operator yields its
463operand unchanged, an error is still reported if it is applied to a
464non-integer value.) There are currently no bitwise, logical, or comparison
465operators.
466
467Finally, an S-expression preceded by @|?| causes the expression to be read in
468the current package (which is always @|sod-user| at the start of a module)
469and immediately evaluated (using @|eval|); the resulting value is converted
e8d70b1b
MW
470into a property value using the \descref{gf}{decode-property}[generic
471function].
b0d55f11
MW
472
473
474\subsection{Property output types and coercions}
475\label{sec:syntax.prop.coerce}
476
477When a property value is inspected by the Sod translator, or an extension, it
478is \emph{coerced} so as to conform to a requested output type. This coercion
e8d70b1b
MW
479process is performed by the \descref{gf}{coerce-property-value}[generic
480function], and additional output types and coercions can be defined by
b0d55f11
MW
481extensions. The built-in output types coercions, from the value types listed
482above, are as follows.
483
484\begin{itemize}
485
486\item The output types @|int|, @|string|, @|char|, @|id|, and @|c-fragment|
487 correspond to the like-named value types described above. No coercions to
488 these output types are defined for the described value types.\footnote{%
489 There is a coercion to @|id| from the value type @|symbol|, but it is
490 only possible to generate a property value of type @|symbol| using Lisp.}
491
492\item The output type @|type| denotes a C type, as does the value type
493 @|type|. In addition, a value of type @|id| can be coerced to a C type if
494 it is the name of a class, a type name explicitly declared by @|typename|,
495 or it is one of: @|bool|, @|_Bool|, @|void|, @|char|, @|short|, @|int|,
496 @|signed|, @|unsigned|, @|long|, @|size_t|, @|ptrdiff_t|, @|wchar_t|,
497 or @|va_list|.
498
499\item The @|boolean| output type denotes a boolean value, which may be either
500 true or false. A value of type @|id| is considered true if it is @|true|,
eaa0e159
MW
501 @|t|, @|yes|, @|on|, @|yup|, or @|verily|; or false if it is @|false|,
502 @|nil|, @|no|, @|off|, @|nope|, or @|nowise|; it is erroneous to provide
503 any other identifier where a boolean value is wanted. A value of type
504 @|int| is considered true if it is nonzero, or false if it is zero.
b0d55f11
MW
505
506\item The @|symbol| output type denotes a Lisp symbol.
507
508 A value of type @|id| is coerced to a symbol as follows. First, the
509 identifier name is subjected to \emph{case inversion}: if all of the
510 letters in the name have the same case, either upper or lower, then they
511 are replaced with the corresponding letters in the opposite case, lower or
512 upper; if the name contains letters of both cases, then it is not changed.
513 For example, @|foo45| becomes @|FOO45|, or \emph{vice-versa}; but @|Splat|
514 remains as it is. Second, the name is subjected to \emph{separator
81bc32d4 515 switching}: all underscores in the name are replaced with hyphens (and
b0d55f11
MW
516 \emph{vice-versa}, though hyphens aren't permitted in identifiers in the
517 first place). Finally, the resulting name is interned in the current
518 package, which will usually be @|sod-user| unless changed explicitly by the
519 module.
520
521 A value of type @|string| is coerced to a symbol as follows. If the string
522 contains no colons, then it is case-inverted (but not separator-switched)
523 and interned in the current package. Otherwise, the string either has the
524 form $p @|:| q$, where $q$ does not begin with a colon (the
525 \emph{single-colon} case) or $p @|::| q$ (the \emph{double-colon} case);
526 where $p$ does not contain a colon. Both $p$ and $q$ are case-inverted
527 (but not separator-switched). If $p$ does not name a package, then an
528 error is reported; as a special case, if $p$ is empty, then it is
529 considered to name the @|keyword| package. Otherwise, $q$ is looked up as
530 a symbol name in package~$p$; in the single-colon case, if the symbol is
531 not an exported symbol in package~$p$, then an error is reported; in the
532 double-colon case, $q$ is interned in package~$p$ (and so there needn't be
533 an exported symbol -- or, indeed, and symbol at all -- named $q$
534 beforehand).
535
536\item The @|keyword| output type denotes symbols within the @|keyword|
537 package. Value of type @|id| or @|string| can be coerced to a @|keyword|
538 in the same way as to a @|symbol|, as described above, only the converted
539 name is looked up in the @|keyword| package rather than the current
540 package. (A @|string| can override this by specifying an explicit package
541 name, but this is unlikely to be very helpful.)
542
543\end{itemize}
544
545%%%--------------------------------------------------------------------------
68a620ab 546\section{Module syntax} \label{sec:syntax.module}
1f7d590d
MW
547
548\begin{grammar}
549<module> ::= @<definition>^*
550
0df66309
MW
551<definition> ::= <property-definition> \fixme{undefined}
552\alt <import-definition>
1f7d590d
MW
553\alt <load-definition>
554\alt <lisp-definition>
555\alt <code-definition>
556\alt <typename-definition>
557\alt <class-definition>
fa548bb1 558\alt <other-definition> \fixme{undefined}
1f7d590d
MW
559\end{grammar}
560
eb41dc76
MW
561A @<module> is the top-level syntactic item: a source file presented to Sod
562is expected to conform with the @<module> syntax.
563
564A module consists of a sequence of definitions.
1f7d590d 565
6390b845 566\fixme{describe syntax; expand}
8399be6f
MW
567Properties:
568\begin{description}
ba375a80 569\item[@|module_class|] A symbol naming the Lisp class to use to
8399be6f 570 represent the module.
ba375a80 571\item[@|guard|] An identifier to use as the guard symbol used to prevent
8399be6f
MW
572 multiple inclusion in the header file.
573\end{description}
574
575
68a620ab 576\subsection{Simple definitions} \label{sec:syntax.module.simple}
1f7d590d 577
68a620ab 578\subsubsection{Importing modules}
1f7d590d
MW
579\begin{grammar}
580<import-definition> ::= "import" <string> ";"
581\end{grammar}
582
583The module named @<string> is processed and its definitions made available.
584
585A search is made for a module source file as follows.
586\begin{itemize}
587\item The module name @<string> is converted into a filename by appending
588 @`.sod', if it has no extension already.\footnote{%
92e590b0
MW
589 Technically, what happens is @|(merge-pathnames name (make-pathname :type
590 "SOD" :case :common))|, so exactly what this means varies according to
591 the host system.} %
1f7d590d
MW
592\item The file is looked for relative to the directory containing the
593 importing module.
594\item If that fails, then the file is looked for in each directory on the
595 module search path in turn.
596\item If the file still isn't found, an error is reported and the import
597 fails.
598\end{itemize}
599At this point, if the file has previously been imported, nothing further
600happens.\footnote{%
92e590b0 601 This check is done using @|truename|, so it should see through simple
1f7d590d
MW
602 tricks like symbolic links. However, it may be confused by fancy things
603 like bind mounts and so on.} %
604
605Recursive imports, either direct or indirect, are an error.
606
68a620ab 607\subsubsection{Loading extensions}
1f7d590d
MW
608\begin{grammar}
609<load-definition> ::= "load" <string> ";"
610\end{grammar}
611
612The Lisp file named @<string> is loaded and evaluated.
613
614A search is made for a Lisp source file as follows.
615\begin{itemize}
616\item The name @<string> is converted into a filename by appending @`.lisp',
617 if it has no extension already.\footnote{%
92e590b0
MW
618 Technically, what happens is @|(merge-pathnames name (make-pathname :type
619 "LISP" :case :common))|, so exactly what this means varies according to
620 the host system.} %
1f7d590d
MW
621\item A search is then made in the same manner as for module imports
622 (\xref{sec:syntax-module}).
623\end{itemize}
92e590b0 624If the file is found, it is loaded using the host Lisp's @|load| function.
1f7d590d
MW
625
626Note that Sod doesn't attempt to compile Lisp files, or even to look for
627existing compiled files. The right way to package a substantial extension to
628the Sod translator is to provide the extension as a standard ASDF system (or
ba375a80 629similar) and leave a dropping @|foo-extension.lisp| in the module path saying
1f7d590d 630something like
92e590b0
MW
631\begin{prog}
632 (asdf:load-system :foo-extension)
633\end{prog}
1f7d590d
MW
634which will arrange for the extension to be compiled if necessary.
635
636(This approach means that the language doesn't need to depend on any
637particular system definition facility. It's bad enough already that it
638depends on Common Lisp.)
639
68a620ab 640\subsubsection{Lisp escapes}
1f7d590d
MW
641\begin{grammar}
642<lisp-definition> ::= "lisp" <s-expression> ";"
643\end{grammar}
644
645The @<s-expression> is evaluated immediately. It can do anything it likes.
646
eae50115
MW
647\begin{boxy}[Warning!]
648 This means that hostile Sod modules are a security hazard. Lisp code can
649 read and write files, start other programs, and make network connections.
650 Don't install Sod modules from sources that you don't trust.\footnote{%
651 Presumably you were going to run the corresponding code at some point, so
652 this isn't as unusually scary as it sounds. But please be careful.} %
653\end{boxy}
1f7d590d 654
68a620ab 655\subsubsection{Declaring type names}
1f7d590d
MW
656\begin{grammar}
657<typename-definition> ::=
ea08dc56 658 "typename" <list>$[\mbox{@<identifier>}]$ ";"
1f7d590d
MW
659\end{grammar}
660
661Each @<identifier> is declared as naming a C type. This is important because
662the C type syntax -- which Sod uses -- is ambiguous, and disambiguation is
663done by distinguishing type names from other identifiers.
664
665Don't declare class names using @"typename"; use @"class" forward
666declarations instead.
667
68a620ab
MW
668
669\subsection{Literal code} \label{sec:syntax.module.literal}
1f7d590d
MW
670
671\begin{grammar}
672<code-definition> ::=
40f2456e 673 "code" <reason> ":" <item-name> @[<constraints>@]
1f7d590d 674 "{" <c-fragment> "}"
54ea6ee8
MW
675\alt
676 "code" <reason> ":" <constraints> ";"
1f7d590d 677
40f2456e
MW
678<reason> ::= <identifier>
679
ea08dc56 680<constraints> ::= "[" <list>$[\mbox{@<constraint>}]$ "]"
1f7d590d 681
4fc52153
MW
682<constraint> ::= @<item-name>^+
683
b04739d0 684<item-name> ::= <identifier> | "(" @<identifier>^+ ")"
1f7d590d
MW
685\end{grammar}
686
687The @<c-fragment> will be output unchanged to one of the output files.
688
689The first @<identifier> is the symbolic name of an output file. Predefined
ba375a80 690output file names are @|c| and @|h|, which are the implementation code and
1f7d590d
MW
691header file respectively; other output files can be defined by extensions.
692
4fc52153
MW
693Output items are named with a sequence of identifiers, separated by
694whitespace, and enclosed in parentheses. As an abbreviation, a name
695consisting of a single identifier may be written as just that identifier,
696without the parentheses.
1f7d590d
MW
697
698The @<constraints> provide a means for specifying where in the output file
699the output item should appear. (Note the two kinds of square brackets shown
700in the syntax: square brackets must appear around the constraints if they are
701present, but that they may be omitted.) Each comma-separated @<constraint>
4fc52153
MW
702is a sequence of names of output items, and indicates that the output items
703must appear in the order given -- though the translator is free to insert
704additional items in between them. (The particular output items needn't be
705defined already -- indeed, they needn't be defined ever.)
1f7d590d 706
ba375a80
MW
707There is a predefined output item @|includes| in both the @|c| and @|h|
708output files which is a suitable place for inserting @|\#include|
1f7d590d
MW
709preprocessor directives in order to declare types and functions for use
710elsewhere in the generated output files.
711
1f7d590d 712
00d59354
MW
713\subsection{Static instance definitions} \label{sec:syntax.module.instance}
714
715\begin{grammar}
716<static-instance-definition> ::=
717 "instance" <identifier> <identifier>
718 @[":" <list>$[\mbox{@<instance-initializer>}]$@] ";"
719
720<instance-initializer> ::= <identifier> "." <identifier> "=" <c-fragment>
721\end{grammar}
722
723Properties:
724\begin{description}
725\item[@"extern"] A boolean flag: if true, then the instance is public, and
726 will be declared in the output header file; if false (the default), then
727 the instance is only available to code defined within the module.
728\item[@"const"] A boolean flag: if true (the default), then the instance is
729 read-only, and may end up in write-protected storage at run-time; if false,
730 then the instance will be writable.
731\end{description}
732
733
68a620ab 734\subsection{Class definitions} \label{sec:syntax.module.class}
1f7d590d
MW
735
736\begin{grammar}
737<class-definition> ::= <class-forward-declaration>
738\alt <full-class-definition>
739\end{grammar}
740
68a620ab 741\subsubsection{Forward declarations}
1f7d590d
MW
742\begin{grammar}
743<class-forward-declaration> ::= "class" <identifier> ";"
744\end{grammar}
745
746A @<class-forward-declaration> informs Sod that an @<identifier> will be used
747to name a class which is currently undefined. Forward declarations are
748necessary in order to resolve certain kinds of circularity. For example,
7119ea4e 749\begin{prog}
020b9e2b
MW
750class Sub; \\+
751
fd040f06 752class Super: SodObject \{ \\ \ind
020b9e2b
MW
753 Sub *sub; \-\\
754\}; \\+
755
fd040f06 756class Sub: Super \{ \\ \ind
020b9e2b 757 /* \dots\ */ \-\\
7119ea4e
MW
758\};
759\end{prog}
1f7d590d 760
68a620ab 761\subsubsection{Full class definitions}
1f7d590d
MW
762\begin{grammar}
763<full-class-definition> ::=
764 @[<properties>@]
ea08dc56
MW
765 "class" <identifier> ":" <list>$[\mbox{@<identifier>}]$
766 "{" @<properties-class-item>^* "}"
1f7d590d 767
391c5a34
MW
768<properties-class-item> ::= @[<properties>@] <class-item>
769
770<class-item> ::= <slot-item>
771\alt <initializer-item>
b2983f35 772\alt <initarg-item>
a42893dd 773\alt <fragment-item>
1f7d590d
MW
774\alt <message-item>
775\alt <method-item>
fa548bb1 776\alt <other-item> \fixme{undefined}
1f7d590d
MW
777\end{grammar}
778
779A full class definition provides a complete description of a class.
780
781The first @<identifier> gives the name of the class. It is an error to
782give the name of an existing class (other than a forward-referenced class),
783or an existing type name. It is conventional to give classes `MixedCase'
784names, to distinguish them from other kinds of identifiers.
785
ea08dc56
MW
786The @<list>$[\mbox{@<identifier>}]$ names the direct superclasses for the new
787class. It is an error if any of these @<identifier>s does not name a defined
8d952432
MW
788class. The superclass list is required, and must not be empty; listing
789@|SodObject| as your class's superclass is a good choice if nothing else
1aedcc8a
MW
790seems suitable. A class with no direct superclasses is called a \emph{root
791class}. It is not possible to define a root class in the Sod language: you
792must use Lisp to do this, and it's quite involved.
1f7d590d
MW
793
794The @<properties> provide additional information. The standard class
795properties are as follows.
796\begin{description}
ba375a80 797\item[@|lisp_class|] The name of the Lisp class to use within the translator
1f7d590d 798 to represent this class. The property value must be an identifier; the
ba375a80 799 default is @|sod_class|. Extensions may define classes with additional
1f7d590d 800 behaviour, and may recognize additional class properties.
ba375a80 801\item[@|metaclass|] The name of the Sod metaclass for this class. In the
1f7d590d
MW
802 generated code, a class is itself an instance of another class -- its
803 \emph{metaclass}. The metaclass defines which slots the class will have,
804 which messages it will respond to, and what its behaviour will be when it
805 receives them. The property value must be an identifier naming a defined
ba375a80 806 subclass of @|SodClass|. The default metaclass is @|SodClass|.
9cd46aef 807 See \xref{sec:concepts.metaclasses} for more details.
ba375a80 808\item[@|nick|] A nickname for the class, to be used to distinguish it from
1f7d590d
MW
809 other classes in various limited contexts. The property value must be an
810 identifier; the default is constructed by forcing the class name to
811 lower-case.
812\end{description}
813
814The class body consists of a sequence of @<class-item>s enclosed in braces.
815These items are discussed on the following sections.
816
68a620ab 817\subsubsection{Slot items}
1f7d590d
MW
818\begin{grammar}
819<slot-item> ::=
ea08dc56 820 @<declaration-specifier>^+ <list>$[\mbox{@<init-declarator>}]$ ";"
1f7d590d 821
0bc19f1c 822<init-declarator> ::= <simple-declarator> @["=" <initializer>@]
1f7d590d
MW
823\end{grammar}
824
825A @<slot-item> defines one or more slots. All instances of the class and any
826subclass will contain these slot, with the names and types given by the
827@<declaration-specifiers> and the @<declarators>. Slot declarators may not
bc7dff5c 828contain dotted names.
1f7d590d
MW
829
830It is not possible to declare a slot with function type: such an item is
831interpreted as being a @<message-item> or @<method-item>. Pointers to
832functions are fine.
833
8399be6f
MW
834Properties:
835\begin{description}
ba375a80 836\item[@|slot_class|] A symbol naming the Lisp class to use to represent the
8399be6f 837 direct slot.
ba375a80 838\item[@|initarg|] An identifier naming an initialization argument which can
8399be6f
MW
839 be used to provide a value for the slot. See
840 \xref{sec:concepts.lifecycle.birth} for the details.
ba375a80
MW
841\item[@|initarg_class|] A symbol naming the Lisp class to use to represent
842 the initarg. Only permitted if @|initarg| is also set.
8399be6f
MW
843\end{description}
844
1f7d590d
MW
845An @<initializer>, if present, is treated as if a separate
846@<initializer-item> containing the slot name and initializer were present.
847For example,
7119ea4e 848\begin{prog}
020b9e2b 849[nick = eg] \\
fd040f06 850class Example: Super \{ \\ \ind
020b9e2b 851 int foo = 17; \-\\
7119ea4e
MW
852\};
853\end{prog}
1f7d590d 854means the same as
7119ea4e 855\begin{prog}
020b9e2b 856[nick = eg] \\
fd040f06 857class Example: Super \{ \\ \ind
020b9e2b
MW
858 int foo; \\
859 eg.foo = 17; \-\\
7119ea4e
MW
860\};
861\end{prog}
1f7d590d 862
68a620ab 863\subsubsection{Initializer items}
1f7d590d 864\begin{grammar}
391c5a34 865<initializer-item> ::= @["class"@] <list>$[\mbox{@<slot-initializer>}]$ ";"
1f7d590d 866
b2983f35 867<slot-initializer> ::= <dotted-name> @["=" <initializer>@]
1f7d590d 868
054e8f8f 869<initializer> ::= <c-fragment>
1f7d590d
MW
870\end{grammar}
871
872An @<initializer-item> provides an initial value for one or more slots. If
ba375a80 873prefixed by @|class|, then the initial values are for class slots (i.e.,
1f7d590d
MW
874slots of the class object itself); otherwise they are for instance slots.
875
bc7dff5c
MW
876The first component of the @<dotted-name> must be the nickname of one of the
877class's superclasses (including itself); the second must be the name of a
878slot defined in that superclass.
1f7d590d 879
8399be6f
MW
880Properties:
881\begin{description}
ba375a80 882\item[@|initializer_class|] A symbol naming the Lisp class to use to
8399be6f 883 represent the initializer.
ba375a80 884\item[@|initarg|] An identifier naming an initialization argument which can
8399be6f
MW
885 be used to provide a value for the slot. See
886 \xref{sec:concepts.lifecycle.birth} for the details. An initializer item
887 must have either an @|initarg| property, or an initializer expression, or
888 both.
ba375a80
MW
889\item[@|initarg_class|] A symbol naming the Lisp class to use to represent
890 the initarg. Only permitted if @|initarg| is also set.
8399be6f 891\end{description}
b2983f35
MW
892
893Each class may define at most one initializer item with an explicit
894initializer expression for a given slot.
895
896\subsubsection{Initarg items}
897\begin{grammar}
898<initarg-item> ::=
899 "initarg"
900 @<declaration-specifier>^+
901 <list>$[\mbox{@<init-declarator>}]$ ";"
902\end{grammar}
0e5c0b9e
MW
903Properties:
904\begin{description}
ba375a80 905\item[@|initarg_class|] A symbol naming the Lisp class to use to represent
0e5c0b9e
MW
906 the initarg.
907\end{description}
b2983f35 908
a42893dd
MW
909\subsubsection{Fragment items}
910\begin{grammar}
911<fragment-item> ::= <fragment-kind> "{" <c-fragment> "}"
912
913<fragment-kind> ::= "init" | "teardown"
914\end{grammar}
915
68a620ab 916\subsubsection{Message items}
1f7d590d
MW
917\begin{grammar}
918<message-item> ::=
391c5a34
MW
919 @<declaration-specifier>^+
920 <keyword-declarator>$[\mbox{@<identifier>}]$
921 @[<method-body>@]
1f7d590d 922\end{grammar}
8399be6f
MW
923Properties:
924\begin{description}
ba375a80 925\item[@|message_class|] A symbol naming the Lisp class to use to represent
8399be6f 926 the message.
e895be21
MW
927\item[@|readonly|] A boolean indicating whether the message guarantees not to
928 modify its receiver. If this is true, the receiver will be declared
929 @"const".
ba375a80 930\item[@|combination|] A keyword naming the aggregating method combination to
8399be6f 931 use.
ba375a80 932\item[@|most_specific|] A keyword, either @`first' or @`last', according to
8399be6f
MW
933 whether the most specific applicable method should be invoked first or
934 last.
935\end{description}
936
937Properties for the @|custom| aggregating method combination:
938\begin{description}
ba375a80 939\item[@|retvar|] An identifier for the return value from the effective
8399be6f
MW
940 method. The default is @|sod__ret|. Only permitted if the message return
941 type is not @|void|.
ba375a80 942\item[@|valvar|] An identifier holding each return value from a direct method
8399be6f 943 in the effective method. The default is @|sod__val|. Only permitted if
ba375a80
MW
944 the method return type (see @|methty| below) is not @|void|.
945\item[@|methty|] A C type, which is the return type for direct methods of
054e8f8f 946 this message. The default is the return type of the message.
ba375a80 947\item[@|decls|] A code fragment containing declarations to be inserted at the
8399be6f 948 head of the effective method body. The default is to insert nothing.
ba375a80 949\item[@|before|] A code fragment containing initialization to be performed at
8399be6f
MW
950 the beginning of the effective method body. The default is to insert
951 nothing.
ba375a80 952\item[@|empty|] A code fragment executed if there are no primary methods;
b07535d8
MW
953 it should usually store a suitable (identity) value in @<retvar>. The
954 default is not to emit an effective method at all if there are no primary
955 methods.
ba375a80
MW
956\item[@|first|] A code fragment to set the return value after calling the
957 first applicable direct method. The default is to use the @|each|
8399be6f 958 fragment.
ba375a80
MW
959\item[@|each|] A code fragment to set the return value after calling a direct
960 method. If @|first| is also set, then it is used after the first direct
8399be6f
MW
961 method instead of this. The default is to insert nothing, which is
962 probably not what you want.
ba375a80 963\item[@|after|] A code fragment inserted at the end of the effective method
8399be6f 964 body. The default is to insert nothing.
ba375a80 965\item[@|count|] An identifier naming a variable to be declared in the
8399be6f
MW
966 effective method body, of type @|size_t|, holding the number of applicable
967 methods. The default is not to provide such a variable.
968\end{description}
1f7d590d 969
68a620ab 970\subsubsection{Method items}
1f7d590d
MW
971\begin{grammar}
972<method-item> ::=
391c5a34
MW
973 @<declaration-specifier>^+
974 <keyword-declarator>$[\mbox{@<dotted-name>}]$
ea08dc56 975 <method-body>
1f7d590d
MW
976
977<method-body> ::= "{" <c-fragment> "}" | "extern" ";"
978\end{grammar}
8399be6f
MW
979Properties:
980\begin{description}
ba375a80 981\item[@|method_class|] A symbol naming the Lisp class to use to represent
8399be6f 982 the direct method.
ba375a80 983\item[@|role|] A keyword naming the direct method's rôle. For the built-in
8399be6f
MW
984 `simple' message classes, the acceptable rôle names are @|before|,
985 @|after|, and @|around|. By default, a primary method is constructed.
986\end{description}
1f7d590d 987
1f7d590d
MW
988%%%----- That's all, folks --------------------------------------------------
989
990%%% Local variables:
991%%% mode: LaTeX
992%%% TeX-master: "sod.tex"
993%%% TeX-PDF-mode: t
994%%% End: