doc/syntax.tex: Clarify the term `root class'.
[sod] / doc / syntax.tex
CommitLineData
1f7d590d
MW
1%%% -*-latex-*-
2%%%
3%%% Module syntax
4%%%
5%%% (c) 2015 Straylight/Edgeware
6%%%
7
8%%%----- Licensing notice ---------------------------------------------------
9%%%
e0808c47 10%%% This file is part of the Sensible Object Design, an object system for C.
1f7d590d
MW
11%%%
12%%% SOD is free software; you can redistribute it and/or modify
13%%% it under the terms of the GNU General Public License as published by
14%%% the Free Software Foundation; either version 2 of the License, or
15%%% (at your option) any later version.
16%%%
17%%% SOD is distributed in the hope that it will be useful,
18%%% but WITHOUT ANY WARRANTY; without even the implied warranty of
19%%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20%%% GNU General Public License for more details.
21%%%
22%%% You should have received a copy of the GNU General Public License
23%%% along with SOD; if not, write to the Free Software Foundation,
24%%% Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25
26\chapter{Module syntax} \label{ch:syntax}
27
28%%%--------------------------------------------------------------------------
68a620ab 29\section{Lexical syntax} \label{sec:syntax.lex}
1f7d590d
MW
30
31Whitespace and comments are discarded. The remaining characters are
32collected into tokens according to the following syntax.
33
34\begin{grammar}
35<token> ::= <identifier>
36\alt <string-literal>
37\alt <char-literal>
38\alt <integer-literal>
39\alt <punctuation>
40\end{grammar}
41
42This syntax is slightly ambiguous, and is disambiguated by the \emph{maximal
43munch} rule: at each stage we take the longest sequence of characters which
44could be a token.
45
68a620ab
MW
46
47\subsection{Identifiers} \label{sec:syntax.lex.id}
1f7d590d
MW
48
49\begin{grammar}
50<identifier> ::= <id-start-char> @<id-body-char>^*
51
52<id-start-char> ::= <alpha-char> | "_"
53
54<id-body-char> ::= <id-start-char> @! <digit-char>
55
56<alpha-char> ::= "A" | "B" | \dots\ | "Z"
57\alt "a" | "b" | \dots\ | "z"
58\alt <extended-alpha-char>
59
60<digit-char> ::= "0" | <nonzero-digit-char>
61
cee29adc 62<nonzero-digit-char> ::= "1" | "2" $| \ldots |$ "9"
1f7d590d
MW
63\end{grammar}
64
65The precise definition of @<alpha-char> is left to the function
92e590b0
MW
66@|alpha-char-p| in the hosting Lisp system. For portability, programmers are
67encouraged to limit themselves to the standard ASCII letters.
1f7d590d
MW
68
69There are no reserved words at the lexical level, but the higher-level syntax
70recognizes certain identifiers as \emph{keywords} in some contexts. There is
71also an ambiguity (inherited from C) in the declaration syntax which is
72settled by distinguishing type names from other identifiers at a lexical
73level.
74
68a620ab
MW
75
76\subsection{String and character literals} \label{sec:syntax.lex.string}
1f7d590d
MW
77
78\begin{grammar}
79<string-literal> ::= "\"" @<string-literal-char>^* "\""
80
81<char-literal> ::= "'" <char-literal-char> "'"
82
83<string-literal-char> ::= any character other than "\\" or "\""
84\alt "\\" <char>
85
86<char-literal-char> ::= any character other than "\\" or "'"
87\alt "\\" <char>
88
89<char> ::= any single character
90\end{grammar}
91
92The syntax for string and character literals differs from~C. In particular,
93escape sequences such as @`\textbackslash n' are not recognized. The use
94of string and character literals in Sod, outside of C~fragments, is limited,
95and the simple syntax seems adequate. For the sake of future compatibility,
96the use of character sequences which resemble C escape sequences is
97discouraged.
98
a59e2904
MW
99
100\subsection{Integer literals} \label{sec:syntax.lex.int}
1f7d590d
MW
101
102\begin{grammar}
103<integer-literal> ::= <decimal-integer>
104\alt <binary-integer>
105\alt <octal-integer>
106\alt <hex-integer>
107
cc0bcf39 108<decimal-integer> ::= "0" | <nonzero-digit-char> @<digit-char>^*
1f7d590d
MW
109
110<binary-integer> ::= "0" @("b"|"B"@) @<binary-digit-char>^+
111
112<binary-digit-char> ::= "0" | "1"
113
114<octal-integer> ::= "0" @["o"|"O"@] @<octal-digit-char>^+
115
cee29adc 116<octal-digit-char> ::= "0" | "1" $| \ldots |$ "7"
1f7d590d
MW
117
118<hex-integer> ::= "0" @("x"|"X"@) @<hex-digit-char>^+
119
120<hex-digit-char> ::= <digit-char>
121\alt "A" | "B" | "C" | "D" | "E" | "F"
122\alt "a" | "b" | "c" | "d" | "e" | "f"
123\end{grammar}
124
125Sod understands only integers, not floating-point numbers; its integer syntax
126goes slightly beyond C in allowing a @`0o' prefix for octal and @`0b' for
127binary. However, length and signedness indicators are not permitted.
128
68a620ab
MW
129
130\subsection{Punctuation} \label{sec:syntax.lex.punct}
1f7d590d
MW
131
132\begin{grammar}
133<punctuation> ::= any nonalphanumeric character other than "_", "\"" or "'"
134\end{grammar}
135
68a620ab
MW
136
137\subsection{Comments} \label{sec:syntax.lex.comment}
1f7d590d
MW
138
139\begin{grammar}
140<comment> ::= <block-comment>
141\alt <line-comment>
142
143<block-comment> ::=
144 "/*"
145 @<not-star>^* @(@<star>^+ <not-star-or-slash> @<not-star>^*@)^*
146 @<star>^*
147 "*/"
148
149<star> ::= "*"
150
151<not-star> ::= any character other than "*"
152
153<not-star-or-slash> ::= any character other than "*" or "/"
154
20f9c213 155<line-comment> ::= "/\,/" @<not-newline>^* <newline>
1f7d590d
MW
156
157<newline> ::= a newline character
158
159<not-newline> ::= any character other than newline
160\end{grammar}
161
20f9c213
MW
162Comments are exactly as in C99: both traditional block comments `@|/*| \dots\
163@|*/|' and \Cplusplus-style `@|/\,/| \dots' comments are permitted and
164ignored.
1f7d590d 165
68a620ab
MW
166
167\subsection{Special nonterminals} \label{sec:syntax.lex.special}
1f7d590d
MW
168
169Aside from the lexical syntax presented above (\xref{sec:lexical-syntax}),
170two special nonterminals occur in the module syntax.
171
68a620ab 172\subsubsection{S-expressions}
1f7d590d
MW
173\begin{grammar}
174<s-expression> ::= an S-expression, as parsed by the Lisp reader
175\end{grammar}
176
177When an S-expression is expected, the Sod parser simply calls the host Lisp
68a620ab
MW
178system's @|read| function. Sod modules are permitted to modify the read
179table to extend the S-expression syntax.
1f7d590d
MW
180
181S-expressions are self-delimiting, so no end-marker is needed.
182
68a620ab 183\subsubsection{C fragments}
1f7d590d
MW
184\begin{grammar}
185<c-fragment> ::= a sequence of C tokens, with matching brackets
186\end{grammar}
187
188Sequences of C code are simply stored and written to the output unchanged
189during translation. They are read using a simple scanner which nonetheless
190understands C comments and string and character literals.
191
192A C fragment is terminated by one of a small number of delimiter characters
a5b9e2be
MW
193determined by the immediately surrounding context -- usually some kind of
194bracket. The first such delimiter character which is not enclosed in
195brackets, braces or parentheses ends the fragment.
1f7d590d 196
68a620ab 197%%%--------------------------------------------------------------------------
a58527f3
MW
198\section{C types} \label{sec:syntax.type}
199
200Sod's syntax for C types closely mirrors the standard C syntax. A C type has
201two parts: a sequence of @<declaration-specifier>s and a @<declarator>. In
202Sod, a type must contain at least one @<declaration-specifier> (i.e.,
203`implicit @|int|' is forbidden), and storage-class specifiers are not
204recognized.
205
206
207\subsection{Declaration specifiers} \label{sec:syntax.type.declspec}
208
209\begin{grammar}
210<declaration-specifier> ::= <type-name>
211\alt "struct" <identifier> | "union" <identifier> | "enum" <identifier>
212\alt "void" | "char" | "int" | "float" | "double"
213\alt "short" | "long"
214\alt "signed" | "unsigned"
215\alt "bool" | "_Bool"
216\alt "imaginary" | "_Imaginary" | "complex" | "_Complex"
217\alt <qualifier>
218\alt <storage-specifier>
219\alt <atomic-type>
220\alt <other-declspec>
221
222<qualifier> ::= <atomic> | "const" | "volatile" | "restrict"
223
224<plain-type> ::= @<declaration-specifier>^+ <abstract-declarator>
225
226<atomic-type> ::= <atomic> "(" <plain-type> ")"
227
228<atomic> ::= "atomic" | "_Atomic"
229
230<storage-specifier> ::= <alignas> "(" <c-fragment> ")"
231
232<alignas> ::= "alignas" "_Alignas"
233
234<type-name> ::= <identifier>
235\end{grammar}
236
237Declaration specifiers may appear in any order. However, not all
238combinations are permitted. A declaration specifier must consist of zero or
239more @<qualifier>s, zero or more @<storage-specifier>s, and one of the
240following, up to reordering:
241\begin{itemize}
242\item @<type-name>;
243\item @<atomic-type>;
244\item @"struct" @<identifier>; @"union" @<identifier>; @"enum" @<identifier>;
245\item @"void";
246\item @"_Bool", @"bool";
247\item @"char"; @"unsigned char"; @"signed char";
248\item @"short", @"signed short", @"short int", @"signed short int";
249 @"unsigned short", @"unsigned short int";
250\item @"int", @"signed", @"signed int"; @"unsigned", @"unsigned int";
251\item @"long", @"signed long", @"long int", @"signed long int"; @"unsigned
252 long", @"unsigned long int";
253\item @"long long", @"signed long long", @"long long int", @"signed long long
254 int"; @"unsigned long long", @"unsigned long long int";
255\item @"float"; @"double"; @"long double";
256\item @"float _Imaginary", @"float imaginary"; @"double _Imaginary", @"double
257 imaginary"; @"long double _Imaginary", @"long double imaginary";
258\item @"float _Complex", @"float complex"; @"double _Complex", @"double
259 complex"; @"long double _Complex", @"long double complex".
260\end{itemize}
261All of these have their usual C meanings. Groups separated by commas mean
262the same thing, and Sod will not preserve the distinction.
263
264Almost all of these mean the same as they do in C. There are some minor
265differences:
266\begin{itemize}
267\item In C, the `tag' namespace is shared between @|struct|, @|union|, and
268 @|enum|; Sod has three distinct namespaces for tags. This may be fixed in
269 the future.
270\item The @<other-declspec> production is a syntactic extension point, where
271 extensions can introduce their own additions to the type system.
272\end{itemize}
273
274C standards from C99 onwards have tended to introduce new keywords beginning
275with an underscore followed by an uppercase letter, so as to avoid conflicts
276with existing code. More conventional spellings are then provided by macros
277in new header files. For example, C99 introduced @"_Bool", and a header file
278@|<stdbool.h>| which defines the macro @|bool|. Sod recognizes both the ugly
279underscore names and the more conventional macro names on input, but always
280emits the ugly names. This doesn't cause a compatibility problem in Sod,
281because Sod's parser recognizes keywords only in the appropriate context.
282For example, the (ill-advised) slot declaration
283\begin{prog}
284 bool bool;
285\end{prog}
286is completely acceptable, and will cause the C structure member
287\begin{prog}
288 \_Bool bool;
289\end{prog}
290to be emitted on output, which will be acceptable to C as long as
291@|<stdbool.h>| is not included.
292
293A @<type-name> is an identifier which has been declared as being a type name,
294using the @"typename" or @"class" definitions. The following type names are
295defined in the built-in module.
296\begin{itemize}
297\item @|va_list|
298\item @|size_t|
299\item @|ptrdiff_t|
300\item @|wchar_t|
301\end{itemize}
302
303
304\subsection{Declarators} \label{sec:syntax.type.declarator}
305
306\begin{grammar}
307<declarator>$[k, a]$ ::= @<pointer>^* <primary-declarator>$[k, a]$
308
309<primary-declarator>$[k, a]$ ::= $k$
310\alt "(" <primary-declarator>$[k, a]$ ")"
311\alt <primary-declarator>$[k, a]$ @<declarator-suffix>$[a]$
312
313<pointer> ::= "*" @<qualifier>^*
314
315<declarator-suffix>$[a]$ ::= "[" <c-fragment> "]"
316\alt "(" $a$ ")"
317
318<argument-list> ::= $\epsilon$ | "\dots"
319\alt <list>$[\mbox{@<argument>}]$ @["," "\dots"@]
320
321<argument> ::= @<declaration-specifier>^+ <argument-declarator>
322
323<abstract-declarator> ::= <declarator>$[\epsilon, \mbox{@<argument-list>}]$
324
325<argument-declarator> ::=
326 <declarator>$[\mbox{@<identifier> @! $\epsilon$}, \mbox{@<argument-list>}]$
327
328<simple-declarator> ::=
329 <declarator>$[\mbox{@<identifier>}, \mbox{@<argument-list>}]$
330\end{grammar}
331
332The declarator syntax is taken from C, but with some differences.
333\begin{itemize}
334\item Array dimensions are uninterpreted @<c-fragments>, terminated by a
335 closing square bracket. This allows array dimensions to contain arbitrary
336 constant expressions.
337\item A declarator may have either a single @<identifier> at its centre or a
338 pair of @<identifier>s separated by a @`.'; this is used to refer to
339 slots or messages defined in superclasses.
340\end{itemize}
341The remaining differences are (I hope) a matter of presentation rather than
342substance.
343
344There is additional syntax to support messages and methods which accept
345keyword arguments.
346
347\begin{grammar}
348<keyword-argument> ::= <argument> @["=" <c-fragment>@]
349
350<keyword-argument-list> ::=
351 @[<list>$[\mbox{@<argument>}]$@]
352 "?" @[<list>$[\mbox{@<keyword-argument>}]$@]
353
354<method-argument-list> ::= <argument-list> @! <keyword-argument-list>
355
356<dotted-name> ::= <identifier> "." <identifier>
357
358<keyword-declarator>$[k]$ ::=
359 <declarator>$[k, \mbox{@<method-argument-list>}]$
360\end{grammar}
361
362%%%--------------------------------------------------------------------------
b0d55f11
MW
363\section{Properties} \label{sec:syntax.prop}
364
365\begin{grammar}
366<properties> ::= "[" <list>$[\mbox{@<property>}]$ "]"
367
368<property> ::= <identifier> "=" <expression>
369
370<expression> ::= <term> | <expression> "+" <term> | <expression> "--" <term>
371
372<term> ::= <factor> | <term> "*" <factor> | <term> "/" <factor>
373
374<factor> ::= <primary> | "+" <factor> | "--" <factor>
375
376<primary> ::=
377 <integer-literal> | <string-literal> | <char-literal> | <identifier>
378\alt "<" <plain-type> ">"
379\alt "{" <c-fragment> "}"
380\alt "?" <s-expression>
381\alt "(" <expression> ")"
382\end{grammar}
383
384\emph{Property sets} are a means for associating miscellaneous information
385with compile-time metaobjects such as modules, classes, messages, methods,
386slots, and initializers. By using property sets, additional information can
387be passed to extensions without the need to introduce idiosyncratic syntax.
388(That said, extensions can add additional first-class syntax, if necessary.)
389
390An error is reported if an unrecognized property is associated with an
391object.
392
393
394\subsection{Property values} \label{sec:syntax.prop.value}
395
396A property has a name, given as an @<identifier>, and a value computed by
397evaluating an @<expression>. The value can be one of a number of types.
398
399\begin{itemize}
400
401\item An @<integer-literal> denotes a value of type @|int|.
402
403\item Similarly @<string-literal> and @<char-literal> denote @|string| and
404 @|char| values respectively. Note that, as properties, characters are
405 quite distinct from integers, whereas in C, a character literal denotes a
406 value of type @|int|.
407
408\item There are no variables in the property-value syntax. Rather, an
409 @<identifier> denotes that identifier, as a value of type @|id|.
410
411\item A C type (a @<plain-type>, as described in \xref{sec:syntax.type})
412 between angle brackets, e.g., @|<int>|, or @|<char *>|, or @|<void (*(int,
413 void (*)(int)))(int)>|, denotes that C type, as a value of type @|type|.
414
415\item A @<c-fragment> within braces denotes the tokens between (and not
416 including) the braces, as a value of type @|c-fragment|.
417
418\end{itemize}
419
420As shown in the grammar, there are four binary operators, @"+" (addition),
421@"--" (subtraction), @"*" (multiplication), and @"/" (division);
422multiplication and division have higher precedence than addition and
423subtraction, and operators of the same precedence associate left-to-right.
424There are also unary @"+" (no effect) and @"--" (negation) operators, with
425higher precedence. All of the above operators act only on integer operands
426and yield integer results. (Although the unary @"+" operator yields its
427operand unchanged, an error is still reported if it is applied to a
428non-integer value.) There are currently no bitwise, logical, or comparison
429operators.
430
431Finally, an S-expression preceded by @|?| causes the expression to be read in
432the current package (which is always @|sod-user| at the start of a module)
433and immediately evaluated (using @|eval|); the resulting value is converted
e8d70b1b
MW
434into a property value using the \descref{gf}{decode-property}[generic
435function].
b0d55f11
MW
436
437
438\subsection{Property output types and coercions}
439\label{sec:syntax.prop.coerce}
440
441When a property value is inspected by the Sod translator, or an extension, it
442is \emph{coerced} so as to conform to a requested output type. This coercion
e8d70b1b
MW
443process is performed by the \descref{gf}{coerce-property-value}[generic
444function], and additional output types and coercions can be defined by
b0d55f11
MW
445extensions. The built-in output types coercions, from the value types listed
446above, are as follows.
447
448\begin{itemize}
449
450\item The output types @|int|, @|string|, @|char|, @|id|, and @|c-fragment|
451 correspond to the like-named value types described above. No coercions to
452 these output types are defined for the described value types.\footnote{%
453 There is a coercion to @|id| from the value type @|symbol|, but it is
454 only possible to generate a property value of type @|symbol| using Lisp.}
455
456\item The output type @|type| denotes a C type, as does the value type
457 @|type|. In addition, a value of type @|id| can be coerced to a C type if
458 it is the name of a class, a type name explicitly declared by @|typename|,
459 or it is one of: @|bool|, @|_Bool|, @|void|, @|char|, @|short|, @|int|,
460 @|signed|, @|unsigned|, @|long|, @|size_t|, @|ptrdiff_t|, @|wchar_t|,
461 or @|va_list|.
462
463\item The @|boolean| output type denotes a boolean value, which may be either
464 true or false. A value of type @|id| is considered true if it is @|true|,
465 @|t|, @|yes|, @|on|, or @|verily|; or false if it is @|false|, @|nil|,
466 @|no|, @|off|, or @|nowise|; it is erroneous to provide any other
467 identifier where a boolean value is wanted. A value of type @|int| is
468 considered true if it is nonzero, or false if it is zero.
469
470\item The @|symbol| output type denotes a Lisp symbol.
471
472 A value of type @|id| is coerced to a symbol as follows. First, the
473 identifier name is subjected to \emph{case inversion}: if all of the
474 letters in the name have the same case, either upper or lower, then they
475 are replaced with the corresponding letters in the opposite case, lower or
476 upper; if the name contains letters of both cases, then it is not changed.
477 For example, @|foo45| becomes @|FOO45|, or \emph{vice-versa}; but @|Splat|
478 remains as it is. Second, the name is subjected to \emph{separator
81bc32d4 479 switching}: all underscores in the name are replaced with hyphens (and
b0d55f11
MW
480 \emph{vice-versa}, though hyphens aren't permitted in identifiers in the
481 first place). Finally, the resulting name is interned in the current
482 package, which will usually be @|sod-user| unless changed explicitly by the
483 module.
484
485 A value of type @|string| is coerced to a symbol as follows. If the string
486 contains no colons, then it is case-inverted (but not separator-switched)
487 and interned in the current package. Otherwise, the string either has the
488 form $p @|:| q$, where $q$ does not begin with a colon (the
489 \emph{single-colon} case) or $p @|::| q$ (the \emph{double-colon} case);
490 where $p$ does not contain a colon. Both $p$ and $q$ are case-inverted
491 (but not separator-switched). If $p$ does not name a package, then an
492 error is reported; as a special case, if $p$ is empty, then it is
493 considered to name the @|keyword| package. Otherwise, $q$ is looked up as
494 a symbol name in package~$p$; in the single-colon case, if the symbol is
495 not an exported symbol in package~$p$, then an error is reported; in the
496 double-colon case, $q$ is interned in package~$p$ (and so there needn't be
497 an exported symbol -- or, indeed, and symbol at all -- named $q$
498 beforehand).
499
500\item The @|keyword| output type denotes symbols within the @|keyword|
501 package. Value of type @|id| or @|string| can be coerced to a @|keyword|
502 in the same way as to a @|symbol|, as described above, only the converted
503 name is looked up in the @|keyword| package rather than the current
504 package. (A @|string| can override this by specifying an explicit package
505 name, but this is unlikely to be very helpful.)
506
507\end{itemize}
508
509%%%--------------------------------------------------------------------------
68a620ab 510\section{Module syntax} \label{sec:syntax.module}
1f7d590d
MW
511
512\begin{grammar}
513<module> ::= @<definition>^*
514
0df66309
MW
515<definition> ::= <property-definition> \fixme{undefined}
516\alt <import-definition>
1f7d590d
MW
517\alt <load-definition>
518\alt <lisp-definition>
519\alt <code-definition>
520\alt <typename-definition>
521\alt <class-definition>
fa548bb1 522\alt <other-definition> \fixme{undefined}
1f7d590d
MW
523\end{grammar}
524
eb41dc76
MW
525A @<module> is the top-level syntactic item: a source file presented to Sod
526is expected to conform with the @<module> syntax.
527
528A module consists of a sequence of definitions.
1f7d590d 529
6390b845 530\fixme{describe syntax; expand}
8399be6f
MW
531Properties:
532\begin{description}
ba375a80 533\item[@|module_class|] A symbol naming the Lisp class to use to
8399be6f 534 represent the module.
ba375a80 535\item[@|guard|] An identifier to use as the guard symbol used to prevent
8399be6f
MW
536 multiple inclusion in the header file.
537\end{description}
538
539
68a620ab 540\subsection{Simple definitions} \label{sec:syntax.module.simple}
1f7d590d 541
68a620ab 542\subsubsection{Importing modules}
1f7d590d
MW
543\begin{grammar}
544<import-definition> ::= "import" <string> ";"
545\end{grammar}
546
547The module named @<string> is processed and its definitions made available.
548
549A search is made for a module source file as follows.
550\begin{itemize}
551\item The module name @<string> is converted into a filename by appending
552 @`.sod', if it has no extension already.\footnote{%
92e590b0
MW
553 Technically, what happens is @|(merge-pathnames name (make-pathname :type
554 "SOD" :case :common))|, so exactly what this means varies according to
555 the host system.} %
1f7d590d
MW
556\item The file is looked for relative to the directory containing the
557 importing module.
558\item If that fails, then the file is looked for in each directory on the
559 module search path in turn.
560\item If the file still isn't found, an error is reported and the import
561 fails.
562\end{itemize}
563At this point, if the file has previously been imported, nothing further
564happens.\footnote{%
92e590b0 565 This check is done using @|truename|, so it should see through simple
1f7d590d
MW
566 tricks like symbolic links. However, it may be confused by fancy things
567 like bind mounts and so on.} %
568
569Recursive imports, either direct or indirect, are an error.
570
68a620ab 571\subsubsection{Loading extensions}
1f7d590d
MW
572\begin{grammar}
573<load-definition> ::= "load" <string> ";"
574\end{grammar}
575
576The Lisp file named @<string> is loaded and evaluated.
577
578A search is made for a Lisp source file as follows.
579\begin{itemize}
580\item The name @<string> is converted into a filename by appending @`.lisp',
581 if it has no extension already.\footnote{%
92e590b0
MW
582 Technically, what happens is @|(merge-pathnames name (make-pathname :type
583 "LISP" :case :common))|, so exactly what this means varies according to
584 the host system.} %
1f7d590d
MW
585\item A search is then made in the same manner as for module imports
586 (\xref{sec:syntax-module}).
587\end{itemize}
92e590b0 588If the file is found, it is loaded using the host Lisp's @|load| function.
1f7d590d
MW
589
590Note that Sod doesn't attempt to compile Lisp files, or even to look for
591existing compiled files. The right way to package a substantial extension to
592the Sod translator is to provide the extension as a standard ASDF system (or
ba375a80 593similar) and leave a dropping @|foo-extension.lisp| in the module path saying
1f7d590d 594something like
92e590b0
MW
595\begin{prog}
596 (asdf:load-system :foo-extension)
597\end{prog}
1f7d590d
MW
598which will arrange for the extension to be compiled if necessary.
599
600(This approach means that the language doesn't need to depend on any
601particular system definition facility. It's bad enough already that it
602depends on Common Lisp.)
603
68a620ab 604\subsubsection{Lisp escapes}
1f7d590d
MW
605\begin{grammar}
606<lisp-definition> ::= "lisp" <s-expression> ";"
607\end{grammar}
608
609The @<s-expression> is evaluated immediately. It can do anything it likes.
610
eae50115
MW
611\begin{boxy}[Warning!]
612 This means that hostile Sod modules are a security hazard. Lisp code can
613 read and write files, start other programs, and make network connections.
614 Don't install Sod modules from sources that you don't trust.\footnote{%
615 Presumably you were going to run the corresponding code at some point, so
616 this isn't as unusually scary as it sounds. But please be careful.} %
617\end{boxy}
1f7d590d 618
68a620ab 619\subsubsection{Declaring type names}
1f7d590d
MW
620\begin{grammar}
621<typename-definition> ::=
ea08dc56 622 "typename" <list>$[\mbox{@<identifier>}]$ ";"
1f7d590d
MW
623\end{grammar}
624
625Each @<identifier> is declared as naming a C type. This is important because
626the C type syntax -- which Sod uses -- is ambiguous, and disambiguation is
627done by distinguishing type names from other identifiers.
628
629Don't declare class names using @"typename"; use @"class" forward
630declarations instead.
631
68a620ab
MW
632
633\subsection{Literal code} \label{sec:syntax.module.literal}
1f7d590d
MW
634
635\begin{grammar}
636<code-definition> ::=
4fc52153 637 "code" <identifier> ":" <item-name> @[<constraints>@]
1f7d590d
MW
638 "{" <c-fragment> "}"
639
ea08dc56 640<constraints> ::= "[" <list>$[\mbox{@<constraint>}]$ "]"
1f7d590d 641
4fc52153
MW
642<constraint> ::= @<item-name>^+
643
644<item-name> ::= <identifier> @! "(" @<identifier>^+ ")"
1f7d590d
MW
645\end{grammar}
646
647The @<c-fragment> will be output unchanged to one of the output files.
648
649The first @<identifier> is the symbolic name of an output file. Predefined
ba375a80 650output file names are @|c| and @|h|, which are the implementation code and
1f7d590d
MW
651header file respectively; other output files can be defined by extensions.
652
4fc52153
MW
653Output items are named with a sequence of identifiers, separated by
654whitespace, and enclosed in parentheses. As an abbreviation, a name
655consisting of a single identifier may be written as just that identifier,
656without the parentheses.
1f7d590d
MW
657
658The @<constraints> provide a means for specifying where in the output file
659the output item should appear. (Note the two kinds of square brackets shown
660in the syntax: square brackets must appear around the constraints if they are
661present, but that they may be omitted.) Each comma-separated @<constraint>
4fc52153
MW
662is a sequence of names of output items, and indicates that the output items
663must appear in the order given -- though the translator is free to insert
664additional items in between them. (The particular output items needn't be
665defined already -- indeed, they needn't be defined ever.)
1f7d590d 666
ba375a80
MW
667There is a predefined output item @|includes| in both the @|c| and @|h|
668output files which is a suitable place for inserting @|\#include|
1f7d590d
MW
669preprocessor directives in order to declare types and functions for use
670elsewhere in the generated output files.
671
1f7d590d 672
68a620ab 673\subsection{Class definitions} \label{sec:syntax.module.class}
1f7d590d
MW
674
675\begin{grammar}
676<class-definition> ::= <class-forward-declaration>
677\alt <full-class-definition>
678\end{grammar}
679
68a620ab 680\subsubsection{Forward declarations}
1f7d590d
MW
681\begin{grammar}
682<class-forward-declaration> ::= "class" <identifier> ";"
683\end{grammar}
684
685A @<class-forward-declaration> informs Sod that an @<identifier> will be used
686to name a class which is currently undefined. Forward declarations are
687necessary in order to resolve certain kinds of circularity. For example,
7119ea4e 688\begin{prog}
020b9e2b
MW
689class Sub; \\+
690
fd040f06 691class Super: SodObject \{ \\ \ind
020b9e2b
MW
692 Sub *sub; \-\\
693\}; \\+
694
fd040f06 695class Sub: Super \{ \\ \ind
020b9e2b 696 /* \dots\ */ \-\\
7119ea4e
MW
697\};
698\end{prog}
1f7d590d 699
68a620ab 700\subsubsection{Full class definitions}
1f7d590d
MW
701\begin{grammar}
702<full-class-definition> ::=
703 @[<properties>@]
ea08dc56
MW
704 "class" <identifier> ":" <list>$[\mbox{@<identifier>}]$
705 "{" @<properties-class-item>^* "}"
1f7d590d 706
391c5a34
MW
707<properties-class-item> ::= @[<properties>@] <class-item>
708
709<class-item> ::= <slot-item>
710\alt <initializer-item>
b2983f35 711\alt <initarg-item>
a42893dd 712\alt <fragment-item>
1f7d590d
MW
713\alt <message-item>
714\alt <method-item>
fa548bb1 715\alt <other-item> \fixme{undefined}
1f7d590d
MW
716\end{grammar}
717
718A full class definition provides a complete description of a class.
719
720The first @<identifier> gives the name of the class. It is an error to
721give the name of an existing class (other than a forward-referenced class),
722or an existing type name. It is conventional to give classes `MixedCase'
723names, to distinguish them from other kinds of identifiers.
724
ea08dc56
MW
725The @<list>$[\mbox{@<identifier>}]$ names the direct superclasses for the new
726class. It is an error if any of these @<identifier>s does not name a defined
8d952432
MW
727class. The superclass list is required, and must not be empty; listing
728@|SodObject| as your class's superclass is a good choice if nothing else
1aedcc8a
MW
729seems suitable. A class with no direct superclasses is called a \emph{root
730class}. It is not possible to define a root class in the Sod language: you
731must use Lisp to do this, and it's quite involved.
1f7d590d
MW
732
733The @<properties> provide additional information. The standard class
734properties are as follows.
735\begin{description}
ba375a80 736\item[@|lisp_class|] The name of the Lisp class to use within the translator
1f7d590d 737 to represent this class. The property value must be an identifier; the
ba375a80 738 default is @|sod_class|. Extensions may define classes with additional
1f7d590d 739 behaviour, and may recognize additional class properties.
ba375a80 740\item[@|metaclass|] The name of the Sod metaclass for this class. In the
1f7d590d
MW
741 generated code, a class is itself an instance of another class -- its
742 \emph{metaclass}. The metaclass defines which slots the class will have,
743 which messages it will respond to, and what its behaviour will be when it
744 receives them. The property value must be an identifier naming a defined
ba375a80 745 subclass of @|SodClass|. The default metaclass is @|SodClass|.
9cd46aef 746 See \xref{sec:concepts.metaclasses} for more details.
ba375a80 747\item[@|nick|] A nickname for the class, to be used to distinguish it from
1f7d590d
MW
748 other classes in various limited contexts. The property value must be an
749 identifier; the default is constructed by forcing the class name to
750 lower-case.
751\end{description}
752
753The class body consists of a sequence of @<class-item>s enclosed in braces.
754These items are discussed on the following sections.
755
68a620ab 756\subsubsection{Slot items}
1f7d590d
MW
757\begin{grammar}
758<slot-item> ::=
ea08dc56 759 @<declaration-specifier>^+ <list>$[\mbox{@<init-declarator>}]$ ";"
1f7d590d 760
0bc19f1c 761<init-declarator> ::= <simple-declarator> @["=" <initializer>@]
1f7d590d
MW
762\end{grammar}
763
764A @<slot-item> defines one or more slots. All instances of the class and any
765subclass will contain these slot, with the names and types given by the
766@<declaration-specifiers> and the @<declarators>. Slot declarators may not
bc7dff5c 767contain dotted names.
1f7d590d
MW
768
769It is not possible to declare a slot with function type: such an item is
770interpreted as being a @<message-item> or @<method-item>. Pointers to
771functions are fine.
772
8399be6f
MW
773Properties:
774\begin{description}
ba375a80 775\item[@|slot_class|] A symbol naming the Lisp class to use to represent the
8399be6f 776 direct slot.
ba375a80 777\item[@|initarg|] An identifier naming an initialization argument which can
8399be6f
MW
778 be used to provide a value for the slot. See
779 \xref{sec:concepts.lifecycle.birth} for the details.
ba375a80
MW
780\item[@|initarg_class|] A symbol naming the Lisp class to use to represent
781 the initarg. Only permitted if @|initarg| is also set.
8399be6f
MW
782\end{description}
783
1f7d590d
MW
784An @<initializer>, if present, is treated as if a separate
785@<initializer-item> containing the slot name and initializer were present.
786For example,
7119ea4e 787\begin{prog}
020b9e2b 788[nick = eg] \\
fd040f06 789class Example: Super \{ \\ \ind
020b9e2b 790 int foo = 17; \-\\
7119ea4e
MW
791\};
792\end{prog}
1f7d590d 793means the same as
7119ea4e 794\begin{prog}
020b9e2b 795[nick = eg] \\
fd040f06 796class Example: Super \{ \\ \ind
020b9e2b
MW
797 int foo; \\
798 eg.foo = 17; \-\\
7119ea4e
MW
799\};
800\end{prog}
1f7d590d 801
68a620ab 802\subsubsection{Initializer items}
1f7d590d 803\begin{grammar}
391c5a34 804<initializer-item> ::= @["class"@] <list>$[\mbox{@<slot-initializer>}]$ ";"
1f7d590d 805
b2983f35 806<slot-initializer> ::= <dotted-name> @["=" <initializer>@]
1f7d590d 807
054e8f8f 808<initializer> ::= <c-fragment>
1f7d590d
MW
809\end{grammar}
810
811An @<initializer-item> provides an initial value for one or more slots. If
ba375a80 812prefixed by @|class|, then the initial values are for class slots (i.e.,
1f7d590d
MW
813slots of the class object itself); otherwise they are for instance slots.
814
bc7dff5c
MW
815The first component of the @<dotted-name> must be the nickname of one of the
816class's superclasses (including itself); the second must be the name of a
817slot defined in that superclass.
1f7d590d 818
8399be6f
MW
819Properties:
820\begin{description}
ba375a80 821\item[@|initializer_class|] A symbol naming the Lisp class to use to
8399be6f 822 represent the initializer.
ba375a80 823\item[@|initarg|] An identifier naming an initialization argument which can
8399be6f
MW
824 be used to provide a value for the slot. See
825 \xref{sec:concepts.lifecycle.birth} for the details. An initializer item
826 must have either an @|initarg| property, or an initializer expression, or
827 both.
ba375a80
MW
828\item[@|initarg_class|] A symbol naming the Lisp class to use to represent
829 the initarg. Only permitted if @|initarg| is also set.
8399be6f 830\end{description}
b2983f35
MW
831
832Each class may define at most one initializer item with an explicit
833initializer expression for a given slot.
834
835\subsubsection{Initarg items}
836\begin{grammar}
837<initarg-item> ::=
838 "initarg"
839 @<declaration-specifier>^+
840 <list>$[\mbox{@<init-declarator>}]$ ";"
841\end{grammar}
0e5c0b9e
MW
842Properties:
843\begin{description}
ba375a80 844\item[@|initarg_class|] A symbol naming the Lisp class to use to represent
0e5c0b9e
MW
845 the initarg.
846\end{description}
b2983f35 847
a42893dd
MW
848\subsubsection{Fragment items}
849\begin{grammar}
850<fragment-item> ::= <fragment-kind> "{" <c-fragment> "}"
851
852<fragment-kind> ::= "init" | "teardown"
853\end{grammar}
854
68a620ab 855\subsubsection{Message items}
1f7d590d
MW
856\begin{grammar}
857<message-item> ::=
391c5a34
MW
858 @<declaration-specifier>^+
859 <keyword-declarator>$[\mbox{@<identifier>}]$
860 @[<method-body>@]
1f7d590d 861\end{grammar}
8399be6f
MW
862Properties:
863\begin{description}
ba375a80 864\item[@|message_class|] A symbol naming the Lisp class to use to represent
8399be6f 865 the message.
ba375a80 866\item[@|combination|] A keyword naming the aggregating method combination to
8399be6f 867 use.
ba375a80 868\item[@|most_specific|] A keyword, either @`first' or @`last', according to
8399be6f
MW
869 whether the most specific applicable method should be invoked first or
870 last.
871\end{description}
872
873Properties for the @|custom| aggregating method combination:
874\begin{description}
ba375a80 875\item[@|retvar|] An identifier for the return value from the effective
8399be6f
MW
876 method. The default is @|sod__ret|. Only permitted if the message return
877 type is not @|void|.
ba375a80 878\item[@|valvar|] An identifier holding each return value from a direct method
8399be6f 879 in the effective method. The default is @|sod__val|. Only permitted if
ba375a80
MW
880 the method return type (see @|methty| below) is not @|void|.
881\item[@|methty|] A C type, which is the return type for direct methods of
054e8f8f 882 this message. The default is the return type of the message.
ba375a80 883\item[@|decls|] A code fragment containing declarations to be inserted at the
8399be6f 884 head of the effective method body. The default is to insert nothing.
ba375a80 885\item[@|before|] A code fragment containing initialization to be performed at
8399be6f
MW
886 the beginning of the effective method body. The default is to insert
887 nothing.
ba375a80 888\item[@|empty|] A code fragment executed if there are no primary methods;
b07535d8
MW
889 it should usually store a suitable (identity) value in @<retvar>. The
890 default is not to emit an effective method at all if there are no primary
891 methods.
ba375a80
MW
892\item[@|first|] A code fragment to set the return value after calling the
893 first applicable direct method. The default is to use the @|each|
8399be6f 894 fragment.
ba375a80
MW
895\item[@|each|] A code fragment to set the return value after calling a direct
896 method. If @|first| is also set, then it is used after the first direct
8399be6f
MW
897 method instead of this. The default is to insert nothing, which is
898 probably not what you want.
ba375a80 899\item[@|after|] A code fragment inserted at the end of the effective method
8399be6f 900 body. The default is to insert nothing.
ba375a80 901\item[@|count|] An identifier naming a variable to be declared in the
8399be6f
MW
902 effective method body, of type @|size_t|, holding the number of applicable
903 methods. The default is not to provide such a variable.
904\end{description}
1f7d590d 905
68a620ab 906\subsubsection{Method items}
1f7d590d
MW
907\begin{grammar}
908<method-item> ::=
391c5a34
MW
909 @<declaration-specifier>^+
910 <keyword-declarator>$[\mbox{@<dotted-name>}]$
ea08dc56 911 <method-body>
1f7d590d
MW
912
913<method-body> ::= "{" <c-fragment> "}" | "extern" ";"
914\end{grammar}
8399be6f
MW
915Properties:
916\begin{description}
ba375a80 917\item[@|method_class|] A symbol naming the Lisp class to use to represent
8399be6f 918 the direct method.
ba375a80 919\item[@|role|] A keyword naming the direct method's rôle. For the built-in
8399be6f
MW
920 `simple' message classes, the acceptable rôle names are @|before|,
921 @|after|, and @|around|. By default, a primary method is constructed.
922\end{description}
1f7d590d 923
1f7d590d
MW
924%%%----- That's all, folks --------------------------------------------------
925
926%%% Local variables:
927%%% mode: LaTeX
928%%% TeX-master: "sod.tex"
929%%% TeX-PDF-mode: t
930%%% End: