doc/syntax.tex: Reformat the various grammar fragments.
[sod] / doc / syntax.tex
... / ...
CommitLineData
1%%% -*-latex-*-
2%%%
3%%% Module syntax
4%%%
5%%% (c) 2015 Straylight/Edgeware
6%%%
7
8%%%----- Licensing notice ---------------------------------------------------
9%%%
10%%% This file is part of the Sensible Object Design, an object system for C.
11%%%
12%%% SOD is free software; you can redistribute it and/or modify
13%%% it under the terms of the GNU General Public License as published by
14%%% the Free Software Foundation; either version 2 of the License, or
15%%% (at your option) any later version.
16%%%
17%%% SOD is distributed in the hope that it will be useful,
18%%% but WITHOUT ANY WARRANTY; without even the implied warranty of
19%%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20%%% GNU General Public License for more details.
21%%%
22%%% You should have received a copy of the GNU General Public License
23%%% along with SOD; if not, write to the Free Software Foundation,
24%%% Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25
26\chapter{Module syntax} \label{ch:syntax}
27
28%%%--------------------------------------------------------------------------
29\section{Lexical syntax} \label{sec:syntax.lex}
30
31Whitespace and comments are discarded. The remaining characters are
32collected into tokens according to the following syntax.
33
34\begin{grammar}
35<token> ::= <identifier>
36\alt <string-literal>
37\alt <char-literal>
38\alt <integer-literal>
39\alt <punctuation>
40\end{grammar}
41
42This syntax is slightly ambiguous, and is disambiguated by the \emph{maximal
43munch} rule: at each stage we take the longest sequence of characters which
44could be a token.
45
46
47\subsection{Identifiers} \label{sec:syntax.lex.id}
48
49\begin{grammar}
50<identifier> ::= <id-start-char> @<id-body-char>^*
51
52<id-start-char> ::= <alpha-char> | "_"
53
54<id-body-char> ::= <id-start-char> @! <digit-char>
55
56<alpha-char> ::= "A" | "B" $| \cdots |$ "Z"
57 | "a" | "b" $| \cdots |$ "z"
58 | <extended-alpha-char>
59
60<digit-char> ::= "0" | <nonzero-digit-char>
61
62<nonzero-digit-char> ::= "1" | "2" $| \cdots |$ "9"
63\end{grammar}
64
65The precise definition of @<alpha-char> is left to the function
66@|alpha-char-p| in the hosting Lisp system. For portability, programmers are
67encouraged to limit themselves to the standard ASCII letters.
68
69There are no reserved words at the lexical level, but the higher-level syntax
70recognizes certain identifiers as \emph{keywords} in some contexts. There is
71also an ambiguity (inherited from C) in the declaration syntax which is
72settled by distinguishing type names from other identifiers at a lexical
73level.
74
75
76\subsection{String and character literals} \label{sec:syntax.lex.string}
77
78\begin{grammar}
79<string-literal> ::= "\"" @<string-literal-char>^* "\""
80
81<char-literal> ::= "'" <char-literal-char> "'"
82
83<string-literal-char> :: "\\" <char>
84 | any character other than "\\" or "\""
85
86<char-literal-char> :: "\\" <char>
87 | any character other than "\\" or "'"
88
89<char> ::= any single character
90\end{grammar}
91
92The syntax for string and character literals differs from~C. In particular,
93escape sequences such as @`\textbackslash n' are not recognized. The use
94of string and character literals in Sod, outside of C~fragments, is limited,
95and the simple syntax seems adequate. For the sake of future compatibility,
96the use of character sequences which resemble C escape sequences is
97discouraged.
98
99
100\subsection{Integer literals} \label{sec:syntax.lex.int}
101
102\begin{grammar}
103<integer-literal> ::= <decimal-integer>
104 | <binary-integer>
105 | <octal-integer>
106 | <hex-integer>
107
108<decimal-integer> ::= "0" | <nonzero-digit-char> @<digit-char>^*
109
110<binary-integer> ::= "0" @("b" @! "B"@) @<binary-digit-char>^+
111
112<binary-digit-char> ::= "0" | "1"
113
114<octal-integer> ::= "0" @["o" @! "O"@] @<octal-digit-char>^+
115
116<octal-digit-char> ::= "0" | "1" $| \cdots |$ "7"
117
118<hex-integer> ::= "0" @("x" @! "X"@) @<hex-digit-char>^+
119
120<hex-digit-char> ::= <digit-char>
121 | "A" | "B" | "C" | "D" | "E" | "F"
122 | "a" | "b" | "c" | "d" | "e" | "f"
123\end{grammar}
124
125Sod understands only integers, not floating-point numbers; its integer syntax
126goes slightly beyond C in allowing a @`0o' prefix for octal and @`0b' for
127binary. However, length and signedness indicators are not permitted.
128
129
130\subsection{Punctuation} \label{sec:syntax.lex.punct}
131
132\begin{grammar}
133<punctuation> ::= "\dots"
134\alt any nonalphanumeric character other than "_", "\"", or "'"
135\end{grammar}
136
137
138\subsection{Comments} \label{sec:syntax.lex.comment}
139
140\begin{grammar}
141<comment> ::= <block-comment> | <line-comment>
142
143<block-comment> ::=
144 "/*"
145 @<not-star>^* @(@<star>^+ <not-star-or-slash> @<not-star>^*@)^*
146 @<star>^*
147 "*/"
148
149<star> ::= "*"
150
151<not-star> ::= any character other than "*"
152
153<not-star-or-slash> ::= any character other than "*" or "/"
154
155<line-comment> ::= "/\,/" @<not-newline>^* <newline>
156
157<newline> ::= a newline character
158
159<not-newline> ::= any character other than newline
160\end{grammar}
161
162Comments are exactly as in C99: both traditional block comments `@|/*| \dots\
163@|*/|' and \Cplusplus-style `@|/\,/| \dots' comments are permitted and
164ignored.
165
166
167\subsection{Special nonterminals} \label{sec:syntax.lex.special}
168
169Aside from the lexical syntax presented above (\xref{sec:lexical-syntax}),
170two special nonterminals occur in the module syntax.
171
172\subsubsection{S-expressions}
173\begin{grammar}
174<s-expression> ::= an S-expression, as parsed by the Lisp reader
175\end{grammar}
176
177When an S-expression is expected, the Sod parser simply calls the host Lisp
178system's @|read| function. Sod modules are permitted to modify the read
179table to extend the S-expression syntax.
180
181S-expressions are self-delimiting, so no end-marker is needed.
182
183\subsubsection{C fragments}
184\begin{grammar}
185<c-fragment> ::= a sequence of C tokens, with matching brackets
186\end{grammar}
187
188Sequences of C code are simply stored and written to the output unchanged
189during translation. They are read using a simple scanner which nonetheless
190understands C comments and string and character literals.
191
192A C fragment is terminated by one of a small number of delimiter characters
193determined by the immediately surrounding context -- usually some kind of
194bracket. The first such delimiter character which is not enclosed in
195brackets, braces or parentheses ends the fragment.
196
197%%%--------------------------------------------------------------------------
198\section{C types} \label{sec:syntax.type}
199
200Sod's syntax for C types closely mirrors the standard C syntax. A C type has
201two parts: a sequence of @<declaration-specifier>s and a @<declarator>. In
202Sod, a type must contain at least one @<declaration-specifier> (i.e.,
203`implicit @|int|' is forbidden), and storage-class specifiers are not
204recognized.
205
206
207\subsection{Declaration specifiers} \label{sec:syntax.type.declspec}
208
209\begin{grammar}
210<declaration-specifier> ::= <type-name>
211\alt "struct" <identifier> | "union" <identifier> | "enum" <identifier>
212\alt "void" | "char" | "int" | "float" | "double"
213\alt "short" | "long"
214\alt "signed" | "unsigned"
215\alt "bool" | "_Bool"
216\alt "imaginary" | "_Imaginary" | "complex" | "_Complex"
217\alt <qualifier>
218\alt <storage-specifier>
219\alt <atomic-type>
220\alt <other-declspec>
221
222<qualifier> ::= <atomic> | "const" | "volatile" | "restrict"
223
224<plain-type> ::= @<declaration-specifier>^+ <abstract-declarator>
225
226<atomic-type> ::= <atomic> "(" <plain-type> ")"
227
228<atomic> ::= "atomic" | "_Atomic"
229
230<storage-specifier> ::= <alignas> "(" <c-fragment> ")"
231
232<alignas> ::= "alignas" "_Alignas"
233
234<type-name> ::= <identifier>
235\end{grammar}
236
237Declaration specifiers may appear in any order. However, not all
238combinations are permitted. A declaration specifier must consist of zero or
239more @<qualifier>s, zero or more @<storage-specifier>s, and one of the
240following, up to reordering:
241\begin{itemize}
242\item @<type-name>;
243\item @<atomic-type>;
244\item @"struct" @<identifier>; @"union" @<identifier>; @"enum" @<identifier>;
245\item @"void";
246\item @"_Bool", @"bool";
247\item @"char"; @"unsigned char"; @"signed char";
248\item @"short", @"signed short", @"short int", @"signed short int";
249 @"unsigned short", @"unsigned short int";
250\item @"int", @"signed", @"signed int"; @"unsigned", @"unsigned int";
251\item @"long", @"signed long", @"long int", @"signed long int"; @"unsigned
252 long", @"unsigned long int";
253\item @"long long", @"signed long long", @"long long int", @"signed long long
254 int"; @"unsigned long long", @"unsigned long long int";
255\item @"float"; @"double"; @"long double";
256\item @"float _Imaginary", @"float imaginary"; @"double _Imaginary", @"double
257 imaginary"; @"long double _Imaginary", @"long double imaginary";
258\item @"float _Complex", @"float complex"; @"double _Complex", @"double
259 complex"; @"long double _Complex", @"long double complex".
260\end{itemize}
261All of these have their usual C meanings. Groups separated by commas mean
262the same thing, and Sod will not preserve the distinction.
263
264Almost all of these mean the same as they do in C. There are some minor
265differences:
266\begin{itemize}
267\item In C, the `tag' namespace is shared between @|struct|, @|union|, and
268 @|enum|; Sod has three distinct namespaces for tags. This may be fixed in
269 the future.
270\item The @<other-declspec> production is a syntactic extension point, where
271 extensions can introduce their own additions to the type system.
272\end{itemize}
273
274C standards from C99 onwards have tended to introduce new keywords beginning
275with an underscore followed by an uppercase letter, so as to avoid conflicts
276with existing code. More conventional spellings are then provided by macros
277in new header files. For example, C99 introduced @"_Bool", and a header file
278@|<stdbool.h>| which defines the macro @|bool|. Sod recognizes both the ugly
279underscore names and the more conventional macro names on input, but always
280emits the ugly names. This doesn't cause a compatibility problem in Sod,
281because Sod's parser recognizes keywords only in the appropriate context.
282For example, the (ill-advised) slot declaration
283\begin{prog}
284 bool bool;
285\end{prog}
286is completely acceptable, and will cause the C structure member
287\begin{prog}
288 \_Bool bool;
289\end{prog}
290to be emitted on output, which will be acceptable to C as long as
291@|<stdbool.h>| is not included.
292
293A @<type-name> is an identifier which has been declared as being a type name,
294using the @"typename" or @"class" definitions. The following type names are
295defined in the built-in module.
296\begin{itemize}
297\item @|va_list|
298\item @|size_t|
299\item @|ptrdiff_t|
300\item @|wchar_t|
301\end{itemize}
302
303
304\subsection{Declarators} \label{sec:syntax.type.declarator}
305
306\begin{grammar}
307<declarator>$[k, a]$ ::= @<pointer>^* <primary-declarator>$[k, a]$
308
309<primary-declarator>$[k, a]$ ::= $k$
310\alt "(" <primary-declarator>$[k, a]$ ")"
311\alt <primary-declarator>$[k, a]$ @<declarator-suffix>$[a]$
312
313<pointer> ::= "*" @<qualifier>^*
314
315<declarator-suffix>$[a]$ ::= "[" <c-fragment> "]"
316\alt "(" $a$ ")"
317
318<argument-list> ::= $\epsilon$ | "\dots"
319\alt <list>$[\mbox{@<argument>}]$ @["," "\dots"@]
320
321<argument> ::= @<declaration-specifier>^+ <argument-declarator>
322
323<abstract-declarator> ::= <declarator>$[\epsilon, \mbox{@<argument-list>}]$
324
325<argument-declarator> ::=
326 <declarator>$[\mbox{@<identifier> @! $\epsilon$}, \mbox{@<argument-list>}]$
327
328<simple-declarator> ::=
329 <declarator>$[\mbox{@<identifier>}, \mbox{@<argument-list>}]$
330\end{grammar}
331
332The declarator syntax is taken from C, but with some differences.
333\begin{itemize}
334\item Array dimensions are uninterpreted @<c-fragments>, terminated by a
335 closing square bracket. This allows array dimensions to contain arbitrary
336 constant expressions.
337\item A declarator may have either a single @<identifier> at its centre or a
338 pair of @<identifier>s separated by a @`.'; this is used to refer to
339 slots or messages defined in superclasses.
340\end{itemize}
341The remaining differences are (I hope) a matter of presentation rather than
342substance.
343
344There is additional syntax to support messages and methods which accept
345keyword arguments.
346
347\begin{grammar}
348<keyword-argument> ::= <argument> @["=" <c-fragment>@]
349
350<keyword-argument-list> ::=
351 @[<list>$[\mbox{@<argument>}]$@]
352 "?" @[<list>$[\mbox{@<keyword-argument>}]$@]
353
354<method-argument-list> ::= <argument-list> @! <keyword-argument-list>
355
356<dotted-name> ::= <identifier> "." <identifier>
357
358<keyword-declarator>$[k]$ ::=
359 <declarator>$[k, \mbox{@<method-argument-list>}]$
360\end{grammar}
361
362%%%--------------------------------------------------------------------------
363\section{Properties} \label{sec:syntax.prop}
364
365\begin{grammar}
366<properties> ::= "[" <list>$[\mbox{@<property>}]$ "]"
367
368<property> ::= <identifier> "=" <expression>
369
370<expression> ::= <additive>
371
372<additive> ::= <term>
373 | <additive> "+" <term>
374 | <additive> "--" <term>
375
376<term> ::= <factor>
377 | <term> "*" <factor>
378 | <term> "/" <factor>
379
380<factor> ::= <primary>
381 | "!" <factor> | "~" factor
382 | "+" <factor> | "--" <factor>
383
384<primary> ::=
385 <integer-literal> | <string-literal> | <char-literal> | <identifier>
386\alt "<" <plain-type> ">" | "{" <c-fragment> "}" | "?" <s-expression>
387 | "(" <expression> ")"
388\end{grammar}
389
390\emph{Property sets} are a means for associating miscellaneous information
391with compile-time metaobjects such as modules, classes, messages, methods,
392slots, and initializers. By using property sets, additional information can
393be passed to extensions without the need to introduce idiosyncratic syntax.
394(That said, extensions can add additional first-class syntax, if necessary.)
395
396An error is reported if an unrecognized property is associated with an
397object.
398
399
400\subsection{Property values} \label{sec:syntax.prop.value}
401
402A property has a name, given as an @<identifier>, and a value computed by
403evaluating an @<expression>. The value can be one of a number of types.
404
405\begin{itemize}
406
407\item An @<integer-literal> denotes a value of type @|int|.
408
409\item Similarly @<string-literal> and @<char-literal> denote @|string| and
410 @|char| values respectively. Note that, as properties, characters are
411 quite distinct from integers, whereas in C, a character literal denotes a
412 value of type @|int|.
413
414\item There are no variables in the property-value syntax. Rather, an
415 @<identifier> denotes that identifier, as a value of type @|id|.
416
417\item A C type (a @<plain-type>, as described in \xref{sec:syntax.type})
418 between angle brackets, e.g., @|<int>|, or @|<char *>|, or @|<void (*(int,
419 void (*)(int)))(int)>|, denotes that C type, as a value of type @|type|.
420
421\item A @<c-fragment> within braces denotes the tokens between (and not
422 including) the braces, as a value of type @|c-fragment|.
423
424\end{itemize}
425
426As shown in the grammar, there are four binary operators, @"+" (addition),
427@"--" (subtraction), @"*" (multiplication), and @"/" (division);
428multiplication and division have higher precedence than addition and
429subtraction, and operators of the same precedence associate left-to-right.
430There are also unary @"+" (no effect) and @"--" (negation) operators, with
431higher precedence. All of the above operators act only on integer operands
432and yield integer results. (Although the unary @"+" operator yields its
433operand unchanged, an error is still reported if it is applied to a
434non-integer value.) There are currently no bitwise, logical, or comparison
435operators.
436
437Finally, an S-expression preceded by @|?| causes the expression to be read in
438the current package (which is always @|sod-user| at the start of a module)
439and immediately evaluated (using @|eval|); the resulting value is converted
440into a property value using the \descref{gf}{decode-property}[generic
441function].
442
443
444\subsection{Property output types and coercions}
445\label{sec:syntax.prop.coerce}
446
447When a property value is inspected by the Sod translator, or an extension, it
448is \emph{coerced} so as to conform to a requested output type. This coercion
449process is performed by the \descref{gf}{coerce-property-value}[generic
450function], and additional output types and coercions can be defined by
451extensions. The built-in output types coercions, from the value types listed
452above, are as follows.
453
454\begin{itemize}
455
456\item The output types @|int|, @|string|, @|char|, @|id|, and @|c-fragment|
457 correspond to the like-named value types described above. No coercions to
458 these output types are defined for the described value types.\footnote{%
459 There is a coercion to @|id| from the value type @|symbol|, but it is
460 only possible to generate a property value of type @|symbol| using Lisp.}
461
462\item The output type @|type| denotes a C type, as does the value type
463 @|type|. In addition, a value of type @|id| can be coerced to a C type if
464 it is the name of a class, a type name explicitly declared by @|typename|,
465 or it is one of: @|bool|, @|_Bool|, @|void|, @|char|, @|short|, @|int|,
466 @|signed|, @|unsigned|, @|long|, @|size_t|, @|ptrdiff_t|, @|wchar_t|,
467 or @|va_list|.
468
469\item The @|boolean| output type denotes a boolean value, which may be either
470 true or false. A value of type @|id| is considered true if it is @|true|,
471 @|t|, @|yes|, @|on|, @|yup|, or @|verily|; or false if it is @|false|,
472 @|nil|, @|no|, @|off|, @|nope|, or @|nowise|; it is erroneous to provide
473 any other identifier where a boolean value is wanted. A value of type
474 @|int| is considered true if it is nonzero, or false if it is zero.
475
476\item The @|symbol| output type denotes a Lisp symbol.
477
478 A value of type @|id| is coerced to a symbol as follows. First, the
479 identifier name is subjected to \emph{case inversion}: if all of the
480 letters in the name have the same case, either upper or lower, then they
481 are replaced with the corresponding letters in the opposite case, lower or
482 upper; if the name contains letters of both cases, then it is not changed.
483 For example, @|foo45| becomes @|FOO45|, or \emph{vice-versa}; but @|Splat|
484 remains as it is. Second, the name is subjected to \emph{separator
485 switching}: all underscores in the name are replaced with hyphens (and
486 \emph{vice-versa}, though hyphens aren't permitted in identifiers in the
487 first place). Finally, the resulting name is interned in the current
488 package, which will usually be @|sod-user| unless changed explicitly by the
489 module.
490
491 A value of type @|string| is coerced to a symbol as follows. If the string
492 contains no colons, then it is case-inverted (but not separator-switched)
493 and interned in the current package. Otherwise, the string either has the
494 form $p @|:| q$, where $q$ does not begin with a colon (the
495 \emph{single-colon} case) or $p @|::| q$ (the \emph{double-colon} case);
496 where $p$ does not contain a colon. Both $p$ and $q$ are case-inverted
497 (but not separator-switched). If $p$ does not name a package, then an
498 error is reported; as a special case, if $p$ is empty, then it is
499 considered to name the @|keyword| package. Otherwise, $q$ is looked up as
500 a symbol name in package~$p$; in the single-colon case, if the symbol is
501 not an exported symbol in package~$p$, then an error is reported; in the
502 double-colon case, $q$ is interned in package~$p$ (and so there needn't be
503 an exported symbol -- or, indeed, and symbol at all -- named $q$
504 beforehand).
505
506\item The @|keyword| output type denotes symbols within the @|keyword|
507 package. Value of type @|id| or @|string| can be coerced to a @|keyword|
508 in the same way as to a @|symbol|, as described above, only the converted
509 name is looked up in the @|keyword| package rather than the current
510 package. (A @|string| can override this by specifying an explicit package
511 name, but this is unlikely to be very helpful.)
512
513\end{itemize}
514
515%%%--------------------------------------------------------------------------
516\section{Module syntax} \label{sec:syntax.module}
517
518\begin{grammar}
519<module> ::= @<definition>^*
520
521<definition> ::= <property-definition> \fixme{undefined}
522\alt <import-definition>
523\alt <load-definition>
524\alt <lisp-definition>
525\alt <code-definition>
526\alt <typename-definition>
527\alt <class-definition>
528\alt <other-definition> \fixme{undefined}
529\end{grammar}
530
531A @<module> is the top-level syntactic item: a source file presented to Sod
532is expected to conform with the @<module> syntax.
533
534A module consists of a sequence of definitions.
535
536\fixme{describe syntax; expand}
537Properties:
538\begin{description}
539\item[@|module_class|] A symbol naming the Lisp class to use to
540 represent the module.
541\item[@|guard|] An identifier to use as the guard symbol used to prevent
542 multiple inclusion in the header file.
543\end{description}
544
545
546\subsection{Simple definitions} \label{sec:syntax.module.simple}
547
548\subsubsection{Importing modules}
549\begin{grammar}
550<import-definition> ::= "import" <string> ";"
551\end{grammar}
552
553The module named @<string> is processed and its definitions made available.
554
555A search is made for a module source file as follows.
556\begin{itemize}
557\item The module name @<string> is converted into a filename by appending
558 @`.sod', if it has no extension already.\footnote{%
559 Technically, what happens is @|(merge-pathnames name (make-pathname :type
560 "SOD" :case :common))|, so exactly what this means varies according to
561 the host system.} %
562\item The file is looked for relative to the directory containing the
563 importing module.
564\item If that fails, then the file is looked for in each directory on the
565 module search path in turn.
566\item If the file still isn't found, an error is reported and the import
567 fails.
568\end{itemize}
569At this point, if the file has previously been imported, nothing further
570happens.\footnote{%
571 This check is done using @|truename|, so it should see through simple
572 tricks like symbolic links. However, it may be confused by fancy things
573 like bind mounts and so on.} %
574
575Recursive imports, either direct or indirect, are an error.
576
577\subsubsection{Loading extensions}
578\begin{grammar}
579<load-definition> ::= "load" <string> ";"
580\end{grammar}
581
582The Lisp file named @<string> is loaded and evaluated.
583
584A search is made for a Lisp source file as follows.
585\begin{itemize}
586\item The name @<string> is converted into a filename by appending @`.lisp',
587 if it has no extension already.\footnote{%
588 Technically, what happens is @|(merge-pathnames name (make-pathname :type
589 "LISP" :case :common))|, so exactly what this means varies according to
590 the host system.} %
591\item A search is then made in the same manner as for module imports
592 (\xref{sec:syntax-module}).
593\end{itemize}
594If the file is found, it is loaded using the host Lisp's @|load| function.
595
596Note that Sod doesn't attempt to compile Lisp files, or even to look for
597existing compiled files. The right way to package a substantial extension to
598the Sod translator is to provide the extension as a standard ASDF system (or
599similar) and leave a dropping @|foo-extension.lisp| in the module path saying
600something like
601\begin{prog}
602 (asdf:load-system :foo-extension)
603\end{prog}
604which will arrange for the extension to be compiled if necessary.
605
606(This approach means that the language doesn't need to depend on any
607particular system definition facility. It's bad enough already that it
608depends on Common Lisp.)
609
610\subsubsection{Lisp escapes}
611\begin{grammar}
612<lisp-definition> ::= "lisp" <s-expression> ";"
613\end{grammar}
614
615The @<s-expression> is evaluated immediately. It can do anything it likes.
616
617\begin{boxy}[Warning!]
618 This means that hostile Sod modules are a security hazard. Lisp code can
619 read and write files, start other programs, and make network connections.
620 Don't install Sod modules from sources that you don't trust.\footnote{%
621 Presumably you were going to run the corresponding code at some point, so
622 this isn't as unusually scary as it sounds. But please be careful.} %
623\end{boxy}
624
625\subsubsection{Declaring type names}
626\begin{grammar}
627<typename-definition> ::=
628 "typename" <list>$[\mbox{@<identifier>}]$ ";"
629\end{grammar}
630
631Each @<identifier> is declared as naming a C type. This is important because
632the C type syntax -- which Sod uses -- is ambiguous, and disambiguation is
633done by distinguishing type names from other identifiers.
634
635Don't declare class names using @"typename"; use @"class" forward
636declarations instead.
637
638
639\subsection{Literal code} \label{sec:syntax.module.literal}
640
641\begin{grammar}
642<code-definition> ::=
643 "code" <identifier> ":" <item-name> @[<constraints>@]
644 "{" <c-fragment> "}"
645
646<constraints> ::= "[" <list>$[\mbox{@<constraint>}]$ "]"
647
648<constraint> ::= @<item-name>^+
649
650<item-name> ::= <identifier> @! "(" @<identifier>^+ ")"
651\end{grammar}
652
653The @<c-fragment> will be output unchanged to one of the output files.
654
655The first @<identifier> is the symbolic name of an output file. Predefined
656output file names are @|c| and @|h|, which are the implementation code and
657header file respectively; other output files can be defined by extensions.
658
659Output items are named with a sequence of identifiers, separated by
660whitespace, and enclosed in parentheses. As an abbreviation, a name
661consisting of a single identifier may be written as just that identifier,
662without the parentheses.
663
664The @<constraints> provide a means for specifying where in the output file
665the output item should appear. (Note the two kinds of square brackets shown
666in the syntax: square brackets must appear around the constraints if they are
667present, but that they may be omitted.) Each comma-separated @<constraint>
668is a sequence of names of output items, and indicates that the output items
669must appear in the order given -- though the translator is free to insert
670additional items in between them. (The particular output items needn't be
671defined already -- indeed, they needn't be defined ever.)
672
673There is a predefined output item @|includes| in both the @|c| and @|h|
674output files which is a suitable place for inserting @|\#include|
675preprocessor directives in order to declare types and functions for use
676elsewhere in the generated output files.
677
678
679\subsection{Class definitions} \label{sec:syntax.module.class}
680
681\begin{grammar}
682<class-definition> ::= <class-forward-declaration>
683\alt <full-class-definition>
684\end{grammar}
685
686\subsubsection{Forward declarations}
687\begin{grammar}
688<class-forward-declaration> ::= "class" <identifier> ";"
689\end{grammar}
690
691A @<class-forward-declaration> informs Sod that an @<identifier> will be used
692to name a class which is currently undefined. Forward declarations are
693necessary in order to resolve certain kinds of circularity. For example,
694\begin{prog}
695class Sub; \\+
696
697class Super: SodObject \{ \\ \ind
698 Sub *sub; \-\\
699\}; \\+
700
701class Sub: Super \{ \\ \ind
702 /* \dots\ */ \-\\
703\};
704\end{prog}
705
706\subsubsection{Full class definitions}
707\begin{grammar}
708<full-class-definition> ::=
709 @[<properties>@]
710 "class" <identifier> ":" <list>$[\mbox{@<identifier>}]$
711 "{" @<properties-class-item>^* "}"
712
713<properties-class-item> ::= @[<properties>@] <class-item>
714
715<class-item> ::= <slot-item>
716\alt <initializer-item>
717\alt <initarg-item>
718\alt <fragment-item>
719\alt <message-item>
720\alt <method-item>
721\alt <other-item> \fixme{undefined}
722\end{grammar}
723
724A full class definition provides a complete description of a class.
725
726The first @<identifier> gives the name of the class. It is an error to
727give the name of an existing class (other than a forward-referenced class),
728or an existing type name. It is conventional to give classes `MixedCase'
729names, to distinguish them from other kinds of identifiers.
730
731The @<list>$[\mbox{@<identifier>}]$ names the direct superclasses for the new
732class. It is an error if any of these @<identifier>s does not name a defined
733class. The superclass list is required, and must not be empty; listing
734@|SodObject| as your class's superclass is a good choice if nothing else
735seems suitable. A class with no direct superclasses is called a \emph{root
736class}. It is not possible to define a root class in the Sod language: you
737must use Lisp to do this, and it's quite involved.
738
739The @<properties> provide additional information. The standard class
740properties are as follows.
741\begin{description}
742\item[@|lisp_class|] The name of the Lisp class to use within the translator
743 to represent this class. The property value must be an identifier; the
744 default is @|sod_class|. Extensions may define classes with additional
745 behaviour, and may recognize additional class properties.
746\item[@|metaclass|] The name of the Sod metaclass for this class. In the
747 generated code, a class is itself an instance of another class -- its
748 \emph{metaclass}. The metaclass defines which slots the class will have,
749 which messages it will respond to, and what its behaviour will be when it
750 receives them. The property value must be an identifier naming a defined
751 subclass of @|SodClass|. The default metaclass is @|SodClass|.
752 See \xref{sec:concepts.metaclasses} for more details.
753\item[@|nick|] A nickname for the class, to be used to distinguish it from
754 other classes in various limited contexts. The property value must be an
755 identifier; the default is constructed by forcing the class name to
756 lower-case.
757\end{description}
758
759The class body consists of a sequence of @<class-item>s enclosed in braces.
760These items are discussed on the following sections.
761
762\subsubsection{Slot items}
763\begin{grammar}
764<slot-item> ::=
765 @<declaration-specifier>^+ <list>$[\mbox{@<init-declarator>}]$ ";"
766
767<init-declarator> ::= <simple-declarator> @["=" <initializer>@]
768\end{grammar}
769
770A @<slot-item> defines one or more slots. All instances of the class and any
771subclass will contain these slot, with the names and types given by the
772@<declaration-specifiers> and the @<declarators>. Slot declarators may not
773contain dotted names.
774
775It is not possible to declare a slot with function type: such an item is
776interpreted as being a @<message-item> or @<method-item>. Pointers to
777functions are fine.
778
779Properties:
780\begin{description}
781\item[@|slot_class|] A symbol naming the Lisp class to use to represent the
782 direct slot.
783\item[@|initarg|] An identifier naming an initialization argument which can
784 be used to provide a value for the slot. See
785 \xref{sec:concepts.lifecycle.birth} for the details.
786\item[@|initarg_class|] A symbol naming the Lisp class to use to represent
787 the initarg. Only permitted if @|initarg| is also set.
788\end{description}
789
790An @<initializer>, if present, is treated as if a separate
791@<initializer-item> containing the slot name and initializer were present.
792For example,
793\begin{prog}
794[nick = eg] \\
795class Example: Super \{ \\ \ind
796 int foo = 17; \-\\
797\};
798\end{prog}
799means the same as
800\begin{prog}
801[nick = eg] \\
802class Example: Super \{ \\ \ind
803 int foo; \\
804 eg.foo = 17; \-\\
805\};
806\end{prog}
807
808\subsubsection{Initializer items}
809\begin{grammar}
810<initializer-item> ::= @["class"@] <list>$[\mbox{@<slot-initializer>}]$ ";"
811
812<slot-initializer> ::= <dotted-name> @["=" <initializer>@]
813
814<initializer> ::= <c-fragment>
815\end{grammar}
816
817An @<initializer-item> provides an initial value for one or more slots. If
818prefixed by @|class|, then the initial values are for class slots (i.e.,
819slots of the class object itself); otherwise they are for instance slots.
820
821The first component of the @<dotted-name> must be the nickname of one of the
822class's superclasses (including itself); the second must be the name of a
823slot defined in that superclass.
824
825Properties:
826\begin{description}
827\item[@|initializer_class|] A symbol naming the Lisp class to use to
828 represent the initializer.
829\item[@|initarg|] An identifier naming an initialization argument which can
830 be used to provide a value for the slot. See
831 \xref{sec:concepts.lifecycle.birth} for the details. An initializer item
832 must have either an @|initarg| property, or an initializer expression, or
833 both.
834\item[@|initarg_class|] A symbol naming the Lisp class to use to represent
835 the initarg. Only permitted if @|initarg| is also set.
836\end{description}
837
838Each class may define at most one initializer item with an explicit
839initializer expression for a given slot.
840
841\subsubsection{Initarg items}
842\begin{grammar}
843<initarg-item> ::=
844 "initarg"
845 @<declaration-specifier>^+
846 <list>$[\mbox{@<init-declarator>}]$ ";"
847\end{grammar}
848Properties:
849\begin{description}
850\item[@|initarg_class|] A symbol naming the Lisp class to use to represent
851 the initarg.
852\end{description}
853
854\subsubsection{Fragment items}
855\begin{grammar}
856<fragment-item> ::= <fragment-kind> "{" <c-fragment> "}"
857
858<fragment-kind> ::= "init" | "teardown"
859\end{grammar}
860
861\subsubsection{Message items}
862\begin{grammar}
863<message-item> ::=
864 @<declaration-specifier>^+
865 <keyword-declarator>$[\mbox{@<identifier>}]$
866 @[<method-body>@]
867\end{grammar}
868Properties:
869\begin{description}
870\item[@|message_class|] A symbol naming the Lisp class to use to represent
871 the message.
872\item[@|combination|] A keyword naming the aggregating method combination to
873 use.
874\item[@|most_specific|] A keyword, either @`first' or @`last', according to
875 whether the most specific applicable method should be invoked first or
876 last.
877\end{description}
878
879Properties for the @|custom| aggregating method combination:
880\begin{description}
881\item[@|retvar|] An identifier for the return value from the effective
882 method. The default is @|sod__ret|. Only permitted if the message return
883 type is not @|void|.
884\item[@|valvar|] An identifier holding each return value from a direct method
885 in the effective method. The default is @|sod__val|. Only permitted if
886 the method return type (see @|methty| below) is not @|void|.
887\item[@|methty|] A C type, which is the return type for direct methods of
888 this message. The default is the return type of the message.
889\item[@|decls|] A code fragment containing declarations to be inserted at the
890 head of the effective method body. The default is to insert nothing.
891\item[@|before|] A code fragment containing initialization to be performed at
892 the beginning of the effective method body. The default is to insert
893 nothing.
894\item[@|empty|] A code fragment executed if there are no primary methods;
895 it should usually store a suitable (identity) value in @<retvar>. The
896 default is not to emit an effective method at all if there are no primary
897 methods.
898\item[@|first|] A code fragment to set the return value after calling the
899 first applicable direct method. The default is to use the @|each|
900 fragment.
901\item[@|each|] A code fragment to set the return value after calling a direct
902 method. If @|first| is also set, then it is used after the first direct
903 method instead of this. The default is to insert nothing, which is
904 probably not what you want.
905\item[@|after|] A code fragment inserted at the end of the effective method
906 body. The default is to insert nothing.
907\item[@|count|] An identifier naming a variable to be declared in the
908 effective method body, of type @|size_t|, holding the number of applicable
909 methods. The default is not to provide such a variable.
910\end{description}
911
912\subsubsection{Method items}
913\begin{grammar}
914<method-item> ::=
915 @<declaration-specifier>^+
916 <keyword-declarator>$[\mbox{@<dotted-name>}]$
917 <method-body>
918
919<method-body> ::= "{" <c-fragment> "}" | "extern" ";"
920\end{grammar}
921Properties:
922\begin{description}
923\item[@|method_class|] A symbol naming the Lisp class to use to represent
924 the direct method.
925\item[@|role|] A keyword naming the direct method's rôle. For the built-in
926 `simple' message classes, the acceptable rôle names are @|before|,
927 @|after|, and @|around|. By default, a primary method is constructed.
928\end{description}
929
930%%%----- That's all, folks --------------------------------------------------
931
932%%% Local variables:
933%%% mode: LaTeX
934%%% TeX-master: "sod.tex"
935%%% TeX-PDF-mode: t
936%%% End: