X-Git-Url: https://git.distorted.org.uk/~mdw/sod/blobdiff_plain/ea08dc5602e822724b99d3ab22dc346d136061db..1ad4b33a5c4b390d728ef15c0eb85e53b0383c50:/doc/syntax.tex

diff --git a/doc/syntax.tex b/doc/syntax.tex
index 2059caf..35ab100 100644
--- a/doc/syntax.tex
+++ b/doc/syntax.tex
@@ -26,6 +26,7 @@
 \chapter{Module syntax} \label{ch:syntax}
 
 %%%--------------------------------------------------------------------------
+\section{Notation} \label{sec:syntax.notation}
 
 Fortunately, Sod is syntactically quite simple.  The notation is slightly
 unusual in order to make the presentation shorter and easier to read.
@@ -57,7 +58,7 @@ enough that they deserve special notation.
     \syntax{$x^*$ ::= <zero-or-more>$[x]$ ::=
       $\epsilon$ @! <zero-or-more>$[x]$ $x$}
   \end{quote}
-\item $x^+$ abbreviates @<one-or-more>$[x]$, denoting a sequence of zero or
+\item $x^+$ abbreviates @<one-or-more>$[x]$, denoting a sequence of one or
   more occurrences of $x$:
   \begin{quote}
     \syntax{$x^+$ ::= <one-or-more>$[x]$ ::= <zero-or-more>$[x]$ $x$}
@@ -69,8 +70,8 @@ enough that they deserve special notation.
   \end{quote}
 \end{itemize}
 
-\subsection{Lexical syntax}
-\label{sec:syntax.lex}
+%%%--------------------------------------------------------------------------
+\section{Lexical syntax} \label{sec:syntax.lex}
 
 Whitespace and comments are discarded.  The remaining characters are
 collected into tokens according to the following syntax.
@@ -87,7 +88,8 @@ This syntax is slightly ambiguous, and is disambiguated by the \emph{maximal
 munch} rule: at each stage we take the longest sequence of characters which
 could be a token.
 
-\subsubsection{Identifiers} \label{sec:syntax.lex.id}
+
+\subsection{Identifiers} \label{sec:syntax.lex.id}
 
 \begin{grammar}
 <identifier> ::= <id-start-char> @<id-body-char>^*
@@ -115,7 +117,8 @@ also an ambiguity (inherited from C) in the declaration syntax which is
 settled by distinguishing type names from other identifiers at a lexical
 level.
 
-\subsubsection{String and character literals} \label{sec:syntax.lex.string}
+
+\subsection{String and character literals} \label{sec:syntax.lex.string}
 
 \begin{grammar}
 <string-literal> ::= "\"" @<string-literal-char>^* "\""
@@ -167,13 +170,15 @@ Sod understands only integers, not floating-point numbers; its integer syntax
 goes slightly beyond C in allowing a @`0o' prefix for octal and @`0b' for
 binary.  However, length and signedness indicators are not permitted.
 
-\subsubsection{Punctuation} \label{sec:syntax.lex.punct}
+
+\subsection{Punctuation} \label{sec:syntax.lex.punct}
 
 \begin{grammar}
 <punctuation> ::= any nonalphanumeric character other than "_", "\"" or "'"
 \end{grammar}
 
-\subsubsection{Comments} \label{sec:lex-comment}
+
+\subsection{Comments} \label{sec:syntax.lex.comment}
 
 \begin{grammar}
 <comment> ::= <block-comment>
@@ -191,37 +196,35 @@ binary.  However, length and signedness indicators are not permitted.
 
 <not-star-or-slash> ::= any character other than "*" or  "/"
 
-<line-comment> ::= "//" @<not-newline>^* <newline>
+<line-comment> ::= "/\,/" @<not-newline>^* <newline>
 
 <newline> ::= a newline character
 
 <not-newline> ::= any character other than newline
 \end{grammar}
 
-Comments are exactly as in C99: both traditional block comments `\texttt{/*}
-\dots\ \texttt{*/}' and \Cplusplus-style `\texttt{//} \dots' comments are
-permitted and ignored.
+Comments are exactly as in C99: both traditional block comments `@|/*| \dots\
+@|*/|' and \Cplusplus-style `@|/\,/| \dots' comments are permitted and
+ignored.
+
 
-\subsection{Special nonterminals}
-\label{sec:special-nonterminals}
+\subsection{Special nonterminals} \label{sec:syntax.lex.special}
 
 Aside from the lexical syntax presented above (\xref{sec:lexical-syntax}),
 two special nonterminals occur in the module syntax.
 
-\subsubsection{S-expressions} \label{sec:syntax-sexp}
-
+\subsubsection{S-expressions}
 \begin{grammar}
 <s-expression> ::= an S-expression, as parsed by the Lisp reader
 \end{grammar}
 
 When an S-expression is expected, the Sod parser simply calls the host Lisp
-system's \textsf{read} function.  Sod modules are permitted to modify the
-read table to extend the S-expression syntax.
+system's @|read| function.  Sod modules are permitted to modify the read
+table to extend the S-expression syntax.
 
 S-expressions are self-delimiting, so no end-marker is needed.
 
-\subsubsection{C fragments} \label{sec:syntax.lex.cfrag}
-
+\subsubsection{C fragments}
 \begin{grammar}
 <c-fragment> ::= a sequence of C tokens, with matching brackets
 \end{grammar}
@@ -235,7 +238,8 @@ determined by the immediately surrounding context -- usually a closing brace
 or bracket.  The first such delimiter character which is not enclosed in
 brackets, braces or parenthesis ends the fragment.
 
-\subsection{Module syntax} \label{sec:syntax-module}
+%%%--------------------------------------------------------------------------
+\section{Module syntax} \label{sec:syntax.module}
 
 \begin{grammar}
 <module> ::= @<definition>^*
@@ -248,13 +252,12 @@ brackets, braces or parenthesis ends the fragment.
 \alt <class-definition>
 \end{grammar}
 
-A module is the top-level syntactic item.  A module consists of a sequence of
-definitions.
+A @<module> is the top-level syntactic item.  A module consists of a sequence
+of definitions.
 
-\subsection{Simple definitions} \label{sec:syntax.defs}
-
-\subsubsection{Importing modules} \label{sec:syntax.defs.import}
+\subsection{Simple definitions} \label{sec:syntax.module.simple}
 
+\subsubsection{Importing modules}
 \begin{grammar}
 <import-definition> ::= "import" <string> ";"
 \end{grammar}
@@ -283,8 +286,7 @@ happens.\footnote{%
 
 Recursive imports, either direct or indirect, are an error.
 
-\subsubsection{Loading extensions} \label{sec:syntax.defs.load}
-
+\subsubsection{Loading extensions}
 \begin{grammar}
 <load-definition> ::= "load" <string> ";"
 \end{grammar}
@@ -318,8 +320,7 @@ which will arrange for the extension to be compiled if necessary.
 particular system definition facility.  It's bad enough already that it
 depends on Common Lisp.)
 
-\subsubsection{Lisp escapes} \label{sec:syntax.defs.lisp}
-
+\subsubsection{Lisp escapes}
 \begin{grammar}
 <lisp-definition> ::= "lisp" <s-expression> ";"
 \end{grammar}
@@ -334,8 +335,7 @@ The @<s-expression> is evaluated immediately.  It can do anything it likes.
     this isn't as unusually scary as it sounds.  But please be careful.} %
 \end{boxy}
 
-\subsubsection{Declaring type names} \label{sec:syntax.defs.typename}
-
+\subsubsection{Declaring type names}
 \begin{grammar}
 <typename-definition> ::=
   "typename" <list>$[\mbox{@<identifier>}]$ ";"
@@ -348,16 +348,19 @@ done by distinguishing type names from other identifiers.
 Don't declare class names using @"typename"; use @"class" forward
 declarations instead.
 
-\subsection{Literal code} \label{sec:syntax-code}
+
+\subsection{Literal code} \label{sec:syntax.module.literal}
 
 \begin{grammar}
 <code-definition> ::=
-  "code" <identifier> ":" <identifier> @[<constraints>@]
+  "code" <identifier> ":" <item-name> @[<constraints>@]
   "{" <c-fragment> "}"
 
 <constraints> ::= "[" <list>$[\mbox{@<constraint>}]$ "]"
 
-<constraint> ::= @<identifier>^+
+<constraint> ::= @<item-name>^+
+
+<item-name> ::= <identifier> @! "(" @<identifier>^+ ")"
 \end{grammar}
 
 The @<c-fragment> will be output unchanged to one of the output files.
@@ -366,26 +369,27 @@ The first @<identifier> is the symbolic name of an output file.  Predefined
 output file names are @"c" and @"h", which are the implementation code and
 header file respectively; other output files can be defined by extensions.
 
-The second @<identifier> provides a name for the output item.  Several C
-fragments can have the same name: they will be concatenated together in the
-order in which they were encountered.
+Output items are named with a sequence of identifiers, separated by
+whitespace, and enclosed in parentheses.  As an abbreviation, a name
+consisting of a single identifier may be written as just that identifier,
+without the parentheses.
 
 The @<constraints> provide a means for specifying where in the output file
 the output item should appear.  (Note the two kinds of square brackets shown
 in the syntax: square brackets must appear around the constraints if they are
 present, but that they may be omitted.)  Each comma-separated @<constraint>
-is a sequence of identifiers naming output items, and indicates that the
-output items must appear in the order given -- though the translator is free
-to insert additional items in between them.  (The particular output items
-needn't be defined already -- indeed, they needn't be defined ever.)
+is a sequence of names of output items, and indicates that the output items
+must appear in the order given -- though the translator is free to insert
+additional items in between them.  (The particular output items needn't be
+defined already -- indeed, they needn't be defined ever.)
 
 There is a predefined output item @"includes" in both the @"c" and @"h"
 output files which is a suitable place for inserting @"\#include"
 preprocessor directives in order to declare types and functions for use
 elsewhere in the generated output files.
 
-\subsection{Property sets} \label{sec:syntax.propset}
 
+\subsection{Property sets} \label{sec:syntax.module.properties}
 \begin{grammar}
 <properties> ::= "[" <list>$[\mbox{@<property>}]$ "]"
 
@@ -401,17 +405,17 @@ A property has a name, given as an @<identifier>, and a value computed by
 evaluating an @<expression>.  The value can be one of a number of types,
 though the only operators currently defined act on integer values only.
 
-\subsubsection{The expression evaluator} \label{sec:syntax.propset.expr}
-
+\subsubsection{The expression evaluator}
 \begin{grammar}
-<expression> ::= <term> | <expression> "+" <term> | <expression> "-" <term>
+<expression> ::= <term> | <expression> "+" <term> | <expression> "--" <term>
 
 <term> ::= <factor> | <term> "*" <factor> | <term> "/" <factor>
 
-<factor> ::= <primary> | "+" <factor> | "-" <factor>
+<factor> ::= <primary> | "+" <factor> | "--" <factor>
 
 <primary> ::=
      <integer-literal> | <string-literal> | <char-literal> | <identifier>
+\alt "<" <plain-type> ">"
 \alt "?" <s-expression>
 \alt "(" <expression> ")"
 \end{grammar}
@@ -429,7 +433,8 @@ causes the @<s-expression> to be evaluated using the Lisp \textsf{eval}
 function.
 %%% FIXME crossref to extension docs
 
-\subsection{C types} \label{sec:syntax.c-types}
+
+\subsection{C types} \label{sec:syntax.module.types}
 
 Sod's syntax for C types closely mirrors the standard C syntax.  A C type has
 two parts: a sequence of @<declaration-specifier>s and a @<declarator>.  In
@@ -437,31 +442,55 @@ Sod, a type must contain at least one @<declaration-specifier> (i.e.,
 `implicit @"int"' is forbidden), and storage-class specifiers are not
 recognized.
 
-\subsubsection{Declaration specifiers} \label{sec:syntax.c-types.declspec}
-
+\subsubsection{Declaration specifiers}
 \begin{grammar}
 <declaration-specifier> ::= <type-name>
 \alt "struct" <identifier> | "union" <identifier> | "enum" <identifier>
 \alt "void" | "char" | "int" | "float" | "double"
 \alt "short" | "long"
 \alt "signed" | "unsigned"
+\alt "bool" | "_Bool"
+\alt "imaginary" | "_Imaginary" | "complex" | "_Complex"
 \alt <qualifier>
+\alt <storage-specifier>
+\alt <atomic-type>
+
+<qualifier> ::= <atomic> | "const" | "volatile" | "restrict"
+
+<plain-type> ::= @<declaration-specifier>^+ <abstract-declarator>
+
+<atomic-type> ::=
+  <atomic> "(" <plain-type> ")"
 
-<qualifier> ::= "const" | "volatile" | "restrict"
+<atomic> ::= "atomic" | "_Atomic"
+
+<storage-specifier> ::= <alignas> "(" <c-fragment> ")"
+
+<alignas> ::= "alignas" "_Alignas"
 
 <type-name> ::= <identifier>
 \end{grammar}
 
 A @<type-name> is an identifier which has been declared as being a type name,
-using the @"typename" or @"class" definitions.
+using the @"typename" or @"class" definitions.  The following type names are
+defined in the built-in module.
+\begin{itemize}
+\item @"va_list"
+\item @"size_t"
+\item @"ptrdiff_t"
+\item @"wchar_t"
+\end{itemize}
 
 Declaration specifiers may appear in any order.  However, not all
 combinations are permitted.  A declaration specifier must consist of zero or
-more @<qualifiers>, and one of the following, up to reordering.
+more @<qualifier>s, zero or more @<storage-specifier>s, and one of the
+following, up to reordering.
 \begin{itemize}
 \item @<type-name>
+\item @<atomic-type>
 \item @"struct" @<identifier>, @"union" @<identifier>, @"enum" @<identifier>
 \item @"void"
+\item @"_Bool", @"bool"
 \item @"char", @"unsigned char", @"signed char"
 \item @"short", @"unsigned short", @"signed short"
 \item @"short int", @"unsigned short int", @"signed short int"
@@ -471,32 +500,40 @@ more @<qualifiers>, and one of the following, up to reordering.
 \item @"long long", @"unsigned long long", @"signed long long"
 \item @"long long int", @"unsigned long long int", @"signed long long int"
 \item @"float", @"double", @"long double"
+\item @"float _Imaginary", @"double _Imaginary", @"long double _Imaginary"
+\item @"float imaginary", @"double imaginary", @"long double imaginary"
+\item @"float _Complex", @"double _Complex", @"long double _Complex"
+\item @"float complex", @"double complex", @"long double complex"
 \end{itemize}
 All of these have their usual C meanings.
 
-\subsubsection{Declarators} \label{sec:syntax.c-types.declarator}
+\subsubsection{Declarators}
 \begin{grammar}
-<declarator>$[k]$ ::= @<pointer>^* <primary-declarator>$[k]$
+<declarator>$[k, a]$ ::= @<pointer>^* <primary-declarator>$[k, a]$
 
-<primary-declarator>$[k]$ ::= $k$
-\alt "(" <primary-declarator>$[k]$ ")"
-\alt <primary-declarator>$[k]$ @<declarator-suffix>
+<primary-declarator>$[k, a]$ ::= $k$
+\alt "(" <primary-declarator>$[k, a]$ ")"
+\alt <primary-declarator>$[k, a]$ @<declarator-suffix>$[a]$
 
 <pointer> ::= "*" @<qualifier>^*
 
-<declarator-suffix> ::= "[" <c-fragment> "]"
-\alt "(" <arguments> ")"
+<declarator-suffix>$[a]$ ::= "[" <c-fragment> "]"
+\alt "(" $a$ ")"
 
-<argument-list> ::= $\epsilon$ | "..."
-\alt <list>$[\mbox{@<argument>}]$ @["," "..."@]
+<argument-list> ::= $\epsilon$ | "\dots"
+\alt <list>$[\mbox{@<argument>}]$ @["," "\dots"@]
 
 <argument> ::= @<declaration-specifier>^+ <argument-declarator>
 
+<abstract-declarator> ::= <declarator>$[\epsilon, \mbox{@<argument-list>}]$
+
 <argument-declarator> ::= <declarator>$[\mbox{@<identifier> @! $\epsilon$}]$
 
-<simple-declarator> ::= <declarator>$[\mbox{@<identifier>}]$
+<argument-declarator> ::=
+  <declarator>$[\mbox{@<identifier> @! $\epsilon$}, \mbox{@<argument-list>}]$
 
-<dotted-name> ::= <identifier> "." <identifier>
+<simple-declarator> ::=
+  <declarator>$[\mbox{@<identifier>}, \mbox{@<argument-list>}]$
 \end{grammar}
 
 The declarator syntax is taken from C, but with some differences.
@@ -511,14 +548,33 @@ The declarator syntax is taken from C, but with some differences.
 The remaining differences are (I hope) a matter of presentation rather than
 substance.
 
-\subsection{Defining classes} \label{sec:syntax.class}
+There is additional syntax to support messages and methods which accept
+keyword arguments.
+
+\begin{grammar}
+<keyword-argument> ::= <argument> @["=" <c-fragment>@]
+
+<keyword-argument-list> ::=
+  @[<list>$[\mbox{@<argument>}]$@]
+  "?" @[<list>$[\mbox{@<keyword-argument>}]$@]
+
+<method-argument-list> ::= <argument-list> @! <keyword-argument-list>
+
+<dotted-name> ::= <identifier> "." <identifier>
+
+<keyword-declarator>$[k]$ ::=
+  <declarator>$[k, \mbox{@<method-argument-list>}]$
+\end{grammar}
+
+
+\subsection{Class definitions} \label{sec:syntax.module.class}
 
 \begin{grammar}
 <class-definition> ::= <class-forward-declaration>
 \alt <full-class-definition>
 \end{grammar}
 
-\subsubsection{Forward declarations} \label{sec:class.class.forward}
+\subsubsection{Forward declarations}
 \begin{grammar}
 <class-forward-declaration> ::= "class" <identifier> ";"
 \end{grammar}
@@ -526,28 +582,31 @@ substance.
 A @<class-forward-declaration> informs Sod that an @<identifier> will be used
 to name a class which is currently undefined.  Forward declarations are
 necessary in order to resolve certain kinds of circularity.  For example,
-\begin{listing}
-class Sub;
+\begin{prog}
+class Sub;                                                      \\+
 
-class Super : SodObject {
-  Sub *sub;
-};
+class Super : SodObject \{                                      \\ \ind
+  Sub *sub;                                                   \-\\
+\};                                                             \\+
 
-class Sub : Super {
-  /* ... */
-};
-\end{listing}
-
-\subsubsection{Full class definitions} \label{sec:class.class.full}
+class Sub : Super \{                                            \\ \ind
+  /* \dots\ */                                                \-\\
+\};
+\end{prog}
 
+\subsubsection{Full class definitions}
 \begin{grammar}
 <full-class-definition> ::=
   @[<properties>@]
   "class" <identifier> ":" <list>$[\mbox{@<identifier>}]$
   "{" @<properties-class-item>^* "}"
 
-<class-item> ::= <slot-item> ";"
-\alt <initializer-item> ";"
+<properties-class-item> ::= @[<properties>@] <class-item>
+
+<class-item> ::= <slot-item>
+\alt <initializer-item>
+\alt <initarg-item>
+\alt <fragment-item>
 \alt <message-item>
 \alt <method-item>
 \end{grammar}
@@ -561,7 +620,10 @@ names, to distinguish them from other kinds of identifiers.
 
 The @<list>$[\mbox{@<identifier>}]$ names the direct superclasses for the new
 class.  It is an error if any of these @<identifier>s does not name a defined
-class.
+class.  The superclass list is required, and must not be empty; listing
+@|SodObject| as your class's superclass is a good choice if nothing else
+seems suitable.  It's not possible to define a \emph{root class} in the Sod
+language: you must use Lisp to do this, and it's quite involved.
 
 The @<properties> provide additional information.  The standard class
 properties are as follows.
@@ -586,11 +648,9 @@ properties are as follows.
 The class body consists of a sequence of @<class-item>s enclosed in braces.
 These items are discussed on the following sections.
 
-\subsubsection{Slot items} \label{sec:sntax.class.slot}
-
+\subsubsection{Slot items}
 \begin{grammar}
 <slot-item> ::=
-  @[<properties>@]
   @<declaration-specifier>^+ <list>$[\mbox{@<init-declarator>}]$ ";"
 
 <init-declarator> ::= <simple-declarator> @["=" <initializer>@]
@@ -608,29 +668,28 @@ functions are fine.
 An @<initializer>, if present, is treated as if a separate
 @<initializer-item> containing the slot name and initializer were present.
 For example,
-\begin{listing}
-[nick = eg]
-class Example : Super {
-  int foo = 17;
-};
-\end{listing}
+\begin{prog}
+[nick = eg]                                                     \\
+class Example : Super \{                                        \\ \ind
+  int foo = 17;                                               \-\\
+\};
+\end{prog}
 means the same as
-\begin{listing}
-[nick = eg]
-class Example : Super {
-  int foo;
-  eg.foo = 17;
-};
-\end{listing}
-
-\subsubsection{Initializer items} \label{sec:syntax.class.init}
-
+\begin{prog}
+[nick = eg]                                                     \\
+class Example : Super \{                                        \\ \ind
+  int foo;                                                      \\
+  eg.foo = 17;                                                \-\\
+\};
+\end{prog}
+
+\subsubsection{Initializer items}
 \begin{grammar}
-<initializer-item> ::= @["class"@] <list>$[\mbox{@<slot-initializer>}]$
+<initializer-item> ::= @["class"@] <list>$[\mbox{@<slot-initializer>}]$ ";"
 
-<slot-initializer> ::= <dotted-name> "=" <initializer>
+<slot-initializer> ::= <dotted-name> @["=" <initializer>@]
 
-<initializer> :: "{" <c-fragment> "}" | <c-fragment>
+<initializer> :: <c-fragment>
 \end{grammar}
 
 An @<initializer-item> provides an initial value for one or more slots.  If
@@ -641,30 +700,42 @@ The first component of the @<dotted-name> must be the nickname of one of the
 class's superclasses (including itself); the second must be the name of a
 slot defined in that superclass.
 
-The initializer has one of two forms.
-\begin{itemize}
-\item A @<c-fragment> enclosed in braces denotes an aggregate initializer.
-  This is suitable for initializing structure, union or array slots.
-\item A @<c-fragment> \emph{not} beginning with an open brace is a `bare'
-  initializer, and continues until the next @`,' or @`;' which is not within
-  nested brackets.  Bare initializers are suitable for initializing scalar
-  slots, such as pointers or integers, and strings.
-\end{itemize}
+An @|initarg| property may be set on an instance slot initializer (or a
+direct slot definition).  See \xref{sec:concepts.lifecycle.birth} for the
+details.  An initializer item must have either an @|initarg| property, or an
+initializer expression, or both.
 
-\subsubsection{Message items} \label{sec:syntax.class.message}
+Each class may define at most one initializer item with an explicit
+initializer expression for a given slot.
 
+\subsubsection{Initarg items}
 \begin{grammar}
-<message-item> ::=
-  @[<properties>@]
-  @<declaration-specifier>^+ <simple-declarator> @[<method-body>@]
+<initarg-item> ::=
+  "initarg"
+  @<declaration-specifier>^+
+  <list>$[\mbox{@<init-declarator>}]$ ";"
 \end{grammar}
 
-\subsubsection{Method items} \label{sec:syntax.class.method}
+\subsubsection{Fragment items}
+\begin{grammar}
+<fragment-item> ::= <fragment-kind> "{" <c-fragment> "}"
 
+<fragment-kind> ::= "init" | "teardown"
+\end{grammar}
+
+\subsubsection{Message items}
+\begin{grammar}
+<message-item> ::=
+  @<declaration-specifier>^+
+  <keyword-declarator>$[\mbox{@<identifier>}]$
+  @[<method-body>@]
+\end{grammar}
+
+\subsubsection{Method items}
 \begin{grammar}
 <method-item> ::=
-  @[<properties>@]
-  @<declaration-specifier>^+ <declarator>$[\mbox{@<dotted-name>}]$
+  @<declaration-specifier>^+
+  <keyword-declarator>$[\mbox{@<dotted-name>}]$
   <method-body>
 
 <method-body> ::= "{" <c-fragment> "}" | "extern" ";"