X-Git-Url: https://git.distorted.org.uk/~mdw/sod/blobdiff_plain/0a488b1c14d36537a1303e1e1f43c4cfc440b0a2..db56b1d3c3d4bc9ffb6500b1f40c27c77d868aa4:/doc/syntax.tex

diff --git a/doc/syntax.tex b/doc/syntax.tex
index 30b8b32..b8ae797 100644
--- a/doc/syntax.tex
+++ b/doc/syntax.tex
@@ -26,6 +26,7 @@
 \chapter{Module syntax} \label{ch:syntax}
 
 %%%--------------------------------------------------------------------------
+\section{Notation} \label{sec:syntax.notation}
 
 Fortunately, Sod is syntactically quite simple.  The notation is slightly
 unusual in order to make the presentation shorter and easier to read.
@@ -69,8 +70,8 @@ enough that they deserve special notation.
   \end{quote}
 \end{itemize}
 
-\subsection{Lexical syntax}
-\label{sec:syntax.lex}
+%%%--------------------------------------------------------------------------
+\section{Lexical syntax} \label{sec:syntax.lex}
 
 Whitespace and comments are discarded.  The remaining characters are
 collected into tokens according to the following syntax.
@@ -87,7 +88,8 @@ This syntax is slightly ambiguous, and is disambiguated by the \emph{maximal
 munch} rule: at each stage we take the longest sequence of characters which
 could be a token.
 
-\subsubsection{Identifiers} \label{sec:syntax.lex.id}
+
+\subsection{Identifiers} \label{sec:syntax.lex.id}
 
 \begin{grammar}
 <identifier> ::= <id-start-char> @<id-body-char>^*
@@ -115,7 +117,8 @@ also an ambiguity (inherited from C) in the declaration syntax which is
 settled by distinguishing type names from other identifiers at a lexical
 level.
 
-\subsubsection{String and character literals} \label{sec:syntax.lex.string}
+
+\subsection{String and character literals} \label{sec:syntax.lex.string}
 
 \begin{grammar}
 <string-literal> ::= "\"" @<string-literal-char>^* "\""
@@ -167,13 +170,15 @@ Sod understands only integers, not floating-point numbers; its integer syntax
 goes slightly beyond C in allowing a @`0o' prefix for octal and @`0b' for
 binary.  However, length and signedness indicators are not permitted.
 
-\subsubsection{Punctuation} \label{sec:syntax.lex.punct}
+
+\subsection{Punctuation} \label{sec:syntax.lex.punct}
 
 \begin{grammar}
 <punctuation> ::= any nonalphanumeric character other than "_", "\"" or "'"
 \end{grammar}
 
-\subsubsection{Comments} \label{sec:lex-comment}
+
+\subsection{Comments} \label{sec:syntax.lex.comment}
 
 \begin{grammar}
 <comment> ::= <block-comment>
@@ -202,26 +207,24 @@ Comments are exactly as in C99: both traditional block comments `\texttt{/*}
 \dots\ \texttt{*/}' and \Cplusplus-style `\texttt{//} \dots' comments are
 permitted and ignored.
 
-\subsection{Special nonterminals}
-\label{sec:special-nonterminals}
+
+\subsection{Special nonterminals} \label{sec:syntax.lex.special}
 
 Aside from the lexical syntax presented above (\xref{sec:lexical-syntax}),
 two special nonterminals occur in the module syntax.
 
-\subsubsection{S-expressions} \label{sec:syntax-sexp}
-
+\subsubsection{S-expressions}
 \begin{grammar}
 <s-expression> ::= an S-expression, as parsed by the Lisp reader
 \end{grammar}
 
 When an S-expression is expected, the Sod parser simply calls the host Lisp
-system's \textsf{read} function.  Sod modules are permitted to modify the
-read table to extend the S-expression syntax.
+system's @|read| function.  Sod modules are permitted to modify the read
+table to extend the S-expression syntax.
 
 S-expressions are self-delimiting, so no end-marker is needed.
 
-\subsubsection{C fragments} \label{sec:syntax.lex.cfrag}
-
+\subsubsection{C fragments}
 \begin{grammar}
 <c-fragment> ::= a sequence of C tokens, with matching brackets
 \end{grammar}
@@ -235,7 +238,8 @@ determined by the immediately surrounding context -- usually a closing brace
 or bracket.  The first such delimiter character which is not enclosed in
 brackets, braces or parenthesis ends the fragment.
 
-\subsection{Module syntax} \label{sec:syntax-module}
+%%%--------------------------------------------------------------------------
+\section{Module syntax} \label{sec:syntax.module}
 
 \begin{grammar}
 <module> ::= @<definition>^*
@@ -248,13 +252,12 @@ brackets, braces or parenthesis ends the fragment.
 \alt <class-definition>
 \end{grammar}
 
-A module is the top-level syntactic item.  A module consists of a sequence of
-definitions.
-
-\subsection{Simple definitions} \label{sec:syntax.defs}
+A @<module> is the top-level syntactic item.  A module consists of a sequence
+of definitions.
 
-\subsubsection{Importing modules} \label{sec:syntax.defs.import}
+\subsection{Simple definitions} \label{sec:syntax.module.simple}
 
+\subsubsection{Importing modules}
 \begin{grammar}
 <import-definition> ::= "import" <string> ";"
 \end{grammar}
@@ -283,8 +286,7 @@ happens.\footnote{%
 
 Recursive imports, either direct or indirect, are an error.
 
-\subsubsection{Loading extensions} \label{sec:syntax.defs.load}
-
+\subsubsection{Loading extensions}
 \begin{grammar}
 <load-definition> ::= "load" <string> ";"
 \end{grammar}
@@ -318,8 +320,7 @@ which will arrange for the extension to be compiled if necessary.
 particular system definition facility.  It's bad enough already that it
 depends on Common Lisp.)
 
-\subsubsection{Lisp escapes} \label{sec:syntax.defs.lisp}
-
+\subsubsection{Lisp escapes}
 \begin{grammar}
 <lisp-definition> ::= "lisp" <s-expression> ";"
 \end{grammar}
@@ -334,11 +335,10 @@ The @<s-expression> is evaluated immediately.  It can do anything it likes.
     this isn't as unusually scary as it sounds.  But please be careful.} %
 \end{boxy}
 
-\subsubsection{Declaring type names} \label{sec:syntax.defs.typename}
-
+\subsubsection{Declaring type names}
 \begin{grammar}
 <typename-definition> ::=
-  "typename" <list>@[<identifier>@] ";"
+  "typename" <list>$[\mbox{@<identifier>}]$ ";"
 \end{grammar}
 
 Each @<identifier> is declared as naming a C type.  This is important because
@@ -348,16 +348,19 @@ done by distinguishing type names from other identifiers.
 Don't declare class names using @"typename"; use @"class" forward
 declarations instead.
 
-\subsection{Literal code} \label{sec:syntax-code}
+
+\subsection{Literal code} \label{sec:syntax.module.literal}
 
 \begin{grammar}
 <code-definition> ::=
-  "code" <identifier> ":" <identifier> @[<constraints>@]
+  "code" <identifier> ":" <item-name> @[<constraints>@]
   "{" <c-fragment> "}"
 
-<constraints> ::= "[" <list>@[<constraint>@] "]"
+<constraints> ::= "[" <list>$[\mbox{@<constraint>}]$ "]"
+
+<constraint> ::= @<item-name>^+
 
-<constraint> ::= @<identifier>^+
+<item-name> ::= <identifier> @! "(" @<identifier>^+ ")"
 \end{grammar}
 
 The @<c-fragment> will be output unchanged to one of the output files.
@@ -366,28 +369,29 @@ The first @<identifier> is the symbolic name of an output file.  Predefined
 output file names are @"c" and @"h", which are the implementation code and
 header file respectively; other output files can be defined by extensions.
 
-The second @<identifier> provides a name for the output item.  Several C
-fragments can have the same name: they will be concatenated together in the
-order in which they were encountered.
+Output items are named with a sequence of identifiers, separated by
+whitespace, and enclosed in parentheses.  As an abbreviation, a name
+consisting of a single identifier may be written as just that identifier,
+without the parentheses.
 
 The @<constraints> provide a means for specifying where in the output file
 the output item should appear.  (Note the two kinds of square brackets shown
 in the syntax: square brackets must appear around the constraints if they are
 present, but that they may be omitted.)  Each comma-separated @<constraint>
-is a sequence of identifiers naming output items, and indicates that the
-output items must appear in the order given -- though the translator is free
-to insert additional items in between them.  (The particular output items
-needn't be defined already -- indeed, they needn't be defined ever.)
+is a sequence of names of output items, and indicates that the output items
+must appear in the order given -- though the translator is free to insert
+additional items in between them.  (The particular output items needn't be
+defined already -- indeed, they needn't be defined ever.)
 
 There is a predefined output item @"includes" in both the @"c" and @"h"
 output files which is a suitable place for inserting @"\#include"
 preprocessor directives in order to declare types and functions for use
 elsewhere in the generated output files.
 
-\subsection{Property sets} \label{sec:syntax.propset}
 
+\subsection{Property sets} \label{sec:syntax.module.properties}
 \begin{grammar}
-<properties> ::= "[" <list>@[<property>@] "]"
+<properties> ::= "[" <list>$[\mbox{@<property>}]$ "]"
 
 <property> ::= <identifier> "=" <expression>
 \end{grammar}
@@ -401,8 +405,7 @@ A property has a name, given as an @<identifier>, and a value computed by
 evaluating an @<expression>.  The value can be one of a number of types,
 though the only operators currently defined act on integer values only.
 
-\subsubsection{The expression evaluator} \label{sec:syntax.propset.expr}
-
+\subsubsection{The expression evaluator}
 \begin{grammar}
 <expression> ::= <term> | <expression> "+" <term> | <expression> "-" <term>
 
@@ -429,7 +432,8 @@ causes the @<s-expression> to be evaluated using the Lisp \textsf{eval}
 function.
 %%% FIXME crossref to extension docs
 
-\subsection{C types} \label{sec:syntax.c-types}
+
+\subsection{C types} \label{sec:syntax.module.types}
 
 Sod's syntax for C types closely mirrors the standard C syntax.  A C type has
 two parts: a sequence of @<declaration-specifier>s and a @<declarator>.  In
@@ -437,31 +441,53 @@ Sod, a type must contain at least one @<declaration-specifier> (i.e.,
 `implicit @"int"' is forbidden), and storage-class specifiers are not
 recognized.
 
-\subsubsection{Declaration specifiers} \label{sec:syntax.c-types.declspec}
-
+\subsubsection{Declaration specifiers}
 \begin{grammar}
 <declaration-specifier> ::= <type-name>
 \alt "struct" <identifier> | "union" <identifier> | "enum" <identifier>
 \alt "void" | "char" | "int" | "float" | "double"
 \alt "short" | "long"
 \alt "signed" | "unsigned"
+\alt "bool" | "_Bool"
+\alt "imaginary" | "_Imaginary" | "complex" | "_Complex"
 \alt <qualifier>
+\alt <storage-specifier>
+\alt <atomic-type>
+
+<qualifier> ::= <atomic> | "const" | "volatile" | "restrict"
+
+<atomic-type> ::=
+  <atomic> "(" @<declaration-specifier>^+ <abstract-declarator> ")"
 
-<qualifier> ::= "const" | "volatile" | "restrict"
+<atomic> ::= "atomic" | "_Atomic"
+
+<storage-specifier> ::= <alignas> "(" <c-fragment> ")"
+
+<alignas> ::= "alignas" "_Alignas"
 
 <type-name> ::= <identifier>
 \end{grammar}
 
 A @<type-name> is an identifier which has been declared as being a type name,
-using the @"typename" or @"class" definitions.
+using the @"typename" or @"class" definitions.  The following type names are
+defined in the built-in module.
+\begin{itemize}
+\item @"va_list"
+\item @"size_t"
+\item @"ptrdiff_t"
+\item @"wchar_t"
+\end{itemize}
 
 Declaration specifiers may appear in any order.  However, not all
 combinations are permitted.  A declaration specifier must consist of zero or
-more @<qualifiers>, and one of the following, up to reordering.
+more @<qualifier>s, zero or more @<storage-specifier>s, and one of the
+following, up to reordering.
 \begin{itemize}
 \item @<type-name>
+\item @<atomic-type>
 \item @"struct" @<identifier>, @"union" @<identifier>, @"enum" @<identifier>
 \item @"void"
+\item @"_Bool", @"bool"
 \item @"char", @"unsigned char", @"signed char"
 \item @"short", @"unsigned short", @"signed short"
 \item @"short int", @"unsigned short int", @"signed short int"
@@ -471,11 +497,14 @@ more @<qualifiers>, and one of the following, up to reordering.
 \item @"long long", @"unsigned long long", @"signed long long"
 \item @"long long int", @"unsigned long long int", @"signed long long int"
 \item @"float", @"double", @"long double"
+\item @"float _Imaginary", @"double _Imaginary", @"long double _Imaginary"
+\item @"float imaginary", @"double imaginary", @"long double imaginary"
+\item @"float _Complex", @"double _Complex", @"long double _Complex"
+\item @"float complex", @"double complex", @"long double complex"
 \end{itemize}
 All of these have their usual C meanings.
 
-\subsubsection{Declarators} \label{sec:syntax.c-types.declarator}
-
+\subsubsection{Declarators}
 \begin{grammar}
 <declarator>$[k]$ ::= @<pointer>^* <primary-declarator>$[k]$
 
@@ -488,18 +517,18 @@ All of these have their usual C meanings.
 <declarator-suffix> ::= "[" <c-fragment> "]"
 \alt "(" <arguments> ")"
 
-<arguments> ::= $\epsilon$ | "..."
-\alt <list>@[<argument>@] @["," "..."@]
+<argument-list> ::= $\epsilon$ | "..."
+\alt <list>$[\mbox{@<argument>}]$ @["," "..."@]
 
 <argument> ::= @<declaration-specifier>^+ <argument-declarator>
 
-<argument-declarator> ::= <declarator>@[<identifier> @! $\epsilon$@]
+<abstract-declarator> ::= <declarator>$[\epsilon]$
 
-<simple-declarator> ::= <declarator>@[<identifier>@]
+<argument-declarator> ::= <declarator>$[\mbox{@<identifier> @! $\epsilon$}]$
 
-<dotted-name> ::= <identifier> "." <identifier>
+<simple-declarator> ::= <declarator>$[\mbox{@<identifier>}]$
 
-<dotted-declarator> ::= <declarator>@[<dotted-name>@]
+<dotted-name> ::= <identifier> "." <identifier>
 \end{grammar}
 
 The declarator syntax is taken from C, but with some differences.
@@ -514,15 +543,15 @@ The declarator syntax is taken from C, but with some differences.
 The remaining differences are (I hope) a matter of presentation rather than
 substance.
 
-\subsection{Defining classes} \label{sec:syntax.class}
+
+\subsection{Class definitions} \label{sec:syntax.module.class}
 
 \begin{grammar}
 <class-definition> ::= <class-forward-declaration>
 \alt <full-class-definition>
 \end{grammar}
 
-\subsubsection{Forward declarations} \label{sec:class.class.forward}
-
+\subsubsection{Forward declarations}
 \begin{grammar}
 <class-forward-declaration> ::= "class" <identifier> ";"
 \end{grammar}
@@ -542,16 +571,17 @@ class Sub : Super {
 };
 \end{listing}
 
-\subsubsection{Full class definitions} \label{sec:class.class.full}
-
+\subsubsection{Full class definitions}
 \begin{grammar}
 <full-class-definition> ::=
   @[<properties>@]
-  "class" <identifier> ":" <list>@[<identifier>@]
-  "{" @<class-item>^* "}"
+  "class" <identifier> ":" <list>$[\mbox{@<identifier>}]$
+  "{" @<properties-class-item>^* "}"
 
-<class-item> ::= <slot-item> ";"
-\alt <initializer-item> ";"
+<properties-class-item> ::= @[<properties>@] <class-item>
+
+<class-item> ::= <slot-item>
+\alt <initializer-item>
 \alt <message-item>
 \alt <method-item>
 \end{grammar}
@@ -563,8 +593,9 @@ give the name of an existing class (other than a forward-referenced class),
 or an existing type name.  It is conventional to give classes `MixedCase'
 names, to distinguish them from other kinds of identifiers.
 
-The @<list>@[<identifier>@] names the direct superclasses for the new class.  It
-is an error if any of these @<identifier>s does not name a defined class.
+The @<list>$[\mbox{@<identifier>}]$ names the direct superclasses for the new
+class.  It is an error if any of these @<identifier>s does not name a defined
+class.
 
 The @<properties> provide additional information.  The standard class
 properties are as follows.
@@ -589,12 +620,10 @@ properties are as follows.
 The class body consists of a sequence of @<class-item>s enclosed in braces.
 These items are discussed on the following sections.
 
-\subsubsection{Slot items} \label{sec:sntax.class.slot}
-
+\subsubsection{Slot items}
 \begin{grammar}
 <slot-item> ::=
-  @[<properties>@]
-  @<declaration-specifier>^+ <list>@[<init-declarator>@]
+  @<declaration-specifier>^+ <list>$[\mbox{@<init-declarator>}]$ ";"
 
 <init-declarator> ::= <simple-declarator> @["=" <initializer>@]
 \end{grammar}
@@ -626,10 +655,9 @@ class Example : Super {
 };
 \end{listing}
 
-\subsubsection{Initializer items} \label{sec:syntax.class.init}
-
+\subsubsection{Initializer items}
 \begin{grammar}
-<initializer-item> ::= @["class"@] <list>@[<slot-initializer>@]
+<initializer-item> ::= @["class"@] <list>$[\mbox{@<slot-initializer>}]$ ";"
 
 <slot-initializer> ::= <dotted-name> "=" <initializer>
 
@@ -654,25 +682,24 @@ The initializer has one of two forms.
   slots, such as pointers or integers, and strings.
 \end{itemize}
 
-\subsubsection{Message items} \label{sec:syntax.class.message}
-
+\subsubsection{Message items}
 \begin{grammar}
 <message-item> ::=
-  @[<properties>@]
-  @<declaration-specifier>^+ <declarator> @[<method-body>@]
+  @<declaration-specifier>^+
+  <keyword-declarator>$[\mbox{@<identifier>}]$
+  @[<method-body>@]
 \end{grammar}
 
-\subsubsection{Method items} \label{sec:syntax.class.method}
-
+\subsubsection{Method items}
 \begin{grammar}
 <method-item> ::=
-  @[<properties>@]
-  @<declaration-specifier>^+ <declarator> <method-body>
+  @<declaration-specifier>^+
+  <keyword-declarator>$[\mbox{@<dotted-name>}]$
+  <method-body>
 
 <method-body> ::= "{" <c-fragment> "}" | "extern" ";"
 \end{grammar}
 
-
 %%%----- That's all, folks --------------------------------------------------
 
 %%% Local variables: