X-Git-Url: https://git.distorted.org.uk/~mdw/sod/blobdiff_plain/bb80145308ea388d7c6ed5336c061340e78f66e8..db56b1d3c3d4bc9ffb6500b1f40c27c77d868aa4:/doc/syntax.tex

diff --git a/doc/syntax.tex b/doc/syntax.tex
index 336f4bc..b8ae797 100644
--- a/doc/syntax.tex
+++ b/doc/syntax.tex
@@ -7,7 +7,7 @@
 
 %%%----- Licensing notice ---------------------------------------------------
 %%%
-%%% This file is part of the Sensble Object Design, an object system for C.
+%%% This file is part of the Sensible Object Design, an object system for C.
 %%%
 %%% SOD is free software; you can redistribute it and/or modify
 %%% it under the terms of the GNU General Public License as published by
@@ -26,36 +26,52 @@
 \chapter{Module syntax} \label{ch:syntax}
 
 %%%--------------------------------------------------------------------------
+\section{Notation} \label{sec:syntax.notation}
 
-Fortunately, Sod is syntactically quite simple.  I've used a little slightly
-unusual notation in order to make the presentation easier to read.  For any
-nonterminal $x$:
+Fortunately, Sod is syntactically quite simple.  The notation is slightly
+unusual in order to make the presentation shorter and easier to read.
+
+Anywhere a simple nonterminal name $x$ may appear in the grammar, an
+\emph{indexed} nonterminal $x[a_1, \ldots, a_n]$ may also appear.  On the
+left-hand side of a production rule, the indices $a_1$, \ldots, $a_n$ are
+variables which vary over all nonterminal and terminal symbols, and the
+variables may also appear on the right-hand side in place of a nonterminal.
+Such a rule stands for a family of rules, in each variable is replaced by
+each possible simple nonterminal or terminal symbol.
+
+The letter $\epsilon$ denotes the empty nonterminal
+\begin{quote}
+  \syntax{$\epsilon$ ::=}
+\end{quote}
+
+The following indexed productions are used throughout the grammar, some often
+enough that they deserve special notation.
 \begin{itemize}
-\item $\epsilon$ denotes the empty nonterminal:
-  \begin{quote}
-    $\epsilon$ ::=
-  \end{quote}
-\item @[$x$@] means an optional $x$:
+\item @[$x$@] abbreviates @<optional>$[x]$, denoting an optional occurrence
+  of $x$:
   \begin{quote}
-    \syntax{@[$x$@] ::= $\epsilon$ @! $x$}
+    \syntax{@[$x$@] ::= <optional>$[x]$ ::= $\epsilon$ @! $x$}
   \end{quote}
-\item $x^*$ means a sequence of zero or more $x$s:
+\item $x^*$ abbreviates @<zero-or-more>$[x]$, denoting a sequence of zero or
+  more occurrences of $x$:
   \begin{quote}
-    \syntax{$x^*$ ::= $\epsilon$ @! $x^*$ $x$}
+    \syntax{$x^*$ ::= <zero-or-more>$[x]$ ::=
+      $\epsilon$ @! <zero-or-more>$[x]$ $x$}
   \end{quote}
-\item $x^+$ means a sequence of one or more $x$s:
+\item $x^+$ abbreviates @<one-or-more>$[x]$, denoting a sequence of zero or
+  more occurrences of $x$:
   \begin{quote}
-    \syntax{$x^+$ ::= $x$ $x^*$}
+    \syntax{$x^+$ ::= <one-or-more>$[x]$ ::= <zero-or-more>$[x]$ $x$}
   \end{quote}
-\item $x$@<-list> means a sequence of one or more $x$s separated
-  by commas:
+\item @<list>$[x]$ denotes a sequence of one or more occurrences of $x$
+  separated by commas:
   \begin{quote}
-    \syntax{$x$<-list> ::= $x$ @! $x$<-list> "," $x$}
+    \syntax{<list>$[x]$ ::= $x$ @! <list>$[x]$ "," $x$}
   \end{quote}
 \end{itemize}
 
-\subsection{Lexical syntax}
-\label{sec:syntax.lex}
+%%%--------------------------------------------------------------------------
+\section{Lexical syntax} \label{sec:syntax.lex}
 
 Whitespace and comments are discarded.  The remaining characters are
 collected into tokens according to the following syntax.
@@ -72,7 +88,8 @@ This syntax is slightly ambiguous, and is disambiguated by the \emph{maximal
 munch} rule: at each stage we take the longest sequence of characters which
 could be a token.
 
-\subsubsection{Identifiers} \label{sec:syntax.lex.id}
+
+\subsection{Identifiers} \label{sec:syntax.lex.id}
 
 \begin{grammar}
 <identifier> ::= <id-start-char> @<id-body-char>^*
@@ -100,7 +117,8 @@ also an ambiguity (inherited from C) in the declaration syntax which is
 settled by distinguishing type names from other identifiers at a lexical
 level.
 
-\subsubsection{String and character literals} \label{sec:syntax.lex.string}
+
+\subsection{String and character literals} \label{sec:syntax.lex.string}
 
 \begin{grammar}
 <string-literal> ::= "\"" @<string-literal-char>^* "\""
@@ -131,7 +149,7 @@ discouraged.
 \alt <octal-integer>
 \alt <hex-integer>
 
-<decimal-integer> ::= <nonzero-digit-char> @<digit-char>^*
+<decimal-integer> ::= "0" | <nonzero-digit-char> @<digit-char>^*
 
 <binary-integer> ::= "0" @("b"|"B"@) @<binary-digit-char>^+
 
@@ -152,13 +170,15 @@ Sod understands only integers, not floating-point numbers; its integer syntax
 goes slightly beyond C in allowing a @`0o' prefix for octal and @`0b' for
 binary.  However, length and signedness indicators are not permitted.
 
-\subsubsection{Punctuation} \label{sec:syntax.lex.punct}
+
+\subsection{Punctuation} \label{sec:syntax.lex.punct}
 
 \begin{grammar}
 <punctuation> ::= any nonalphanumeric character other than "_", "\"" or "'"
 \end{grammar}
 
-\subsubsection{Comments} \label{sec:lex-comment}
+
+\subsection{Comments} \label{sec:syntax.lex.comment}
 
 \begin{grammar}
 <comment> ::= <block-comment>
@@ -187,26 +207,24 @@ Comments are exactly as in C99: both traditional block comments `\texttt{/*}
 \dots\ \texttt{*/}' and \Cplusplus-style `\texttt{//} \dots' comments are
 permitted and ignored.
 
-\subsection{Special nonterminals}
-\label{sec:special-nonterminals}
+
+\subsection{Special nonterminals} \label{sec:syntax.lex.special}
 
 Aside from the lexical syntax presented above (\xref{sec:lexical-syntax}),
 two special nonterminals occur in the module syntax.
 
-\subsubsection{S-expressions} \label{sec:syntax-sexp}
-
+\subsubsection{S-expressions}
 \begin{grammar}
 <s-expression> ::= an S-expression, as parsed by the Lisp reader
 \end{grammar}
 
 When an S-expression is expected, the Sod parser simply calls the host Lisp
-system's \textsf{read} function.  Sod modules are permitted to modify the
-read table to extend the S-expression syntax.
+system's @|read| function.  Sod modules are permitted to modify the read
+table to extend the S-expression syntax.
 
 S-expressions are self-delimiting, so no end-marker is needed.
 
-\subsubsection{C fragments} \label{sec:syntax.lex.cfrag}
-
+\subsubsection{C fragments}
 \begin{grammar}
 <c-fragment> ::= a sequence of C tokens, with matching brackets
 \end{grammar}
@@ -220,7 +238,8 @@ determined by the immediately surrounding context -- usually a closing brace
 or bracket.  The first such delimiter character which is not enclosed in
 brackets, braces or parenthesis ends the fragment.
 
-\subsection{Module syntax} \label{sec:syntax-module}
+%%%--------------------------------------------------------------------------
+\section{Module syntax} \label{sec:syntax.module}
 
 \begin{grammar}
 <module> ::= @<definition>^*
@@ -233,13 +252,12 @@ brackets, braces or parenthesis ends the fragment.
 \alt <class-definition>
 \end{grammar}
 
-A module is the top-level syntactic item.  A module consists of a sequence of
-definitions.
-
-\subsection{Simple definitions} \label{sec:syntax.defs}
+A @<module> is the top-level syntactic item.  A module consists of a sequence
+of definitions.
 
-\subsubsection{Importing modules} \label{sec:syntax.defs.import}
+\subsection{Simple definitions} \label{sec:syntax.module.simple}
 
+\subsubsection{Importing modules}
 \begin{grammar}
 <import-definition> ::= "import" <string> ";"
 \end{grammar}
@@ -268,8 +286,7 @@ happens.\footnote{%
 
 Recursive imports, either direct or indirect, are an error.
 
-\subsubsection{Loading extensions} \label{sec:syntax.defs.load}
-
+\subsubsection{Loading extensions}
 \begin{grammar}
 <load-definition> ::= "load" <string> ";"
 \end{grammar}
@@ -303,8 +320,7 @@ which will arrange for the extension to be compiled if necessary.
 particular system definition facility.  It's bad enough already that it
 depends on Common Lisp.)
 
-\subsubsection{Lisp escapes} \label{sec:syntax.defs.lisp}
-
+\subsubsection{Lisp escapes}
 \begin{grammar}
 <lisp-definition> ::= "lisp" <s-expression> ";"
 \end{grammar}
@@ -319,11 +335,10 @@ The @<s-expression> is evaluated immediately.  It can do anything it likes.
     this isn't as unusually scary as it sounds.  But please be careful.} %
 \end{boxy}
 
-\subsubsection{Declaring type names} \label{sec:syntax.defs.typename}
-
+\subsubsection{Declaring type names}
 \begin{grammar}
 <typename-definition> ::=
-  "typename" <identifier-list> ";"
+  "typename" <list>$[\mbox{@<identifier>}]$ ";"
 \end{grammar}
 
 Each @<identifier> is declared as naming a C type.  This is important because
@@ -333,16 +348,19 @@ done by distinguishing type names from other identifiers.
 Don't declare class names using @"typename"; use @"class" forward
 declarations instead.
 
-\subsection{Literal code} \label{sec:syntax-code}
+
+\subsection{Literal code} \label{sec:syntax.module.literal}
 
 \begin{grammar}
 <code-definition> ::=
-  "code" <identifier> ":" <identifier> @[<constraints>@]
+  "code" <identifier> ":" <item-name> @[<constraints>@]
   "{" <c-fragment> "}"
 
-<constraints> ::= "[" <constraint-list> "]"
+<constraints> ::= "[" <list>$[\mbox{@<constraint>}]$ "]"
 
-<constraint> ::= @<identifier>^+
+<constraint> ::= @<item-name>^+
+
+<item-name> ::= <identifier> @! "(" @<identifier>^+ ")"
 \end{grammar}
 
 The @<c-fragment> will be output unchanged to one of the output files.
@@ -351,28 +369,29 @@ The first @<identifier> is the symbolic name of an output file.  Predefined
 output file names are @"c" and @"h", which are the implementation code and
 header file respectively; other output files can be defined by extensions.
 
-The second @<identifier> provides a name for the output item.  Several C
-fragments can have the same name: they will be concatenated together in the
-order in which they were encountered.
+Output items are named with a sequence of identifiers, separated by
+whitespace, and enclosed in parentheses.  As an abbreviation, a name
+consisting of a single identifier may be written as just that identifier,
+without the parentheses.
 
 The @<constraints> provide a means for specifying where in the output file
 the output item should appear.  (Note the two kinds of square brackets shown
 in the syntax: square brackets must appear around the constraints if they are
 present, but that they may be omitted.)  Each comma-separated @<constraint>
-is a sequence of identifiers naming output items, and indicates that the
-output items must appear in the order given -- though the translator is free
-to insert additional items in between them.  (The particular output items
-needn't be defined already -- indeed, they needn't be defined ever.)
+is a sequence of names of output items, and indicates that the output items
+must appear in the order given -- though the translator is free to insert
+additional items in between them.  (The particular output items needn't be
+defined already -- indeed, they needn't be defined ever.)
 
 There is a predefined output item @"includes" in both the @"c" and @"h"
 output files which is a suitable place for inserting @"\#include"
 preprocessor directives in order to declare types and functions for use
 elsewhere in the generated output files.
 
-\subsection{Property sets} \label{sec:syntax.propset}
 
+\subsection{Property sets} \label{sec:syntax.module.properties}
 \begin{grammar}
-<properties> ::= "[" <property-list> "]"
+<properties> ::= "[" <list>$[\mbox{@<property>}]$ "]"
 
 <property> ::= <identifier> "=" <expression>
 \end{grammar}
@@ -386,8 +405,7 @@ A property has a name, given as an @<identifier>, and a value computed by
 evaluating an @<expression>.  The value can be one of a number of types,
 though the only operators currently defined act on integer values only.
 
-\subsubsection{The expression evaluator} \label{sec:syntax.propset.expr}
-
+\subsubsection{The expression evaluator}
 \begin{grammar}
 <expression> ::= <term> | <expression> "+" <term> | <expression> "-" <term>
 
@@ -414,7 +432,8 @@ causes the @<s-expression> to be evaluated using the Lisp \textsf{eval}
 function.
 %%% FIXME crossref to extension docs
 
-\subsection{C types} \label{sec:syntax.c-types}
+
+\subsection{C types} \label{sec:syntax.module.types}
 
 Sod's syntax for C types closely mirrors the standard C syntax.  A C type has
 two parts: a sequence of @<declaration-specifier>s and a @<declarator>.  In
@@ -422,31 +441,53 @@ Sod, a type must contain at least one @<declaration-specifier> (i.e.,
 `implicit @"int"' is forbidden), and storage-class specifiers are not
 recognized.
 
-\subsubsection{Declaration specifiers} \label{sec:syntax.c-types.declspec}
-
+\subsubsection{Declaration specifiers}
 \begin{grammar}
 <declaration-specifier> ::= <type-name>
 \alt "struct" <identifier> | "union" <identifier> | "enum" <identifier>
 \alt "void" | "char" | "int" | "float" | "double"
 \alt "short" | "long"
 \alt "signed" | "unsigned"
+\alt "bool" | "_Bool"
+\alt "imaginary" | "_Imaginary" | "complex" | "_Complex"
 \alt <qualifier>
+\alt <storage-specifier>
+\alt <atomic-type>
+
+<qualifier> ::= <atomic> | "const" | "volatile" | "restrict"
+
+<atomic-type> ::=
+  <atomic> "(" @<declaration-specifier>^+ <abstract-declarator> ")"
+
+<atomic> ::= "atomic" | "_Atomic"
+
+<storage-specifier> ::= <alignas> "(" <c-fragment> ")"
 
-<qualifier> ::= "const" | "volatile" | "restrict"
+<alignas> ::= "alignas" "_Alignas"
 
 <type-name> ::= <identifier>
 \end{grammar}
 
 A @<type-name> is an identifier which has been declared as being a type name,
-using the @"typename" or @"class" definitions.
+using the @"typename" or @"class" definitions.  The following type names are
+defined in the built-in module.
+\begin{itemize}
+\item @"va_list"
+\item @"size_t"
+\item @"ptrdiff_t"
+\item @"wchar_t"
+\end{itemize}
 
 Declaration specifiers may appear in any order.  However, not all
 combinations are permitted.  A declaration specifier must consist of zero or
-more @<qualifiers>, and one of the following, up to reordering.
+more @<qualifier>s, zero or more @<storage-specifier>s, and one of the
+following, up to reordering.
 \begin{itemize}
 \item @<type-name>
+\item @<atomic-type>
 \item @"struct" @<identifier>, @"union" @<identifier>, @"enum" @<identifier>
 \item @"void"
+\item @"_Bool", @"bool"
 \item @"char", @"unsigned char", @"signed char"
 \item @"short", @"unsigned short", @"signed short"
 \item @"short int", @"unsigned short int", @"signed short int"
@@ -456,35 +497,38 @@ more @<qualifiers>, and one of the following, up to reordering.
 \item @"long long", @"unsigned long long", @"signed long long"
 \item @"long long int", @"unsigned long long int", @"signed long long int"
 \item @"float", @"double", @"long double"
+\item @"float _Imaginary", @"double _Imaginary", @"long double _Imaginary"
+\item @"float imaginary", @"double imaginary", @"long double imaginary"
+\item @"float _Complex", @"double _Complex", @"long double _Complex"
+\item @"float complex", @"double complex", @"long double complex"
 \end{itemize}
 All of these have their usual C meanings.
 
-\subsubsection{Declarators} \label{sec:syntax.c-types.declarator}
-
+\subsubsection{Declarators}
 \begin{grammar}
 <declarator>$[k]$ ::= @<pointer>^* <primary-declarator>$[k]$
 
 <primary-declarator>$[k]$ ::= $k$
 \alt "(" <primary-declarator>$[k]$ ")"
-\alt <primary-declarator>$[k]$ @<declarator-suffix>^*
+\alt <primary-declarator>$[k]$ @<declarator-suffix>
 
 <pointer> ::= "*" @<qualifier>^*
 
 <declarator-suffix> ::= "[" <c-fragment> "]"
 \alt "(" <arguments> ")"
 
-<arguments> ::= $\epsilon$ | "..."
-\alt <argument-list> @["," "..."@]
+<argument-list> ::= $\epsilon$ | "..."
+\alt <list>$[\mbox{@<argument>}]$ @["," "..."@]
 
 <argument> ::= @<declaration-specifier>^+ <argument-declarator>
 
-<argument-declarator> ::= <declarator>@[<identifier> @! $\epsilon$@]
+<abstract-declarator> ::= <declarator>$[\epsilon]$
 
-<simple-declarator> ::= <declarator>@[<identifier>@]
+<argument-declarator> ::= <declarator>$[\mbox{@<identifier> @! $\epsilon$}]$
 
-<dotted-name> ::= <identifier> "." <identifier>
+<simple-declarator> ::= <declarator>$[\mbox{@<identifier>}]$
 
-<dotted-declarator> ::= <declarator>@[<dotted-name>@]
+<dotted-name> ::= <identifier> "." <identifier>
 \end{grammar}
 
 The declarator syntax is taken from C, but with some differences.
@@ -499,15 +543,15 @@ The declarator syntax is taken from C, but with some differences.
 The remaining differences are (I hope) a matter of presentation rather than
 substance.
 
-\subsection{Defining classes} \label{sec:syntax.class}
+
+\subsection{Class definitions} \label{sec:syntax.module.class}
 
 \begin{grammar}
 <class-definition> ::= <class-forward-declaration>
 \alt <full-class-definition>
 \end{grammar}
 
-\subsubsection{Forward declarations} \label{sec:class.class.forward}
-
+\subsubsection{Forward declarations}
 \begin{grammar}
 <class-forward-declaration> ::= "class" <identifier> ";"
 \end{grammar}
@@ -527,18 +571,19 @@ class Sub : Super {
 };
 \end{listing}
 
-\subsubsection{Full class definitions} \label{sec:class.class.full}
-
+\subsubsection{Full class definitions}
 \begin{grammar}
 <full-class-definition> ::=
   @[<properties>@]
-  "class" <identifier> ":" <identifier-list>
-  "{" @<class-item>^* "}"
+  "class" <identifier> ":" <list>$[\mbox{@<identifier>}]$
+  "{" @<properties-class-item>^* "}"
+
+<properties-class-item> ::= @[<properties>@] <class-item>
 
-<class-item> ::= <slot-item> ";"
+<class-item> ::= <slot-item>
+\alt <initializer-item>
 \alt <message-item>
 \alt <method-item>
-\alt  <initializer-item> ";"
 \end{grammar}
 
 A full class definition provides a complete description of a class.
@@ -548,8 +593,9 @@ give the name of an existing class (other than a forward-referenced class),
 or an existing type name.  It is conventional to give classes `MixedCase'
 names, to distinguish them from other kinds of identifiers.
 
-The @<identifier-list> names the direct superclasses for the new class.  It
-is an error if any of these @<identifier>s does not name a defined class.
+The @<list>$[\mbox{@<identifier>}]$ names the direct superclasses for the new
+class.  It is an error if any of these @<identifier>s does not name a defined
+class.
 
 The @<properties> provide additional information.  The standard class
 properties are as follows.
@@ -574,20 +620,18 @@ properties are as follows.
 The class body consists of a sequence of @<class-item>s enclosed in braces.
 These items are discussed on the following sections.
 
-\subsubsection{Slot items} \label{sec:sntax.class.slot}
-
+\subsubsection{Slot items}
 \begin{grammar}
 <slot-item> ::=
-  @[<properties>@]
-  @<declaration-specifier>^+ <init-declarator-list>
+  @<declaration-specifier>^+ <list>$[\mbox{@<init-declarator>}]$ ";"
 
-<init-declarator> ::= <declarator> @["=" <initializer>@]
+<init-declarator> ::= <simple-declarator> @["=" <initializer>@]
 \end{grammar}
 
 A @<slot-item> defines one or more slots.  All instances of the class and any
 subclass will contain these slot, with the names and types given by the
 @<declaration-specifiers> and the @<declarators>.  Slot declarators may not
-contain qualified identifiers.
+contain dotted names.
 
 It is not possible to declare a slot with function type: such an item is
 interpreted as being a @<message-item> or @<method-item>.  Pointers to
@@ -611,12 +655,11 @@ class Example : Super {
 };
 \end{listing}
 
-\subsubsection{Initializer items} \label{sec:syntax.class.init}
-
+\subsubsection{Initializer items}
 \begin{grammar}
-<initializer-item> ::= @["class"@] <slot-initializer-list>
+<initializer-item> ::= @["class"@] <list>$[\mbox{@<slot-initializer>}]$ ";"
 
-<slot-initializer> ::= <qualified-identifier> "=" <initializer>
+<slot-initializer> ::= <dotted-name> "=" <initializer>
 
 <initializer> :: "{" <c-fragment> "}" | <c-fragment>
 \end{grammar}
@@ -625,9 +668,9 @@ An @<initializer-item> provides an initial value for one or more slots.  If
 prefixed by @"class", then the initial values are for class slots (i.e.,
 slots of the class object itself); otherwise they are for instance slots.
 
-The first component of the @<qualified-identifier> must be the nickname of
-one of the class's superclasses (including itself); the second must be the
-name of a slot defined in that superclass.
+The first component of the @<dotted-name> must be the nickname of one of the
+class's superclasses (including itself); the second must be the name of a
+slot defined in that superclass.
 
 The initializer has one of two forms.
 \begin{itemize}
@@ -639,25 +682,24 @@ The initializer has one of two forms.
   slots, such as pointers or integers, and strings.
 \end{itemize}
 
-\subsubsection{Message items} \label{sec:syntax.class.message}
-
+\subsubsection{Message items}
 \begin{grammar}
 <message-item> ::=
-  @[<properties>@]
-  @<declaration-specifier>^+ <declarator> @[<method-body>@]
+  @<declaration-specifier>^+
+  <keyword-declarator>$[\mbox{@<identifier>}]$
+  @[<method-body>@]
 \end{grammar}
 
-\subsubsection{Method items} \label{sec:syntax.class.method}
-
+\subsubsection{Method items}
 \begin{grammar}
 <method-item> ::=
-  @[<properties>@]
-  @<declaration-specifier>^+ <declarator> <method-body>
+  @<declaration-specifier>^+
+  <keyword-declarator>$[\mbox{@<dotted-name>}]$
+  <method-body>
 
 <method-body> ::= "{" <c-fragment> "}" | "extern" ";"
 \end{grammar}
 
-
 %%%----- That's all, folks --------------------------------------------------
 
 %%% Local variables: