X-Git-Url: https://git.distorted.org.uk/~mdw/sod/blobdiff_plain/a58527f39183dd0b06f0eb1eebc86a6909f94774..e895be217c3be6769708da17c9ae87cb22db040e:/doc/syntax.tex

diff --git a/doc/syntax.tex b/doc/syntax.tex
index 18690fc..72329a4 100644
--- a/doc/syntax.tex
+++ b/doc/syntax.tex
@@ -51,20 +51,20 @@ could be a token.
 
 <id-start-char> ::= <alpha-char> | "_"
 
-<id-body-char> ::= <id-start-char> @! <digit-char>
+<id-body-char> ::= <id-start-char> | <digit-char>
 
-<alpha-char> ::= "A" | "B" | \dots\ | "Z"
-\alt "a" | "b" | \dots\ | "z"
-\alt <extended-alpha-char>
+<alpha-char> ::= "A" | "B" | $\cdots$ | "Z"
+  | "a" | "b" | $\cdots$ | "z"
+  | <extended-alpha-char>
 
 <digit-char> ::= "0" | <nonzero-digit-char>
 
-<nonzero-digit-char> ::= "1" | "2" $| \ldots |$ "9"
+<nonzero-digit-char> ::= "1" | "2" | $\cdots$ | "9"
 \end{grammar}
 
 The precise definition of @<alpha-char> is left to the function
-\textsf{alpha-char-p} in the hosting Lisp system.  For portability,
-programmers are encouraged to limit themselves to the standard ASCII letters.
+@|alpha-char-p| in the hosting Lisp system.  For portability, programmers are
+encouraged to limit themselves to the standard ASCII letters.
 
 There are no reserved words at the lexical level, but the higher-level syntax
 recognizes certain identifiers as \emph{keywords} in some contexts.  There is
@@ -80,11 +80,11 @@ level.
 
 <char-literal> ::= "'" <char-literal-char> "'"
 
-<string-literal-char> ::= any character other than "\\" or "\""
-\alt "\\" <char>
+<string-literal-char> :: "\\" <char>
+  | any character other than "\\" or "\""
 
-<char-literal-char> ::= any character other than "\\" or "'"
-\alt "\\" <char>
+<char-literal-char> :: "\\" <char>
+  | any character other than "\\" or "'"
 
 <char> ::= any single character
 \end{grammar}
@@ -101,25 +101,25 @@ discouraged.
 
 \begin{grammar}
 <integer-literal> ::= <decimal-integer>
-\alt <binary-integer>
-\alt <octal-integer>
-\alt <hex-integer>
+  | <binary-integer>
+  | <octal-integer>
+  | <hex-integer>
 
 <decimal-integer> ::= "0" | <nonzero-digit-char> @<digit-char>^*
 
-<binary-integer> ::= "0" @("b"|"B"@) @<binary-digit-char>^+
+<binary-integer> ::= "0" @("b" | "B"@) @<binary-digit-char>^+
 
 <binary-digit-char> ::= "0" | "1"
 
-<octal-integer> ::= "0" @["o"|"O"@] @<octal-digit-char>^+
+<octal-integer> ::= "0" @["o" | "O"@] @<octal-digit-char>^+
 
-<octal-digit-char> ::= "0" | "1" $| \ldots |$ "7"
+<octal-digit-char> ::= "0" | "1" | $\cdots$ | "7"
 
-<hex-integer> ::= "0" @("x"|"X"@) @<hex-digit-char>^+
+<hex-integer> ::= "0" @("x" | "X"@) @<hex-digit-char>^+
 
 <hex-digit-char> ::= <digit-char>
-\alt "A" | "B" | "C" | "D" | "E" | "F"
-\alt "a" | "b" | "c" | "d" | "e" | "f"
+  | "A" | "B" | "C" | "D" | "E" | "F"
+  | "a" | "b" | "c" | "d" | "e" | "f"
 \end{grammar}
 
 Sod understands only integers, not floating-point numbers; its integer syntax
@@ -130,15 +130,16 @@ binary.  However, length and signedness indicators are not permitted.
 \subsection{Punctuation} \label{sec:syntax.lex.punct}
 
 \begin{grammar}
-<punctuation> ::= any nonalphanumeric character other than "_", "\"" or "'"
+<punctuation> ::= "<<" | ">>" | "||" | "&&"
+  | "<=" | ">=" | "==" | "!=" | "\dots"
+\alt any nonalphanumeric character other than "_", "\"", or "'"
 \end{grammar}
 
 
 \subsection{Comments} \label{sec:syntax.lex.comment}
 
 \begin{grammar}
-<comment> ::= <block-comment>
-\alt <line-comment>
+<comment> ::= <block-comment> | <line-comment>
 
 <block-comment> ::=
   "/*"
@@ -190,9 +191,9 @@ during translation.  They are read using a simple scanner which nonetheless
 understands C comments and string and character literals.
 
 A C fragment is terminated by one of a small number of delimiter characters
-determined by the immediately surrounding context -- usually a closing brace
-or bracket.  The first such delimiter character which is not enclosed in
-brackets, braces or parenthesis ends the fragment.
+determined by the immediately surrounding context -- usually some kind of
+bracket.  The first such delimiter character which is not enclosed in
+brackets, braces or parentheses ends the fragment.
 
 %%%--------------------------------------------------------------------------
 \section{C types} \label{sec:syntax.type}
@@ -323,7 +324,7 @@ defined in the built-in module.
 <abstract-declarator> ::= <declarator>$[\epsilon, \mbox{@<argument-list>}]$
 
 <argument-declarator> ::=
-  <declarator>$[\mbox{@<identifier> @! $\epsilon$}, \mbox{@<argument-list>}]$
+  <declarator>$[\mbox{@<identifier> | $\epsilon$}, \mbox{@<argument-list>}]$
 
 <simple-declarator> ::=
   <declarator>$[\mbox{@<identifier>}, \mbox{@<argument-list>}]$
@@ -351,7 +352,7 @@ keyword arguments.
   @[<list>$[\mbox{@<argument>}]$@]
   "?" @[<list>$[\mbox{@<keyword-argument>}]$@]
 
-<method-argument-list> ::= <argument-list> @! <keyword-argument-list>
+<method-argument-list> ::= <argument-list> | <keyword-argument-list>
 
 <dotted-name> ::= <identifier> "." <identifier>
 
@@ -360,21 +361,207 @@ keyword arguments.
 \end{grammar}
 
 %%%--------------------------------------------------------------------------
+\section{Properties} \label{sec:syntax.prop}
+
+\begin{grammar}
+<properties> ::= "[" <list>$[\mbox{@<property>}]$ "]"
+
+<property> ::= <identifier> "=" <expression>
+
+<expression> ::= <logical-or>
+
+<logical-or> ::= <logical-and>
+  | <logical-or> "||" <logical-and>
+
+<logical-and> ::= <bitwise-or>
+  | <logical-and> "&&" <bitwise-or>
+
+<bitwise-or> ::= <bitwise-xor>
+  | <bitwise-or> "|" <bitwise-xor>
+
+<bitwise-xor> ::= <bitwise-and>
+  | <bitwise-xor> "^" <bitwise-and>
+
+<bitwise-and> ::= <equality>
+  | <bitwise-and> "&" <equality>
+
+<equality> ::= <ordering>
+  | <equality> "==" <ordering>
+  | <equality> "!=" <ordering>
+
+<ordering> ::= <shift>
+  | <ordering> "<" <shift>
+  | <ordering> "<=" <shift>
+  | <ordering> ">=" <shift>
+  | <ordering> ">" <shift>
+
+<shift> ::= <additive>
+  | <shift> "<<" <additive>
+  | <shift> ">>" <additive>
+
+<additive> ::= <term>
+  | <additive> "+" <term>
+  | <additive> "--" <term>
+
+<term> ::= <factor>
+  | <term> "*" <factor>
+  | <term> "/" <factor>
+
+<factor> ::= <primary>
+  | "!" <factor> | "~" factor
+  | "+" <factor> | "--" <factor>
+
+<primary> ::=
+     <integer-literal> | <string-literal> | <char-literal> | <identifier>
+\alt "<" <plain-type> ">" | "{" <c-fragment> "}" | "?" <s-expression>
+  | "(" <expression> ")"
+\end{grammar}
+
+\emph{Property sets} are a means for associating miscellaneous information
+with compile-time metaobjects such as modules, classes, messages, methods,
+slots, and initializers.  By using property sets, additional information can
+be passed to extensions without the need to introduce idiosyncratic syntax.
+(That said, extensions can add additional first-class syntax, if necessary.)
+
+An error is reported if an unrecognized property is associated with an
+object.
+
+
+\subsection{Property values} \label{sec:syntax.prop.value}
+
+A property has a name, given as an @<identifier>, and a value computed by
+evaluating an @<expression>.  The value can be one of a number of types.
+
+\begin{itemize}
+
+\item An @<integer-literal> denotes a value of type @|int|.
+
+\item Similarly @<string-literal> and @<char-literal> denote @|string| and
+  @|char| values respectively.  Note that, as properties, characters are
+  quite distinct from integers, whereas in C, a character literal denotes a
+  value of type @|int|.
+
+\item There are no variables in the property-value syntax.  Rather, an
+  @<identifier> denotes that identifier, as a value of type @|id|.
+
+\item A C type (a @<plain-type>, as described in \xref{sec:syntax.type})
+  between angle brackets, e.g., @|<int>|, or @|<char *>|, or @|<void (*(int,
+  void (*)(int)))(int)>|, denotes that C type, as a value of type @|type|.
+
+\item A @<c-fragment> within braces denotes the tokens between (and not
+  including) the braces, as a value of type @|c-fragment|.
+
+\end{itemize}
+
+As shown in the grammar, there are four binary operators, @"+" (addition),
+@"--" (subtraction), @"*" (multiplication), and @"/" (division);
+multiplication and division have higher precedence than addition and
+subtraction, and operators of the same precedence associate left-to-right.
+There are also unary @"+" (no effect) and @"--" (negation) operators, with
+higher precedence.  All of the above operators act only on integer operands
+and yield integer results.  (Although the unary @"+" operator yields its
+operand unchanged, an error is still reported if it is applied to a
+non-integer value.)  There are currently no bitwise, logical, or comparison
+operators.
+
+Finally, an S-expression preceded by @|?| causes the expression to be read in
+the current package (which is always @|sod-user| at the start of a module)
+and immediately evaluated (using @|eval|); the resulting value is converted
+into a property value using the \descref{gf}{decode-property}[generic
+function].
+
+
+\subsection{Property output types and coercions}
+\label{sec:syntax.prop.coerce}
+
+When a property value is inspected by the Sod translator, or an extension, it
+is \emph{coerced} so as to conform to a requested output type.  This coercion
+process is performed by the \descref{gf}{coerce-property-value}[generic
+function], and additional output types and coercions can be defined by
+extensions.  The built-in output types coercions, from the value types listed
+above, are as follows.
+
+\begin{itemize}
+
+\item The output types @|int|, @|string|, @|char|, @|id|, and @|c-fragment|
+  correspond to the like-named value types described above.  No coercions to
+  these output types are defined for the described value types.\footnote{%
+    There is a coercion to @|id| from the value type @|symbol|, but it is
+    only possible to generate a property value of type @|symbol| using Lisp.}
+
+\item The output type @|type| denotes a C type, as does the value type
+  @|type|.  In addition, a value of type @|id| can be coerced to a C type if
+  it is the name of a class, a type name explicitly declared by @|typename|,
+  or it is one of: @|bool|, @|_Bool|, @|void|, @|char|, @|short|, @|int|,
+  @|signed|, @|unsigned|, @|long|, @|size_t|, @|ptrdiff_t|, @|wchar_t|,
+  or @|va_list|.
+
+\item The @|boolean| output type denotes a boolean value, which may be either
+  true or false.  A value of type @|id| is considered true if it is @|true|,
+  @|t|, @|yes|, @|on|, @|yup|, or @|verily|; or false if it is @|false|,
+  @|nil|, @|no|, @|off|, @|nope|, or @|nowise|; it is erroneous to provide
+  any other identifier where a boolean value is wanted.  A value of type
+  @|int| is considered true if it is nonzero, or false if it is zero.
+
+\item The @|symbol| output type denotes a Lisp symbol.
+
+  A value of type @|id| is coerced to a symbol as follows.  First, the
+  identifier name is subjected to \emph{case inversion}: if all of the
+  letters in the name have the same case, either upper or lower, then they
+  are replaced with the corresponding letters in the opposite case, lower or
+  upper; if the name contains letters of both cases, then it is not changed.
+  For example, @|foo45| becomes @|FOO45|, or \emph{vice-versa}; but @|Splat|
+  remains as it is.  Second, the name is subjected to \emph{separator
+  switching}: all underscores in the name are replaced with hyphens (and
+  \emph{vice-versa}, though hyphens aren't permitted in identifiers in the
+  first place).  Finally, the resulting name is interned in the current
+  package, which will usually be @|sod-user| unless changed explicitly by the
+  module.
+
+  A value of type @|string| is coerced to a symbol as follows.  If the string
+  contains no colons, then it is case-inverted (but not separator-switched)
+  and interned in the current package.  Otherwise, the string either has the
+  form $p @|:| q$, where $q$ does not begin with a colon (the
+  \emph{single-colon} case) or $p @|::| q$ (the \emph{double-colon} case);
+  where $p$ does not contain a colon.  Both $p$ and $q$ are case-inverted
+  (but not separator-switched).  If $p$ does not name a package, then an
+  error is reported; as a special case, if $p$ is empty, then it is
+  considered to name the @|keyword| package.  Otherwise, $q$ is looked up as
+  a symbol name in package~$p$; in the single-colon case, if the symbol is
+  not an exported symbol in package~$p$, then an error is reported; in the
+  double-colon case, $q$ is interned in package~$p$ (and so there needn't be
+  an exported symbol -- or, indeed, and symbol at all -- named $q$
+  beforehand).
+
+\item The @|keyword| output type denotes symbols within the @|keyword|
+  package.  Value of type @|id| or @|string| can be coerced to a @|keyword|
+  in the same way as to a @|symbol|, as described above, only the converted
+  name is looked up in the @|keyword| package rather than the current
+  package.  (A @|string| can override this by specifying an explicit package
+  name, but this is unlikely to be very helpful.)
+
+\end{itemize}
+
+%%%--------------------------------------------------------------------------
 \section{Module syntax} \label{sec:syntax.module}
 
 \begin{grammar}
 <module> ::= @<definition>^*
 
-<definition> ::= <import-definition>
+<definition> ::= <property-definition> \fixme{undefined}
+\alt <import-definition>
 \alt <load-definition>
 \alt <lisp-definition>
 \alt <code-definition>
 \alt <typename-definition>
 \alt <class-definition>
+\alt <other-definition> \fixme{undefined}
 \end{grammar}
 
-A @<module> is the top-level syntactic item.  A module consists of a sequence
-of definitions.
+A @<module> is the top-level syntactic item: a source file presented to Sod
+is expected to conform with the @<module> syntax.
+
+A module consists of a sequence of definitions.
 
 \fixme{describe syntax; expand}
 Properties:
@@ -399,9 +586,9 @@ A search is made for a module source file as follows.
 \begin{itemize}
 \item The module name @<string> is converted into a filename by appending
   @`.sod', if it has no extension already.\footnote{%
-    Technically, what happens is \textsf{(merge-pathnames name (make-pathname
-    :type "SOD" :case :common))}, so exactly what this means varies
-    according to the host system.} %
+    Technically, what happens is @|(merge-pathnames name (make-pathname :type
+    "SOD" :case :common))|, so exactly what this means varies according to
+    the host system.} %
 \item The file is looked for relative to the directory containing the
   importing module.
 \item If that fails, then the file is looked for in each directory on the
@@ -411,7 +598,7 @@ A search is made for a module source file as follows.
 \end{itemize}
 At this point, if the file has previously been imported, nothing further
 happens.\footnote{%
-  This check is done using \textsf{truename}, so it should see through simple
+  This check is done using @|truename|, so it should see through simple
   tricks like symbolic links.  However, it may be confused by fancy things
   like bind mounts and so on.} %
 
@@ -428,23 +615,22 @@ A search is made for a Lisp source file as follows.
 \begin{itemize}
 \item The name @<string> is converted into a filename by appending @`.lisp',
   if it has no extension already.\footnote{%
-    Technically, what happens is \textsf{(merge-pathnames name (make-pathname
-    :type "LISP" :case :common))}, so exactly what this means varies
-    according to the host system.} %
+    Technically, what happens is @|(merge-pathnames name (make-pathname :type
+    "LISP" :case :common))|, so exactly what this means varies according to
+    the host system.} %
 \item A search is then made in the same manner as for module imports
   (\xref{sec:syntax-module}).
 \end{itemize}
-If the file is found, it is loaded using the host Lisp's \textsf{load}
-function.
+If the file is found, it is loaded using the host Lisp's @|load| function.
 
 Note that Sod doesn't attempt to compile Lisp files, or even to look for
 existing compiled files.  The right way to package a substantial extension to
 the Sod translator is to provide the extension as a standard ASDF system (or
 similar) and leave a dropping @|foo-extension.lisp| in the module path saying
 something like
-\begin{quote}
-  \textsf{(asdf:load-system :foo-extension)}
-\end{quote}
+\begin{prog}
+  (asdf:load-system :foo-extension)
+\end{prog}
 which will arrange for the extension to be compiled if necessary.
 
 (This approach means that the language doesn't need to depend on any
@@ -484,14 +670,18 @@ declarations instead.
 
 \begin{grammar}
 <code-definition> ::=
-  "code" <identifier> ":" <item-name> @[<constraints>@]
+  "code" <reason> ":" <item-name> @[<constraints>@]
   "{" <c-fragment> "}"
+\alt
+  "code" <reason> ":" <constraints> ";"
+
+<reason> ::= <identifier>
 
 <constraints> ::= "[" <list>$[\mbox{@<constraint>}]$ "]"
 
 <constraint> ::= @<item-name>^+
 
-<item-name> ::= <identifier> @! "(" @<identifier>^+ ")"
+<item-name> ::= <identifier> | "(" @<identifier>^+ ")"
 \end{grammar}
 
 The @<c-fragment> will be output unchanged to one of the output files.
@@ -520,51 +710,6 @@ preprocessor directives in order to declare types and functions for use
 elsewhere in the generated output files.
 
 
-\subsection{Property sets} \label{sec:syntax.module.properties}
-\begin{grammar}
-<properties> ::= "[" <list>$[\mbox{@<property>}]$ "]"
-
-<property> ::= <identifier> "=" <expression>
-\end{grammar}
-
-Property sets are a means for associating miscellaneous information with
-classes and related items.  By using property sets, additional information
-can be passed to extensions without the need to introduce idiosyncratic
-syntax.
-
-A property has a name, given as an @<identifier>, and a value computed by
-evaluating an @<expression>.  The value can be one of a number of types,
-though the only operators currently defined act on integer values only.
-
-\subsubsection{The expression evaluator}
-\begin{grammar}
-<expression> ::= <term> | <expression> "+" <term> | <expression> "--" <term>
-
-<term> ::= <factor> | <term> "*" <factor> | <term> "/" <factor>
-
-<factor> ::= <primary> | "+" <factor> | "--" <factor>
-
-<primary> ::=
-     <integer-literal> | <string-literal> | <char-literal> | <identifier>
-\alt "<" <plain-type> ">"
-\alt "?" <s-expression>
-\alt "(" <expression> ")"
-\end{grammar}
-
-The arithmetic expression syntax is simple and standard; there are currently
-no bitwise, logical, or comparison operators.
-
-A @<primary> expression may be a literal or an identifier.  Note that
-identifiers stand for themselves: they \emph{do not} denote values.  For more
-fancy expressions, the syntax
-\begin{quote}
-  @"?" @<s-expression>
-\end{quote}
-causes the @<s-expression> to be evaluated using the Lisp \textsf{eval}
-function.
-%%% FIXME crossref to extension docs
-
-
 \subsection{Class definitions} \label{sec:syntax.module.class}
 
 \begin{grammar}
@@ -607,6 +752,7 @@ class Sub: Super \{                                             \\ \ind
 \alt <fragment-item>
 \alt <message-item>
 \alt <method-item>
+\alt <other-item> \fixme{undefined}
 \end{grammar}
 
 A full class definition provides a complete description of a class.
@@ -620,8 +766,9 @@ The @<list>$[\mbox{@<identifier>}]$ names the direct superclasses for the new
 class.  It is an error if any of these @<identifier>s does not name a defined
 class.  The superclass list is required, and must not be empty; listing
 @|SodObject| as your class's superclass is a good choice if nothing else
-seems suitable.  It's not possible to define a \emph{root class} in the Sod
-language: you must use Lisp to do this, and it's quite involved.
+seems suitable.  A class with no direct superclasses is called a \emph{root
+class}.  It is not possible to define a root class in the Sod language: you
+must use Lisp to do this, and it's quite involved.
 
 The @<properties> provide additional information.  The standard class
 properties are as follows.
@@ -756,6 +903,9 @@ Properties:
 \begin{description}
 \item[@|message_class|] A symbol naming the Lisp class to use to represent
   the message.
+\item[@|readonly|] A boolean indicating whether the message guarantees not to
+  modify its receiver.  If this is true, the receiver will be declared
+  @"const".
 \item[@|combination|] A keyword naming the aggregating method combination to
   use.
 \item[@|most_specific|] A keyword, either @`first' or @`last', according to