doc/syntax.tex: Reformat the various grammar fragments.

[sod] / doc / syntax.tex
diff --git a/doc/syntax.tex b/doc/syntax.tex

index 18690fc..fc34e4e 100644 (file)
--- a/doc/syntax.tex
+++ b/doc/syntax.tex
@@ -53,18 +53,18 @@ could be a token.
  
  <id-body-char> ::= <id-start-char> @! <digit-char>
  
-<alpha-char> ::= "A" | "B" | \dots\ | "Z"
-\alt "a" | "b" | \dots\ | "z"
-\alt <extended-alpha-char>
+<alpha-char> ::= "A" | "B" $| \cdots |$ "Z"
+  | "a" | "b" $| \cdots |$ "z"
+  | <extended-alpha-char>
  
  <digit-char> ::= "0" | <nonzero-digit-char>
  
-<nonzero-digit-char> ::= "1" | "2" $| \ldots |$ "9"
+<nonzero-digit-char> ::= "1" | "2" $| \cdots |$ "9"
  \end{grammar}
  
  The precise definition of @<alpha-char> is left to the function
-\textsf{alpha-char-p} in the hosting Lisp system.  For portability,
-programmers are encouraged to limit themselves to the standard ASCII letters.
+@|alpha-char-p| in the hosting Lisp system.  For portability, programmers are
+encouraged to limit themselves to the standard ASCII letters.
  
  There are no reserved words at the lexical level, but the higher-level syntax
  recognizes certain identifiers as \emph{keywords} in some contexts.  There is
@@ -80,11 +80,11 @@ level.
  
  <char-literal> ::= "'" <char-literal-char> "'"
  
-<string-literal-char> ::= any character other than "\\" or "\""
-\alt "\\" <char>
+<string-literal-char> :: "\\" <char>
+  | any character other than "\\" or "\""
  
-<char-literal-char> ::= any character other than "\\" or "'"
-\alt "\\" <char>
+<char-literal-char> :: "\\" <char>
+  | any character other than "\\" or "'"
  
  <char> ::= any single character
  \end{grammar}
@@ -101,25 +101,25 @@ discouraged.
  
  \begin{grammar}
  <integer-literal> ::= <decimal-integer>
-\alt <binary-integer>
-\alt <octal-integer>
-\alt <hex-integer>
+  | <binary-integer>
+  | <octal-integer>
+  | <hex-integer>
  
  <decimal-integer> ::= "0" | <nonzero-digit-char> @<digit-char>^*
  
-<binary-integer> ::= "0" @("b"|"B"@) @<binary-digit-char>^+
+<binary-integer> ::= "0" @("b" @! "B"@) @<binary-digit-char>^+
  
  <binary-digit-char> ::= "0" | "1"
  
-<octal-integer> ::= "0" @["o"|"O"@] @<octal-digit-char>^+
+<octal-integer> ::= "0" @["o" @! "O"@] @<octal-digit-char>^+
  
-<octal-digit-char> ::= "0" | "1" $| \ldots |$ "7"
+<octal-digit-char> ::= "0" | "1" $| \cdots |$ "7"
  
-<hex-integer> ::= "0" @("x"|"X"@) @<hex-digit-char>^+
+<hex-integer> ::= "0" @("x" @! "X"@) @<hex-digit-char>^+
  
  <hex-digit-char> ::= <digit-char>
-\alt "A" | "B" | "C" | "D" | "E" | "F"
-\alt "a" | "b" | "c" | "d" | "e" | "f"
+  | "A" | "B" | "C" | "D" | "E" | "F"
+  | "a" | "b" | "c" | "d" | "e" | "f"
  \end{grammar}
  
  Sod understands only integers, not floating-point numbers; its integer syntax
@@ -130,15 +130,15 @@ binary.  However, length and signedness indicators are not permitted.
  \subsection{Punctuation} \label{sec:syntax.lex.punct}
  
  \begin{grammar}
-<punctuation> ::= any nonalphanumeric character other than "_", "\"" or "'"
+<punctuation> ::= "\dots"
+\alt any nonalphanumeric character other than "_", "\"", or "'"
  \end{grammar}
  
  
  \subsection{Comments} \label{sec:syntax.lex.comment}
  
  \begin{grammar}
-<comment> ::= <block-comment>
-\alt <line-comment>
+<comment> ::= <block-comment> | <line-comment>
  
  <block-comment> ::=
    "/*"
@@ -190,9 +190,9 @@ during translation.  They are read using a simple scanner which nonetheless
  understands C comments and string and character literals.
  
  A C fragment is terminated by one of a small number of delimiter characters
-determined by the immediately surrounding context -- usually a closing brace
-or bracket.  The first such delimiter character which is not enclosed in
-brackets, braces or parenthesis ends the fragment.
+determined by the immediately surrounding context -- usually some kind of
+bracket.  The first such delimiter character which is not enclosed in
+brackets, braces or parentheses ends the fragment.
  
  %%%--------------------------------------------------------------------------
  \section{C types} \label{sec:syntax.type}
@@ -360,21 +360,178 @@ keyword arguments.
  \end{grammar}
  
  %%%--------------------------------------------------------------------------
+\section{Properties} \label{sec:syntax.prop}
+
+\begin{grammar}
+<properties> ::= "[" <list>$[\mbox{@<property>}]$ "]"
+
+<property> ::= <identifier> "=" <expression>
+
+<expression> ::= <additive>
+
+<additive> ::= <term>
+  | <additive> "+" <term>
+  | <additive> "--" <term>
+
+<term> ::= <factor>
+  | <term> "*" <factor>
+  | <term> "/" <factor>
+
+<factor> ::= <primary>
+  | "!" <factor> | "~" factor
+  | "+" <factor> | "--" <factor>
+
+<primary> ::=
+     <integer-literal> | <string-literal> | <char-literal> | <identifier>
+\alt "<" <plain-type> ">" | "{" <c-fragment> "}" | "?" <s-expression>
+  | "(" <expression> ")"
+\end{grammar}
+
+\emph{Property sets} are a means for associating miscellaneous information
+with compile-time metaobjects such as modules, classes, messages, methods,
+slots, and initializers.  By using property sets, additional information can
+be passed to extensions without the need to introduce idiosyncratic syntax.
+(That said, extensions can add additional first-class syntax, if necessary.)
+
+An error is reported if an unrecognized property is associated with an
+object.
+
+
+\subsection{Property values} \label{sec:syntax.prop.value}
+
+A property has a name, given as an @<identifier>, and a value computed by
+evaluating an @<expression>.  The value can be one of a number of types.
+
+\begin{itemize}
+
+\item An @<integer-literal> denotes a value of type @|int|.
+
+\item Similarly @<string-literal> and @<char-literal> denote @|string| and
+  @|char| values respectively.  Note that, as properties, characters are
+  quite distinct from integers, whereas in C, a character literal denotes a
+  value of type @|int|.
+
+\item There are no variables in the property-value syntax.  Rather, an
+  @<identifier> denotes that identifier, as a value of type @|id|.
+
+\item A C type (a @<plain-type>, as described in \xref{sec:syntax.type})
+  between angle brackets, e.g., @|<int>|, or @|<char *>|, or @|<void (*(int,
+  void (*)(int)))(int)>|, denotes that C type, as a value of type @|type|.
+
+\item A @<c-fragment> within braces denotes the tokens between (and not
+  including) the braces, as a value of type @|c-fragment|.
+
+\end{itemize}
+
+As shown in the grammar, there are four binary operators, @"+" (addition),
+@"--" (subtraction), @"*" (multiplication), and @"/" (division);
+multiplication and division have higher precedence than addition and
+subtraction, and operators of the same precedence associate left-to-right.
+There are also unary @"+" (no effect) and @"--" (negation) operators, with
+higher precedence.  All of the above operators act only on integer operands
+and yield integer results.  (Although the unary @"+" operator yields its
+operand unchanged, an error is still reported if it is applied to a
+non-integer value.)  There are currently no bitwise, logical, or comparison
+operators.
+
+Finally, an S-expression preceded by @|?| causes the expression to be read in
+the current package (which is always @|sod-user| at the start of a module)
+and immediately evaluated (using @|eval|); the resulting value is converted
+into a property value using the \descref{gf}{decode-property}[generic
+function].
+
+
+\subsection{Property output types and coercions}
+\label{sec:syntax.prop.coerce}
+
+When a property value is inspected by the Sod translator, or an extension, it
+is \emph{coerced} so as to conform to a requested output type.  This coercion
+process is performed by the \descref{gf}{coerce-property-value}[generic
+function], and additional output types and coercions can be defined by
+extensions.  The built-in output types coercions, from the value types listed
+above, are as follows.
+
+\begin{itemize}
+
+\item The output types @|int|, @|string|, @|char|, @|id|, and @|c-fragment|
+  correspond to the like-named value types described above.  No coercions to
+  these output types are defined for the described value types.\footnote{%
+    There is a coercion to @|id| from the value type @|symbol|, but it is
+    only possible to generate a property value of type @|symbol| using Lisp.}
+
+\item The output type @|type| denotes a C type, as does the value type
+  @|type|.  In addition, a value of type @|id| can be coerced to a C type if
+  it is the name of a class, a type name explicitly declared by @|typename|,
+  or it is one of: @|bool|, @|_Bool|, @|void|, @|char|, @|short|, @|int|,
+  @|signed|, @|unsigned|, @|long|, @|size_t|, @|ptrdiff_t|, @|wchar_t|,
+  or @|va_list|.
+
+\item The @|boolean| output type denotes a boolean value, which may be either
+  true or false.  A value of type @|id| is considered true if it is @|true|,
+  @|t|, @|yes|, @|on|, @|yup|, or @|verily|; or false if it is @|false|,
+  @|nil|, @|no|, @|off|, @|nope|, or @|nowise|; it is erroneous to provide
+  any other identifier where a boolean value is wanted.  A value of type
+  @|int| is considered true if it is nonzero, or false if it is zero.
+
+\item The @|symbol| output type denotes a Lisp symbol.
+
+  A value of type @|id| is coerced to a symbol as follows.  First, the
+  identifier name is subjected to \emph{case inversion}: if all of the
+  letters in the name have the same case, either upper or lower, then they
+  are replaced with the corresponding letters in the opposite case, lower or
+  upper; if the name contains letters of both cases, then it is not changed.
+  For example, @|foo45| becomes @|FOO45|, or \emph{vice-versa}; but @|Splat|
+  remains as it is.  Second, the name is subjected to \emph{separator
+  switching}: all underscores in the name are replaced with hyphens (and
+  \emph{vice-versa}, though hyphens aren't permitted in identifiers in the
+  first place).  Finally, the resulting name is interned in the current
+  package, which will usually be @|sod-user| unless changed explicitly by the
+  module.
+
+  A value of type @|string| is coerced to a symbol as follows.  If the string
+  contains no colons, then it is case-inverted (but not separator-switched)
+  and interned in the current package.  Otherwise, the string either has the
+  form $p @|:| q$, where $q$ does not begin with a colon (the
+  \emph{single-colon} case) or $p @|::| q$ (the \emph{double-colon} case);
+  where $p$ does not contain a colon.  Both $p$ and $q$ are case-inverted
+  (but not separator-switched).  If $p$ does not name a package, then an
+  error is reported; as a special case, if $p$ is empty, then it is
+  considered to name the @|keyword| package.  Otherwise, $q$ is looked up as
+  a symbol name in package~$p$; in the single-colon case, if the symbol is
+  not an exported symbol in package~$p$, then an error is reported; in the
+  double-colon case, $q$ is interned in package~$p$ (and so there needn't be
+  an exported symbol -- or, indeed, and symbol at all -- named $q$
+  beforehand).
+
+\item The @|keyword| output type denotes symbols within the @|keyword|
+  package.  Value of type @|id| or @|string| can be coerced to a @|keyword|
+  in the same way as to a @|symbol|, as described above, only the converted
+  name is looked up in the @|keyword| package rather than the current
+  package.  (A @|string| can override this by specifying an explicit package
+  name, but this is unlikely to be very helpful.)
+
+\end{itemize}
+
+%%%--------------------------------------------------------------------------
  \section{Module syntax} \label{sec:syntax.module}
  
  \begin{grammar}
  <module> ::= @<definition>^*
  
-<definition> ::= <import-definition>
+<definition> ::= <property-definition> \fixme{undefined}
+\alt <import-definition>
  \alt <load-definition>
  \alt <lisp-definition>
  \alt <code-definition>
  \alt <typename-definition>
  \alt <class-definition>
+\alt <other-definition> \fixme{undefined}
  \end{grammar}
  
-A @<module> is the top-level syntactic item.  A module consists of a sequence
-of definitions.
+A @<module> is the top-level syntactic item: a source file presented to Sod
+is expected to conform with the @<module> syntax.
+
+A module consists of a sequence of definitions.
  
  \fixme{describe syntax; expand}
  Properties:
@@ -399,9 +556,9 @@ A search is made for a module source file as follows.
  \begin{itemize}
  \item The module name @<string> is converted into a filename by appending
    @`.sod', if it has no extension already.\footnote{%
-    Technically, what happens is \textsf{(merge-pathnames name (make-pathname
-    :type "SOD" :case :common))}, so exactly what this means varies
-    according to the host system.} %
+    Technically, what happens is @|(merge-pathnames name (make-pathname :type
+    "SOD" :case :common))|, so exactly what this means varies according to
+    the host system.} %
  \item The file is looked for relative to the directory containing the
    importing module.
  \item If that fails, then the file is looked for in each directory on the
@@ -411,7 +568,7 @@ A search is made for a module source file as follows.
  \end{itemize}
  At this point, if the file has previously been imported, nothing further
  happens.\footnote{%
-  This check is done using \textsf{truename}, so it should see through simple
+  This check is done using @|truename|, so it should see through simple
    tricks like symbolic links.  However, it may be confused by fancy things
    like bind mounts and so on.} %
  
@@ -428,23 +585,22 @@ A search is made for a Lisp source file as follows.
  \begin{itemize}
  \item The name @<string> is converted into a filename by appending @`.lisp',
    if it has no extension already.\footnote{%
-    Technically, what happens is \textsf{(merge-pathnames name (make-pathname
-    :type "LISP" :case :common))}, so exactly what this means varies
-    according to the host system.} %
+    Technically, what happens is @|(merge-pathnames name (make-pathname :type
+    "LISP" :case :common))|, so exactly what this means varies according to
+    the host system.} %
  \item A search is then made in the same manner as for module imports
    (\xref{sec:syntax-module}).
  \end{itemize}
-If the file is found, it is loaded using the host Lisp's \textsf{load}
-function.
+If the file is found, it is loaded using the host Lisp's @|load| function.
  
  Note that Sod doesn't attempt to compile Lisp files, or even to look for
  existing compiled files.  The right way to package a substantial extension to
  the Sod translator is to provide the extension as a standard ASDF system (or
  similar) and leave a dropping @|foo-extension.lisp| in the module path saying
  something like
-\begin{quote}
-  \textsf{(asdf:load-system :foo-extension)}
-\end{quote}
+\begin{prog}
+  (asdf:load-system :foo-extension)
+\end{prog}
  which will arrange for the extension to be compiled if necessary.
  
  (This approach means that the language doesn't need to depend on any
@@ -520,51 +676,6 @@ preprocessor directives in order to declare types and functions for use
  elsewhere in the generated output files.
  
  
-\subsection{Property sets} \label{sec:syntax.module.properties}
-\begin{grammar}
-<properties> ::= "[" <list>$[\mbox{@<property>}]$ "]"
-
-<property> ::= <identifier> "=" <expression>
-\end{grammar}
-
-Property sets are a means for associating miscellaneous information with
-classes and related items.  By using property sets, additional information
-can be passed to extensions without the need to introduce idiosyncratic
-syntax.
-
-A property has a name, given as an @<identifier>, and a value computed by
-evaluating an @<expression>.  The value can be one of a number of types,
-though the only operators currently defined act on integer values only.
-
-\subsubsection{The expression evaluator}
-\begin{grammar}
-<expression> ::= <term> | <expression> "+" <term> | <expression> "--" <term>
-
-<term> ::= <factor> | <term> "*" <factor> | <term> "/" <factor>
-
-<factor> ::= <primary> | "+" <factor> | "--" <factor>
-
-<primary> ::=
-     <integer-literal> | <string-literal> | <char-literal> | <identifier>
-\alt "<" <plain-type> ">"
-\alt "?" <s-expression>
-\alt "(" <expression> ")"
-\end{grammar}
-
-The arithmetic expression syntax is simple and standard; there are currently
-no bitwise, logical, or comparison operators.
-
-A @<primary> expression may be a literal or an identifier.  Note that
-identifiers stand for themselves: they \emph{do not} denote values.  For more
-fancy expressions, the syntax
-\begin{quote}
-  @"?" @<s-expression>
-\end{quote}
-causes the @<s-expression> to be evaluated using the Lisp \textsf{eval}
-function.
-%%% FIXME crossref to extension docs
-
-
  \subsection{Class definitions} \label{sec:syntax.module.class}
  
  \begin{grammar}
@@ -607,6 +718,7 @@ class Sub: Super \{                                             \\ \ind
  \alt <fragment-item>
  \alt <message-item>
  \alt <method-item>
+\alt <other-item> \fixme{undefined}
  \end{grammar}
  
  A full class definition provides a complete description of a class.
@@ -620,8 +732,9 @@ The @<list>$[\mbox{@<identifier>}]$ names the direct superclasses for the new
  class.  It is an error if any of these @<identifier>s does not name a defined
  class.  The superclass list is required, and must not be empty; listing
  @|SodObject| as your class's superclass is a good choice if nothing else
-seems suitable.  It's not possible to define a \emph{root class} in the Sod
-language: you must use Lisp to do this, and it's quite involved.
+seems suitable.  A class with no direct superclasses is called a \emph{root
+class}.  It is not possible to define a root class in the Sod language: you
+must use Lisp to do this, and it's quite involved.
  
  The @<properties> provide additional information.  The standard class
  properties are as follows.