mdw@git.distorted.org.uk Git - sod/blame_incremental

... / ...

Commit	Line	Data
	1	%%% --latex--
	2	%%%
	3	%%% Module syntax
	4	%%%
	5	%%% (c) 2015 Straylight/Edgeware
	6	%%%
	7
	8	%%%----- Licensing notice ---------------------------------------------------
	9	%%%
	10	%%% This file is part of the Sensible Object Design, an object system for C.
	11	%%%
	12	%%% SOD is free software; you can redistribute it and/or modify
	13	%%% it under the terms of the GNU General Public License as published by
	14	%%% the Free Software Foundation; either version 2 of the License, or
	15	%%% (at your option) any later version.
	16	%%%
	17	%%% SOD is distributed in the hope that it will be useful,
	18	%%% but WITHOUT ANY WARRANTY; without even the implied warranty of
	19	%%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	20	%%% GNU General Public License for more details.
	21	%%%
	22	%%% You should have received a copy of the GNU General Public License
	23	%%% along with SOD; if not, write to the Free Software Foundation,
	24	%%% Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
	25
	26	\chapter{Module syntax} \label{ch:syntax}
	27
	28	%%%--------------------------------------------------------------------------
	29	\section{Lexical syntax} \label{sec:syntax.lex}
	30
	31	Whitespace and comments are discarded. The remaining characters are
	32	collected into tokens according to the following syntax.
	33
	34	\begin{grammar}
	35	<token> ::= <identifier>
	36	\alt <string-literal>
	37	\alt <char-literal>
	38	\alt <integer-literal>
	39	\alt <punctuation>
	40	\end{grammar}
	41
	42	This syntax is slightly ambiguous, and is disambiguated by the \emph{maximal
	43	munch} rule: at each stage we take the longest sequence of characters which
	44	could be a token.
	45
	46
	47	\subsection{Identifiers} \label{sec:syntax.lex.id}
	48
	49	\begin{grammar}
	50	<identifier> ::= <id-start-char> @<id-body-char>^*
	51
	52	<id-start-char> ::= <alpha-char> \| "_"
	53
	54	<id-body-char> ::= <id-start-char> @! <digit-char>
	55
	56	<alpha-char> ::= "A" \| "B" $\| \cdots \|$ "Z"
	57	\| "a" \| "b" $\| \cdots \|$ "z"
	58	\| <extended-alpha-char>
	59
	60	<digit-char> ::= "0" \| <nonzero-digit-char>
	61
	62	<nonzero-digit-char> ::= "1" \| "2" $\| \cdots \|$ "9"
	63	\end{grammar}
	64
	65	The precise definition of @<alpha-char> is left to the function
	66	@\|alpha-char-p\| in the hosting Lisp system. For portability, programmers are
	67	encouraged to limit themselves to the standard ASCII letters.
	68
	69	There are no reserved words at the lexical level, but the higher-level syntax
	70	recognizes certain identifiers as \emph{keywords} in some contexts. There is
	71	also an ambiguity (inherited from C) in the declaration syntax which is
	72	settled by distinguishing type names from other identifiers at a lexical
	73	level.
	74
	75
	76	\subsection{String and character literals} \label{sec:syntax.lex.string}
	77
	78	\begin{grammar}
	79	<string-literal> ::= "\"" @<string-literal-char>^* "\""
	80
	81	<char-literal> ::= "'" <char-literal-char> "'"
	82
	83	<string-literal-char> :: "\\" <char>
	84	\| any character other than "\\" or "\""
	85
	86	<char-literal-char> :: "\\" <char>
	87	\| any character other than "\\" or "'"
	88
	89	<char> ::= any single character
	90	\end{grammar}
	91
	92	The syntax for string and character literals differs from~C. In particular,
	93	escape sequences such as @`\textbackslash n' are not recognized. The use
	94	of string and character literals in Sod, outside of C~fragments, is limited,
	95	and the simple syntax seems adequate. For the sake of future compatibility,
	96	the use of character sequences which resemble C escape sequences is
	97	discouraged.
	98
	99
	100	\subsection{Integer literals} \label{sec:syntax.lex.int}
	101
	102	\begin{grammar}
	103	<integer-literal> ::= <decimal-integer>
	104	\| <binary-integer>
	105	\| <octal-integer>
	106	\| <hex-integer>
	107
	108	<decimal-integer> ::= "0" \| <nonzero-digit-char> @<digit-char>^*
	109
	110	<binary-integer> ::= "0" @("b" @! "B"@) @<binary-digit-char>^+
	111
	112	<binary-digit-char> ::= "0" \| "1"
	113
	114	<octal-integer> ::= "0" @["o" @! "O"@] @<octal-digit-char>^+
	115
	116	<octal-digit-char> ::= "0" \| "1" $\| \cdots \|$ "7"
	117
	118	<hex-integer> ::= "0" @("x" @! "X"@) @<hex-digit-char>^+
	119
	120	<hex-digit-char> ::= <digit-char>
	121	\| "A" \| "B" \| "C" \| "D" \| "E" \| "F"
	122	\| "a" \| "b" \| "c" \| "d" \| "e" \| "f"
	123	\end{grammar}
	124
	125	Sod understands only integers, not floating-point numbers; its integer syntax
	126	goes slightly beyond C in allowing a @`0o' prefix for octal and @`0b' for
	127	binary. However, length and signedness indicators are not permitted.
	128
	129
	130	\subsection{Punctuation} \label{sec:syntax.lex.punct}
	131
	132	\begin{grammar}
	133	<punctuation> ::= "\dots"
	134	\alt any nonalphanumeric character other than "_", "\"", or "'"
	135	\end{grammar}
	136
	137
	138	\subsection{Comments} \label{sec:syntax.lex.comment}
	139
	140	\begin{grammar}
	141	<comment> ::= <block-comment> \| <line-comment>
	142
	143	<block-comment> ::=
	144	"/*"
	145	@<not-star>^* @(@<star>^+ <not-star-or-slash> @<not-star>^@)^
	146	@<star>^*
	147	"*/"
	148
	149	<star> ::= "*"
	150
	151	<not-star> ::= any character other than "*"
	152
	153	<not-star-or-slash> ::= any character other than "*" or "/"
	154
	155	<line-comment> ::= "/\,/" @<not-newline>^* <newline>
	156
	157	<newline> ::= a newline character
	158
	159	<not-newline> ::= any character other than newline
	160	\end{grammar}
	161
	162	Comments are exactly as in C99: both traditional block comments `@\|/*\| \dots\
	163	@\|*/\|' and \Cplusplus-style `@\|/\,/\| \dots' comments are permitted and
	164	ignored.
	165
	166
	167	\subsection{Special nonterminals} \label{sec:syntax.lex.special}
	168
	169	Aside from the lexical syntax presented above (\xref{sec:lexical-syntax}),
	170	two special nonterminals occur in the module syntax.
	171
	172	\subsubsection{S-expressions}
	173	\begin{grammar}
	174	<s-expression> ::= an S-expression, as parsed by the Lisp reader
	175	\end{grammar}
	176
	177	When an S-expression is expected, the Sod parser simply calls the host Lisp
	178	system's @\|read\| function. Sod modules are permitted to modify the read
	179	table to extend the S-expression syntax.
	180
	181	S-expressions are self-delimiting, so no end-marker is needed.
	182
	183	\subsubsection{C fragments}
	184	\begin{grammar}
	185	<c-fragment> ::= a sequence of C tokens, with matching brackets
	186	\end{grammar}
	187
	188	Sequences of C code are simply stored and written to the output unchanged
	189	during translation. They are read using a simple scanner which nonetheless
	190	understands C comments and string and character literals.
	191
	192	A C fragment is terminated by one of a small number of delimiter characters
	193	determined by the immediately surrounding context -- usually some kind of
	194	bracket. The first such delimiter character which is not enclosed in
	195	brackets, braces or parentheses ends the fragment.
	196
	197	%%%--------------------------------------------------------------------------
	198	\section{C types} \label{sec:syntax.type}
	199
	200	Sod's syntax for C types closely mirrors the standard C syntax. A C type has
	201	two parts: a sequence of @<declaration-specifier>s and a @<declarator>. In
	202	Sod, a type must contain at least one @<declaration-specifier> (i.e.,
	203	`implicit @\|int\|' is forbidden), and storage-class specifiers are not
	204	recognized.
	205
	206
	207	\subsection{Declaration specifiers} \label{sec:syntax.type.declspec}
	208
	209	\begin{grammar}
	210	<declaration-specifier> ::= <type-name>
	211	\alt "struct" <identifier> \| "union" <identifier> \| "enum" <identifier>
	212	\alt "void" \| "char" \| "int" \| "float" \| "double"
	213	\alt "short" \| "long"
	214	\alt "signed" \| "unsigned"
	215	\alt "bool" \| "_Bool"
	216	\alt "imaginary" \| "_Imaginary" \| "complex" \| "_Complex"
	217	\alt <qualifier>
	218	\alt <storage-specifier>
	219	\alt <atomic-type>
	220	\alt <other-declspec>
	221
	222	<qualifier> ::= <atomic> \| "const" \| "volatile" \| "restrict"
	223
	224	<plain-type> ::= @<declaration-specifier>^+ <abstract-declarator>
	225
	226	<atomic-type> ::= <atomic> "(" <plain-type> ")"
	227
	228	<atomic> ::= "atomic" \| "_Atomic"
	229
	230	<storage-specifier> ::= <alignas> "(" <c-fragment> ")"
	231
	232	<alignas> ::= "alignas" "_Alignas"
	233
	234	<type-name> ::= <identifier>
	235	\end{grammar}
	236
	237	Declaration specifiers may appear in any order. However, not all
	238	combinations are permitted. A declaration specifier must consist of zero or
	239	more @<qualifier>s, zero or more @<storage-specifier>s, and one of the
	240	following, up to reordering:
	241	\begin{itemize}
	242	\item @<type-name>;
	243	\item @<atomic-type>;
	244	\item @"struct" @<identifier>; @"union" @<identifier>; @"enum" @<identifier>;
	245	\item @"void";
	246	\item @"_Bool", @"bool";
	247	\item @"char"; @"unsigned char"; @"signed char";
	248	\item @"short", @"signed short", @"short int", @"signed short int";
	249	@"unsigned short", @"unsigned short int";
	250	\item @"int", @"signed", @"signed int"; @"unsigned", @"unsigned int";
	251	\item @"long", @"signed long", @"long int", @"signed long int"; @"unsigned
	252	long", @"unsigned long int";
	253	\item @"long long", @"signed long long", @"long long int", @"signed long long
	254	int"; @"unsigned long long", @"unsigned long long int";
	255	\item @"float"; @"double"; @"long double";
	256	\item @"float _Imaginary", @"float imaginary"; @"double _Imaginary", @"double
	257	imaginary"; @"long double _Imaginary", @"long double imaginary";
	258	\item @"float _Complex", @"float complex"; @"double _Complex", @"double
	259	complex"; @"long double _Complex", @"long double complex".
	260	\end{itemize}
	261	All of these have their usual C meanings. Groups separated by commas mean
	262	the same thing, and Sod will not preserve the distinction.
	263
	264	Almost all of these mean the same as they do in C. There are some minor
	265	differences:
	266	\begin{itemize}
	267	\item In C, the `tag' namespace is shared between @\|struct\|, @\|union\|, and
	268	@\|enum\|; Sod has three distinct namespaces for tags. This may be fixed in
	269	the future.
	270	\item The @<other-declspec> production is a syntactic extension point, where
	271	extensions can introduce their own additions to the type system.
	272	\end{itemize}
	273
	274	C standards from C99 onwards have tended to introduce new keywords beginning
	275	with an underscore followed by an uppercase letter, so as to avoid conflicts
	276	with existing code. More conventional spellings are then provided by macros
	277	in new header files. For example, C99 introduced @"_Bool", and a header file
	278	@\|<stdbool.h>\| which defines the macro @\|bool\|. Sod recognizes both the ugly
	279	underscore names and the more conventional macro names on input, but always
	280	emits the ugly names. This doesn't cause a compatibility problem in Sod,
	281	because Sod's parser recognizes keywords only in the appropriate context.
	282	For example, the (ill-advised) slot declaration
	283	\begin{prog}
	284	bool bool;
	285	\end{prog}
	286	is completely acceptable, and will cause the C structure member
	287	\begin{prog}
	288	\_Bool bool;
	289	\end{prog}
	290	to be emitted on output, which will be acceptable to C as long as
	291	@\|<stdbool.h>\| is not included.
	292
	293	A @<type-name> is an identifier which has been declared as being a type name,
	294	using the @"typename" or @"class" definitions. The following type names are
	295	defined in the built-in module.
	296	\begin{itemize}
	297	\item @\|va_list\|
	298	\item @\|size_t\|
	299	\item @\|ptrdiff_t\|
	300	\item @\|wchar_t\|
	301	\end{itemize}
	302
	303
	304	\subsection{Declarators} \label{sec:syntax.type.declarator}
	305
	306	\begin{grammar}
	307	<declarator>$[k, a]$ ::= @<pointer>^* <primary-declarator>$[k, a]$
	308
	309	<primary-declarator>$[k, a]$ ::= $k$
	310	\alt "(" <primary-declarator>$[k, a]$ ")"
	311	\alt <primary-declarator>$[k, a]$ @<declarator-suffix>$[a]$
	312
	313	<pointer> ::= "" @<qualifier>^
	314
	315	<declarator-suffix>$[a]$ ::= "[" <c-fragment> "]"
	316	\alt "(" $a$ ")"
	317
	318	<argument-list> ::= $\epsilon$ \| "\dots"
	319	\alt <list>$[\mbox{@<argument>}]$ @["," "\dots"@]
	320
	321	<argument> ::= @<declaration-specifier>^+ <argument-declarator>
	322
	323	<abstract-declarator> ::= <declarator>$[\epsilon, \mbox{@<argument-list>}]$
	324
	325	<argument-declarator> ::=
	326	<declarator>$[\mbox{@<identifier> @! $\epsilon$}, \mbox{@<argument-list>}]$
	327
	328	<simple-declarator> ::=
	329	<declarator>$[\mbox{@<identifier>}, \mbox{@<argument-list>}]$
	330	\end{grammar}
	331
	332	The declarator syntax is taken from C, but with some differences.
	333	\begin{itemize}
	334	\item Array dimensions are uninterpreted @<c-fragments>, terminated by a
	335	closing square bracket. This allows array dimensions to contain arbitrary
	336	constant expressions.
	337	\item A declarator may have either a single @<identifier> at its centre or a
	338	pair of @<identifier>s separated by a @`.'; this is used to refer to
	339	slots or messages defined in superclasses.
	340	\end{itemize}
	341	The remaining differences are (I hope) a matter of presentation rather than
	342	substance.
	343
	344	There is additional syntax to support messages and methods which accept
	345	keyword arguments.
	346
	347	\begin{grammar}
	348	<keyword-argument> ::= <argument> @["=" <c-fragment>@]
	349
	350	<keyword-argument-list> ::=
	351	@[<list>$[\mbox{@<argument>}]$@]
	352	"?" @[<list>$[\mbox{@<keyword-argument>}]$@]
	353
	354	<method-argument-list> ::= <argument-list> @! <keyword-argument-list>
	355
	356	<dotted-name> ::= <identifier> "." <identifier>
	357
	358	<keyword-declarator>$[k]$ ::=
	359	<declarator>$[k, \mbox{@<method-argument-list>}]$
	360	\end{grammar}
	361
	362	%%%--------------------------------------------------------------------------
	363	\section{Properties} \label{sec:syntax.prop}
	364
	365	\begin{grammar}
	366	<properties> ::= "[" <list>$[\mbox{@<property>}]$ "]"
	367
	368	<property> ::= <identifier> "=" <expression>
	369
	370	<expression> ::= <additive>
	371
	372	<additive> ::= <term>
	373	\| <additive> "+" <term>
	374	\| <additive> "--" <term>
	375
	376	<term> ::= <factor>
	377	\| <term> "*" <factor>
	378	\| <term> "/" <factor>
	379
	380	<factor> ::= <primary>
	381	\| "!" <factor> \| "~" factor
	382	\| "+" <factor> \| "--" <factor>
	383
	384	<primary> ::=
	385	<integer-literal> \| <string-literal> \| <char-literal> \| <identifier>
	386	\alt "<" <plain-type> ">" \| "{" <c-fragment> "}" \| "?" <s-expression>
	387	\| "(" <expression> ")"
	388	\end{grammar}
	389
	390	\emph{Property sets} are a means for associating miscellaneous information
	391	with compile-time metaobjects such as modules, classes, messages, methods,
	392	slots, and initializers. By using property sets, additional information can
	393	be passed to extensions without the need to introduce idiosyncratic syntax.
	394	(That said, extensions can add additional first-class syntax, if necessary.)
	395
	396	An error is reported if an unrecognized property is associated with an
	397	object.
	398
	399
	400	\subsection{Property values} \label{sec:syntax.prop.value}
	401
	402	A property has a name, given as an @<identifier>, and a value computed by
	403	evaluating an @<expression>. The value can be one of a number of types.
	404
	405	\begin{itemize}
	406
	407	\item An @<integer-literal> denotes a value of type @\|int\|.
	408
	409	\item Similarly @<string-literal> and @<char-literal> denote @\|string\| and
	410	@\|char\| values respectively. Note that, as properties, characters are
	411	quite distinct from integers, whereas in C, a character literal denotes a
	412	value of type @\|int\|.
	413
	414	\item There are no variables in the property-value syntax. Rather, an
	415	@<identifier> denotes that identifier, as a value of type @\|id\|.
	416
	417	\item A C type (a @<plain-type>, as described in \xref{sec:syntax.type})
	418	between angle brackets, e.g., @\|<int>\|, or @\|<char >\|, or @\|<void ((int,
	419	void (*)(int)))(int)>\|, denotes that C type, as a value of type @\|type\|.
	420
	421	\item A @<c-fragment> within braces denotes the tokens between (and not
	422	including) the braces, as a value of type @\|c-fragment\|.
	423
	424	\end{itemize}
	425
	426	As shown in the grammar, there are four binary operators, @"+" (addition),
	427	@"--" (subtraction), @"*" (multiplication), and @"/" (division);
	428	multiplication and division have higher precedence than addition and
	429	subtraction, and operators of the same precedence associate left-to-right.
	430	There are also unary @"+" (no effect) and @"--" (negation) operators, with
	431	higher precedence. All of the above operators act only on integer operands
	432	and yield integer results. (Although the unary @"+" operator yields its
	433	operand unchanged, an error is still reported if it is applied to a
	434	non-integer value.) There are currently no bitwise, logical, or comparison
	435	operators.
	436
	437	Finally, an S-expression preceded by @\|?\| causes the expression to be read in
	438	the current package (which is always @\|sod-user\| at the start of a module)
	439	and immediately evaluated (using @\|eval\|); the resulting value is converted
	440	into a property value using the \descref{gf}{decode-property}[generic
	441	function].
	442
	443
	444	\subsection{Property output types and coercions}
	445	\label{sec:syntax.prop.coerce}
	446
	447	When a property value is inspected by the Sod translator, or an extension, it
	448	is \emph{coerced} so as to conform to a requested output type. This coercion
	449	process is performed by the \descref{gf}{coerce-property-value}[generic
	450	function], and additional output types and coercions can be defined by
	451	extensions. The built-in output types coercions, from the value types listed
	452	above, are as follows.
	453
	454	\begin{itemize}
	455
	456	\item The output types @\|int\|, @\|string\|, @\|char\|, @\|id\|, and @\|c-fragment\|
	457	correspond to the like-named value types described above. No coercions to
	458	these output types are defined for the described value types.\footnote{%
	459	There is a coercion to @\|id\| from the value type @\|symbol\|, but it is
	460	only possible to generate a property value of type @\|symbol\| using Lisp.}
	461
	462	\item The output type @\|type\| denotes a C type, as does the value type
	463	@\|type\|. In addition, a value of type @\|id\| can be coerced to a C type if
	464	it is the name of a class, a type name explicitly declared by @\|typename\|,
	465	or it is one of: @\|bool\|, @\|_Bool\|, @\|void\|, @\|char\|, @\|short\|, @\|int\|,
	466	@\|signed\|, @\|unsigned\|, @\|long\|, @\|size_t\|, @\|ptrdiff_t\|, @\|wchar_t\|,
	467	or @\|va_list\|.
	468
	469	\item The @\|boolean\| output type denotes a boolean value, which may be either
	470	true or false. A value of type @\|id\| is considered true if it is @\|true\|,
	471	@\|t\|, @\|yes\|, @\|on\|, @\|yup\|, or @\|verily\|; or false if it is @\|false\|,
	472	@\|nil\|, @\|no\|, @\|off\|, @\|nope\|, or @\|nowise\|; it is erroneous to provide
	473	any other identifier where a boolean value is wanted. A value of type
	474	@\|int\| is considered true if it is nonzero, or false if it is zero.
	475
	476	\item The @\|symbol\| output type denotes a Lisp symbol.
	477
	478	A value of type @\|id\| is coerced to a symbol as follows. First, the
	479	identifier name is subjected to \emph{case inversion}: if all of the
	480	letters in the name have the same case, either upper or lower, then they
	481	are replaced with the corresponding letters in the opposite case, lower or
	482	upper; if the name contains letters of both cases, then it is not changed.
	483	For example, @\|foo45\| becomes @\|FOO45\|, or \emph{vice-versa}; but @\|Splat\|
	484	remains as it is. Second, the name is subjected to \emph{separator
	485	switching}: all underscores in the name are replaced with hyphens (and
	486	\emph{vice-versa}, though hyphens aren't permitted in identifiers in the
	487	first place). Finally, the resulting name is interned in the current
	488	package, which will usually be @\|sod-user\| unless changed explicitly by the
	489	module.
	490
	491	A value of type @\|string\| is coerced to a symbol as follows. If the string
	492	contains no colons, then it is case-inverted (but not separator-switched)
	493	and interned in the current package. Otherwise, the string either has the
	494	form $p @\|:\| q$, where $q$ does not begin with a colon (the
	495	\emph{single-colon} case) or $p @\|::\| q$ (the \emph{double-colon} case);
	496	where $p$ does not contain a colon. Both $p$ and $q$ are case-inverted
	497	(but not separator-switched). If $p$ does not name a package, then an
	498	error is reported; as a special case, if $p$ is empty, then it is
	499	considered to name the @\|keyword\| package. Otherwise, $q$ is looked up as
	500	a symbol name in package~$p$; in the single-colon case, if the symbol is
	501	not an exported symbol in package~$p$, then an error is reported; in the
	502	double-colon case, $q$ is interned in package~$p$ (and so there needn't be
	503	an exported symbol -- or, indeed, and symbol at all -- named $q$
	504	beforehand).
	505
	506	\item The @\|keyword\| output type denotes symbols within the @\|keyword\|
	507	package. Value of type @\|id\| or @\|string\| can be coerced to a @\|keyword\|
	508	in the same way as to a @\|symbol\|, as described above, only the converted
	509	name is looked up in the @\|keyword\| package rather than the current
	510	package. (A @\|string\| can override this by specifying an explicit package
	511	name, but this is unlikely to be very helpful.)
	512
	513	\end{itemize}
	514
	515	%%%--------------------------------------------------------------------------
	516	\section{Module syntax} \label{sec:syntax.module}
	517
	518	\begin{grammar}
	519	<module> ::= @<definition>^*
	520
	521	<definition> ::= <property-definition> \fixme{undefined}
	522	\alt <import-definition>
	523	\alt <load-definition>
	524	\alt <lisp-definition>
	525	\alt <code-definition>
	526	\alt <typename-definition>
	527	\alt <class-definition>
	528	\alt <other-definition> \fixme{undefined}
	529	\end{grammar}
	530
	531	A @<module> is the top-level syntactic item: a source file presented to Sod
	532	is expected to conform with the @<module> syntax.
	533
	534	A module consists of a sequence of definitions.
	535
	536	\fixme{describe syntax; expand}
	537	Properties:
	538	\begin{description}
	539	\item[@\|module_class\|] A symbol naming the Lisp class to use to
	540	represent the module.
	541	\item[@\|guard\|] An identifier to use as the guard symbol used to prevent
	542	multiple inclusion in the header file.
	543	\end{description}
	544
	545
	546	\subsection{Simple definitions} \label{sec:syntax.module.simple}
	547
	548	\subsubsection{Importing modules}
	549	\begin{grammar}
	550	<import-definition> ::= "import" <string> ";"
	551	\end{grammar}
	552
	553	The module named @<string> is processed and its definitions made available.
	554
	555	A search is made for a module source file as follows.
	556	\begin{itemize}
	557	\item The module name @<string> is converted into a filename by appending
	558	@`.sod', if it has no extension already.\footnote{%
	559	Technically, what happens is @\|(merge-pathnames name (make-pathname :type
	560	"SOD" :case :common))\|, so exactly what this means varies according to
	561	the host system.} %
	562	\item The file is looked for relative to the directory containing the
	563	importing module.
	564	\item If that fails, then the file is looked for in each directory on the
	565	module search path in turn.
	566	\item If the file still isn't found, an error is reported and the import
	567	fails.
	568	\end{itemize}
	569	At this point, if the file has previously been imported, nothing further
	570	happens.\footnote{%
	571	This check is done using @\|truename\|, so it should see through simple
	572	tricks like symbolic links. However, it may be confused by fancy things
	573	like bind mounts and so on.} %
	574
	575	Recursive imports, either direct or indirect, are an error.
	576
	577	\subsubsection{Loading extensions}
	578	\begin{grammar}
	579	<load-definition> ::= "load" <string> ";"
	580	\end{grammar}
	581
	582	The Lisp file named @<string> is loaded and evaluated.
	583
	584	A search is made for a Lisp source file as follows.
	585	\begin{itemize}
	586	\item The name @<string> is converted into a filename by appending @`.lisp',
	587	if it has no extension already.\footnote{%
	588	Technically, what happens is @\|(merge-pathnames name (make-pathname :type
	589	"LISP" :case :common))\|, so exactly what this means varies according to
	590	the host system.} %
	591	\item A search is then made in the same manner as for module imports
	592	(\xref{sec:syntax-module}).
	593	\end{itemize}
	594	If the file is found, it is loaded using the host Lisp's @\|load\| function.
	595
	596	Note that Sod doesn't attempt to compile Lisp files, or even to look for
	597	existing compiled files. The right way to package a substantial extension to
	598	the Sod translator is to provide the extension as a standard ASDF system (or
	599	similar) and leave a dropping @\|foo-extension.lisp\| in the module path saying
	600	something like
	601	\begin{prog}
	602	(asdf:load-system :foo-extension)
	603	\end{prog}
	604	which will arrange for the extension to be compiled if necessary.
	605
	606	(This approach means that the language doesn't need to depend on any
	607	particular system definition facility. It's bad enough already that it
	608	depends on Common Lisp.)
	609
	610	\subsubsection{Lisp escapes}
	611	\begin{grammar}
	612	<lisp-definition> ::= "lisp" <s-expression> ";"
	613	\end{grammar}
	614
	615	The @<s-expression> is evaluated immediately. It can do anything it likes.
	616
	617	\begin{boxy}[Warning!]
	618	This means that hostile Sod modules are a security hazard. Lisp code can
	619	read and write files, start other programs, and make network connections.
	620	Don't install Sod modules from sources that you don't trust.\footnote{%
	621	Presumably you were going to run the corresponding code at some point, so
	622	this isn't as unusually scary as it sounds. But please be careful.} %
	623	\end{boxy}
	624
	625	\subsubsection{Declaring type names}
	626	\begin{grammar}
	627	<typename-definition> ::=
	628	"typename" <list>$[\mbox{@<identifier>}]$ ";"
	629	\end{grammar}
	630
	631	Each @<identifier> is declared as naming a C type. This is important because
	632	the C type syntax -- which Sod uses -- is ambiguous, and disambiguation is
	633	done by distinguishing type names from other identifiers.
	634
	635	Don't declare class names using @"typename"; use @"class" forward
	636	declarations instead.
	637
	638
	639	\subsection{Literal code} \label{sec:syntax.module.literal}
	640
	641	\begin{grammar}
	642	<code-definition> ::=
	643	"code" <identifier> ":" <item-name> @[<constraints>@]
	644	"{" <c-fragment> "}"
	645
	646	<constraints> ::= "[" <list>$[\mbox{@<constraint>}]$ "]"
	647
	648	<constraint> ::= @<item-name>^+
	649
	650	<item-name> ::= <identifier> @! "(" @<identifier>^+ ")"
	651	\end{grammar}
	652
	653	The @<c-fragment> will be output unchanged to one of the output files.
	654
	655	The first @<identifier> is the symbolic name of an output file. Predefined
	656	output file names are @\|c\| and @\|h\|, which are the implementation code and
	657	header file respectively; other output files can be defined by extensions.
	658
	659	Output items are named with a sequence of identifiers, separated by
	660	whitespace, and enclosed in parentheses. As an abbreviation, a name
	661	consisting of a single identifier may be written as just that identifier,
	662	without the parentheses.
	663
	664	The @<constraints> provide a means for specifying where in the output file
	665	the output item should appear. (Note the two kinds of square brackets shown
	666	in the syntax: square brackets must appear around the constraints if they are
	667	present, but that they may be omitted.) Each comma-separated @<constraint>
	668	is a sequence of names of output items, and indicates that the output items
	669	must appear in the order given -- though the translator is free to insert
	670	additional items in between them. (The particular output items needn't be
	671	defined already -- indeed, they needn't be defined ever.)
	672
	673	There is a predefined output item @\|includes\| in both the @\|c\| and @\|h\|
	674	output files which is a suitable place for inserting @\|\#include\|
	675	preprocessor directives in order to declare types and functions for use
	676	elsewhere in the generated output files.
	677
	678
	679	\subsection{Class definitions} \label{sec:syntax.module.class}
	680
	681	\begin{grammar}
	682	<class-definition> ::= <class-forward-declaration>
	683	\alt <full-class-definition>
	684	\end{grammar}
	685
	686	\subsubsection{Forward declarations}
	687	\begin{grammar}
	688	<class-forward-declaration> ::= "class" <identifier> ";"
	689	\end{grammar}
	690
	691	A @<class-forward-declaration> informs Sod that an @<identifier> will be used
	692	to name a class which is currently undefined. Forward declarations are
	693	necessary in order to resolve certain kinds of circularity. For example,
	694	\begin{prog}
	695	class Sub; \\+
	696
	697	class Super: SodObject \{ \\ \ind
	698	Sub *sub; \-\\
	699	\}; \\+
	700
	701	class Sub: Super \{ \\ \ind
	702	/* \dots\ */ \-\\
	703	\};
	704	\end{prog}
	705
	706	\subsubsection{Full class definitions}
	707	\begin{grammar}
	708	<full-class-definition> ::=
	709	@[<properties>@]
	710	"class" <identifier> ":" <list>$[\mbox{@<identifier>}]$
	711	"{" @<properties-class-item>^* "}"
	712
	713	<properties-class-item> ::= @[<properties>@] <class-item>
	714
	715	<class-item> ::= <slot-item>
	716	\alt <initializer-item>
	717	\alt <initarg-item>
	718	\alt <fragment-item>
	719	\alt <message-item>
	720	\alt <method-item>
	721	\alt <other-item> \fixme{undefined}
	722	\end{grammar}
	723
	724	A full class definition provides a complete description of a class.
	725
	726	The first @<identifier> gives the name of the class. It is an error to
	727	give the name of an existing class (other than a forward-referenced class),
	728	or an existing type name. It is conventional to give classes `MixedCase'
	729	names, to distinguish them from other kinds of identifiers.
	730
	731	The @<list>$[\mbox{@<identifier>}]$ names the direct superclasses for the new
	732	class. It is an error if any of these @<identifier>s does not name a defined
	733	class. The superclass list is required, and must not be empty; listing
	734	@\|SodObject\| as your class's superclass is a good choice if nothing else
	735	seems suitable. A class with no direct superclasses is called a \emph{root
	736	class}. It is not possible to define a root class in the Sod language: you
	737	must use Lisp to do this, and it's quite involved.
	738
	739	The @<properties> provide additional information. The standard class
	740	properties are as follows.
	741	\begin{description}
	742	\item[@\|lisp_class\|] The name of the Lisp class to use within the translator
	743	to represent this class. The property value must be an identifier; the
	744	default is @\|sod_class\|. Extensions may define classes with additional
	745	behaviour, and may recognize additional class properties.
	746	\item[@\|metaclass\|] The name of the Sod metaclass for this class. In the
	747	generated code, a class is itself an instance of another class -- its
	748	\emph{metaclass}. The metaclass defines which slots the class will have,
	749	which messages it will respond to, and what its behaviour will be when it
	750	receives them. The property value must be an identifier naming a defined
	751	subclass of @\|SodClass\|. The default metaclass is @\|SodClass\|.
	752	See \xref{sec:concepts.metaclasses} for more details.
	753	\item[@\|nick\|] A nickname for the class, to be used to distinguish it from
	754	other classes in various limited contexts. The property value must be an
	755	identifier; the default is constructed by forcing the class name to
	756	lower-case.
	757	\end{description}
	758
	759	The class body consists of a sequence of @<class-item>s enclosed in braces.
	760	These items are discussed on the following sections.
	761
	762	\subsubsection{Slot items}
	763	\begin{grammar}
	764	<slot-item> ::=
	765	@<declaration-specifier>^+ <list>$[\mbox{@<init-declarator>}]$ ";"
	766
	767	<init-declarator> ::= <simple-declarator> @["=" <initializer>@]
	768	\end{grammar}
	769
	770	A @<slot-item> defines one or more slots. All instances of the class and any
	771	subclass will contain these slot, with the names and types given by the
	772	@<declaration-specifiers> and the @<declarators>. Slot declarators may not
	773	contain dotted names.
	774
	775	It is not possible to declare a slot with function type: such an item is
	776	interpreted as being a @<message-item> or @<method-item>. Pointers to
	777	functions are fine.
	778
	779	Properties:
	780	\begin{description}
	781	\item[@\|slot_class\|] A symbol naming the Lisp class to use to represent the
	782	direct slot.
	783	\item[@\|initarg\|] An identifier naming an initialization argument which can
	784	be used to provide a value for the slot. See
	785	\xref{sec:concepts.lifecycle.birth} for the details.
	786	\item[@\|initarg_class\|] A symbol naming the Lisp class to use to represent
	787	the initarg. Only permitted if @\|initarg\| is also set.
	788	\end{description}
	789
	790	An @<initializer>, if present, is treated as if a separate
	791	@<initializer-item> containing the slot name and initializer were present.
	792	For example,
	793	\begin{prog}
	794	[nick = eg] \\
	795	class Example: Super \{ \\ \ind
	796	int foo = 17; \-\\
	797	\};
	798	\end{prog}
	799	means the same as
	800	\begin{prog}
	801	[nick = eg] \\
	802	class Example: Super \{ \\ \ind
	803	int foo; \\
	804	eg.foo = 17; \-\\
	805	\};
	806	\end{prog}
	807
	808	\subsubsection{Initializer items}
	809	\begin{grammar}
	810	<initializer-item> ::= @["class"@] <list>$[\mbox{@<slot-initializer>}]$ ";"
	811
	812	<slot-initializer> ::= <dotted-name> @["=" <initializer>@]
	813
	814	<initializer> ::= <c-fragment>
	815	\end{grammar}
	816
	817	An @<initializer-item> provides an initial value for one or more slots. If
	818	prefixed by @\|class\|, then the initial values are for class slots (i.e.,
	819	slots of the class object itself); otherwise they are for instance slots.
	820
	821	The first component of the @<dotted-name> must be the nickname of one of the
	822	class's superclasses (including itself); the second must be the name of a
	823	slot defined in that superclass.
	824
	825	Properties:
	826	\begin{description}
	827	\item[@\|initializer_class\|] A symbol naming the Lisp class to use to
	828	represent the initializer.
	829	\item[@\|initarg\|] An identifier naming an initialization argument which can
	830	be used to provide a value for the slot. See
	831	\xref{sec:concepts.lifecycle.birth} for the details. An initializer item
	832	must have either an @\|initarg\| property, or an initializer expression, or
	833	both.
	834	\item[@\|initarg_class\|] A symbol naming the Lisp class to use to represent
	835	the initarg. Only permitted if @\|initarg\| is also set.
	836	\end{description}
	837
	838	Each class may define at most one initializer item with an explicit
	839	initializer expression for a given slot.
	840
	841	\subsubsection{Initarg items}
	842	\begin{grammar}
	843	<initarg-item> ::=
	844	"initarg"
	845	@<declaration-specifier>^+
	846	<list>$[\mbox{@<init-declarator>}]$ ";"
	847	\end{grammar}
	848	Properties:
	849	\begin{description}
	850	\item[@\|initarg_class\|] A symbol naming the Lisp class to use to represent
	851	the initarg.
	852	\end{description}
	853
	854	\subsubsection{Fragment items}
	855	\begin{grammar}
	856	<fragment-item> ::= <fragment-kind> "{" <c-fragment> "}"
	857
	858	<fragment-kind> ::= "init" \| "teardown"
	859	\end{grammar}
	860
	861	\subsubsection{Message items}
	862	\begin{grammar}
	863	<message-item> ::=
	864	@<declaration-specifier>^+
	865	<keyword-declarator>$[\mbox{@<identifier>}]$
	866	@[<method-body>@]
	867	\end{grammar}
	868	Properties:
	869	\begin{description}
	870	\item[@\|message_class\|] A symbol naming the Lisp class to use to represent
	871	the message.
	872	\item[@\|combination\|] A keyword naming the aggregating method combination to
	873	use.
	874	\item[@\|most_specific\|] A keyword, either @`first' or @`last', according to
	875	whether the most specific applicable method should be invoked first or
	876	last.
	877	\end{description}
	878
	879	Properties for the @\|custom\| aggregating method combination:
	880	\begin{description}
	881	\item[@\|retvar\|] An identifier for the return value from the effective
	882	method. The default is @\|sod__ret\|. Only permitted if the message return
	883	type is not @\|void\|.
	884	\item[@\|valvar\|] An identifier holding each return value from a direct method
	885	in the effective method. The default is @\|sod__val\|. Only permitted if
	886	the method return type (see @\|methty\| below) is not @\|void\|.
	887	\item[@\|methty\|] A C type, which is the return type for direct methods of
	888	this message. The default is the return type of the message.
	889	\item[@\|decls\|] A code fragment containing declarations to be inserted at the
	890	head of the effective method body. The default is to insert nothing.
	891	\item[@\|before\|] A code fragment containing initialization to be performed at
	892	the beginning of the effective method body. The default is to insert
	893	nothing.
	894	\item[@\|empty\|] A code fragment executed if there are no primary methods;
	895	it should usually store a suitable (identity) value in @<retvar>. The
	896	default is not to emit an effective method at all if there are no primary
	897	methods.
	898	\item[@\|first\|] A code fragment to set the return value after calling the
	899	first applicable direct method. The default is to use the @\|each\|
	900	fragment.
	901	\item[@\|each\|] A code fragment to set the return value after calling a direct
	902	method. If @\|first\| is also set, then it is used after the first direct
	903	method instead of this. The default is to insert nothing, which is
	904	probably not what you want.
	905	\item[@\|after\|] A code fragment inserted at the end of the effective method
	906	body. The default is to insert nothing.
	907	\item[@\|count\|] An identifier naming a variable to be declared in the
	908	effective method body, of type @\|size_t\|, holding the number of applicable
	909	methods. The default is not to provide such a variable.
	910	\end{description}
	911
	912	\subsubsection{Method items}
	913	\begin{grammar}
	914	<method-item> ::=
	915	@<declaration-specifier>^+
	916	<keyword-declarator>$[\mbox{@<dotted-name>}]$
	917	<method-body>
	918
	919	<method-body> ::= "{" <c-fragment> "}" \| "extern" ";"
	920	\end{grammar}
	921	Properties:
	922	\begin{description}
	923	\item[@\|method_class\|] A symbol naming the Lisp class to use to represent
	924	the direct method.
	925	\item[@\|role\|] A keyword naming the direct method's rôle. For the built-in
	926	`simple' message classes, the acceptable rôle names are @\|before\|,
	927	@\|after\|, and @\|around\|. By default, a primary method is constructed.
	928	\end{description}
	929
	930	%%%----- That's all, folks --------------------------------------------------
	931
	932	%%% Local variables:
	933	%%% mode: LaTeX
	934	%%% TeX-master: "sod.tex"
	935	%%% TeX-PDF-mode: t
	936	%%% End: