mdw@git.distorted.org.uk Git - sod/blame_incremental

... / ...

Commit	Line	Data
	1	%%% --latex--
	2	%%%
	3	%%% Module syntax
	4	%%%
	5	%%% (c) 2015 Straylight/Edgeware
	6	%%%
	7
	8	%%%----- Licensing notice ---------------------------------------------------
	9	%%%
	10	%%% This file is part of the Sensible Object Design, an object system for C.
	11	%%%
	12	%%% SOD is free software; you can redistribute it and/or modify
	13	%%% it under the terms of the GNU General Public License as published by
	14	%%% the Free Software Foundation; either version 2 of the License, or
	15	%%% (at your option) any later version.
	16	%%%
	17	%%% SOD is distributed in the hope that it will be useful,
	18	%%% but WITHOUT ANY WARRANTY; without even the implied warranty of
	19	%%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	20	%%% GNU General Public License for more details.
	21	%%%
	22	%%% You should have received a copy of the GNU General Public License
	23	%%% along with SOD; if not, write to the Free Software Foundation,
	24	%%% Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
	25
	26	\chapter{Module syntax} \label{ch:syntax}
	27
	28	%%%--------------------------------------------------------------------------
	29	\section{Lexical syntax} \label{sec:syntax.lex}
	30
	31	Whitespace and comments are discarded. The remaining characters are
	32	collected into tokens according to the following syntax.
	33
	34	\begin{grammar}
	35	<token> ::= <identifier>
	36	\alt <string-literal>
	37	\alt <char-literal>
	38	\alt <integer-literal>
	39	\alt <punctuation>
	40	\end{grammar}
	41
	42	This syntax is slightly ambiguous, and is disambiguated by the \emph{maximal
	43	munch} rule: at each stage we take the longest sequence of characters which
	44	could be a token.
	45
	46
	47	\subsection{Identifiers} \label{sec:syntax.lex.id}
	48
	49	\begin{grammar}
	50	<identifier> ::= <id-start-char> @<id-body-char>^*
	51
	52	<id-start-char> ::= <alpha-char> \| "_"
	53
	54	<id-body-char> ::= <id-start-char> \| <digit-char>
	55
	56	<alpha-char> ::= "A" \| "B" \| $\cdots$ \| "Z"
	57	\| "a" \| "b" \| $\cdots$ \| "z"
	58	\| <extended-alpha-char>
	59
	60	<digit-char> ::= "0" \| <nonzero-digit-char>
	61
	62	<nonzero-digit-char> ::= "1" \| "2" \| $\cdots$ \| "9"
	63	\end{grammar}
	64
	65	The precise definition of @<alpha-char> is left to the function
	66	@\|alpha-char-p\| in the hosting Lisp system. For portability, programmers are
	67	encouraged to limit themselves to the standard ASCII letters.
	68
	69	There are no reserved words at the lexical level, but the higher-level syntax
	70	recognizes certain identifiers as \emph{keywords} in some contexts. There is
	71	also an ambiguity (inherited from C) in the declaration syntax which is
	72	settled by distinguishing type names from other identifiers at a lexical
	73	level.
	74
	75
	76	\subsection{String and character literals} \label{sec:syntax.lex.string}
	77
	78	\begin{grammar}
	79	<string-literal> ::= "\"" @<string-literal-char>^* "\""
	80
	81	<char-literal> ::= "'" <char-literal-char> "'"
	82
	83	<string-literal-char> :: "\\" <char>
	84	\| any character other than "\\" or "\""
	85
	86	<char-literal-char> :: "\\" <char>
	87	\| any character other than "\\" or "'"
	88
	89	<char> ::= any single character
	90	\end{grammar}
	91
	92	The syntax for string and character literals differs from~C. In particular,
	93	escape sequences such as @`\textbackslash n' are not recognized. The use
	94	of string and character literals in Sod, outside of C~fragments, is limited,
	95	and the simple syntax seems adequate. For the sake of future compatibility,
	96	the use of character sequences which resemble C escape sequences is
	97	discouraged.
	98
	99
	100	\subsection{Integer literals} \label{sec:syntax.lex.int}
	101
	102	\begin{grammar}
	103	<integer-literal> ::= <decimal-integer>
	104	\| <binary-integer>
	105	\| <octal-integer>
	106	\| <hex-integer>
	107
	108	<decimal-integer> ::= "0" \| <nonzero-digit-char> @<digit-char>^*
	109
	110	<binary-integer> ::= "0" @("b" \| "B"@) @<binary-digit-char>^+
	111
	112	<binary-digit-char> ::= "0" \| "1"
	113
	114	<octal-integer> ::= "0" @["o" \| "O"@] @<octal-digit-char>^+
	115
	116	<octal-digit-char> ::= "0" \| "1" \| $\cdots$ \| "7"
	117
	118	<hex-integer> ::= "0" @("x" \| "X"@) @<hex-digit-char>^+
	119
	120	<hex-digit-char> ::= <digit-char>
	121	\| "A" \| "B" \| "C" \| "D" \| "E" \| "F"
	122	\| "a" \| "b" \| "c" \| "d" \| "e" \| "f"
	123	\end{grammar}
	124
	125	Sod understands only integers, not floating-point numbers; its integer syntax
	126	goes slightly beyond C in allowing a @`0o' prefix for octal and @`0b' for
	127	binary. However, length and signedness indicators are not permitted.
	128
	129
	130	\subsection{Punctuation} \label{sec:syntax.lex.punct}
	131
	132	\begin{grammar}
	133	<punctuation> ::= "<<" \| ">>" \| "\|\|" \| "&&"
	134	\| "<=" \| ">=" \| "==" \| "!=" \| "\dots"
	135	\alt any nonalphanumeric character other than "_", "\"", or "'"
	136	\end{grammar}
	137
	138
	139	\subsection{Comments} \label{sec:syntax.lex.comment}
	140
	141	\begin{grammar}
	142	<comment> ::= <block-comment> \| <line-comment>
	143
	144	<block-comment> ::=
	145	"/*"
	146	@<not-star>^* @(@<star>^+ <not-star-or-slash> @<not-star>^@)^
	147	@<star>^*
	148	"*/"
	149
	150	<star> ::= "*"
	151
	152	<not-star> ::= any character other than "*"
	153
	154	<not-star-or-slash> ::= any character other than "*" or "/"
	155
	156	<line-comment> ::= "/\,/" @<not-newline>^* <newline>
	157
	158	<newline> ::= a newline character
	159
	160	<not-newline> ::= any character other than newline
	161	\end{grammar}
	162
	163	Comments are exactly as in C99: both traditional block comments `@\|/*\| \dots\
	164	@\|*/\|' and \Cplusplus-style `@\|/\,/\| \dots' comments are permitted and
	165	ignored.
	166
	167
	168	\subsection{Special nonterminals} \label{sec:syntax.lex.special}
	169
	170	Aside from the lexical syntax presented above (\xref{sec:lexical-syntax}),
	171	two special nonterminals occur in the module syntax.
	172
	173	\subsubsection{S-expressions}
	174	\begin{grammar}
	175	<s-expression> ::= an S-expression, as parsed by the Lisp reader
	176	\end{grammar}
	177
	178	When an S-expression is expected, the Sod parser simply calls the host Lisp
	179	system's @\|read\| function. Sod modules are permitted to modify the read
	180	table to extend the S-expression syntax.
	181
	182	S-expressions are self-delimiting, so no end-marker is needed.
	183
	184	\subsubsection{C fragments}
	185	\begin{grammar}
	186	<c-fragment> ::= a sequence of C tokens, with matching brackets
	187	\end{grammar}
	188
	189	Sequences of C code are simply stored and written to the output unchanged
	190	during translation. They are read using a simple scanner which nonetheless
	191	understands C comments and string and character literals.
	192
	193	A C fragment is terminated by one of a small number of delimiter characters
	194	determined by the immediately surrounding context -- usually some kind of
	195	bracket. The first such delimiter character which is not enclosed in
	196	brackets, braces or parentheses ends the fragment.
	197
	198	%%%--------------------------------------------------------------------------
	199	\section{C types} \label{sec:syntax.type}
	200
	201	Sod's syntax for C types closely mirrors the standard C syntax. A C type has
	202	two parts: a sequence of @<declaration-specifier>s and a @<declarator>. In
	203	Sod, a type must contain at least one @<declaration-specifier> (i.e.,
	204	`implicit @\|int\|' is forbidden), and storage-class specifiers are not
	205	recognized.
	206
	207
	208	\subsection{Declaration specifiers} \label{sec:syntax.type.declspec}
	209
	210	\begin{grammar}
	211	<declaration-specifier> ::= <type-name>
	212	\alt "struct" <identifier> \| "union" <identifier> \| "enum" <identifier>
	213	\alt "void" \| "char" \| "int" \| "float" \| "double"
	214	\alt "short" \| "long"
	215	\alt "signed" \| "unsigned"
	216	\alt "bool" \| "_Bool"
	217	\alt "imaginary" \| "_Imaginary" \| "complex" \| "_Complex"
	218	\alt <qualifier>
	219	\alt <storage-specifier>
	220	\alt <atomic-type>
	221	\alt <other-declspec>
	222
	223	<qualifier> ::= <atomic> \| "const" \| "volatile" \| "restrict"
	224
	225	<plain-type> ::= @<declaration-specifier>^+ <abstract-declarator>
	226
	227	<atomic-type> ::= <atomic> "(" <plain-type> ")"
	228
	229	<atomic> ::= "atomic" \| "_Atomic"
	230
	231	<storage-specifier> ::= <alignas> "(" <c-fragment> ")"
	232
	233	<alignas> ::= "alignas" "_Alignas"
	234
	235	<type-name> ::= <identifier>
	236	\end{grammar}
	237
	238	Declaration specifiers may appear in any order. However, not all
	239	combinations are permitted. A declaration specifier must consist of zero or
	240	more @<qualifier>s, zero or more @<storage-specifier>s, and one of the
	241	following, up to reordering:
	242	\begin{itemize}
	243	\item @<type-name>;
	244	\item @<atomic-type>;
	245	\item @"struct" @<identifier>; @"union" @<identifier>; @"enum" @<identifier>;
	246	\item @"void";
	247	\item @"_Bool", @"bool";
	248	\item @"char"; @"unsigned char"; @"signed char";
	249	\item @"short", @"signed short", @"short int", @"signed short int";
	250	@"unsigned short", @"unsigned short int";
	251	\item @"int", @"signed", @"signed int"; @"unsigned", @"unsigned int";
	252	\item @"long", @"signed long", @"long int", @"signed long int"; @"unsigned
	253	long", @"unsigned long int";
	254	\item @"long long", @"signed long long", @"long long int", @"signed long long
	255	int"; @"unsigned long long", @"unsigned long long int";
	256	\item @"float"; @"double"; @"long double";
	257	\item @"float _Imaginary", @"float imaginary"; @"double _Imaginary", @"double
	258	imaginary"; @"long double _Imaginary", @"long double imaginary";
	259	\item @"float _Complex", @"float complex"; @"double _Complex", @"double
	260	complex"; @"long double _Complex", @"long double complex".
	261	\end{itemize}
	262	All of these have their usual C meanings. Groups separated by commas mean
	263	the same thing, and Sod will not preserve the distinction.
	264
	265	Almost all of these mean the same as they do in C. There are some minor
	266	differences:
	267	\begin{itemize}
	268	\item In C, the `tag' namespace is shared between @\|struct\|, @\|union\|, and
	269	@\|enum\|; Sod has three distinct namespaces for tags. This may be fixed in
	270	the future.
	271	\item The @<other-declspec> production is a syntactic extension point, where
	272	extensions can introduce their own additions to the type system.
	273	\end{itemize}
	274
	275	C standards from C99 onwards have tended to introduce new keywords beginning
	276	with an underscore followed by an uppercase letter, so as to avoid conflicts
	277	with existing code. More conventional spellings are then provided by macros
	278	in new header files. For example, C99 introduced @"_Bool", and a header file
	279	@\|<stdbool.h>\| which defines the macro @\|bool\|. Sod recognizes both the ugly
	280	underscore names and the more conventional macro names on input, but always
	281	emits the ugly names. This doesn't cause a compatibility problem in Sod,
	282	because Sod's parser recognizes keywords only in the appropriate context.
	283	For example, the (ill-advised) slot declaration
	284	\begin{prog}
	285	bool bool;
	286	\end{prog}
	287	is completely acceptable, and will cause the C structure member
	288	\begin{prog}
	289	\_Bool bool;
	290	\end{prog}
	291	to be emitted on output, which will be acceptable to C as long as
	292	@\|<stdbool.h>\| is not included.
	293
	294	A @<type-name> is an identifier which has been declared as being a type name,
	295	using the @"typename" or @"class" definitions. The following type names are
	296	defined in the built-in module.
	297	\begin{itemize}
	298	\item @\|va_list\|
	299	\item @\|size_t\|
	300	\item @\|ptrdiff_t\|
	301	\item @\|wchar_t\|
	302	\end{itemize}
	303
	304
	305	\subsection{Declarators} \label{sec:syntax.type.declarator}
	306
	307	\begin{grammar}
	308	<declarator>$[k, a]$ ::= @<pointer>^* <primary-declarator>$[k, a]$
	309
	310	<primary-declarator>$[k, a]$ ::= $k$
	311	\alt "(" <primary-declarator>$[k, a]$ ")"
	312	\alt <primary-declarator>$[k, a]$ @<declarator-suffix>$[a]$
	313
	314	<pointer> ::= "" @<qualifier>^
	315
	316	<declarator-suffix>$[a]$ ::= "[" <c-fragment> "]"
	317	\alt "(" $a$ ")"
	318
	319	<argument-list> ::= $\epsilon$ \| "\dots"
	320	\alt <list>$[\mbox{@<argument>}]$ @["," "\dots"@]
	321
	322	<argument> ::= @<declaration-specifier>^+ <argument-declarator>
	323
	324	<abstract-declarator> ::= <declarator>$[\epsilon, \mbox{@<argument-list>}]$
	325
	326	<argument-declarator> ::=
	327	<declarator>$[\mbox{@<identifier> \| $\epsilon$}, \mbox{@<argument-list>}]$
	328
	329	<simple-declarator> ::=
	330	<declarator>$[\mbox{@<identifier>}, \mbox{@<argument-list>}]$
	331	\end{grammar}
	332
	333	The declarator syntax is taken from C, but with some differences.
	334	\begin{itemize}
	335	\item Array dimensions are uninterpreted @<c-fragments>, terminated by a
	336	closing square bracket. This allows array dimensions to contain arbitrary
	337	constant expressions.
	338	\item A declarator may have either a single @<identifier> at its centre or a
	339	pair of @<identifier>s separated by a @`.'; this is used to refer to
	340	slots or messages defined in superclasses.
	341	\end{itemize}
	342	The remaining differences are (I hope) a matter of presentation rather than
	343	substance.
	344
	345	There is additional syntax to support messages and methods which accept
	346	keyword arguments.
	347
	348	\begin{grammar}
	349	<keyword-argument> ::= <argument> @["=" <c-fragment>@]
	350
	351	<keyword-argument-list> ::=
	352	@[<list>$[\mbox{@<argument>}]$@]
	353	"?" @[<list>$[\mbox{@<keyword-argument>}]$@]
	354
	355	<method-argument-list> ::= <argument-list> \| <keyword-argument-list>
	356
	357	<dotted-name> ::= <identifier> "." <identifier>
	358
	359	<keyword-declarator>$[k]$ ::=
	360	<declarator>$[k, \mbox{@<method-argument-list>}]$
	361	\end{grammar}
	362
	363	%%%--------------------------------------------------------------------------
	364	\section{Properties} \label{sec:syntax.prop}
	365
	366	\begin{grammar}
	367	<properties> ::= "[" <list>$[\mbox{@<property>}]$ "]"
	368
	369	<property> ::= <identifier> "=" <expression>
	370
	371	<expression> ::= <logical-or>
	372
	373	<logical-or> ::= <logical-and>
	374	\| <logical-or> "\|\|" <logical-and>
	375
	376	<logical-and> ::= <bitwise-or>
	377	\| <logical-and> "&&" <bitwise-or>
	378
	379	<bitwise-or> ::= <bitwise-xor>
	380	\| <bitwise-or> "\|" <bitwise-xor>
	381
	382	<bitwise-xor> ::= <bitwise-and>
	383	\| <bitwise-xor> "^" <bitwise-and>
	384
	385	<bitwise-and> ::= <equality>
	386	\| <bitwise-and> "&" <equality>
	387
	388	<equality> ::= <ordering>
	389	\| <equality> "==" <ordering>
	390	\| <equality> "!=" <ordering>
	391
	392	<ordering> ::= <shift>
	393	\| <ordering> "<" <shift>
	394	\| <ordering> "<=" <shift>
	395	\| <ordering> ">=" <shift>
	396	\| <ordering> ">" <shift>
	397
	398	<shift> ::= <additive>
	399	\| <shift> "<<" <additive>
	400	\| <shift> ">>" <additive>
	401
	402	<additive> ::= <term>
	403	\| <additive> "+" <term>
	404	\| <additive> "--" <term>
	405
	406	<term> ::= <factor>
	407	\| <term> "*" <factor>
	408	\| <term> "/" <factor>
	409
	410	<factor> ::= <primary>
	411	\| "!" <factor> \| "~" factor
	412	\| "+" <factor> \| "--" <factor>
	413
	414	<primary> ::=
	415	<integer-literal> \| <string-literal> \| <char-literal> \| <identifier>
	416	\alt "<" <plain-type> ">" \| "{" <c-fragment> "}" \| "?" <s-expression>
	417	\| "(" <expression> ")"
	418	\end{grammar}
	419
	420	\emph{Property sets} are a means for associating miscellaneous information
	421	with compile-time metaobjects such as modules, classes, messages, methods,
	422	slots, and initializers. By using property sets, additional information can
	423	be passed to extensions without the need to introduce idiosyncratic syntax.
	424	(That said, extensions can add additional first-class syntax, if necessary.)
	425
	426	An error is reported if an unrecognized property is associated with an
	427	object.
	428
	429
	430	\subsection{Property values} \label{sec:syntax.prop.value}
	431
	432	A property has a name, given as an @<identifier>, and a value computed by
	433	evaluating an @<expression>. The value can be one of a number of types.
	434
	435	\begin{itemize}
	436
	437	\item An @<integer-literal> denotes a value of type @\|int\|.
	438
	439	\item Similarly @<string-literal> and @<char-literal> denote @\|string\| and
	440	@\|char\| values respectively. Note that, as properties, characters are
	441	quite distinct from integers, whereas in C, a character literal denotes a
	442	value of type @\|int\|.
	443
	444	\item There are no variables in the property-value syntax. Rather, an
	445	@<identifier> denotes that identifier, as a value of type @\|id\|.
	446
	447	\item A C type (a @<plain-type>, as described in \xref{sec:syntax.type})
	448	between angle brackets, e.g., @\|<int>\|, or @\|<char >\|, or @\|<void ((int,
	449	void (*)(int)))(int)>\|, denotes that C type, as a value of type @\|type\|.
	450
	451	\item A @<c-fragment> within braces denotes the tokens between (and not
	452	including) the braces, as a value of type @\|c-fragment\|.
	453
	454	\end{itemize}
	455
	456	As shown in the grammar, there are four binary operators, @"+" (addition),
	457	@"--" (subtraction), @"*" (multiplication), and @"/" (division);
	458	multiplication and division have higher precedence than addition and
	459	subtraction, and operators of the same precedence associate left-to-right.
	460	There are also unary @"+" (no effect) and @"--" (negation) operators, with
	461	higher precedence. All of the above operators act only on integer operands
	462	and yield integer results. (Although the unary @"+" operator yields its
	463	operand unchanged, an error is still reported if it is applied to a
	464	non-integer value.) There are currently no bitwise, logical, or comparison
	465	operators.
	466
	467	Finally, an S-expression preceded by @\|?\| causes the expression to be read in
	468	the current package (which is always @\|sod-user\| at the start of a module)
	469	and immediately evaluated (using @\|eval\|); the resulting value is converted
	470	into a property value using the \descref{gf}{decode-property}[generic
	471	function].
	472
	473
	474	\subsection{Property output types and coercions}
	475	\label{sec:syntax.prop.coerce}
	476
	477	When a property value is inspected by the Sod translator, or an extension, it
	478	is \emph{coerced} so as to conform to a requested output type. This coercion
	479	process is performed by the \descref{gf}{coerce-property-value}[generic
	480	function], and additional output types and coercions can be defined by
	481	extensions. The built-in output types coercions, from the value types listed
	482	above, are as follows.
	483
	484	\begin{itemize}
	485
	486	\item The output types @\|int\|, @\|string\|, @\|char\|, @\|id\|, and @\|c-fragment\|
	487	correspond to the like-named value types described above. No coercions to
	488	these output types are defined for the described value types.\footnote{%
	489	There is a coercion to @\|id\| from the value type @\|symbol\|, but it is
	490	only possible to generate a property value of type @\|symbol\| using Lisp.}
	491
	492	\item The output type @\|type\| denotes a C type, as does the value type
	493	@\|type\|. In addition, a value of type @\|id\| can be coerced to a C type if
	494	it is the name of a class, a type name explicitly declared by @\|typename\|,
	495	or it is one of: @\|bool\|, @\|_Bool\|, @\|void\|, @\|char\|, @\|short\|, @\|int\|,
	496	@\|signed\|, @\|unsigned\|, @\|long\|, @\|size_t\|, @\|ptrdiff_t\|, @\|wchar_t\|,
	497	or @\|va_list\|.
	498
	499	\item The @\|boolean\| output type denotes a boolean value, which may be either
	500	true or false. A value of type @\|id\| is considered true if it is @\|true\|,
	501	@\|t\|, @\|yes\|, @\|on\|, @\|yup\|, or @\|verily\|; or false if it is @\|false\|,
	502	@\|nil\|, @\|no\|, @\|off\|, @\|nope\|, or @\|nowise\|; it is erroneous to provide
	503	any other identifier where a boolean value is wanted. A value of type
	504	@\|int\| is considered true if it is nonzero, or false if it is zero.
	505
	506	\item The @\|symbol\| output type denotes a Lisp symbol.
	507
	508	A value of type @\|id\| is coerced to a symbol as follows. First, the
	509	identifier name is subjected to \emph{case inversion}: if all of the
	510	letters in the name have the same case, either upper or lower, then they
	511	are replaced with the corresponding letters in the opposite case, lower or
	512	upper; if the name contains letters of both cases, then it is not changed.
	513	For example, @\|foo45\| becomes @\|FOO45\|, or \emph{vice-versa}; but @\|Splat\|
	514	remains as it is. Second, the name is subjected to \emph{separator
	515	switching}: all underscores in the name are replaced with hyphens (and
	516	\emph{vice-versa}, though hyphens aren't permitted in identifiers in the
	517	first place). Finally, the resulting name is interned in the current
	518	package, which will usually be @\|sod-user\| unless changed explicitly by the
	519	module.
	520
	521	A value of type @\|string\| is coerced to a symbol as follows. If the string
	522	contains no colons, then it is case-inverted (but not separator-switched)
	523	and interned in the current package. Otherwise, the string either has the
	524	form $p @\|:\| q$, where $q$ does not begin with a colon (the
	525	\emph{single-colon} case) or $p @\|::\| q$ (the \emph{double-colon} case);
	526	where $p$ does not contain a colon. Both $p$ and $q$ are case-inverted
	527	(but not separator-switched). If $p$ does not name a package, then an
	528	error is reported; as a special case, if $p$ is empty, then it is
	529	considered to name the @\|keyword\| package. Otherwise, $q$ is looked up as
	530	a symbol name in package~$p$; in the single-colon case, if the symbol is
	531	not an exported symbol in package~$p$, then an error is reported; in the
	532	double-colon case, $q$ is interned in package~$p$ (and so there needn't be
	533	an exported symbol -- or, indeed, and symbol at all -- named $q$
	534	beforehand).
	535
	536	\item The @\|keyword\| output type denotes symbols within the @\|keyword\|
	537	package. Value of type @\|id\| or @\|string\| can be coerced to a @\|keyword\|
	538	in the same way as to a @\|symbol\|, as described above, only the converted
	539	name is looked up in the @\|keyword\| package rather than the current
	540	package. (A @\|string\| can override this by specifying an explicit package
	541	name, but this is unlikely to be very helpful.)
	542
	543	\end{itemize}
	544
	545	%%%--------------------------------------------------------------------------
	546	\section{Module syntax} \label{sec:syntax.module}
	547
	548	\begin{grammar}
	549	<module> ::= @<definition>^*
	550
	551	<definition> ::= <property-definition> \fixme{undefined}
	552	\alt <import-definition>
	553	\alt <load-definition>
	554	\alt <lisp-definition>
	555	\alt <code-definition>
	556	\alt <typename-definition>
	557	\alt <class-definition>
	558	\alt <other-definition> \fixme{undefined}
	559	\end{grammar}
	560
	561	A @<module> is the top-level syntactic item: a source file presented to Sod
	562	is expected to conform with the @<module> syntax.
	563
	564	A module consists of a sequence of definitions.
	565
	566	\fixme{describe syntax; expand}
	567	Properties:
	568	\begin{description}
	569	\item[@\|module_class\|] A symbol naming the Lisp class to use to
	570	represent the module.
	571	\item[@\|guard\|] An identifier to use as the guard symbol used to prevent
	572	multiple inclusion in the header file.
	573	\end{description}
	574
	575
	576	\subsection{Simple definitions} \label{sec:syntax.module.simple}
	577
	578	\subsubsection{Importing modules}
	579	\begin{grammar}
	580	<import-definition> ::= "import" <string> ";"
	581	\end{grammar}
	582
	583	The module named @<string> is processed and its definitions made available.
	584
	585	A search is made for a module source file as follows.
	586	\begin{itemize}
	587	\item The module name @<string> is converted into a filename by appending
	588	@`.sod', if it has no extension already.\footnote{%
	589	Technically, what happens is @\|(merge-pathnames name (make-pathname :type
	590	"SOD" :case :common))\|, so exactly what this means varies according to
	591	the host system.} %
	592	\item The file is looked for relative to the directory containing the
	593	importing module.
	594	\item If that fails, then the file is looked for in each directory on the
	595	module search path in turn.
	596	\item If the file still isn't found, an error is reported and the import
	597	fails.
	598	\end{itemize}
	599	At this point, if the file has previously been imported, nothing further
	600	happens.\footnote{%
	601	This check is done using @\|truename\|, so it should see through simple
	602	tricks like symbolic links. However, it may be confused by fancy things
	603	like bind mounts and so on.} %
	604
	605	Recursive imports, either direct or indirect, are an error.
	606
	607	\subsubsection{Loading extensions}
	608	\begin{grammar}
	609	<load-definition> ::= "load" <string> ";"
	610	\end{grammar}
	611
	612	The Lisp file named @<string> is loaded and evaluated.
	613
	614	A search is made for a Lisp source file as follows.
	615	\begin{itemize}
	616	\item The name @<string> is converted into a filename by appending @`.lisp',
	617	if it has no extension already.\footnote{%
	618	Technically, what happens is @\|(merge-pathnames name (make-pathname :type
	619	"LISP" :case :common))\|, so exactly what this means varies according to
	620	the host system.} %
	621	\item A search is then made in the same manner as for module imports
	622	(\xref{sec:syntax-module}).
	623	\end{itemize}
	624	If the file is found, it is loaded using the host Lisp's @\|load\| function.
	625
	626	Note that Sod doesn't attempt to compile Lisp files, or even to look for
	627	existing compiled files. The right way to package a substantial extension to
	628	the Sod translator is to provide the extension as a standard ASDF system (or
	629	similar) and leave a dropping @\|foo-extension.lisp\| in the module path saying
	630	something like
	631	\begin{prog}
	632	(asdf:load-system :foo-extension)
	633	\end{prog}
	634	which will arrange for the extension to be compiled if necessary.
	635
	636	(This approach means that the language doesn't need to depend on any
	637	particular system definition facility. It's bad enough already that it
	638	depends on Common Lisp.)
	639
	640	\subsubsection{Lisp escapes}
	641	\begin{grammar}
	642	<lisp-definition> ::= "lisp" <s-expression> ";"
	643	\end{grammar}
	644
	645	The @<s-expression> is evaluated immediately. It can do anything it likes.
	646
	647	\begin{boxy}[Warning!]
	648	This means that hostile Sod modules are a security hazard. Lisp code can
	649	read and write files, start other programs, and make network connections.
	650	Don't install Sod modules from sources that you don't trust.\footnote{%
	651	Presumably you were going to run the corresponding code at some point, so
	652	this isn't as unusually scary as it sounds. But please be careful.} %
	653	\end{boxy}
	654
	655	\subsubsection{Declaring type names}
	656	\begin{grammar}
	657	<typename-definition> ::=
	658	"typename" <list>$[\mbox{@<identifier>}]$ ";"
	659	\end{grammar}
	660
	661	Each @<identifier> is declared as naming a C type. This is important because
	662	the C type syntax -- which Sod uses -- is ambiguous, and disambiguation is
	663	done by distinguishing type names from other identifiers.
	664
	665	Don't declare class names using @"typename"; use @"class" forward
	666	declarations instead.
	667
	668
	669	\subsection{Literal code} \label{sec:syntax.module.literal}
	670
	671	\begin{grammar}
	672	<code-definition> ::=
	673	"code" <reason> ":" <item-name> @[<constraints>@]
	674	"{" <c-fragment> "}"
	675	\alt
	676	"code" <reason> ":" <constraints> ";"
	677
	678	<reason> ::= <identifier>
	679
	680	<constraints> ::= "[" <list>$[\mbox{@<constraint>}]$ "]"
	681
	682	<constraint> ::= @<item-name>^+
	683
	684	<item-name> ::= <identifier> \| "(" @<identifier>^+ ")"
	685	\end{grammar}
	686
	687	The @<c-fragment> will be output unchanged to one of the output files.
	688
	689	The first @<identifier> is the symbolic name of an output file. Predefined
	690	output file names are @\|c\| and @\|h\|, which are the implementation code and
	691	header file respectively; other output files can be defined by extensions.
	692
	693	Output items are named with a sequence of identifiers, separated by
	694	whitespace, and enclosed in parentheses. As an abbreviation, a name
	695	consisting of a single identifier may be written as just that identifier,
	696	without the parentheses.
	697
	698	The @<constraints> provide a means for specifying where in the output file
	699	the output item should appear. (Note the two kinds of square brackets shown
	700	in the syntax: square brackets must appear around the constraints if they are
	701	present, but that they may be omitted.) Each comma-separated @<constraint>
	702	is a sequence of names of output items, and indicates that the output items
	703	must appear in the order given -- though the translator is free to insert
	704	additional items in between them. (The particular output items needn't be
	705	defined already -- indeed, they needn't be defined ever.)
	706
	707	There is a predefined output item @\|includes\| in both the @\|c\| and @\|h\|
	708	output files which is a suitable place for inserting @\|\#include\|
	709	preprocessor directives in order to declare types and functions for use
	710	elsewhere in the generated output files.
	711
	712
	713	\subsection{Class definitions} \label{sec:syntax.module.class}
	714
	715	\begin{grammar}
	716	<class-definition> ::= <class-forward-declaration>
	717	\alt <full-class-definition>
	718	\end{grammar}
	719
	720	\subsubsection{Forward declarations}
	721	\begin{grammar}
	722	<class-forward-declaration> ::= "class" <identifier> ";"
	723	\end{grammar}
	724
	725	A @<class-forward-declaration> informs Sod that an @<identifier> will be used
	726	to name a class which is currently undefined. Forward declarations are
	727	necessary in order to resolve certain kinds of circularity. For example,
	728	\begin{prog}
	729	class Sub; \\+
	730
	731	class Super: SodObject \{ \\ \ind
	732	Sub *sub; \-\\
	733	\}; \\+
	734
	735	class Sub: Super \{ \\ \ind
	736	/* \dots\ */ \-\\
	737	\};
	738	\end{prog}
	739
	740	\subsubsection{Full class definitions}
	741	\begin{grammar}
	742	<full-class-definition> ::=
	743	@[<properties>@]
	744	"class" <identifier> ":" <list>$[\mbox{@<identifier>}]$
	745	"{" @<properties-class-item>^* "}"
	746
	747	<properties-class-item> ::= @[<properties>@] <class-item>
	748
	749	<class-item> ::= <slot-item>
	750	\alt <initializer-item>
	751	\alt <initarg-item>
	752	\alt <fragment-item>
	753	\alt <message-item>
	754	\alt <method-item>
	755	\alt <other-item> \fixme{undefined}
	756	\end{grammar}
	757
	758	A full class definition provides a complete description of a class.
	759
	760	The first @<identifier> gives the name of the class. It is an error to
	761	give the name of an existing class (other than a forward-referenced class),
	762	or an existing type name. It is conventional to give classes `MixedCase'
	763	names, to distinguish them from other kinds of identifiers.
	764
	765	The @<list>$[\mbox{@<identifier>}]$ names the direct superclasses for the new
	766	class. It is an error if any of these @<identifier>s does not name a defined
	767	class. The superclass list is required, and must not be empty; listing
	768	@\|SodObject\| as your class's superclass is a good choice if nothing else
	769	seems suitable. A class with no direct superclasses is called a \emph{root
	770	class}. It is not possible to define a root class in the Sod language: you
	771	must use Lisp to do this, and it's quite involved.
	772
	773	The @<properties> provide additional information. The standard class
	774	properties are as follows.
	775	\begin{description}
	776	\item[@\|lisp_class\|] The name of the Lisp class to use within the translator
	777	to represent this class. The property value must be an identifier; the
	778	default is @\|sod_class\|. Extensions may define classes with additional
	779	behaviour, and may recognize additional class properties.
	780	\item[@\|metaclass\|] The name of the Sod metaclass for this class. In the
	781	generated code, a class is itself an instance of another class -- its
	782	\emph{metaclass}. The metaclass defines which slots the class will have,
	783	which messages it will respond to, and what its behaviour will be when it
	784	receives them. The property value must be an identifier naming a defined
	785	subclass of @\|SodClass\|. The default metaclass is @\|SodClass\|.
	786	See \xref{sec:concepts.metaclasses} for more details.
	787	\item[@\|nick\|] A nickname for the class, to be used to distinguish it from
	788	other classes in various limited contexts. The property value must be an
	789	identifier; the default is constructed by forcing the class name to
	790	lower-case.
	791	\end{description}
	792
	793	The class body consists of a sequence of @<class-item>s enclosed in braces.
	794	These items are discussed on the following sections.
	795
	796	\subsubsection{Slot items}
	797	\begin{grammar}
	798	<slot-item> ::=
	799	@<declaration-specifier>^+ <list>$[\mbox{@<init-declarator>}]$ ";"
	800
	801	<init-declarator> ::= <simple-declarator> @["=" <initializer>@]
	802	\end{grammar}
	803
	804	A @<slot-item> defines one or more slots. All instances of the class and any
	805	subclass will contain these slot, with the names and types given by the
	806	@<declaration-specifiers> and the @<declarators>. Slot declarators may not
	807	contain dotted names.
	808
	809	It is not possible to declare a slot with function type: such an item is
	810	interpreted as being a @<message-item> or @<method-item>. Pointers to
	811	functions are fine.
	812
	813	Properties:
	814	\begin{description}
	815	\item[@\|slot_class\|] A symbol naming the Lisp class to use to represent the
	816	direct slot.
	817	\item[@\|initarg\|] An identifier naming an initialization argument which can
	818	be used to provide a value for the slot. See
	819	\xref{sec:concepts.lifecycle.birth} for the details.
	820	\item[@\|initarg_class\|] A symbol naming the Lisp class to use to represent
	821	the initarg. Only permitted if @\|initarg\| is also set.
	822	\end{description}
	823
	824	An @<initializer>, if present, is treated as if a separate
	825	@<initializer-item> containing the slot name and initializer were present.
	826	For example,
	827	\begin{prog}
	828	[nick = eg] \\
	829	class Example: Super \{ \\ \ind
	830	int foo = 17; \-\\
	831	\};
	832	\end{prog}
	833	means the same as
	834	\begin{prog}
	835	[nick = eg] \\
	836	class Example: Super \{ \\ \ind
	837	int foo; \\
	838	eg.foo = 17; \-\\
	839	\};
	840	\end{prog}
	841
	842	\subsubsection{Initializer items}
	843	\begin{grammar}
	844	<initializer-item> ::= @["class"@] <list>$[\mbox{@<slot-initializer>}]$ ";"
	845
	846	<slot-initializer> ::= <dotted-name> @["=" <initializer>@]
	847
	848	<initializer> ::= <c-fragment>
	849	\end{grammar}
	850
	851	An @<initializer-item> provides an initial value for one or more slots. If
	852	prefixed by @\|class\|, then the initial values are for class slots (i.e.,
	853	slots of the class object itself); otherwise they are for instance slots.
	854
	855	The first component of the @<dotted-name> must be the nickname of one of the
	856	class's superclasses (including itself); the second must be the name of a
	857	slot defined in that superclass.
	858
	859	Properties:
	860	\begin{description}
	861	\item[@\|initializer_class\|] A symbol naming the Lisp class to use to
	862	represent the initializer.
	863	\item[@\|initarg\|] An identifier naming an initialization argument which can
	864	be used to provide a value for the slot. See
	865	\xref{sec:concepts.lifecycle.birth} for the details. An initializer item
	866	must have either an @\|initarg\| property, or an initializer expression, or
	867	both.
	868	\item[@\|initarg_class\|] A symbol naming the Lisp class to use to represent
	869	the initarg. Only permitted if @\|initarg\| is also set.
	870	\end{description}
	871
	872	Each class may define at most one initializer item with an explicit
	873	initializer expression for a given slot.
	874
	875	\subsubsection{Initarg items}
	876	\begin{grammar}
	877	<initarg-item> ::=
	878	"initarg"
	879	@<declaration-specifier>^+
	880	<list>$[\mbox{@<init-declarator>}]$ ";"
	881	\end{grammar}
	882	Properties:
	883	\begin{description}
	884	\item[@\|initarg_class\|] A symbol naming the Lisp class to use to represent
	885	the initarg.
	886	\end{description}
	887
	888	\subsubsection{Fragment items}
	889	\begin{grammar}
	890	<fragment-item> ::= <fragment-kind> "{" <c-fragment> "}"
	891
	892	<fragment-kind> ::= "init" \| "teardown"
	893	\end{grammar}
	894
	895	\subsubsection{Message items}
	896	\begin{grammar}
	897	<message-item> ::=
	898	@<declaration-specifier>^+
	899	<keyword-declarator>$[\mbox{@<identifier>}]$
	900	@[<method-body>@]
	901	\end{grammar}
	902	Properties:
	903	\begin{description}
	904	\item[@\|message_class\|] A symbol naming the Lisp class to use to represent
	905	the message.
	906	\item[@\|readonly\|] A boolean indicating whether the message guarantees not to
	907	modify its receiver. If this is true, the receiver will be declared
	908	@"const".
	909	\item[@\|combination\|] A keyword naming the aggregating method combination to
	910	use.
	911	\item[@\|most_specific\|] A keyword, either @`first' or @`last', according to
	912	whether the most specific applicable method should be invoked first or
	913	last.
	914	\end{description}
	915
	916	Properties for the @\|custom\| aggregating method combination:
	917	\begin{description}
	918	\item[@\|retvar\|] An identifier for the return value from the effective
	919	method. The default is @\|sod__ret\|. Only permitted if the message return
	920	type is not @\|void\|.
	921	\item[@\|valvar\|] An identifier holding each return value from a direct method
	922	in the effective method. The default is @\|sod__val\|. Only permitted if
	923	the method return type (see @\|methty\| below) is not @\|void\|.
	924	\item[@\|methty\|] A C type, which is the return type for direct methods of
	925	this message. The default is the return type of the message.
	926	\item[@\|decls\|] A code fragment containing declarations to be inserted at the
	927	head of the effective method body. The default is to insert nothing.
	928	\item[@\|before\|] A code fragment containing initialization to be performed at
	929	the beginning of the effective method body. The default is to insert
	930	nothing.
	931	\item[@\|empty\|] A code fragment executed if there are no primary methods;
	932	it should usually store a suitable (identity) value in @<retvar>. The
	933	default is not to emit an effective method at all if there are no primary
	934	methods.
	935	\item[@\|first\|] A code fragment to set the return value after calling the
	936	first applicable direct method. The default is to use the @\|each\|
	937	fragment.
	938	\item[@\|each\|] A code fragment to set the return value after calling a direct
	939	method. If @\|first\| is also set, then it is used after the first direct
	940	method instead of this. The default is to insert nothing, which is
	941	probably not what you want.
	942	\item[@\|after\|] A code fragment inserted at the end of the effective method
	943	body. The default is to insert nothing.
	944	\item[@\|count\|] An identifier naming a variable to be declared in the
	945	effective method body, of type @\|size_t\|, holding the number of applicable
	946	methods. The default is not to provide such a variable.
	947	\end{description}
	948
	949	\subsubsection{Method items}
	950	\begin{grammar}
	951	<method-item> ::=
	952	@<declaration-specifier>^+
	953	<keyword-declarator>$[\mbox{@<dotted-name>}]$
	954	<method-body>
	955
	956	<method-body> ::= "{" <c-fragment> "}" \| "extern" ";"
	957	\end{grammar}
	958	Properties:
	959	\begin{description}
	960	\item[@\|method_class\|] A symbol naming the Lisp class to use to represent
	961	the direct method.
	962	\item[@\|role\|] A keyword naming the direct method's rôle. For the built-in
	963	`simple' message classes, the acceptable rôle names are @\|before\|,
	964	@\|after\|, and @\|around\|. By default, a primary method is constructed.
	965	\end{description}
	966
	967	%%%----- That's all, folks --------------------------------------------------
	968
	969	%%% Local variables:
	970	%%% mode: LaTeX
	971	%%% TeX-master: "sod.tex"
	972	%%% TeX-PDF-mode: t
	973	%%% End: