lib/sod.[ch]: Name instance arguments `obj', not `p'.
[sod] / doc / sod.tex
CommitLineData
1f1d88f5
MW
1\documentclass[noarticle]{strayman}
2
3\usepackage[T1]{fontenc}
4\usepackage[utf8]{inputenc}
5\usepackage[palatino, helvetica, courier, maths=cmr]{mdwfonts}
6\usepackage{syntax}
7\usepackage{sverb}
dea4d055
MW
8\usepackage{mdwtab}
9\usepackage{footnote}
1f1d88f5
MW
10\usepackage{at}
11\usepackage{mdwref}
12
13\title{A Sensible Object Design for C}
14\author{Mark Wooding}
15
dea4d055
MW
16\makeatletter
17
18\errorcontextlines999
19
1f1d88f5
MW
20\def\syntleft{\normalfont\itshape}
21\let\syntright\empty
22
dea4d055
MW
23\let\codeface\sffamily
24
25\def\ulitleft{\normalfont\codeface}
1f1d88f5
MW
26\let\ulitright\empty
27
28\let\listingsize\relax
29
30\let\epsilon\varepsilon
31
dea4d055
MW
32\atdef <#1>{\synt{#1}\@scripts}
33\atdef "#1"{\lit*{#1}\@scripts}
34\atdef `#1'{\lit{#1}\@scripts}
35\atdef |#1|{\textsf{#1}\@scripts}
36\def\dbl@maybe#1{\let\@tempa#1\futurelet\@ch\dbl@maybe@i}
37\def\dbl@maybe@i{\m@maybe\ifx\@ch\@tempa\@tempa\!\@tempa%
38 \expandafter\@firstoftwo\expandafter\@scripts%
39 \else\@tempa\expandafter\@scripts\fi}
40\atdef [{\dbl@maybe[}
41\atdef ]{\dbl@maybe]}
42\atdef {{\m@maybe\{\@scripts}
43\atdef }{\m@maybe\}\@scripts}
44\atdef ({\m@maybe(\@scripts}
45\atdef ){\m@maybe)\@scripts}
46\atdef !{\m@maybe|\@scripts}
47\atdef to{\leavevmode\unskip\quad\m@maybe\longrightarrow\m@maybe@end\quad}
48\let\m@maybe@end\relax
49\def\m@maybe{\ifmmode\else$\let\m@maybe@end$\fi}
50\def\@scripts{\futurelet\@ch\@scripts@i}
1f1d88f5 51
dea4d055 52\atdef ;#1\\{\normalfont\itshape;#1\\}
239fa5bd
MW
53\let\@@grammar\grammar
54\def\grammar{\def\textbar{\hbox{$|$}}\@@grammar}
dea4d055
MW
55
56\begingroup\lccode`\~=`\_\lowercase{\endgroup
57\def\@scripts@i{\if1\ifx\@ch~1\else\ifx\@ch^1\else0\fi\fi%
58 \expandafter\@scripts@ii\else\expandafter\m@maybe@end\fi}}
59\def\@scripts@ii#1#2{\m@maybe#1{#2}\@scripts}
60
61\def\Cplusplus{C\kern-\p@++}
1f1d88f5
MW
62\def\Csharp{C\#}
63\def\man#1#2{\textbf{#1}(#2)}
64
65\begingroup\lccode`\~=`\
66\lowercase{
67\endgroup
68\def\prog{%
dea4d055 69 \codeface%
1f1d88f5 70 \quote%
dea4d055 71 \let\old@nl\\%
1f1d88f5
MW
72 \obeylines%
73 \tabbing%
74 \global\let~\\%
75 \global\let\\\textbackslash%
76}
77\def\endprog{%
78 \endtabbing%
dea4d055 79 \global\let\\\old@nl%
1f1d88f5
MW
80 \endquote%
81}}
82
dea4d055
MW
83\newenvironment{boxy}[1][\q@]{%
84 \dimen@\linewidth\advance\dimen@-1.2pt\advance\dimen@-2ex%
85 \medskip%
86 \vbox\bgroup\hrule\hbox\bgroup\vrule%
87 \vbox\bgroup\vskip1ex\hbox\bgroup\hskip1ex\minipage\dimen@%
88 \def\@temp{#1}\ifx\@temp\q@\else\leavevmode{\headfam\bfseries#1\quad}\fi%
89}{%
90 \endminipage\hskip1ex\egroup\vskip1ex\egroup%
91 \vrule\egroup\hrule\egroup%
92 \medskip%
93}
94
95\def\definedescribecategory#1#2{\@namedef{cat!#1}{#2}}
96\def\describecategoryname#1{%
97 \expandafter\let\expandafter\@tempa\csname cat!#1\endcsname%
98 \ifx\@tempa\relax#1\else\@tempa\fi}
99\definedescribecategory{fun}{function}
100\definedescribecategory{gf}{generic function}
101\definedescribecategory{var}{variable}
102\definedescribecategory{const}{constant}
103\definedescribecategory{meth}{primary method}
104\definedescribecategory{ar-meth}{around-method}
105\definedescribecategory{be-meth}{before-method}
106\definedescribecategory{af-meth}{after-method}
107\definedescribecategory{cls}{class}
108\definedescribecategory{ty}{type}
109\definedescribecategory{mac}{macro}
110
111\def\q@{\q@}
112\newenvironment{describe}[3][\q@]{%
113 \normalfont%
114 \par\goodbreak%
115 \vspace{\bigskipamount}%
116 \setbox\z@\hbox{\bfseries[\describecategoryname{#2}]}%
117 \dimen@\linewidth\advance\dimen@-\wd\z@%
118 \def\@temp##1 ##2\q@{\message{#2:##1}\label{#2:##1}}%
119 \def\@tempa{#1}\ifx\@tempa\q@\@temp#3 \q@\else\@temp{#1} \\\fi%
120 \edef\@temp{{\the\linewidth}{@{}p{\the\dimen@}%
121 @{\extracolsep{\fill}}l@{\extracolsep{0pt}}}}%
122 \noindent\csname tabular*\expandafter\endcsname\@temp%
123 \tabbing\codeface#3\endtabbing&\unhbox\z@\\\endtabular%
124% \@afterheading%
125 \list{}{\rightmargin\z@}\item%
126}{%
127 \endlist%
128}
129
130\def\push{\quad\=\+\kill}
131
1f1d88f5
MW
132\begin{document}
133
134\maketitle
135
136\include{sod-tut}
137
138%%%--------------------------------------------------------------------------
139\chapter{Internals}
140
141\section{Generated names}
142
143The generated names for functions and objects related to a class are
144constructed systematically so as not to interfere with each other. The rules
145on class, slot and message naming exist so as to ensure that the generated
146names don't collide with each other.
147
148The following notation is used in this section.
149\begin{description}
150\item[@<class>] The full name of the `focus' class: the one for which we are
151 generating name.
152\item[@<super-nick>] The nickname of a superclass.
153\item[@<head-nick>] The nickname of the chain-head class of the chain
154 in question.
155\end{description}
156
157\subsection{Instance layout}
158
159%%%--------------------------------------------------------------------------
160\section{Syntax}
161\label{sec:syntax}
162
163Fortunately, Sod is syntactically quite simple. I've used a little slightly
164unusual notation in order to make the presentation easier to read.
165\begin{itemize}
166\item $\epsilon$ denotes the empty nonterminal:
167 \begin{quote}
168 $\epsilon$ ::=
169 \end{quote}
239fa5bd 170\item @[@<item>@] means an optional @<item>:
1f1d88f5 171 \begin{quote}
ea578bb4 172 \syntax{@[<item>@] ::= $\epsilon$ @! <item>}
1f1d88f5 173 \end{quote}
239fa5bd 174\item @<item>^* means a sequence of zero or more @<item>s:
1f1d88f5 175 \begin{quote}
ea578bb4 176 \syntax{@<item>^* ::= $\epsilon$ @! @<item>^* <item>}
1f1d88f5 177 \end{quote}
239fa5bd 178\item @<item>^+ means a sequence of one or more @<item>s:
1f1d88f5 179 \begin{quote}
239fa5bd 180 \syntax{@<item>^+ ::= <item> @<item>^*}
1f1d88f5
MW
181 \end{quote}
182\item @<item-list> means a sequence of one or more @<item>s separated
183 by commas:
184 \begin{quote}
ea578bb4 185 \syntax{<item-list> ::= <item> @! <item-list> "," <item>}
1f1d88f5
MW
186 \end{quote}
187\end{itemize}
188
189\subsection{Lexical syntax}
190\label{sec:syntax.lex}
191
192Whitespace and comments are discarded. The remaining characters are
193collected into tokens according to the following syntax.
194
195\begin{grammar}
196<token> ::= <identifier>
197\alt <reserved-word>
198\alt <string-literal>
199\alt <char-literal>
200\alt <integer-literal>
201\alt <punctuation>
202\end{grammar}
203
204This syntax is slightly ambiguous. The following two rules serve to
205disambiguate:
206\begin{enumerate}
207\item Reserved words take precedence. All @<reserved-word>s are
208 syntactically @<identifier>s; Sod resolves the ambiguity in favour of
209 @<reserved-word>.
210\item `Maximal munch'. In other cases, at each stage we take the longest
211 sequence of characters which could be a token.
212\end{enumerate}
213
214\subsubsection{Identifiers} \label{sec:syntax.lex.id}
215
216\begin{grammar}
239fa5bd 217<identifier> ::= <id-start-char> @<id-body-char>^*
1f1d88f5 218
239fa5bd 219<id-start-char> ::= <alpha-char> | "_"
1f1d88f5 220
239fa5bd 221<id-body-char> ::= <id-start-char> @! <digit-char>
1f1d88f5 222
239fa5bd
MW
223<alpha-char> ::= "A" | "B" | \dots\ | "Z"
224\alt "a" | "b" | \dots\ | "z"
1f1d88f5
MW
225\alt <extended-alpha-char>
226
239fa5bd 227<digit-char> ::= "0" | <nonzero-digit-char>
1f1d88f5 228
239fa5bd 229<nonzero-digit-char> ::= "1" | "2" $| \cdots |$ "9"
1f1d88f5
MW
230\end{grammar}
231
232The precise definition of @<alpha-char> is left to the function
233\textsf{alpha-char-p} in the hosting Lisp system. For portability,
234programmers are encouraged to limit themselves to the standard ASCII letters.
235
236\subsubsection{Reserved words} \label{sec:syntax.lex.reserved}
237
238\begin{grammar}
239<reserved-word> ::=
239fa5bd
MW
240"char" | "class" | "code" | "const" | "double" | "enum" |
241"extern" | "float" | "import" | "int" | "lisp" | "load" | "long"
242| "restrict" | "short" | "signed" | "struct" | "typename" |
243"union" | "unsigned" | "void" | "volatile"
1f1d88f5
MW
244\end{grammar}
245
246Many of these are borrowed from~C; however, some (e.g., @"import" and
247@"lisp") are not, and some C reserved words are not reserved (e.g.,
248@"static").
249
250\subsubsection{String and character literals} \label{sec:syntax.lex.string}
251
252\begin{grammar}
239fa5bd 253<string-literal> ::= "\"" @<string-literal-char>^* "\""
1f1d88f5
MW
254
255<char-literal> ::= "'" <char-literal-char> "'"
256
257<string-literal-char> ::= any character other than "\\" or "\""
258\alt "\\" <char>
259
260<char-literal-char> ::= any character other than "\\" or "'"
261\alt "\\" <char>
262
263<char> ::= any single character
264\end{grammar}
265
266The syntax for string and character literals differs from~C. In particular,
267escape sequences such as @`\textbackslash n' are not recognized. The use
268of string and character literals in Sod, outside of C~fragments, is limited,
269and the simple syntax seems adequate. For the sake of future compatibility,
270the use of character sequences which resemble C escape sequences is
271discouraged.
272
273\subsubsection{Integer literals} \label{sec:syntax.lex.int}
274
275\begin{grammar}
276<integer-literal> ::= <decimal-integer>
277\alt <binary-integer>
278\alt <octal-integer>
279\alt <hex-integer>
280
239fa5bd 281<decimal-integer> ::= <nonzero-digit-char> @<digit-char>^*
1f1d88f5 282
239fa5bd 283<binary-integer> ::= "0" @("b"|"B"@) @<binary-digit-char>^+
1f1d88f5 284
239fa5bd 285<binary-digit-char> ::= "0" | "1"
1f1d88f5 286
239fa5bd 287<octal-integer> ::= "0" @["o"|"O"@] @<octal-digit-char>^+
1f1d88f5 288
239fa5bd 289<octal-digit-char> ::= "0" | "1" $| \cdots |$ "7"
1f1d88f5 290
239fa5bd 291<hex-integer> ::= "0" @("x"|"X"@) @<hex-digit-char>^+
1f1d88f5
MW
292
293<hex-digit-char> ::= <digit-char>
239fa5bd
MW
294\alt "A" | "B" | "C" | "D" | "E" | "F"
295\alt "a" | "b" | "c" | "d" | "e" | "f"
1f1d88f5
MW
296\end{grammar}
297
298Sod understands only integers, not floating-point numbers; its integer syntax
299goes slightly beyond C in allowing a @`0o' prefix for octal and @`0b' for
300binary. However, length and signedness indicators are not permitted.
301
302\subsubsection{Punctuation} \label{sec:syntax.lex.punct}
303
304\begin{grammar}
305<punctuation> ::= any character other than "\"" or "'"
306\end{grammar}
307
308Due to the `maximal munch' rule, @<punctuation> tokens cannot be
309alphanumeric.
310
311\subsubsection{Comments} \label{sec:lex-comment}
312
313\begin{grammar}
314<comment> ::= <block-comment>
315\alt <line-comment>
316
317<block-comment> ::=
318 "/*"
239fa5bd
MW
319 @<not-star>^* @(@<star>^+ <not-star-or-slash> @<not-star>^*@)^*
320 @<star>^*
1f1d88f5
MW
321 "*/"
322
323<star> ::= "*"
324
325<not-star> ::= any character other than "*"
326
327<not-star-or-slash> ::= any character other than "*" or "/"
328
239fa5bd 329<line-comment> ::= "//" @<not-newline>^* <newline>
1f1d88f5
MW
330
331<newline> ::= a newline character
332
333<not-newline> ::= any character other than newline
334\end{grammar}
335
336Comments are exactly as in C99: both traditional block comments `\texttt{/*}
337\dots\ \texttt{*/}' and \Cplusplus-style `\texttt{//} \dots' comments are
338permitted and ignored.
339
340\subsection{Special nonterminals}
341\label{sec:special-nonterminals}
342
343Aside from the lexical syntax presented above (\xref{sec:lexical-syntax}),
344two special nonterminals occur in the module syntax.
345
346\subsubsection{S-expressions} \label{sec:syntax-sexp}
347
348\begin{grammar}
349<s-expression> ::= an S-expression, as parsed by the Lisp reader
350\end{grammar}
351
352When an S-expression is expected, the Sod parser simply calls the host Lisp
353system's \textsf{read} function. Sod modules are permitted to modify the
354read table to extend the S-expression syntax.
355
356S-expressions are self-delimiting, so no end-marker is needed.
357
358\subsubsection{C fragments} \label{sec:syntax.lex.cfrag}
359
360\begin{grammar}
361<c-fragment> ::= a sequence of C tokens, with matching brackets
362\end{grammar}
363
364Sequences of C code are simply stored and written to the output unchanged
365during translation. They are read using a simple scanner which nonetheless
366understands C comments and string and character literals.
367
368A C fragment is terminated by one of a small number of delimiter characters
369determined by the immediately surrounding context -- usually a closing brace
370or bracket. The first such delimiter character which is not enclosed in
371brackets, braces or parenthesis ends the fragment.
372
373\subsection{Module syntax} \label{sec:syntax-module}
374
375\begin{grammar}
239fa5bd 376<module> ::= @<definition>^*
1f1d88f5
MW
377
378<definition> ::= <import-definition>
379\alt <load-definition>
380\alt <lisp-definition>
381\alt <code-definition>
382\alt <typename-definition>
383\alt <class-definition>
384\end{grammar}
385
386A module is the top-level syntactic item. A module consists of a sequence of
387definitions.
388
389\subsection{Simple definitions} \label{sec:syntax.defs}
390
391\subsubsection{Importing modules} \label{sec:syntax.defs.import}
392
393\begin{grammar}
394<import-definition> ::= "import" <string> ";"
395\end{grammar}
396
397The module named @<string> is processed and its definitions made available.
398
399A search is made for a module source file as follows.
400\begin{itemize}
401\item The module name @<string> is converted into a filename by appending
402 @`.sod', if it has no extension already.\footnote{%
403 Technically, what happens is \textsf{(merge-pathnames name (make-pathname
404 :type "SOD" :case :common))}, so exactly what this means varies
405 according to the host system.} %
406\item The file is looked for relative to the directory containing the
407 importing module.
408\item If that fails, then the file is looked for in each directory on the
409 module search path in turn.
410\item If the file still isn't found, an error is reported and the import
411 fails.
412\end{itemize}
413At this point, if the file has previously been imported, nothing further
414happens.\footnote{%
415 This check is done using \textsf{truename}, so it should see through simple
416 tricks like symbolic links. However, it may be confused by fancy things
417 like bind mounts and so on.} %
418
419Recursive imports, either direct or indirect, are an error.
420
421\subsubsection{Loading extensions} \label{sec:syntax.defs.load}
422
423\begin{grammar}
424<load-definition> ::= "load" <string> ";"
425\end{grammar}
426
427The Lisp file named @<string> is loaded and evaluated.
428
429A search is made for a Lisp source file as follows.
430\begin{itemize}
431\item The name @<string> is converted into a filename by appending @`.lisp',
432 if it has no extension already.\footnote{%
433 Technically, what happens is \textsf{(merge-pathnames name (make-pathname
434 :type "LISP" :case :common))}, so exactly what this means varies
435 according to the host system.} %
436\item A search is then made in the same manner as for module imports
437 (\xref{sec:syntax-module}).
438\end{itemize}
439If the file is found, it is loaded using the host Lisp's \textsf{load}
440function.
441
442Note that Sod doesn't attempt to compile Lisp files, or even to look for
443existing compiled files. The right way to package a substantial extension to
444the Sod translator is to provide the extension as a standard ASDF system (or
445similar) and leave a dropping @"foo-extension.lisp" in the module path saying
446something like
447\begin{listing}
448(asdf:operate 'asdf:load-op :foo-extension)
449\end{listing}
450which will arrange for the extension to be compiled if necessary.
451
452(This approach means that the language doesn't need to depend on any
453particular system definition facility. It's bad enough already that it
454depends on Common Lisp.)
455
456\subsubsection{Lisp escapes} \label{sec:syntax.defs.lisp}
457
458\begin{grammar}
459<lisp-definition> ::= "lisp" <s-expression> ";"
460\end{grammar}
461
462The @<s-expression> is evaluated immediately. It can do anything it likes.
463
464\textbf{Warning!} This means that hostile Sod modules are a security hazard.
465Lisp code can read and write files, start other programs, and make network
466connections. Don't install Sod modules from sources that you don't
467trust.\footnote{%
468 Presumably you were going to run the corresponding code at some point, so
469 this isn't as unusually scary as it sounds. But please be careful.} %
470
471\subsubsection{Declaring type names} \label{sec:syntax.defs.typename}
472
473\begin{grammar}
474<typename-definition> ::=
475 "typename" <identifier-list> ";"
476\end{grammar}
477
478Each @<identifier> is declared as naming a C type. This is important because
479the C type syntax -- which Sod uses -- is ambiguous, and disambiguation is
480done by distinguishing type names from other identifiers.
481
482Don't declare class names using @"typename"; use @"class" forward
483declarations instead.
484
485\subsection{Literal code} \label{sec:syntax-code}
486
487\begin{grammar}
488<code-definition> ::=
239fa5bd 489 "code" <identifier> ":" <identifier> @[<constraints>@]
1f1d88f5
MW
490 "{" <c-fragment> "}"
491
492<constraints> ::= "[" <constraint-list> "]"
493
239fa5bd 494<constraint> ::= @<identifier>^+
1f1d88f5
MW
495\end{grammar}
496
497The @<c-fragment> will be output unchanged to one of the output files.
498
499The first @<identifier> is the symbolic name of an output file. Predefined
500output file names are @"c" and @"h", which are the implementation code and
501header file respectively; other output files can be defined by extensions.
502
503The second @<identifier> provides a name for the output item. Several C
504fragments can have the same name: they will be concatenated together in the
505order in which they were encountered.
506
507The @<constraints> provide a means for specifying where in the output file
508the output item should appear. (Note the two kinds of square brackets shown
509in the syntax: square brackets must appear around the constraints if they are
510present, but that they may be omitted.) Each comma-separated @<constraint>
511is a sequence of identifiers naming output items, and indicates that the
512output items must appear in the order given -- though the translator is free
513to insert additional items in between them. (The particular output items
514needn't be defined already -- indeed, they needn't be defined ever.)
515
516There is a predefined output item @"includes" in both the @"c" and @"h"
517output files which is a suitable place for inserting @"\#include"
518preprocessor directives in order to declare types and functions for use
519elsewhere in the generated output files.
520
521\subsection{Property sets} \label{sec:syntax.propset}
522
523\begin{grammar}
524<properties> ::= "[" <property-list> "]"
525
526<property> ::= <identifier> "=" <expression>
527\end{grammar}
528
529Property sets are a means for associating miscellaneous information with
530classes and related items. By using property sets, additional information
531can be passed to extensions without the need to introduce idiosyncratic
532syntax.
533
534A property has a name, given as an @<identifier>, and a value computed by
535evaluating an @<expression>. The value can be one of a number of types,
536though the only operators currently defined act on integer values only.
537
538\subsubsection{The expression evaluator} \label{sec:syntax.propset.expr}
539
540\begin{grammar}
541<expression> ::= <term> | <expression> "+" <term> | <expression> "-" <term>
542
543<term> ::= <factor> | <term> "*" <factor> | <term> "/" <factor>
544
545<factor> ::= <primary> | "+" <factor> | "-" <factor>
546
547<primary> ::=
548 <integer-literal> | <string-literal> | <char-literal> | <identifier>
549\alt "?" <s-expression>
550\alt "(" <expression> ")"
551\end{grammar}
552
553The arithmetic expression syntax is simple and standard; there are currently
554no bitwise, logical, or comparison operators.
555
556A @<primary> expression may be a literal or an identifier. Note that
557identifiers stand for themselves: they \emph{do not} denote values. For more
558fancy expressions, the syntax
559\begin{quote}
560 @"?" @<s-expression>
561\end{quote}
562causes the @<s-expression> to be evaluated using the Lisp \textsf{eval}
563function.
564%%% FIXME crossref to extension docs
565
566\subsection{C types} \label{sec:syntax.c-types}
567
568Sod's syntax for C types closely mirrors the standard C syntax. A C type has
569two parts: a sequence of @<declaration-specifier>s and a @<declarator>. In
570Sod, a type must contain at least one @<declaration-specifier> (i.e.,
571`implicit @"int"' is forbidden), and storage-class specifiers are not
572recognized.
573
574\subsubsection{Declaration specifiers} \label{sec:syntax.c-types.declspec}
575
576\begin{grammar}
577<declaration-specifier> ::= <type-name>
578\alt "struct" <identifier> | "union" <identifier> | "enum" <identifier>
579\alt "void" | "char" | "int" | "float" | "double"
580\alt "short" | "long"
581\alt "signed" | "unsigned"
582\alt <qualifier>
583
584<qualifier> ::= "const" | "volatile" | "restrict"
585
586<type-name> ::= <identifier>
587\end{grammar}
588
589A @<type-name> is an identifier which has been declared as being a type name,
590using the @"typename" or @"class" definitions.
591
592Declaration specifiers may appear in any order. However, not all
593combinations are permitted. A declaration specifier must consist of zero or
594more @<qualifiers>, and one of the following, up to reordering.
595\begin{itemize}
596\item @<type-name>
239fa5bd 597\item @"struct" @<identifier>, @"union" @<identifier>, @"enum" @<identifier>
1f1d88f5
MW
598\item @"void"
599\item @"char", @"unsigned char", @"signed char"
600\item @"short", @"unsigned short", @"signed short"
601\item @"short int", @"unsigned short int", @"signed short int"
602\item @"int", @"unsigned int", @"signed int", @"unsigned", @"signed"
603\item @"long", @"unsigned long", @"signed long"
604\item @"long int", @"unsigned long int", @"signed long int"
605\item @"long long", @"unsigned long long", @"signed long long"
606\item @"long long int", @"unsigned long long int", @"signed long long int"
607\item @"float", @"double", @"long double"
608\end{itemize}
609All of these have their usual C meanings.
610
611\subsubsection{Declarators} \label{sec:syntax.c-types.declarator}
612
613\begin{grammar}
ea578bb4 614<declarator>$[k]$ ::= @<pointer>^* <primary-declarator>$[k]$
1f1d88f5 615
ea578bb4
MW
616<primary-declarator>$[k]$ ::= $k$
617\alt "(" <primary-declarator>$[k]$ ")"
618\alt <primary-declarator>$[k]$ @<declarator-suffix>^*
1f1d88f5 619
239fa5bd 620<pointer> ::= "*" @<qualifier>^*
1f1d88f5
MW
621
622<declarator-suffix> ::= "[" <c-fragment> "]"
623\alt "(" <arguments> ")"
624
ea578bb4 625<arguments> ::= $\epsilon$ | "..."
239fa5bd 626\alt <argument-list> @["," "..."@]
1f1d88f5 627
239fa5bd 628<argument> ::= @<declaration-specifier>^+ <argument-declarator>
1f1d88f5 629
ea578bb4
MW
630<argument-declarator> ::= <declarator>@[<identifier> @! $\epsilon$@]
631
632<simple-declarator> ::= <declarator>@[<identifier>@]
1f1d88f5 633
ea578bb4 634<dotted-name> ::= <identifier> "." <identifier>
1f1d88f5 635
ea578bb4 636<dotted-declarator> ::= <declarator>@[<dotted-name>@]
1f1d88f5
MW
637\end{grammar}
638
639The declarator syntax is taken from C, but with some differences.
640\begin{itemize}
641\item Array dimensions are uninterpreted @<c-fragments>, terminated by a
642 closing square bracket. This allows array dimensions to contain arbitrary
643 constant expressions.
644\item A declarator may have either a single @<identifier> at its centre or a
645 pair of @<identifier>s separated by a @`.'; this is used to refer to
646 slots or messages defined in superclasses.
647\end{itemize}
648The remaining differences are (I hope) a matter of presentation rather than
649substance.
650
651\subsection{Defining classes} \label{sec:syntax.class}
652
653\begin{grammar}
654<class-definition> ::= <class-forward-declaration>
655\alt <full-class-definition>
656\end{grammar}
657
658\subsubsection{Forward declarations} \label{sec:class.class.forward}
659
660\begin{grammar}
661<class-forward-declaration> ::= "class" <identifier> ";"
662\end{grammar}
663
664A @<class-forward-declaration> informs Sod that an @<identifier> will be used
665to name a class which is currently undefined. Forward declarations are
666necessary in order to resolve certain kinds of circularity. For example,
667\begin{listing}
668class Sub;
669
670class Super : SodObject {
671 Sub *sub;
672};
673
674class Sub : Super {
675 /* ... */
676};
677\end{listing}
678
679\subsubsection{Full class definitions} \label{sec:class.class.full}
680
681\begin{grammar}
682<full-class-definition> ::=
239fa5bd 683 @[<properties>@]
1f1d88f5 684 "class" <identifier> ":" <identifier-list>
239fa5bd 685 "{" @<class-item>^* "}"
1f1d88f5
MW
686
687<class-item> ::= <slot-item> ";"
688\alt <message-item>
689\alt <method-item>
690\alt <initializer-item> ";"
691\end{grammar}
692
693A full class definition provides a complete description of a class.
694
695The first @<identifier> gives the name of the class. It is an error to
696give the name of an existing class (other than a forward-referenced class),
697or an existing type name. It is conventional to give classes `MixedCase'
698names, to distinguish them from other kinds of identifiers.
699
700The @<identifier-list> names the direct superclasses for the new class. It
701is an error if any of these @<identifier>s does not name a defined class.
702
703The @<properties> provide additional information. The standard class
704properties are as follows.
705\begin{description}
706\item[@"lisp_class"] The name of the Lisp class to use within the translator
707 to represent this class. The property value must be an identifier; the
708 default is @"sod_class". Extensions may define classes with additional
709 behaviour, and may recognize additional class properties.
710\item[@"metaclass"] The name of the Sod metaclass for this class. In the
711 generated code, a class is itself an instance of another class -- its
712 \emph{metaclass}. The metaclass defines which slots the class will have,
713 which messages it will respond to, and what its behaviour will be when it
714 receives them. The property value must be an identifier naming a defined
715 subclass of @"SodClass". The default metaclass is @"SodClass".
716 %%% FIXME xref to theory
717\item[@"nick"] A nickname for the class, to be used to distinguish it from
718 other classes in various limited contexts. The property value must be an
719 identifier; the default is constructed by forcing the class name to
720 lower-case.
721\end{description}
722
723The class body consists of a sequence of @<class-item>s enclosed in braces.
724These items are discussed on the following sections.
725
726\subsubsection{Slot items} \label{sec:sntax.class.slot}
727
728\begin{grammar}
729<slot-item> ::=
239fa5bd
MW
730 @[<properties>@]
731 @<declaration-specifier>^+ <init-declarator-list>
1f1d88f5 732
239fa5bd 733<init-declarator> ::= <declarator> @["=" <initializer>@]
1f1d88f5
MW
734\end{grammar}
735
736A @<slot-item> defines one or more slots. All instances of the class and any
737subclass will contain these slot, with the names and types given by the
738@<declaration-specifiers> and the @<declarators>. Slot declarators may not
739contain qualified identifiers.
740
741It is not possible to declare a slot with function type: such an item is
742interpreted as being a @<message-item> or @<method-item>. Pointers to
743functions are fine.
744
745An @<initializer>, if present, is treated as if a separate
746@<initializer-item> containing the slot name and initializer were present.
747For example,
748\begin{listing}
749[nick = eg]
750class Example : Super {
751 int foo = 17;
752};
753\end{listing}
754means the same as
755\begin{listing}
756[nick = eg]
757class Example : Super {
758 int foo;
759 eg.foo = 17;
760};
761\end{listing}
762
763\subsubsection{Initializer items} \label{sec:syntax.class.init}
764
765\begin{grammar}
239fa5bd 766<initializer-item> ::= @["class"@] <slot-initializer-list>
1f1d88f5
MW
767
768<slot-initializer> ::= <qualified-identifier> "=" <initializer>
769
770<initializer> :: "{" <c-fragment> "}" | <c-fragment>
771\end{grammar}
772
773An @<initializer-item> provides an initial value for one or more slots. If
774prefixed by @"class", then the initial values are for class slots (i.e.,
775slots of the class object itself); otherwise they are for instance slots.
776
777The first component of the @<qualified-identifier> must be the nickname of
778one of the class's superclasses (including itself); the second must be the
779name of a slot defined in that superclass.
780
781The initializer has one of two forms.
782\begin{itemize}
783\item A @<c-fragment> enclosed in braces denotes an aggregate initializer.
784 This is suitable for initializing structure, union or array slots.
785\item A @<c-fragment> \emph{not} beginning with an open brace is a `bare'
786 initializer, and continues until the next @`,' or @`;' which is not within
787 nested brackets. Bare initializers are suitable for initializing scalar
788 slots, such as pointers or integers, and strings.
789\end{itemize}
790
791\subsubsection{Message items} \label{sec:syntax.class.message}
792
793\begin{grammar}
794<message-item> ::=
239fa5bd
MW
795 @[<properties>@]
796 @<declaration-specifier>^+ <declarator> @[<method-body>@]
1f1d88f5
MW
797\end{grammar}
798
799\subsubsection{Method items} \label{sec:syntax.class.method}
800
801\begin{grammar}
802<method-item> ::=
239fa5bd
MW
803 @[<properties>@]
804 @<declaration-specifier>^+ <declarator> <method-body>
1f1d88f5
MW
805
806<method-body> ::= "{" <c-fragment> "}" | "extern" ";"
807\end{grammar}
808
809%%%--------------------------------------------------------------------------
810\section{Class objects}
811
812\begin{listing}
813typedef struct SodClass__ichain_obj SodClass;
814
815struct sod_chain {
816 size_t n_classes; /* Number of classes in chain */
817 const SodClass *const *classes; /* Vector of classes, head first */
818 size_t off_ichain; /* Offset of ichain from instance base */
819 const struct sod_vtable *vt; /* Vtable pointer for chain */
820 size_t ichainsz; /* Size of the ichain structure */
821};
822
823struct sod_vtable {
824 SodClass *_class; /* Pointer to instance's class */
825 size_t _base; /* Offset to instance base */
826};
827
828struct SodClass__islots {
829
830 /* Basic information */
831 const char *name; /* The class's name as a string */
832 const char *nick; /* The nickname as a string */
833
834 /* Instance allocation and initialization */
835 size_t instsz; /* Instance layout size in bytes */
836 void *(*imprint)(void *); /* Stamp instance with vtable ptrs */
837 void *(*init)(void *); /* Initialize instance */
838
839 /* Superclass structure */
840 size_t n_supers; /* Number of direct superclasses */
841 const SodClass *const *supers; /* Vector of direct superclasses */
842 size_t n_cpl; /* Length of class precedence list */
843 const SodClass *const *cpl; /* Vector for class precedence list */
844
845 /* Chain structure */
846 const SodClass *link; /* Link to next class in chain */
847 const SodClass *head; /* Pointer to head of chain */
848 size_t level; /* Index of class in its chain */
849 size_t n_chains; /* Number of superclass chains */
850 const sod_chain *chains; /* Vector of chain structures */
851
852 /* Layout */
853 size_t off_islots; /* Offset of islots from ichain base */
854 size_t islotsz; /* Size of instance slots */
855};
856
857struct SodClass__ichain_obj {
858 const SodClass__vt_obj *_vt;
859 struct SodClass__islots cls;
860};
861
862struct sod_instance {
863 struct sod_vtable *_vt;
864};
865\end{listing}
866
867\begin{listing}
868void *sod_convert(const SodClass *cls, const void *obj)
869{
870 const struct sod_instance *inst = obj;
871 const SodClass *real = inst->_vt->_cls;
872 const struct sod_chain *chain;
873 size_t i, index;
874
875 for (i = 0; i < real->cls.n_chains; i++) {
876 chain = &real->cls.chains[i];
877 if (chain->classes[0] == cls->cls.head) {
878 index = cls->cls.index;
879 if (index < chain->n_classes && chain->classes[index] == cls)
880 return ((char *)cls - inst->_vt._base + chain->off_ichain);
881 else
882 return (0);
883 }
884 }
885 return (0);
886}
887\end{listing}
888
889%%%--------------------------------------------------------------------------
890\section{Classes}
891
892\subsection{Classes and superclasses}
893
894A @<full-class-definition> must list one or more existing classes to be the
895\emph{direct superclasses} for the new class being defined. We make the
896following definitions.
897\begin{itemize}
898\item The \emph{superclasses} of a class consist of the class itself together
899 with the superclasses of its direct superclasses.
900\item The \emph{proper superclasses} of a class are its superclasses other
901 than itself.
902\item If $C$ is a (proper) superclass of $D$ then $D$ is a (\emph{proper})
903 \emph{subclass} of $C$.
904\end{itemize}
905The predefined class @|SodObject| has no direct superclasses; it is unique in
906this respect. All classes are subclasses of @|SodObject|.
907
908\subsection{The class precedence list}
909
910Let $C$ be a class. The superclasses of $C$ form a directed graph, with an
911edge from each class to each of its direct superclasses. This is the
912\emph{superclass graph of $C$}.
913
914In order to resolve inheritance of items, we define a \emph{class precedence
915 list} (or CPL) for each class, which imposes a total order on that class's
916superclasses. The default algorithm for computing the CPL is the \emph{C3}
917algorithm \cite{fixme-c3}, though extensions may implement other algorithms.
918
919The default algorithm works as follows. Let $C$ be the class whose CPL we
920are to compute. Let $X$ and $Y$ be two of $C$'s superclasses.
921\begin{itemize}
922\item $C$ must appear first in the CPL.
923\item If $X$ appears before $Y$ in the CPL of one of $C$'s direct
924 superclasses, then $X$ appears before $Y$ in the $C$'s CPL.
925\item If the above rules don't suffice to order $X$ and $Y$, then whichever
926 of $X$ and $Y$ has a subclass which appears further left in the list of
927 $C$'s direct superclasses will appear earlier in the CPL.
928\end{itemize}
929This last rule is sufficient to disambiguate because if both $X$ and $Y$ are
930superclasses of the same direct superclass of $C$ then that direct
931superclass's CPL will order $X$ and $Y$.
932
933We say that \emph{$X$ is more specific than $Y$ as a superclass of $C$} if
934$X$ is earlier than $Y$ in $C$'s class precedence list. If $C$ is clear from
935context then we omit it, saying simply that $X$ is more specific than $Y$.
936
937\subsection{Instances and metaclasses}
938
939A class defines the structure and behaviour of its \emph{instances}: run-time
940objects created (possibly) dynamically. An instance is an instance of only
941one class, though structurally it may be used in place of an instance of any
942of that class's superclasses. It is possible, with care, to change the class
943of an instance at run-time.
944
945Classes are themselves represented as instances -- called \emph{class
946 objects} -- in the running program. Being instances, they have a class,
947called the \emph{metaclass}. The metaclass defines the structure and
948behaviour of the class object.
949
950The predefined class @|SodClass| is the default metaclass for new classes.
951@|SodClass| has @|SodObject| as its only direct superclass. @|SodClass| is
952its own metaclass.
953
954\subsection{Items and inheritance}
955
956A class definition also declares \emph{slots}, \emph{messages},
957\emph{initializers} and \emph{methods} -- collectively referred to as
958\emph{items}. In addition to the items declared in the class definition --
959the class's \emph{direct items} -- a class also \emph{inherits} items from
960its superclasses.
961
962The precise rules for item inheritance vary according to the kinds of items
963involved.
964
965Some object systems have a notion of `repeated inheritance': if there are
966multiple paths in the superclass graph from a class to one of its
967superclasses then items defined in that superclass may appear duplicated in
968the subclass. Sod does not have this notion.
969
970\subsubsection{Slots}
971A \emph{slot} is a unit of state. In other object systems, slots may be
972called `fields', `member variables', or `instance variables'.
973
974A slot has a \emph{name} and a \emph{type}. The name serves only to
975distinguish the slot from other direct slots defined by the same class. A
976class inherits all of its proper superclasses' slots. Slots inherited from
977superclasses do not conflict with each other or with direct slots, even if
978they have the same names.
979
980At run-time, each instance of the class holds a separate value for each slot,
981whether direct or inherited. Changing the value of an instance's slot
982doesn't affect other instances.
983
984\subsubsection{Initializers}
985Mumble.
986
987\subsubsection{Messages}
988A \emph{message} is the stimulus for behaviour. In Sod, a class must define,
989statically, the name and format of the messages it is able to receive and the
990values it will return in reply. In this respect, a message is similar to
991`abstract member functions' or `interface member functions' in other object
992systems.
993
994Like slots, a message has a \emph{name} and a \emph{type}. Again, the name
995serves only to distinguish the message from other direct messages defined by
996the same class. Messages inherited from superclasses do not conflict with
997each other or with direct messages, even if they have the same name.
998
999At run-time, one sends a message to an instance by invoking a function
1000obtained from the instance's \emph{vtable}: \xref{sec:fixme-vtable}.
1001
1002\subsubsection{Methods}
1003A \emph{method} is a unit of behaviour. In other object systems, methods may
1004be called `member functions'.
1005
1006A method is associated with a message. When a message is received by an
1007instance, all of the methods associated with that message on the instance's
1008class or any of its superclasses are \emph{applicable}. The details of how
1009the applicable methods are invoked are described fully in
1010\xref{sec:fixme-method-combination}.
1011
1012\subsection{Chains and instance layout}
1013
3be8c2bf 1014\include{sod-backg}
a07d8d00 1015\include{sod-protocol}
1f1d88f5
MW
1016
1017\end{document}
1018\f
1019%%% Local variables:
1020%%% mode: LaTeX
1021%%% TeX-PDF-mode: t
1022%%% End: