doc/: Work in progress.
[sod] / doc / sod.tex
1 \documentclass[noarticle]{strayman}
2
3 \usepackage[T1]{fontenc}
4 \usepackage[utf8]{inputenc}
5 \usepackage[palatino, helvetica, courier, maths=cmr]{mdwfonts}
6 \usepackage{tikz}
7 \usepackage{syntax}
8 \usepackage{sverb}
9 \usepackage{mdwtab}
10 \usepackage[mdwmargin]{mdwthm}
11 \usepackage{amssymb}
12 \usepackage{footnote}
13 \usepackage{at}
14 \usepackage{mdwref}
15
16 \title{A Sensible Object Design for C}
17 \author{Mark Wooding}
18
19 \makeatletter
20
21 \errorcontextlines999
22
23 \def\syntleft{\normalfont\itshape}
24 \let\syntright\empty
25
26 \let\codeface\sffamily
27
28 \def\ulitleft{\normalfont\codeface}
29 \let\ulitright\empty
30
31 \let\listingsize\relax
32
33 \let\epsilon\varepsilon
34
35 \atdef <#1>{\synt{#1}\@scripts}
36 \atdef "#1"{\lit*{#1}\@scripts}
37 \atdef `#1'{\lit{#1}\@scripts}
38 \atdef |#1|{\textsf{#1}\@scripts}
39 \def\dbl@maybe#1{\let\@tempa#1\futurelet\@ch\dbl@maybe@i}
40 \def\dbl@maybe@i{\m@maybe\ifx\@ch\@tempa\@tempa\!\@tempa%
41 \expandafter\@firstoftwo\expandafter\@scripts%
42 \else\@tempa\expandafter\@scripts\fi}
43 \atdef [{\dbl@maybe[}
44 \atdef ]{\dbl@maybe]}
45 \atdef {{\m@maybe\{\@scripts}
46 \atdef }{\m@maybe\}\@scripts}
47 \atdef ({\m@maybe(\@scripts}
48 \atdef ){\m@maybe)\@scripts}
49 \atdef !{\m@maybe|\@scripts}
50 \atdef to{\leavevmode\unskip\quad\m@maybe\longrightarrow\m@maybe@end\quad}
51 \let\m@maybe@end\relax
52 \def\m@maybe{\ifmmode\else$\let\m@maybe@end$\fi}
53 \def\@scripts{\futurelet\@ch\@scripts@i}
54
55 \def\chain#1#2{\mathsf{ch}_{#1}(#2)}
56 \def\chainhead#1#2{\mathsf{hd}_{#1}(#2)}
57 \def\chaintail#1#2{\mathsf{tl}_{#1}(#2)}
58
59 \let\implies\Rightarrow
60
61 \atdef ;#1\\{\normalfont\itshape;#1\\}
62 \let\@@grammar\grammar
63 \def\grammar{\def\textbar{\hbox{$|$}}\@@grammar}
64
65 \begingroup\lccode`\~=`\_\lowercase{\endgroup
66 \def\@scripts@i{\if1\ifx\@ch~1\else\ifx\@ch^1\else0\fi\fi%
67 \expandafter\@scripts@ii\else\expandafter\m@maybe@end\fi}}
68 \def\@scripts@ii#1#2{\m@maybe#1{#2}\@scripts}
69
70 \def\Cplusplus{C\kern-\p@++}
71 \def\Csharp{C\#}
72 \def\man#1#2{\textbf{#1}(#2)}
73
74 \begingroup\lccode`\~=`\
75 \lowercase{
76 \endgroup
77 \def\prog{%
78 \codeface%
79 \quote%
80 \let\old@nl\\%
81 \obeylines%
82 \tabbing%
83 \global\let~\\%
84 \global\let\\\textbackslash%
85 }
86 \def\endprog{%
87 \endtabbing%
88 \global\let\\\old@nl%
89 \endquote%
90 }}
91
92 \newenvironment{boxy}[1][\q@]{%
93 \dimen@\linewidth\advance\dimen@-1.2pt\advance\dimen@-2ex%
94 \medskip%
95 \vbox\bgroup\hrule\hbox\bgroup\vrule%
96 \vbox\bgroup\vskip1ex\hbox\bgroup\hskip1ex\minipage\dimen@%
97 \def\@temp{#1}\ifx\@temp\q@\else\leavevmode{\headfam\bfseries#1\quad}\fi%
98 }{%
99 \endminipage\hskip1ex\egroup\vskip1ex\egroup%
100 \vrule\egroup\hrule\egroup%
101 \medskip%
102 }
103
104 \def\definedescribecategory#1#2{\@namedef{cat!#1}{#2}}
105 \def\describecategoryname#1{%
106 \expandafter\let\expandafter\@tempa\csname cat!#1\endcsname%
107 \ifx\@tempa\relax#1\else\@tempa\fi}
108 \definedescribecategory{fun}{function}
109 \definedescribecategory{gf}{generic function}
110 \definedescribecategory{var}{variable}
111 \definedescribecategory{const}{constant}
112 \definedescribecategory{meth}{primary method}
113 \definedescribecategory{ar-meth}{around-method}
114 \definedescribecategory{be-meth}{before-method}
115 \definedescribecategory{af-meth}{after-method}
116 \definedescribecategory{cls}{class}
117 \definedescribecategory{ty}{type}
118 \definedescribecategory{mac}{macro}
119
120 \def\q@{\q@}
121 \newenvironment{describe}[3][\q@]{%
122 \normalfont%
123 \par\goodbreak%
124 \vspace{\bigskipamount}%
125 \setbox\z@\hbox{\bfseries[\describecategoryname{#2}]}%
126 \dimen@\linewidth\advance\dimen@-\wd\z@%
127 \def\@temp##1 ##2\q@{\message{#2:##1}\label{#2:##1}}%
128 \def\@tempa{#1}\ifx\@tempa\q@\@temp#3 \q@\else\@temp{#1} \\\fi%
129 \edef\@temp{{\the\linewidth}{@{}p{\the\dimen@}%
130 @{\extracolsep{\fill}}l@{\extracolsep{0pt}}}}%
131 \noindent\csname tabular*\expandafter\endcsname\@temp%
132 \tabbing\codeface#3\endtabbing&\unhbox\z@\\\endtabular%
133 % \@afterheading%
134 \list{}{\rightmargin\z@}\item%
135 }{%
136 \endlist%
137 }
138
139 \def\push{\quad\=\+\kill}
140
141 \begin{document}
142
143 \maketitle
144
145 \include{sod-tut}
146
147 %%%--------------------------------------------------------------------------
148 \chapter{Internals}
149
150 \section{Generated names}
151
152 The generated names for functions and objects related to a class are
153 constructed systematically so as not to interfere with each other. The rules
154 on class, slot and message naming exist so as to ensure that the generated
155 names don't collide with each other.
156
157 The following notation is used in this section.
158 \begin{description}
159 \item[@<class>] The full name of the `focus' class: the one for which we are
160 generating name.
161 \item[@<super-nick>] The nickname of a superclass.
162 \item[@<head-nick>] The nickname of the chain-head class of the chain
163 in question.
164 \end{description}
165
166 \subsection{Instance layout}
167
168 %%%--------------------------------------------------------------------------
169 \section{Syntax}
170 \label{sec:syntax}
171
172 Fortunately, Sod is syntactically quite simple. I've used a little slightly
173 unusual notation in order to make the presentation easier to read. For any
174 nonterminal $x$:
175 \begin{itemize}
176 \item $\epsilon$ denotes the empty nonterminal:
177 \begin{quote}
178 $\epsilon$ ::=
179 \end{quote}
180 \item @[$x$@] means an optional $x$:
181 \begin{quote}
182 \syntax{@[$x$@] ::= $\epsilon$ @! $x$}
183 \end{quote}
184 \item $x^*$ means a sequence of zero or more $x$s:
185 \begin{quote}
186 \syntax{$x^*$ ::= $\epsilon$ @! $x^*$ $x$}
187 \end{quote}
188 \item $x^+$ means a sequence of one or more $x$s:
189 \begin{quote}
190 \syntax{$x^+$ ::= $x$ $x^*$}
191 \end{quote}
192 \item $x$@<-list> means a sequence of one or more $x$s separated
193 by commas:
194 \begin{quote}
195 \syntax{$x$<-list> ::= $x$ @! $x$<-list> "," $x$}
196 \end{quote}
197 \end{itemize}
198
199 \subsection{Lexical syntax}
200 \label{sec:syntax.lex}
201
202 Whitespace and comments are discarded. The remaining characters are
203 collected into tokens according to the following syntax.
204
205 \begin{grammar}
206 <token> ::= <identifier>
207 \alt <string-literal>
208 \alt <char-literal>
209 \alt <integer-literal>
210 \alt <punctuation>
211 \end{grammar}
212
213 This syntax is slightly ambiguous, and is disambiguated by the \emph{maximal
214 munch} rule: at each stage we take the longest sequence of characters which
215 could be a token.
216
217 \subsubsection{Identifiers} \label{sec:syntax.lex.id}
218
219 \begin{grammar}
220 <identifier> ::= <id-start-char> @<id-body-char>^*
221
222 <id-start-char> ::= <alpha-char> | "_"
223
224 <id-body-char> ::= <id-start-char> @! <digit-char>
225
226 <alpha-char> ::= "A" | "B" | \dots\ | "Z"
227 \alt "a" | "b" | \dots\ | "z"
228 \alt <extended-alpha-char>
229
230 <digit-char> ::= "0" | <nonzero-digit-char>
231
232 <nonzero-digit-char> ::= "1" | "2" $| \cdots |$ "9"
233 \end{grammar}
234
235 The precise definition of @<alpha-char> is left to the function
236 \textsf{alpha-char-p} in the hosting Lisp system. For portability,
237 programmers are encouraged to limit themselves to the standard ASCII letters.
238
239 There are no reserved words at the lexical level, but the higher-level syntax
240 recognizes certain identifiers as \emph{keywords} in some contexts. There is
241 also an ambiguity (inherited from C) in the declaration syntax which is
242 settled by distinguishing type names from other identifiers at a lexical
243 level.
244
245 \subsubsection{String and character literals} \label{sec:syntax.lex.string}
246
247 \begin{grammar}
248 <string-literal> ::= "\"" @<string-literal-char>^* "\""
249
250 <char-literal> ::= "'" <char-literal-char> "'"
251
252 <string-literal-char> ::= any character other than "\\" or "\""
253 \alt "\\" <char>
254
255 <char-literal-char> ::= any character other than "\\" or "'"
256 \alt "\\" <char>
257
258 <char> ::= any single character
259 \end{grammar}
260
261 The syntax for string and character literals differs from~C. In particular,
262 escape sequences such as @`\textbackslash n' are not recognized. The use
263 of string and character literals in Sod, outside of C~fragments, is limited,
264 and the simple syntax seems adequate. For the sake of future compatibility,
265 the use of character sequences which resemble C escape sequences is
266 discouraged.
267
268 \subsubsection{Integer literals} \label{sec:syntax.lex.int}
269
270 \begin{grammar}
271 <integer-literal> ::= <decimal-integer>
272 \alt <binary-integer>
273 \alt <octal-integer>
274 \alt <hex-integer>
275
276 <decimal-integer> ::= <nonzero-digit-char> @<digit-char>^*
277
278 <binary-integer> ::= "0" @("b"|"B"@) @<binary-digit-char>^+
279
280 <binary-digit-char> ::= "0" | "1"
281
282 <octal-integer> ::= "0" @["o"|"O"@] @<octal-digit-char>^+
283
284 <octal-digit-char> ::= "0" | "1" $| \cdots |$ "7"
285
286 <hex-integer> ::= "0" @("x"|"X"@) @<hex-digit-char>^+
287
288 <hex-digit-char> ::= <digit-char>
289 \alt "A" | "B" | "C" | "D" | "E" | "F"
290 \alt "a" | "b" | "c" | "d" | "e" | "f"
291 \end{grammar}
292
293 Sod understands only integers, not floating-point numbers; its integer syntax
294 goes slightly beyond C in allowing a @`0o' prefix for octal and @`0b' for
295 binary. However, length and signedness indicators are not permitted.
296
297 \subsubsection{Punctuation} \label{sec:syntax.lex.punct}
298
299 \begin{grammar}
300 <punctuation> ::= any nonalphanumeric character other than "_", "\"" or "'"
301 \end{grammar}
302
303 \subsubsection{Comments} \label{sec:lex-comment}
304
305 \begin{grammar}
306 <comment> ::= <block-comment>
307 \alt <line-comment>
308
309 <block-comment> ::=
310 "/*"
311 @<not-star>^* @(@<star>^+ <not-star-or-slash> @<not-star>^*@)^*
312 @<star>^*
313 "*/"
314
315 <star> ::= "*"
316
317 <not-star> ::= any character other than "*"
318
319 <not-star-or-slash> ::= any character other than "*" or "/"
320
321 <line-comment> ::= "//" @<not-newline>^* <newline>
322
323 <newline> ::= a newline character
324
325 <not-newline> ::= any character other than newline
326 \end{grammar}
327
328 Comments are exactly as in C99: both traditional block comments `\texttt{/*}
329 \dots\ \texttt{*/}' and \Cplusplus-style `\texttt{//} \dots' comments are
330 permitted and ignored.
331
332 \subsection{Special nonterminals}
333 \label{sec:special-nonterminals}
334
335 Aside from the lexical syntax presented above (\xref{sec:lexical-syntax}),
336 two special nonterminals occur in the module syntax.
337
338 \subsubsection{S-expressions} \label{sec:syntax-sexp}
339
340 \begin{grammar}
341 <s-expression> ::= an S-expression, as parsed by the Lisp reader
342 \end{grammar}
343
344 When an S-expression is expected, the Sod parser simply calls the host Lisp
345 system's \textsf{read} function. Sod modules are permitted to modify the
346 read table to extend the S-expression syntax.
347
348 S-expressions are self-delimiting, so no end-marker is needed.
349
350 \subsubsection{C fragments} \label{sec:syntax.lex.cfrag}
351
352 \begin{grammar}
353 <c-fragment> ::= a sequence of C tokens, with matching brackets
354 \end{grammar}
355
356 Sequences of C code are simply stored and written to the output unchanged
357 during translation. They are read using a simple scanner which nonetheless
358 understands C comments and string and character literals.
359
360 A C fragment is terminated by one of a small number of delimiter characters
361 determined by the immediately surrounding context -- usually a closing brace
362 or bracket. The first such delimiter character which is not enclosed in
363 brackets, braces or parenthesis ends the fragment.
364
365 \subsection{Module syntax} \label{sec:syntax-module}
366
367 \begin{grammar}
368 <module> ::= @<definition>^*
369
370 <definition> ::= <import-definition>
371 \alt <load-definition>
372 \alt <lisp-definition>
373 \alt <code-definition>
374 \alt <typename-definition>
375 \alt <class-definition>
376 \end{grammar}
377
378 A module is the top-level syntactic item. A module consists of a sequence of
379 definitions.
380
381 \subsection{Simple definitions} \label{sec:syntax.defs}
382
383 \subsubsection{Importing modules} \label{sec:syntax.defs.import}
384
385 \begin{grammar}
386 <import-definition> ::= "import" <string> ";"
387 \end{grammar}
388
389 The module named @<string> is processed and its definitions made available.
390
391 A search is made for a module source file as follows.
392 \begin{itemize}
393 \item The module name @<string> is converted into a filename by appending
394 @`.sod', if it has no extension already.\footnote{%
395 Technically, what happens is \textsf{(merge-pathnames name (make-pathname
396 :type "SOD" :case :common))}, so exactly what this means varies
397 according to the host system.} %
398 \item The file is looked for relative to the directory containing the
399 importing module.
400 \item If that fails, then the file is looked for in each directory on the
401 module search path in turn.
402 \item If the file still isn't found, an error is reported and the import
403 fails.
404 \end{itemize}
405 At this point, if the file has previously been imported, nothing further
406 happens.\footnote{%
407 This check is done using \textsf{truename}, so it should see through simple
408 tricks like symbolic links. However, it may be confused by fancy things
409 like bind mounts and so on.} %
410
411 Recursive imports, either direct or indirect, are an error.
412
413 \subsubsection{Loading extensions} \label{sec:syntax.defs.load}
414
415 \begin{grammar}
416 <load-definition> ::= "load" <string> ";"
417 \end{grammar}
418
419 The Lisp file named @<string> is loaded and evaluated.
420
421 A search is made for a Lisp source file as follows.
422 \begin{itemize}
423 \item The name @<string> is converted into a filename by appending @`.lisp',
424 if it has no extension already.\footnote{%
425 Technically, what happens is \textsf{(merge-pathnames name (make-pathname
426 :type "LISP" :case :common))}, so exactly what this means varies
427 according to the host system.} %
428 \item A search is then made in the same manner as for module imports
429 (\xref{sec:syntax-module}).
430 \end{itemize}
431 If the file is found, it is loaded using the host Lisp's \textsf{load}
432 function.
433
434 Note that Sod doesn't attempt to compile Lisp files, or even to look for
435 existing compiled files. The right way to package a substantial extension to
436 the Sod translator is to provide the extension as a standard ASDF system (or
437 similar) and leave a dropping @"foo-extension.lisp" in the module path saying
438 something like
439 \begin{quote}
440 \textsf{(asdf:load-system :foo-extension)}
441 \end{quote}
442 which will arrange for the extension to be compiled if necessary.
443
444 (This approach means that the language doesn't need to depend on any
445 particular system definition facility. It's bad enough already that it
446 depends on Common Lisp.)
447
448 \subsubsection{Lisp escapes} \label{sec:syntax.defs.lisp}
449
450 \begin{grammar}
451 <lisp-definition> ::= "lisp" <s-expression> ";"
452 \end{grammar}
453
454 The @<s-expression> is evaluated immediately. It can do anything it likes.
455
456 \textbf{Warning!} This means that hostile Sod modules are a security hazard.
457 Lisp code can read and write files, start other programs, and make network
458 connections. Don't install Sod modules from sources that you don't
459 trust.\footnote{%
460 Presumably you were going to run the corresponding code at some point, so
461 this isn't as unusually scary as it sounds. But please be careful.} %
462
463 \subsubsection{Declaring type names} \label{sec:syntax.defs.typename}
464
465 \begin{grammar}
466 <typename-definition> ::=
467 "typename" <identifier-list> ";"
468 \end{grammar}
469
470 Each @<identifier> is declared as naming a C type. This is important because
471 the C type syntax -- which Sod uses -- is ambiguous, and disambiguation is
472 done by distinguishing type names from other identifiers.
473
474 Don't declare class names using @"typename"; use @"class" forward
475 declarations instead.
476
477 \subsection{Literal code} \label{sec:syntax-code}
478
479 \begin{grammar}
480 <code-definition> ::=
481 "code" <identifier> ":" <identifier> @[<constraints>@]
482 "{" <c-fragment> "}"
483
484 <constraints> ::= "[" <constraint-list> "]"
485
486 <constraint> ::= @<identifier>^+
487 \end{grammar}
488
489 The @<c-fragment> will be output unchanged to one of the output files.
490
491 The first @<identifier> is the symbolic name of an output file. Predefined
492 output file names are @"c" and @"h", which are the implementation code and
493 header file respectively; other output files can be defined by extensions.
494
495 The second @<identifier> provides a name for the output item. Several C
496 fragments can have the same name: they will be concatenated together in the
497 order in which they were encountered.
498
499 The @<constraints> provide a means for specifying where in the output file
500 the output item should appear. (Note the two kinds of square brackets shown
501 in the syntax: square brackets must appear around the constraints if they are
502 present, but that they may be omitted.) Each comma-separated @<constraint>
503 is a sequence of identifiers naming output items, and indicates that the
504 output items must appear in the order given -- though the translator is free
505 to insert additional items in between them. (The particular output items
506 needn't be defined already -- indeed, they needn't be defined ever.)
507
508 There is a predefined output item @"includes" in both the @"c" and @"h"
509 output files which is a suitable place for inserting @"\#include"
510 preprocessor directives in order to declare types and functions for use
511 elsewhere in the generated output files.
512
513 \subsection{Property sets} \label{sec:syntax.propset}
514
515 \begin{grammar}
516 <properties> ::= "[" <property-list> "]"
517
518 <property> ::= <identifier> "=" <expression>
519 \end{grammar}
520
521 Property sets are a means for associating miscellaneous information with
522 classes and related items. By using property sets, additional information
523 can be passed to extensions without the need to introduce idiosyncratic
524 syntax.
525
526 A property has a name, given as an @<identifier>, and a value computed by
527 evaluating an @<expression>. The value can be one of a number of types,
528 though the only operators currently defined act on integer values only.
529
530 \subsubsection{The expression evaluator} \label{sec:syntax.propset.expr}
531
532 \begin{grammar}
533 <expression> ::= <term> | <expression> "+" <term> | <expression> "-" <term>
534
535 <term> ::= <factor> | <term> "*" <factor> | <term> "/" <factor>
536
537 <factor> ::= <primary> | "+" <factor> | "-" <factor>
538
539 <primary> ::=
540 <integer-literal> | <string-literal> | <char-literal> | <identifier>
541 \alt "?" <s-expression>
542 \alt "(" <expression> ")"
543 \end{grammar}
544
545 The arithmetic expression syntax is simple and standard; there are currently
546 no bitwise, logical, or comparison operators.
547
548 A @<primary> expression may be a literal or an identifier. Note that
549 identifiers stand for themselves: they \emph{do not} denote values. For more
550 fancy expressions, the syntax
551 \begin{quote}
552 @"?" @<s-expression>
553 \end{quote}
554 causes the @<s-expression> to be evaluated using the Lisp \textsf{eval}
555 function.
556 %%% FIXME crossref to extension docs
557
558 \subsection{C types} \label{sec:syntax.c-types}
559
560 Sod's syntax for C types closely mirrors the standard C syntax. A C type has
561 two parts: a sequence of @<declaration-specifier>s and a @<declarator>. In
562 Sod, a type must contain at least one @<declaration-specifier> (i.e.,
563 `implicit @"int"' is forbidden), and storage-class specifiers are not
564 recognized.
565
566 \subsubsection{Declaration specifiers} \label{sec:syntax.c-types.declspec}
567
568 \begin{grammar}
569 <declaration-specifier> ::= <type-name>
570 \alt "struct" <identifier> | "union" <identifier> | "enum" <identifier>
571 \alt "void" | "char" | "int" | "float" | "double"
572 \alt "short" | "long"
573 \alt "signed" | "unsigned"
574 \alt <qualifier>
575
576 <qualifier> ::= "const" | "volatile" | "restrict"
577
578 <type-name> ::= <identifier>
579 \end{grammar}
580
581 A @<type-name> is an identifier which has been declared as being a type name,
582 using the @"typename" or @"class" definitions.
583
584 Declaration specifiers may appear in any order. However, not all
585 combinations are permitted. A declaration specifier must consist of zero or
586 more @<qualifiers>, and one of the following, up to reordering.
587 \begin{itemize}
588 \item @<type-name>
589 \item @"struct" @<identifier>, @"union" @<identifier>, @"enum" @<identifier>
590 \item @"void"
591 \item @"char", @"unsigned char", @"signed char"
592 \item @"short", @"unsigned short", @"signed short"
593 \item @"short int", @"unsigned short int", @"signed short int"
594 \item @"int", @"unsigned int", @"signed int", @"unsigned", @"signed"
595 \item @"long", @"unsigned long", @"signed long"
596 \item @"long int", @"unsigned long int", @"signed long int"
597 \item @"long long", @"unsigned long long", @"signed long long"
598 \item @"long long int", @"unsigned long long int", @"signed long long int"
599 \item @"float", @"double", @"long double"
600 \end{itemize}
601 All of these have their usual C meanings.
602
603 \subsubsection{Declarators} \label{sec:syntax.c-types.declarator}
604
605 \begin{grammar}
606 <declarator>$[k]$ ::= @<pointer>^* <primary-declarator>$[k]$
607
608 <primary-declarator>$[k]$ ::= $k$
609 \alt "(" <primary-declarator>$[k]$ ")"
610 \alt <primary-declarator>$[k]$ @<declarator-suffix>^*
611
612 <pointer> ::= "*" @<qualifier>^*
613
614 <declarator-suffix> ::= "[" <c-fragment> "]"
615 \alt "(" <arguments> ")"
616
617 <arguments> ::= $\epsilon$ | "..."
618 \alt <argument-list> @["," "..."@]
619
620 <argument> ::= @<declaration-specifier>^+ <argument-declarator>
621
622 <argument-declarator> ::= <declarator>@[<identifier> @! $\epsilon$@]
623
624 <simple-declarator> ::= <declarator>@[<identifier>@]
625
626 <dotted-name> ::= <identifier> "." <identifier>
627
628 <dotted-declarator> ::= <declarator>@[<dotted-name>@]
629 \end{grammar}
630
631 The declarator syntax is taken from C, but with some differences.
632 \begin{itemize}
633 \item Array dimensions are uninterpreted @<c-fragments>, terminated by a
634 closing square bracket. This allows array dimensions to contain arbitrary
635 constant expressions.
636 \item A declarator may have either a single @<identifier> at its centre or a
637 pair of @<identifier>s separated by a @`.'; this is used to refer to
638 slots or messages defined in superclasses.
639 \end{itemize}
640 The remaining differences are (I hope) a matter of presentation rather than
641 substance.
642
643 \subsection{Defining classes} \label{sec:syntax.class}
644
645 \begin{grammar}
646 <class-definition> ::= <class-forward-declaration>
647 \alt <full-class-definition>
648 \end{grammar}
649
650 \subsubsection{Forward declarations} \label{sec:class.class.forward}
651
652 \begin{grammar}
653 <class-forward-declaration> ::= "class" <identifier> ";"
654 \end{grammar}
655
656 A @<class-forward-declaration> informs Sod that an @<identifier> will be used
657 to name a class which is currently undefined. Forward declarations are
658 necessary in order to resolve certain kinds of circularity. For example,
659 \begin{listing}
660 class Sub;
661
662 class Super : SodObject {
663 Sub *sub;
664 };
665
666 class Sub : Super {
667 /* ... */
668 };
669 \end{listing}
670
671 \subsubsection{Full class definitions} \label{sec:class.class.full}
672
673 \begin{grammar}
674 <full-class-definition> ::=
675 @[<properties>@]
676 "class" <identifier> ":" <identifier-list>
677 "{" @<class-item>^* "}"
678
679 <class-item> ::= <slot-item> ";"
680 \alt <message-item>
681 \alt <method-item>
682 \alt <initializer-item> ";"
683 \end{grammar}
684
685 A full class definition provides a complete description of a class.
686
687 The first @<identifier> gives the name of the class. It is an error to
688 give the name of an existing class (other than a forward-referenced class),
689 or an existing type name. It is conventional to give classes `MixedCase'
690 names, to distinguish them from other kinds of identifiers.
691
692 The @<identifier-list> names the direct superclasses for the new class. It
693 is an error if any of these @<identifier>s does not name a defined class.
694
695 The @<properties> provide additional information. The standard class
696 properties are as follows.
697 \begin{description}
698 \item[@"lisp_class"] The name of the Lisp class to use within the translator
699 to represent this class. The property value must be an identifier; the
700 default is @"sod_class". Extensions may define classes with additional
701 behaviour, and may recognize additional class properties.
702 \item[@"metaclass"] The name of the Sod metaclass for this class. In the
703 generated code, a class is itself an instance of another class -- its
704 \emph{metaclass}. The metaclass defines which slots the class will have,
705 which messages it will respond to, and what its behaviour will be when it
706 receives them. The property value must be an identifier naming a defined
707 subclass of @"SodClass". The default metaclass is @"SodClass".
708 %%% FIXME xref to theory
709 \item[@"nick"] A nickname for the class, to be used to distinguish it from
710 other classes in various limited contexts. The property value must be an
711 identifier; the default is constructed by forcing the class name to
712 lower-case.
713 \end{description}
714
715 The class body consists of a sequence of @<class-item>s enclosed in braces.
716 These items are discussed on the following sections.
717
718 \subsubsection{Slot items} \label{sec:sntax.class.slot}
719
720 \begin{grammar}
721 <slot-item> ::=
722 @[<properties>@]
723 @<declaration-specifier>^+ <init-declarator-list>
724
725 <init-declarator> ::= <declarator> @["=" <initializer>@]
726 \end{grammar}
727
728 A @<slot-item> defines one or more slots. All instances of the class and any
729 subclass will contain these slot, with the names and types given by the
730 @<declaration-specifiers> and the @<declarators>. Slot declarators may not
731 contain qualified identifiers.
732
733 It is not possible to declare a slot with function type: such an item is
734 interpreted as being a @<message-item> or @<method-item>. Pointers to
735 functions are fine.
736
737 An @<initializer>, if present, is treated as if a separate
738 @<initializer-item> containing the slot name and initializer were present.
739 For example,
740 \begin{listing}
741 [nick = eg]
742 class Example : Super {
743 int foo = 17;
744 };
745 \end{listing}
746 means the same as
747 \begin{listing}
748 [nick = eg]
749 class Example : Super {
750 int foo;
751 eg.foo = 17;
752 };
753 \end{listing}
754
755 \subsubsection{Initializer items} \label{sec:syntax.class.init}
756
757 \begin{grammar}
758 <initializer-item> ::= @["class"@] <slot-initializer-list>
759
760 <slot-initializer> ::= <qualified-identifier> "=" <initializer>
761
762 <initializer> :: "{" <c-fragment> "}" | <c-fragment>
763 \end{grammar}
764
765 An @<initializer-item> provides an initial value for one or more slots. If
766 prefixed by @"class", then the initial values are for class slots (i.e.,
767 slots of the class object itself); otherwise they are for instance slots.
768
769 The first component of the @<qualified-identifier> must be the nickname of
770 one of the class's superclasses (including itself); the second must be the
771 name of a slot defined in that superclass.
772
773 The initializer has one of two forms.
774 \begin{itemize}
775 \item A @<c-fragment> enclosed in braces denotes an aggregate initializer.
776 This is suitable for initializing structure, union or array slots.
777 \item A @<c-fragment> \emph{not} beginning with an open brace is a `bare'
778 initializer, and continues until the next @`,' or @`;' which is not within
779 nested brackets. Bare initializers are suitable for initializing scalar
780 slots, such as pointers or integers, and strings.
781 \end{itemize}
782
783 \subsubsection{Message items} \label{sec:syntax.class.message}
784
785 \begin{grammar}
786 <message-item> ::=
787 @[<properties>@]
788 @<declaration-specifier>^+ <declarator> @[<method-body>@]
789 \end{grammar}
790
791 \subsubsection{Method items} \label{sec:syntax.class.method}
792
793 \begin{grammar}
794 <method-item> ::=
795 @[<properties>@]
796 @<declaration-specifier>^+ <declarator> <method-body>
797
798 <method-body> ::= "{" <c-fragment> "}" | "extern" ";"
799 \end{grammar}
800
801 %%%--------------------------------------------------------------------------
802 \section{Class objects}
803
804 \begin{listing}
805 typedef struct SodClass__ichain_obj SodClass;
806
807 struct sod_chain {
808 size_t n_classes; /* Number of classes in chain */
809 const SodClass *const *classes; /* Vector of classes, head first */
810 size_t off_ichain; /* Offset of ichain from instance base */
811 const struct sod_vtable *vt; /* Vtable pointer for chain */
812 size_t ichainsz; /* Size of the ichain structure */
813 };
814
815 struct sod_vtable {
816 SodClass *_class; /* Pointer to instance's class */
817 size_t _base; /* Offset to instance base */
818 };
819
820 struct SodClass__islots {
821
822 /* Basic information */
823 const char *name; /* The class's name as a string */
824 const char *nick; /* The nickname as a string */
825
826 /* Instance allocation and initialization */
827 size_t instsz; /* Instance layout size in bytes */
828 void *(*imprint)(void *); /* Stamp instance with vtable ptrs */
829 void *(*init)(void *); /* Initialize instance */
830
831 /* Superclass structure */
832 size_t n_supers; /* Number of direct superclasses */
833 const SodClass *const *supers; /* Vector of direct superclasses */
834 size_t n_cpl; /* Length of class precedence list */
835 const SodClass *const *cpl; /* Vector for class precedence list */
836
837 /* Chain structure */
838 const SodClass *link; /* Link to next class in chain */
839 const SodClass *head; /* Pointer to head of chain */
840 size_t level; /* Index of class in its chain */
841 size_t n_chains; /* Number of superclass chains */
842 const sod_chain *chains; /* Vector of chain structures */
843
844 /* Layout */
845 size_t off_islots; /* Offset of islots from ichain base */
846 size_t islotsz; /* Size of instance slots */
847 };
848
849 struct SodClass__ichain_obj {
850 const SodClass__vt_obj *_vt;
851 struct SodClass__islots cls;
852 };
853
854 struct sod_instance {
855 struct sod_vtable *_vt;
856 };
857 \end{listing}
858
859 \begin{listing}
860 void *sod_convert(const SodClass *cls, const void *obj)
861 {
862 const struct sod_instance *inst = obj;
863 const SodClass *real = inst->_vt->_cls;
864 const struct sod_chain *chain;
865 size_t i, index;
866
867 for (i = 0; i < real->cls.n_chains; i++) {
868 chain = &real->cls.chains[i];
869 if (chain->classes[0] == cls->cls.head) {
870 index = cls->cls.index;
871 if (index < chain->n_classes && chain->classes[index] == cls)
872 return ((char *)cls - inst->_vt._base + chain->off_ichain);
873 else
874 return (0);
875 }
876 }
877 return (0);
878 }
879 \end{listing}
880
881 %%%--------------------------------------------------------------------------
882 \section{Classes}
883 \label{sec:class}
884
885 \subsection{Classes and superclasses} \label{sec:class.defs}
886
887 A @<full-class-definition> must list one or more existing classes to be the
888 \emph{direct superclasses} for the new class being defined. We make the
889 following definitions.
890 \begin{itemize}
891 \item The \emph{superclasses} of a class consist of the class itself together
892 with the superclasses of its direct superclasses.
893 \item The \emph{proper superclasses} of a class are its superclasses other
894 than itself.
895 \item If $C$ is a (proper) superclass of $D$ then $D$ is a (\emph{proper})
896 \emph{subclass} of $C$.
897 \end{itemize}
898 The predefined class @|SodObject| has no direct superclasses; it is unique in
899 this respect. All classes are subclasses of @|SodObject|.
900
901 \subsection{The class precedence list} \label{sec:class.cpl}
902
903 Let $C$ be a class. The superclasses of $C$ form a directed graph, with an
904 edge from each class to each of its direct superclasses. This is the
905 \emph{superclass graph of $C$}.
906
907 In order to resolve inheritance of items, we define a \emph{class precedence
908 list} (or CPL) for each class, which imposes a total order on that class's
909 superclasses. The default algorithm for computing the CPL is the \emph{C3}
910 algorithm \cite{fixme-c3}, though extensions may implement other algorithms.
911
912 The default algorithm works as follows. Let $C$ be the class whose CPL we
913 are to compute. Let $X$ and $Y$ be two of $C$'s superclasses.
914 \begin{itemize}
915 \item $C$ must appear first in the CPL.
916 \item If $X$ appears before $Y$ in the CPL of one of $C$'s direct
917 superclasses, then $X$ appears before $Y$ in the $C$'s CPL.
918 \item If the above rules don't suffice to order $X$ and $Y$, then whichever
919 of $X$ and $Y$ has a subclass which appears further left in the list of
920 $C$'s direct superclasses will appear earlier in the CPL.
921 \end{itemize}
922 This last rule is sufficient to disambiguate because if both $X$ and $Y$ are
923 superclasses of the same direct superclass of $C$ then that direct
924 superclass's CPL will order $X$ and $Y$.
925
926 We say that \emph{$X$ is more specific than $Y$ as a superclass of $C$} if
927 $X$ is earlier than $Y$ in $C$'s class precedence list. If $C$ is clear from
928 context then we omit it, saying simply that $X$ is more specific than $Y$.
929
930 \subsection{Instances and metaclasses} \label{sec:class.meta}
931
932 A class defines the structure and behaviour of its \emph{instances}: run-time
933 objects created (possibly) dynamically. An instance is an instance of only
934 one class, though structurally it may be used in place of an instance of any
935 of that class's superclasses. It is possible, with care, to change the class
936 of an instance at run-time.
937
938 Classes are themselves represented as instances -- called \emph{class
939 objects} -- in the running program. Being instances, they have a class,
940 called the \emph{metaclass}. The metaclass defines the structure and
941 behaviour of the class object.
942
943 The predefined class @|SodClass| is the default metaclass for new classes.
944 @|SodClass| has @|SodObject| as its only direct superclass. @|SodClass| is
945 its own metaclass.
946
947 To make matters more complicated, Sod has \emph{two} distinct metalevels: as
948 well as the runtime metalevel, as discussed above, there's a compile-time
949 metalevel hosted in the Sod translator. Since Sod is written in Common Lisp,
950 a Sod class's compile-time metaclass is a CLOS class. The usual compile-time
951 metaclass is @|sod-class|. The compile-time metalevel is the subject of
952 \xref{ch:api}.
953
954 \subsection{Items and inheritance} \label{sec:class.inherit}
955
956 A class definition also declares \emph{slots}, \emph{messages},
957 \emph{initializers} and \emph{methods} -- collectively referred to as
958 \emph{items}. In addition to the items declared in the class definition --
959 the class's \emph{direct items} -- a class also \emph{inherits} items from
960 its superclasses.
961
962 The precise rules for item inheritance vary according to the kinds of items
963 involved.
964
965 Some object systems have a notion of `repeated inheritance': if there are
966 multiple paths in the superclass graph from a class to one of its
967 superclasses then items defined in that superclass may appear duplicated in
968 the subclass. Sod does not have this notion.
969
970 \subsubsection{Slots} \label{sec:class.inherit.slots}
971 A \emph{slot} is a unit of state. In other object systems, slots may be
972 called `fields', `member variables', or `instance variables'.
973
974 A slot has a \emph{name} and a \emph{type}. The name serves only to
975 distinguish the slot from other direct slots defined by the same class. A
976 class inherits all of its proper superclasses' slots. Slots inherited from
977 superclasses do not conflict with each other or with direct slots, even if
978 they have the same names.
979
980 At run-time, each instance of the class holds a separate value for each slot,
981 whether direct or inherited. Changing the value of an instance's slot
982 doesn't affect other instances.
983
984 \subsubsection{Initializers} \label{sec:class.inherit.init}
985 Mumble.
986
987 \subsubsection{Messages} \label{sec:class.inherit.messages}
988 A \emph{message} is the stimulus for behaviour. In Sod, a class must define,
989 statically, the name and format of the messages it is able to receive and the
990 values it will return in reply. In this respect, a message is similar to
991 `abstract member functions' or `interface member functions' in other object
992 systems.
993
994 Like slots, a message has a \emph{name} and a \emph{type}. Again, the name
995 serves only to distinguish the message from other direct messages defined by
996 the same class. Messages inherited from superclasses do not conflict with
997 each other or with direct messages, even if they have the same name.
998
999 At run-time, one sends a message to an instance by invoking a function
1000 obtained from the instance's \emph{vtable}: \xref{sec:fixme-vtable}.
1001
1002 \subsubsection{Methods} \label{sec:class.inherit.methods}
1003 A \emph{method} is a unit of behaviour. In other object systems, methods may
1004 be called `member functions'.
1005
1006 A method is associated with a message. When a message is received by an
1007 instance, all of the methods associated with that message on the instance's
1008 class or any of its superclasses are \emph{applicable}. The details of how
1009 the applicable methods are invoked are described fully in
1010 \xref{sec:fixme-method-combination}.
1011
1012 \subsection{Chains and instance layout} \label{sec:class.layout}
1013
1014 C is a rather low-level language, and in particular it exposes details of the
1015 way data is laid out in memory. Since an instance of a class~$C$ should be
1016 (at least in principle) usable anywhere an instance of some superclass $B
1017 \succeq C$ is expected, this implies that an instance of the subclass $C$
1018 needs to contain within it a complete instance of each superclass $B$, laid
1019 out according to the rules of instances of $B$, so that if we have (the
1020 address of) an instance of $C$, we can easily construct a pointer to a thing
1021 which looks like an instance of $B$ contained within it.
1022
1023 Specifically, the information we need to retain for an instance of a
1024 class~$C$ is:
1025 \begin{itemize}
1026 \item the values of each of the slots defined by $C$, including those defined
1027 by superclasses;
1028 \item information which will let us convert a pointer to $C$ into a pointer
1029 to any superclass $B \succeq C$;
1030 \item information which will let us call the appropriate effective method for
1031 each message defined by $C$, including those defined by superclasses; and
1032 \item some additional meta-level information, such as how to find the class
1033 object for $C$ given (the address of) one of its instances.
1034 \end{itemize}
1035
1036 Observe that, while each distinct instance must clearly have its own storage
1037 for slots, all instances of $C$ can share a single copy of the remaining
1038 information. The individual instance only needs to keep a pointer to this
1039 shared table, which, inspired by the similar structure in many \Cplusplus\
1040 ABIs, are called a \emph{vtable}.
1041
1042 The easiest approach would be to decide that instances of $C$ are exactly
1043 like instances of $B$, only with extra space at the end for the extra slots
1044 which $C$ defines over and above those already existing in $B$. Conversion
1045 is then trivial: a pointer to an instance of $C$ can be converted to a
1046 pointer to an instance of some superclass $B$ simply by casting. Even though
1047 the root class @|SodObject| doesn't have any slots at all, its instances will
1048 still need a vtable so that you can find its class object: the address of the
1049 vtable therefore needs to be at the very start of the instance structure.
1050 Again, a vtable for a superclass would have a vtable for each of its
1051 superclasses as a prefix, with new items added afterwards.
1052
1053 This appealing approach works well for an object system which only permits
1054 single inheritance of both state and behaviour. Alas, it breaks down when
1055 multiple inheritance is allowed: $C$ can be a subclass of both $B$ and $B'$,
1056 even though $B$ is not a subclass of $B'$, nor \emph{vice versa}; so, in
1057 general, $B$'s instance structure will not be a prefix of $B'$'s, nor will
1058 $B'$'s be a prefix of $B$'s, and therefore $C$ cannot have both $B$ and $B'$
1059 as a prefix.
1060
1061 A (non-root) class may -- though need not -- have a distinguished \emph{link}
1062 superclass, which need not be a direct superclass. Furthermore, each
1063 class~$C$ must satisfy the \emph{chain condition}: for any superclass $A$ of
1064 $C$, there can be at most one other superclass of $C$ whose link superclass
1065 is $A$.\footnote{%
1066 That is, it's permitted for two classes $B$ and $B'$ to have the same link
1067 superclass $A$, but $B$ and $B'$ can't then both be superclasses of the
1068 same class $C$.} %
1069 Therefore, the links partition the superclasses of~$C$ into nice linear
1070 \emph{chains}, such that each superclass is a member of exactly one chain.
1071 If a class~$B$ has a link superclass~$A$, then $B$'s \emph{level} is one more
1072 than that of $A$; otherwise $B$ is called a \emph{chain head} and its level
1073 is zero. If the classes in a chain are written in a list, chain head first,
1074 then the level of each class gives its index in the list.
1075
1076 Chains therefore allow us to recover some of the linearity properties which
1077 made layout simple in the case of single inheritance. The instance structure
1078 for a class $C$ contains a substructure for each of $C$'s superclass chains;
1079 a pointer to an object of class $C$ actually points to the substructure for
1080 the chain containing $C$. The order of these substructures is unimportant
1081 for now.\footnote{%
1082 The chains appear in the order in which their most specific classes appear
1083 in $C$'s class precedence list. This guarantees that the chain containing
1084 $C$ itself appears first, so that a pointer to $C$'s instance structure is
1085 actually a pointer to $C$'s chain substructure. Apart from that, it's a
1086 simple, stable, but basically arbitrary choice which can't be changed
1087 without breaking the ABI.} %
1088 The substructure for each chain begins with a pointer to a vtable, followed
1089 by a structure for each superclass in the chain containing the slots defined
1090 by that superclass, with the chain head (least specific class) first.
1091
1092 Suppose we have a pointer to (static) type $C$, and want to convert it into a
1093 pointer to some superclass $B$ of $C$ -- an \emph{upcast}.\footnote{%
1094 In the more general case, we have a pointer to static type $C$, which
1095 actually points to an object of some subclass $D$ of $C$, and want to
1096 convert it into a pointer to type $B$. Such a conversion is called a
1097 \emph{downcast} if $B$ is a subclass of $C$, or a \emph{cross-cast}
1098 otherwise. Downcasts and cross-casts require complicated run-time
1099 checking, and can will fail unless $B$ is a superclass of $D$.} %
1100 If $B$ is in the same chain as $C$ -- an \emph{in-chain upcast} -- then the
1101 pointer value is already correct and it's only necessary to cast it
1102 appropriately. Otherwise -- a \emph{cross-chain upcast} -- the pointer needs
1103 to be adjusted to point to a different chain substructure. Since the lengths
1104 and relative positions of the chain substructures vary between classes, the
1105 adjustments are stored in the vtable. Cross-chain upcasts are therefore a
1106 bit slower than in-chain upcasts.
1107
1108 Each chain has its own separate vtable, because much of the metadata stored
1109 in the vtable is specific to a particular chain. For example:
1110 \begin{itemize}
1111 \item offsets to other chains' substructures will vary depending on which
1112 chain we start from; and
1113 \item entry points to methods {
1114
1115 %%%--------------------------------------------------------------------------
1116 \chapter{The Lisp programming interface} \label{ch:api}
1117
1118 %% output for `h' files
1119 %%
1120 %% prologue
1121 %% guard start
1122 %% typedefs start
1123 %% typedefs
1124 %% typedefs end
1125 %% includes start
1126 %% includes
1127 %% includes end
1128 %% classes start
1129 %% CLASS banner
1130 %% CLASS islots start
1131 %% CLASS islots slots
1132 %% CLASS islots end
1133 %% CLASS vtmsgs start
1134 %% CLASS vtmsgs CLASS start
1135 %% CLASS vtmsgs CLASS slots
1136 %% CLASS vtmsgs CLASS end
1137 %% CLASS vtmsgs end
1138 %% CLASS vtables start
1139 %% CLASS vtables CHAIN-HEAD start
1140 %% CLASS vtables CHAIN-HEAD slots
1141 %% CLASS vtables CHAIN-HEAD end
1142 %% CLASS vtables end
1143 %% CLASS vtable-externs
1144 %% CLASS vtable-externs-after
1145 %% CLASS methods start
1146 %% CLASS methods
1147 %% CLASS methods end
1148 %% CLASS ichains start
1149 %% CLASS ichains CHAIN-HEAD start
1150 %% CLASS ichains CHAIN-HEAD slots
1151 %% CLASS ichains CHAIN-HEAD end
1152 %% CLASS ichains end
1153 %% CLASS ilayout start
1154 %% CLASS ilayout slots
1155 %% CLASS ilayout end
1156 %% CLASS conversions
1157 %% CLASS object
1158 %% classes end
1159 %% guard end
1160 %% epilogue
1161
1162 %% output for `c' files
1163 %%
1164 %% prologue
1165 %% includes start
1166 %% includes
1167 %% includes end
1168 %% classes start
1169 %% CLASS banner
1170 %% CLASS direct-methods start
1171 %% CLASS direct-methods METHOD start
1172 %% CLASS direct-methods METHOD body
1173 %% CLASS direct-methods METHOD end
1174 %% CLASS direct-methods end
1175 %% CLASS effective-methods
1176 %% CLASS vtables start
1177 %% CLASS vtables CHAIN-HEAD start
1178 %% CLASS vtables CHAIN-HEAD class-pointer METACLASS
1179 %% CLASS vtables CHAIN-HEAD base-offset
1180 %% CLASS vtables CHAIN-HEAD chain-offset TARGET-HEAD
1181 %% CLASS vtables CHAIN-HEAD vtmsgs CLASS start
1182 %% CLASS vtables CHAIN-HEAD vtmsgs CLASS slots
1183 %% CLASS vtables CHAIN-HEAD vtmsgs CLASS end
1184 %% CLASS vtables CHAIN-HEAD end
1185 %% CLASS vtables end
1186 %% CLASS object prepare
1187 %% CLASS object start
1188 %% CLASS object CHAIN-HEAD ichain start
1189 %% CLASS object SUPER slots start
1190 %% CLASS object SUPER slots
1191 %% CLASS object SUPER vtable
1192 %% CLASS object SUPER slots end
1193 %% CLASS object CHAIN-HEAD ichain end
1194 %% CLASS object end
1195 %% classes end
1196 %% epilogue
1197
1198 %%%--------------------------------------------------------------------------
1199
1200 \include{sod-backg}
1201 \include{sod-protocol}
1202
1203 \end{document}
1204 \f
1205 %%% Local variables:
1206 %%% mode: LaTeX
1207 %%% TeX-PDF-mode: t
1208 %%% End: