Massive reorganization in progress.
[sod] / doc / sod.tex
1 \documentclass[noarticle]{strayman}
2
3 \usepackage[T1]{fontenc}
4 \usepackage[utf8]{inputenc}
5 \usepackage[palatino, helvetica, courier, maths=cmr]{mdwfonts}
6 \usepackage{syntax}
7 \usepackage{sverb}
8 \usepackage{mdwtab}
9 \usepackage{footnote}
10 \usepackage{at}
11 \usepackage{mdwref}
12
13 \title{A Sensible Object Design for C}
14 \author{Mark Wooding}
15
16 \makeatletter
17
18 \errorcontextlines999
19
20 \def\syntleft{\normalfont\itshape}
21 \let\syntright\empty
22
23 \let\codeface\sffamily
24
25 \def\ulitleft{\normalfont\codeface}
26 \let\ulitright\empty
27
28 \let\listingsize\relax
29
30 \let\epsilon\varepsilon
31
32 \atdef <#1>{\synt{#1}\@scripts}
33 \atdef "#1"{\lit*{#1}\@scripts}
34 \atdef `#1'{\lit{#1}\@scripts}
35 \atdef |#1|{\textsf{#1}\@scripts}
36 \def\dbl@maybe#1{\let\@tempa#1\futurelet\@ch\dbl@maybe@i}
37 \def\dbl@maybe@i{\m@maybe\ifx\@ch\@tempa\@tempa\!\@tempa%
38 \expandafter\@firstoftwo\expandafter\@scripts%
39 \else\@tempa\expandafter\@scripts\fi}
40 \atdef [{\dbl@maybe[}
41 \atdef ]{\dbl@maybe]}
42 \atdef {{\m@maybe\{\@scripts}
43 \atdef }{\m@maybe\}\@scripts}
44 \atdef ({\m@maybe(\@scripts}
45 \atdef ){\m@maybe)\@scripts}
46 \atdef !{\m@maybe|\@scripts}
47 \atdef to{\leavevmode\unskip\quad\m@maybe\longrightarrow\m@maybe@end\quad}
48 \let\m@maybe@end\relax
49 \def\m@maybe{\ifmmode\else$\let\m@maybe@end$\fi}
50 \def\@scripts{\futurelet\@ch\@scripts@i}
51
52 \atdef ;#1\\{\normalfont\itshape;#1\\}
53
54 \begingroup\lccode`\~=`\_\lowercase{\endgroup
55 \def\@scripts@i{\if1\ifx\@ch~1\else\ifx\@ch^1\else0\fi\fi%
56 \expandafter\@scripts@ii\else\expandafter\m@maybe@end\fi}}
57 \def\@scripts@ii#1#2{\m@maybe#1{#2}\@scripts}
58
59 \def\Cplusplus{C\kern-\p@++}
60 \def\Csharp{C\#}
61 \def\man#1#2{\textbf{#1}(#2)}
62
63 \begingroup\lccode`\~=`\
64 \lowercase{
65 \endgroup
66 \def\prog{%
67 \codeface%
68 \quote%
69 \let\old@nl\\%
70 \obeylines%
71 \tabbing%
72 \global\let~\\%
73 \global\let\\\textbackslash%
74 }
75 \def\endprog{%
76 \endtabbing%
77 \global\let\\\old@nl%
78 \endquote%
79 }}
80
81 \newenvironment{boxy}[1][\q@]{%
82 \dimen@\linewidth\advance\dimen@-1.2pt\advance\dimen@-2ex%
83 \medskip%
84 \vbox\bgroup\hrule\hbox\bgroup\vrule%
85 \vbox\bgroup\vskip1ex\hbox\bgroup\hskip1ex\minipage\dimen@%
86 \def\@temp{#1}\ifx\@temp\q@\else\leavevmode{\headfam\bfseries#1\quad}\fi%
87 }{%
88 \endminipage\hskip1ex\egroup\vskip1ex\egroup%
89 \vrule\egroup\hrule\egroup%
90 \medskip%
91 }
92
93 \def\definedescribecategory#1#2{\@namedef{cat!#1}{#2}}
94 \def\describecategoryname#1{%
95 \expandafter\let\expandafter\@tempa\csname cat!#1\endcsname%
96 \ifx\@tempa\relax#1\else\@tempa\fi}
97 \definedescribecategory{fun}{function}
98 \definedescribecategory{gf}{generic function}
99 \definedescribecategory{var}{variable}
100 \definedescribecategory{const}{constant}
101 \definedescribecategory{meth}{primary method}
102 \definedescribecategory{ar-meth}{around-method}
103 \definedescribecategory{be-meth}{before-method}
104 \definedescribecategory{af-meth}{after-method}
105 \definedescribecategory{cls}{class}
106 \definedescribecategory{ty}{type}
107 \definedescribecategory{mac}{macro}
108
109 \def\q@{\q@}
110 \newenvironment{describe}[3][\q@]{%
111 \normalfont%
112 \par\goodbreak%
113 \vspace{\bigskipamount}%
114 \setbox\z@\hbox{\bfseries[\describecategoryname{#2}]}%
115 \dimen@\linewidth\advance\dimen@-\wd\z@%
116 \def\@temp##1 ##2\q@{\message{#2:##1}\label{#2:##1}}%
117 \def\@tempa{#1}\ifx\@tempa\q@\@temp#3 \q@\else\@temp{#1} \\\fi%
118 \edef\@temp{{\the\linewidth}{@{}p{\the\dimen@}%
119 @{\extracolsep{\fill}}l@{\extracolsep{0pt}}}}%
120 \noindent\csname tabular*\expandafter\endcsname\@temp%
121 \tabbing\codeface#3\endtabbing&\unhbox\z@\\\endtabular%
122 % \@afterheading%
123 \list{}{\rightmargin\z@}\item%
124 }{%
125 \endlist%
126 }
127
128 \def\push{\quad\=\+\kill}
129
130 \begin{document}
131
132 \maketitle
133
134 \include{sod-tut}
135
136 %%%--------------------------------------------------------------------------
137 \chapter{Internals}
138
139 \section{Generated names}
140
141 The generated names for functions and objects related to a class are
142 constructed systematically so as not to interfere with each other. The rules
143 on class, slot and message naming exist so as to ensure that the generated
144 names don't collide with each other.
145
146 The following notation is used in this section.
147 \begin{description}
148 \item[@<class>] The full name of the `focus' class: the one for which we are
149 generating name.
150 \item[@<super-nick>] The nickname of a superclass.
151 \item[@<head-nick>] The nickname of the chain-head class of the chain
152 in question.
153 \end{description}
154
155 \subsection{Instance layout}
156
157 %%%--------------------------------------------------------------------------
158 \section{Syntax}
159 \label{sec:syntax}
160
161 Fortunately, Sod is syntactically quite simple. I've used a little slightly
162 unusual notation in order to make the presentation easier to read.
163 \begin{itemize}
164 \item $\epsilon$ denotes the empty nonterminal:
165 \begin{quote}
166 $\epsilon$ ::=
167 \end{quote}
168 \item $[$@<item>$]$ means an optional @<item>:
169 \begin{quote}
170 \syntax{$[$<item>$]$ ::= $\epsilon$ | <item>}
171 \end{quote}
172 \item @<item>$^*$ means a sequence of zero or more @<item>s:
173 \begin{quote}
174 \syntax{<item>$^*$ ::= $\epsilon$ | <item>$^*$ <item>}
175 \end{quote}
176 \item @<item>$^+$ means a sequence of one or more @<item>s:
177 \begin{quote}
178 \syntax{<item>$^+$ ::= <item> <item>$^*$}
179 \end{quote}
180 \item @<item-list> means a sequence of one or more @<item>s separated
181 by commas:
182 \begin{quote}
183 \syntax{<item-list> ::= <item> | <item-list> "," <item>}
184 \end{quote}
185 \end{itemize}
186
187 \subsection{Lexical syntax}
188 \label{sec:syntax.lex}
189
190 Whitespace and comments are discarded. The remaining characters are
191 collected into tokens according to the following syntax.
192
193 \begin{grammar}
194 <token> ::= <identifier>
195 \alt <reserved-word>
196 \alt <string-literal>
197 \alt <char-literal>
198 \alt <integer-literal>
199 \alt <punctuation>
200 \end{grammar}
201
202 This syntax is slightly ambiguous. The following two rules serve to
203 disambiguate:
204 \begin{enumerate}
205 \item Reserved words take precedence. All @<reserved-word>s are
206 syntactically @<identifier>s; Sod resolves the ambiguity in favour of
207 @<reserved-word>.
208 \item `Maximal munch'. In other cases, at each stage we take the longest
209 sequence of characters which could be a token.
210 \end{enumerate}
211
212 \subsubsection{Identifiers} \label{sec:syntax.lex.id}
213
214 \begin{grammar}
215 <identifier> ::= <id-start-char> <id-body-char>$^*$
216
217 <id-start-char> ::= <alpha-char> $|$ "_"
218
219 <id-body-char> ::= <id-start-char> $|$ <digit-char>
220
221 <alpha-char> ::= "A" $|$ "B" $|$ \dots\ $|$ "Z"
222 \alt "a" $|$ "b" $|$ \dots\ $|$ "z"
223 \alt <extended-alpha-char>
224
225 <digit-char> ::= "0" $|$ <nonzero-digit-char>
226
227 <nonzero-digit-char> ::= "1" $|$ "2" $| \cdots |$ "9"
228 \end{grammar}
229
230 The precise definition of @<alpha-char> is left to the function
231 \textsf{alpha-char-p} in the hosting Lisp system. For portability,
232 programmers are encouraged to limit themselves to the standard ASCII letters.
233
234 \subsubsection{Reserved words} \label{sec:syntax.lex.reserved}
235
236 \begin{grammar}
237 <reserved-word> ::=
238 "char" $|$ "class" $|$ "code" $|$ "const" $|$ "double" $|$ "enum" $|$
239 "extern" $|$ "float" $|$ "import" $|$ "int" $|$ "lisp" $|$ "load" $|$ "long"
240 $|$ "restrict" $|$ "short" $|$ "signed" $|$ "struct" $|$ "typename" $|$
241 "union" $|$ "unsigned" $|$ "void" $|$ "volatile"
242 \end{grammar}
243
244 Many of these are borrowed from~C; however, some (e.g., @"import" and
245 @"lisp") are not, and some C reserved words are not reserved (e.g.,
246 @"static").
247
248 \subsubsection{String and character literals} \label{sec:syntax.lex.string}
249
250 \begin{grammar}
251 <string-literal> ::= "\"" <string-literal-char>$^*$ "\""
252
253 <char-literal> ::= "'" <char-literal-char> "'"
254
255 <string-literal-char> ::= any character other than "\\" or "\""
256 \alt "\\" <char>
257
258 <char-literal-char> ::= any character other than "\\" or "'"
259 \alt "\\" <char>
260
261 <char> ::= any single character
262 \end{grammar}
263
264 The syntax for string and character literals differs from~C. In particular,
265 escape sequences such as @`\textbackslash n' are not recognized. The use
266 of string and character literals in Sod, outside of C~fragments, is limited,
267 and the simple syntax seems adequate. For the sake of future compatibility,
268 the use of character sequences which resemble C escape sequences is
269 discouraged.
270
271 \subsubsection{Integer literals} \label{sec:syntax.lex.int}
272
273 \begin{grammar}
274 <integer-literal> ::= <decimal-integer>
275 \alt <binary-integer>
276 \alt <octal-integer>
277 \alt <hex-integer>
278
279 <decimal-integer> ::= <nonzero-digit-char> <digit-char>$^*$
280
281 <binary-integer> ::= "0" $($"b"$|$"B"$)$ <binary-digit-char>$^+$
282
283 <binary-digit-char> ::= "0" $|$ "1"
284
285 <octal-integer> ::= "0" $[$"o"$|$"O"$]$ <octal-digit-char>$^+$
286
287 <octal-digit-char> ::= "0" $|$ "1" $| \cdots |$ "7"
288
289 <hex-integer> ::= "0" $($"x"$|$"X"$)$ <hex-digit-char>$^+$
290
291 <hex-digit-char> ::= <digit-char>
292 \alt "A" $|$ "B" $|$ "C" $|$ "D" $|$ "E" $|$ "F"
293 \alt "a" $|$ "b" $|$ "c" $|$ "d" $|$ "e" $|$ "f"
294 \end{grammar}
295
296 Sod understands only integers, not floating-point numbers; its integer syntax
297 goes slightly beyond C in allowing a @`0o' prefix for octal and @`0b' for
298 binary. However, length and signedness indicators are not permitted.
299
300 \subsubsection{Punctuation} \label{sec:syntax.lex.punct}
301
302 \begin{grammar}
303 <punctuation> ::= any character other than "\"" or "'"
304 \end{grammar}
305
306 Due to the `maximal munch' rule, @<punctuation> tokens cannot be
307 alphanumeric.
308
309 \subsubsection{Comments} \label{sec:lex-comment}
310
311 \begin{grammar}
312 <comment> ::= <block-comment>
313 \alt <line-comment>
314
315 <block-comment> ::=
316 "/*"
317 <not-star>$^*$ $($<star>$^+$ <not-star-or-slash> <not-star>$^*)^*$
318 <star>$^*$
319 "*/"
320
321 <star> ::= "*"
322
323 <not-star> ::= any character other than "*"
324
325 <not-star-or-slash> ::= any character other than "*" or "/"
326
327 <line-comment> ::= "//" <not-newline>$^*$ <newline>
328
329 <newline> ::= a newline character
330
331 <not-newline> ::= any character other than newline
332 \end{grammar}
333
334 Comments are exactly as in C99: both traditional block comments `\texttt{/*}
335 \dots\ \texttt{*/}' and \Cplusplus-style `\texttt{//} \dots' comments are
336 permitted and ignored.
337
338 \subsection{Special nonterminals}
339 \label{sec:special-nonterminals}
340
341 Aside from the lexical syntax presented above (\xref{sec:lexical-syntax}),
342 two special nonterminals occur in the module syntax.
343
344 \subsubsection{S-expressions} \label{sec:syntax-sexp}
345
346 \begin{grammar}
347 <s-expression> ::= an S-expression, as parsed by the Lisp reader
348 \end{grammar}
349
350 When an S-expression is expected, the Sod parser simply calls the host Lisp
351 system's \textsf{read} function. Sod modules are permitted to modify the
352 read table to extend the S-expression syntax.
353
354 S-expressions are self-delimiting, so no end-marker is needed.
355
356 \subsubsection{C fragments} \label{sec:syntax.lex.cfrag}
357
358 \begin{grammar}
359 <c-fragment> ::= a sequence of C tokens, with matching brackets
360 \end{grammar}
361
362 Sequences of C code are simply stored and written to the output unchanged
363 during translation. They are read using a simple scanner which nonetheless
364 understands C comments and string and character literals.
365
366 A C fragment is terminated by one of a small number of delimiter characters
367 determined by the immediately surrounding context -- usually a closing brace
368 or bracket. The first such delimiter character which is not enclosed in
369 brackets, braces or parenthesis ends the fragment.
370
371 \subsection{Module syntax} \label{sec:syntax-module}
372
373 \begin{grammar}
374 <module> ::= <definition>$^*$
375
376 <definition> ::= <import-definition>
377 \alt <load-definition>
378 \alt <lisp-definition>
379 \alt <code-definition>
380 \alt <typename-definition>
381 \alt <class-definition>
382 \end{grammar}
383
384 A module is the top-level syntactic item. A module consists of a sequence of
385 definitions.
386
387 \subsection{Simple definitions} \label{sec:syntax.defs}
388
389 \subsubsection{Importing modules} \label{sec:syntax.defs.import}
390
391 \begin{grammar}
392 <import-definition> ::= "import" <string> ";"
393 \end{grammar}
394
395 The module named @<string> is processed and its definitions made available.
396
397 A search is made for a module source file as follows.
398 \begin{itemize}
399 \item The module name @<string> is converted into a filename by appending
400 @`.sod', if it has no extension already.\footnote{%
401 Technically, what happens is \textsf{(merge-pathnames name (make-pathname
402 :type "SOD" :case :common))}, so exactly what this means varies
403 according to the host system.} %
404 \item The file is looked for relative to the directory containing the
405 importing module.
406 \item If that fails, then the file is looked for in each directory on the
407 module search path in turn.
408 \item If the file still isn't found, an error is reported and the import
409 fails.
410 \end{itemize}
411 At this point, if the file has previously been imported, nothing further
412 happens.\footnote{%
413 This check is done using \textsf{truename}, so it should see through simple
414 tricks like symbolic links. However, it may be confused by fancy things
415 like bind mounts and so on.} %
416
417 Recursive imports, either direct or indirect, are an error.
418
419 \subsubsection{Loading extensions} \label{sec:syntax.defs.load}
420
421 \begin{grammar}
422 <load-definition> ::= "load" <string> ";"
423 \end{grammar}
424
425 The Lisp file named @<string> is loaded and evaluated.
426
427 A search is made for a Lisp source file as follows.
428 \begin{itemize}
429 \item The name @<string> is converted into a filename by appending @`.lisp',
430 if it has no extension already.\footnote{%
431 Technically, what happens is \textsf{(merge-pathnames name (make-pathname
432 :type "LISP" :case :common))}, so exactly what this means varies
433 according to the host system.} %
434 \item A search is then made in the same manner as for module imports
435 (\xref{sec:syntax-module}).
436 \end{itemize}
437 If the file is found, it is loaded using the host Lisp's \textsf{load}
438 function.
439
440 Note that Sod doesn't attempt to compile Lisp files, or even to look for
441 existing compiled files. The right way to package a substantial extension to
442 the Sod translator is to provide the extension as a standard ASDF system (or
443 similar) and leave a dropping @"foo-extension.lisp" in the module path saying
444 something like
445 \begin{listing}
446 (asdf:operate 'asdf:load-op :foo-extension)
447 \end{listing}
448 which will arrange for the extension to be compiled if necessary.
449
450 (This approach means that the language doesn't need to depend on any
451 particular system definition facility. It's bad enough already that it
452 depends on Common Lisp.)
453
454 \subsubsection{Lisp escapes} \label{sec:syntax.defs.lisp}
455
456 \begin{grammar}
457 <lisp-definition> ::= "lisp" <s-expression> ";"
458 \end{grammar}
459
460 The @<s-expression> is evaluated immediately. It can do anything it likes.
461
462 \textbf{Warning!} This means that hostile Sod modules are a security hazard.
463 Lisp code can read and write files, start other programs, and make network
464 connections. Don't install Sod modules from sources that you don't
465 trust.\footnote{%
466 Presumably you were going to run the corresponding code at some point, so
467 this isn't as unusually scary as it sounds. But please be careful.} %
468
469 \subsubsection{Declaring type names} \label{sec:syntax.defs.typename}
470
471 \begin{grammar}
472 <typename-definition> ::=
473 "typename" <identifier-list> ";"
474 \end{grammar}
475
476 Each @<identifier> is declared as naming a C type. This is important because
477 the C type syntax -- which Sod uses -- is ambiguous, and disambiguation is
478 done by distinguishing type names from other identifiers.
479
480 Don't declare class names using @"typename"; use @"class" forward
481 declarations instead.
482
483 \subsection{Literal code} \label{sec:syntax-code}
484
485 \begin{grammar}
486 <code-definition> ::=
487 "code" <identifier> ":" <identifier> $[$<constraints>$]$
488 "{" <c-fragment> "}"
489
490 <constraints> ::= "[" <constraint-list> "]"
491
492 <constraint> ::= <identifier>$^+$
493 \end{grammar}
494
495 The @<c-fragment> will be output unchanged to one of the output files.
496
497 The first @<identifier> is the symbolic name of an output file. Predefined
498 output file names are @"c" and @"h", which are the implementation code and
499 header file respectively; other output files can be defined by extensions.
500
501 The second @<identifier> provides a name for the output item. Several C
502 fragments can have the same name: they will be concatenated together in the
503 order in which they were encountered.
504
505 The @<constraints> provide a means for specifying where in the output file
506 the output item should appear. (Note the two kinds of square brackets shown
507 in the syntax: square brackets must appear around the constraints if they are
508 present, but that they may be omitted.) Each comma-separated @<constraint>
509 is a sequence of identifiers naming output items, and indicates that the
510 output items must appear in the order given -- though the translator is free
511 to insert additional items in between them. (The particular output items
512 needn't be defined already -- indeed, they needn't be defined ever.)
513
514 There is a predefined output item @"includes" in both the @"c" and @"h"
515 output files which is a suitable place for inserting @"\#include"
516 preprocessor directives in order to declare types and functions for use
517 elsewhere in the generated output files.
518
519 \subsection{Property sets} \label{sec:syntax.propset}
520
521 \begin{grammar}
522 <properties> ::= "[" <property-list> "]"
523
524 <property> ::= <identifier> "=" <expression>
525 \end{grammar}
526
527 Property sets are a means for associating miscellaneous information with
528 classes and related items. By using property sets, additional information
529 can be passed to extensions without the need to introduce idiosyncratic
530 syntax.
531
532 A property has a name, given as an @<identifier>, and a value computed by
533 evaluating an @<expression>. The value can be one of a number of types,
534 though the only operators currently defined act on integer values only.
535
536 \subsubsection{The expression evaluator} \label{sec:syntax.propset.expr}
537
538 \begin{grammar}
539 <expression> ::= <term> | <expression> "+" <term> | <expression> "-" <term>
540
541 <term> ::= <factor> | <term> "*" <factor> | <term> "/" <factor>
542
543 <factor> ::= <primary> | "+" <factor> | "-" <factor>
544
545 <primary> ::=
546 <integer-literal> | <string-literal> | <char-literal> | <identifier>
547 \alt "?" <s-expression>
548 \alt "(" <expression> ")"
549 \end{grammar}
550
551 The arithmetic expression syntax is simple and standard; there are currently
552 no bitwise, logical, or comparison operators.
553
554 A @<primary> expression may be a literal or an identifier. Note that
555 identifiers stand for themselves: they \emph{do not} denote values. For more
556 fancy expressions, the syntax
557 \begin{quote}
558 @"?" @<s-expression>
559 \end{quote}
560 causes the @<s-expression> to be evaluated using the Lisp \textsf{eval}
561 function.
562 %%% FIXME crossref to extension docs
563
564 \subsection{C types} \label{sec:syntax.c-types}
565
566 Sod's syntax for C types closely mirrors the standard C syntax. A C type has
567 two parts: a sequence of @<declaration-specifier>s and a @<declarator>. In
568 Sod, a type must contain at least one @<declaration-specifier> (i.e.,
569 `implicit @"int"' is forbidden), and storage-class specifiers are not
570 recognized.
571
572 \subsubsection{Declaration specifiers} \label{sec:syntax.c-types.declspec}
573
574 \begin{grammar}
575 <declaration-specifier> ::= <type-name>
576 \alt "struct" <identifier> | "union" <identifier> | "enum" <identifier>
577 \alt "void" | "char" | "int" | "float" | "double"
578 \alt "short" | "long"
579 \alt "signed" | "unsigned"
580 \alt <qualifier>
581
582 <qualifier> ::= "const" | "volatile" | "restrict"
583
584 <type-name> ::= <identifier>
585 \end{grammar}
586
587 A @<type-name> is an identifier which has been declared as being a type name,
588 using the @"typename" or @"class" definitions.
589
590 Declaration specifiers may appear in any order. However, not all
591 combinations are permitted. A declaration specifier must consist of zero or
592 more @<qualifiers>, and one of the following, up to reordering.
593 \begin{itemize}
594 \item @<type-name>
595 \item @"struct" <identifier>, @"union" <identifier>, @"enum" <identifier>
596 \item @"void"
597 \item @"char", @"unsigned char", @"signed char"
598 \item @"short", @"unsigned short", @"signed short"
599 \item @"short int", @"unsigned short int", @"signed short int"
600 \item @"int", @"unsigned int", @"signed int", @"unsigned", @"signed"
601 \item @"long", @"unsigned long", @"signed long"
602 \item @"long int", @"unsigned long int", @"signed long int"
603 \item @"long long", @"unsigned long long", @"signed long long"
604 \item @"long long int", @"unsigned long long int", @"signed long long int"
605 \item @"float", @"double", @"long double"
606 \end{itemize}
607 All of these have their usual C meanings.
608
609 \subsubsection{Declarators} \label{sec:syntax.c-types.declarator}
610
611 \begin{grammar}
612 <declarator> ::=
613 <pointer>$^*$ <inner-declarator> <declarator-suffix>$^*$
614
615 <inner-declarator> ::= <identifier> | <qualified-identifier>
616 \alt "(" <declarator> ")"
617
618 <qualified-identifier> ::= <identifier> "." <identifier>
619
620 <pointer> ::= "*" <qualifier>$^*$
621
622 <declarator-suffix> ::= "[" <c-fragment> "]"
623 \alt "(" <arguments> ")"
624
625 <arguments> ::= <empty> | "..."
626 \alt <argument-list> $[$"," "..."$]$
627
628 <argument> ::= <declaration-specifier>$^+$ <argument-declarator>
629
630 <argument-declarator> ::= <declarator> | $[$<abstract-declarator>$]$
631
632 <abstract-declarator> ::=
633 <pointer>$^+$ | <pointer>$^*$ <inner-abstract-declarator>
634
635 <inner-abstract-declarator> ::= "(" <abstract-declarator> ")"
636 \alt $[$<inner-abstract-declarator>$]$ <declarator-suffix>$^+$
637 \end{grammar}
638
639 The declarator syntax is taken from C, but with some differences.
640 \begin{itemize}
641 \item Array dimensions are uninterpreted @<c-fragments>, terminated by a
642 closing square bracket. This allows array dimensions to contain arbitrary
643 constant expressions.
644 \item A declarator may have either a single @<identifier> at its centre or a
645 pair of @<identifier>s separated by a @`.'; this is used to refer to
646 slots or messages defined in superclasses.
647 \end{itemize}
648 The remaining differences are (I hope) a matter of presentation rather than
649 substance.
650
651 \subsection{Defining classes} \label{sec:syntax.class}
652
653 \begin{grammar}
654 <class-definition> ::= <class-forward-declaration>
655 \alt <full-class-definition>
656 \end{grammar}
657
658 \subsubsection{Forward declarations} \label{sec:class.class.forward}
659
660 \begin{grammar}
661 <class-forward-declaration> ::= "class" <identifier> ";"
662 \end{grammar}
663
664 A @<class-forward-declaration> informs Sod that an @<identifier> will be used
665 to name a class which is currently undefined. Forward declarations are
666 necessary in order to resolve certain kinds of circularity. For example,
667 \begin{listing}
668 class Sub;
669
670 class Super : SodObject {
671 Sub *sub;
672 };
673
674 class Sub : Super {
675 /* ... */
676 };
677 \end{listing}
678
679 \subsubsection{Full class definitions} \label{sec:class.class.full}
680
681 \begin{grammar}
682 <full-class-definition> ::=
683 $[$<properties>$]$
684 "class" <identifier> ":" <identifier-list>
685 "{" <class-item>$^*$ "}"
686
687 <class-item> ::= <slot-item> ";"
688 \alt <message-item>
689 \alt <method-item>
690 \alt <initializer-item> ";"
691 \end{grammar}
692
693 A full class definition provides a complete description of a class.
694
695 The first @<identifier> gives the name of the class. It is an error to
696 give the name of an existing class (other than a forward-referenced class),
697 or an existing type name. It is conventional to give classes `MixedCase'
698 names, to distinguish them from other kinds of identifiers.
699
700 The @<identifier-list> names the direct superclasses for the new class. It
701 is an error if any of these @<identifier>s does not name a defined class.
702
703 The @<properties> provide additional information. The standard class
704 properties are as follows.
705 \begin{description}
706 \item[@"lisp_class"] The name of the Lisp class to use within the translator
707 to represent this class. The property value must be an identifier; the
708 default is @"sod_class". Extensions may define classes with additional
709 behaviour, and may recognize additional class properties.
710 \item[@"metaclass"] The name of the Sod metaclass for this class. In the
711 generated code, a class is itself an instance of another class -- its
712 \emph{metaclass}. The metaclass defines which slots the class will have,
713 which messages it will respond to, and what its behaviour will be when it
714 receives them. The property value must be an identifier naming a defined
715 subclass of @"SodClass". The default metaclass is @"SodClass".
716 %%% FIXME xref to theory
717 \item[@"nick"] A nickname for the class, to be used to distinguish it from
718 other classes in various limited contexts. The property value must be an
719 identifier; the default is constructed by forcing the class name to
720 lower-case.
721 \end{description}
722
723 The class body consists of a sequence of @<class-item>s enclosed in braces.
724 These items are discussed on the following sections.
725
726 \subsubsection{Slot items} \label{sec:sntax.class.slot}
727
728 \begin{grammar}
729 <slot-item> ::=
730 $[$<properties>$]$
731 <declaration-specifier>$^+$ <init-declarator-list>
732
733 <init-declarator> ::= <declarator> $[$"=" <initializer>$]$
734 \end{grammar}
735
736 A @<slot-item> defines one or more slots. All instances of the class and any
737 subclass will contain these slot, with the names and types given by the
738 @<declaration-specifiers> and the @<declarators>. Slot declarators may not
739 contain qualified identifiers.
740
741 It is not possible to declare a slot with function type: such an item is
742 interpreted as being a @<message-item> or @<method-item>. Pointers to
743 functions are fine.
744
745 An @<initializer>, if present, is treated as if a separate
746 @<initializer-item> containing the slot name and initializer were present.
747 For example,
748 \begin{listing}
749 [nick = eg]
750 class Example : Super {
751 int foo = 17;
752 };
753 \end{listing}
754 means the same as
755 \begin{listing}
756 [nick = eg]
757 class Example : Super {
758 int foo;
759 eg.foo = 17;
760 };
761 \end{listing}
762
763 \subsubsection{Initializer items} \label{sec:syntax.class.init}
764
765 \begin{grammar}
766 <initializer-item> ::= $[$"class"$]$ <slot-initializer-list>
767
768 <slot-initializer> ::= <qualified-identifier> "=" <initializer>
769
770 <initializer> :: "{" <c-fragment> "}" | <c-fragment>
771 \end{grammar}
772
773 An @<initializer-item> provides an initial value for one or more slots. If
774 prefixed by @"class", then the initial values are for class slots (i.e.,
775 slots of the class object itself); otherwise they are for instance slots.
776
777 The first component of the @<qualified-identifier> must be the nickname of
778 one of the class's superclasses (including itself); the second must be the
779 name of a slot defined in that superclass.
780
781 The initializer has one of two forms.
782 \begin{itemize}
783 \item A @<c-fragment> enclosed in braces denotes an aggregate initializer.
784 This is suitable for initializing structure, union or array slots.
785 \item A @<c-fragment> \emph{not} beginning with an open brace is a `bare'
786 initializer, and continues until the next @`,' or @`;' which is not within
787 nested brackets. Bare initializers are suitable for initializing scalar
788 slots, such as pointers or integers, and strings.
789 \end{itemize}
790
791 \subsubsection{Message items} \label{sec:syntax.class.message}
792
793 \begin{grammar}
794 <message-item> ::=
795 $[$<properties>$]$
796 <declaration-specifier>$^+$ <declarator> $[$<method-body>$]$
797 \end{grammar}
798
799 \subsubsection{Method items} \label{sec:syntax.class.method}
800
801 \begin{grammar}
802 <method-item> ::=
803 $[$<properties>$]$
804 <declaration-specifier>$^+$ <declarator> <method-body>
805
806 <method-body> ::= "{" <c-fragment> "}" | "extern" ";"
807 \end{grammar}
808
809 %%%--------------------------------------------------------------------------
810 \section{Class objects}
811
812 \begin{listing}
813 typedef struct SodClass__ichain_obj SodClass;
814
815 struct sod_chain {
816 size_t n_classes; /* Number of classes in chain */
817 const SodClass *const *classes; /* Vector of classes, head first */
818 size_t off_ichain; /* Offset of ichain from instance base */
819 const struct sod_vtable *vt; /* Vtable pointer for chain */
820 size_t ichainsz; /* Size of the ichain structure */
821 };
822
823 struct sod_vtable {
824 SodClass *_class; /* Pointer to instance's class */
825 size_t _base; /* Offset to instance base */
826 };
827
828 struct SodClass__islots {
829
830 /* Basic information */
831 const char *name; /* The class's name as a string */
832 const char *nick; /* The nickname as a string */
833
834 /* Instance allocation and initialization */
835 size_t instsz; /* Instance layout size in bytes */
836 void *(*imprint)(void *); /* Stamp instance with vtable ptrs */
837 void *(*init)(void *); /* Initialize instance */
838
839 /* Superclass structure */
840 size_t n_supers; /* Number of direct superclasses */
841 const SodClass *const *supers; /* Vector of direct superclasses */
842 size_t n_cpl; /* Length of class precedence list */
843 const SodClass *const *cpl; /* Vector for class precedence list */
844
845 /* Chain structure */
846 const SodClass *link; /* Link to next class in chain */
847 const SodClass *head; /* Pointer to head of chain */
848 size_t level; /* Index of class in its chain */
849 size_t n_chains; /* Number of superclass chains */
850 const sod_chain *chains; /* Vector of chain structures */
851
852 /* Layout */
853 size_t off_islots; /* Offset of islots from ichain base */
854 size_t islotsz; /* Size of instance slots */
855 };
856
857 struct SodClass__ichain_obj {
858 const SodClass__vt_obj *_vt;
859 struct SodClass__islots cls;
860 };
861
862 struct sod_instance {
863 struct sod_vtable *_vt;
864 };
865 \end{listing}
866
867 \begin{listing}
868 void *sod_convert(const SodClass *cls, const void *obj)
869 {
870 const struct sod_instance *inst = obj;
871 const SodClass *real = inst->_vt->_cls;
872 const struct sod_chain *chain;
873 size_t i, index;
874
875 for (i = 0; i < real->cls.n_chains; i++) {
876 chain = &real->cls.chains[i];
877 if (chain->classes[0] == cls->cls.head) {
878 index = cls->cls.index;
879 if (index < chain->n_classes && chain->classes[index] == cls)
880 return ((char *)cls - inst->_vt._base + chain->off_ichain);
881 else
882 return (0);
883 }
884 }
885 return (0);
886 }
887 \end{listing}
888
889 %%%--------------------------------------------------------------------------
890 \section{Classes}
891
892 \subsection{Classes and superclasses}
893
894 A @<full-class-definition> must list one or more existing classes to be the
895 \emph{direct superclasses} for the new class being defined. We make the
896 following definitions.
897 \begin{itemize}
898 \item The \emph{superclasses} of a class consist of the class itself together
899 with the superclasses of its direct superclasses.
900 \item The \emph{proper superclasses} of a class are its superclasses other
901 than itself.
902 \item If $C$ is a (proper) superclass of $D$ then $D$ is a (\emph{proper})
903 \emph{subclass} of $C$.
904 \end{itemize}
905 The predefined class @|SodObject| has no direct superclasses; it is unique in
906 this respect. All classes are subclasses of @|SodObject|.
907
908 \subsection{The class precedence list}
909
910 Let $C$ be a class. The superclasses of $C$ form a directed graph, with an
911 edge from each class to each of its direct superclasses. This is the
912 \emph{superclass graph of $C$}.
913
914 In order to resolve inheritance of items, we define a \emph{class precedence
915 list} (or CPL) for each class, which imposes a total order on that class's
916 superclasses. The default algorithm for computing the CPL is the \emph{C3}
917 algorithm \cite{fixme-c3}, though extensions may implement other algorithms.
918
919 The default algorithm works as follows. Let $C$ be the class whose CPL we
920 are to compute. Let $X$ and $Y$ be two of $C$'s superclasses.
921 \begin{itemize}
922 \item $C$ must appear first in the CPL.
923 \item If $X$ appears before $Y$ in the CPL of one of $C$'s direct
924 superclasses, then $X$ appears before $Y$ in the $C$'s CPL.
925 \item If the above rules don't suffice to order $X$ and $Y$, then whichever
926 of $X$ and $Y$ has a subclass which appears further left in the list of
927 $C$'s direct superclasses will appear earlier in the CPL.
928 \end{itemize}
929 This last rule is sufficient to disambiguate because if both $X$ and $Y$ are
930 superclasses of the same direct superclass of $C$ then that direct
931 superclass's CPL will order $X$ and $Y$.
932
933 We say that \emph{$X$ is more specific than $Y$ as a superclass of $C$} if
934 $X$ is earlier than $Y$ in $C$'s class precedence list. If $C$ is clear from
935 context then we omit it, saying simply that $X$ is more specific than $Y$.
936
937 \subsection{Instances and metaclasses}
938
939 A class defines the structure and behaviour of its \emph{instances}: run-time
940 objects created (possibly) dynamically. An instance is an instance of only
941 one class, though structurally it may be used in place of an instance of any
942 of that class's superclasses. It is possible, with care, to change the class
943 of an instance at run-time.
944
945 Classes are themselves represented as instances -- called \emph{class
946 objects} -- in the running program. Being instances, they have a class,
947 called the \emph{metaclass}. The metaclass defines the structure and
948 behaviour of the class object.
949
950 The predefined class @|SodClass| is the default metaclass for new classes.
951 @|SodClass| has @|SodObject| as its only direct superclass. @|SodClass| is
952 its own metaclass.
953
954 \subsection{Items and inheritance}
955
956 A class definition also declares \emph{slots}, \emph{messages},
957 \emph{initializers} and \emph{methods} -- collectively referred to as
958 \emph{items}. In addition to the items declared in the class definition --
959 the class's \emph{direct items} -- a class also \emph{inherits} items from
960 its superclasses.
961
962 The precise rules for item inheritance vary according to the kinds of items
963 involved.
964
965 Some object systems have a notion of `repeated inheritance': if there are
966 multiple paths in the superclass graph from a class to one of its
967 superclasses then items defined in that superclass may appear duplicated in
968 the subclass. Sod does not have this notion.
969
970 \subsubsection{Slots}
971 A \emph{slot} is a unit of state. In other object systems, slots may be
972 called `fields', `member variables', or `instance variables'.
973
974 A slot has a \emph{name} and a \emph{type}. The name serves only to
975 distinguish the slot from other direct slots defined by the same class. A
976 class inherits all of its proper superclasses' slots. Slots inherited from
977 superclasses do not conflict with each other or with direct slots, even if
978 they have the same names.
979
980 At run-time, each instance of the class holds a separate value for each slot,
981 whether direct or inherited. Changing the value of an instance's slot
982 doesn't affect other instances.
983
984 \subsubsection{Initializers}
985 Mumble.
986
987 \subsubsection{Messages}
988 A \emph{message} is the stimulus for behaviour. In Sod, a class must define,
989 statically, the name and format of the messages it is able to receive and the
990 values it will return in reply. In this respect, a message is similar to
991 `abstract member functions' or `interface member functions' in other object
992 systems.
993
994 Like slots, a message has a \emph{name} and a \emph{type}. Again, the name
995 serves only to distinguish the message from other direct messages defined by
996 the same class. Messages inherited from superclasses do not conflict with
997 each other or with direct messages, even if they have the same name.
998
999 At run-time, one sends a message to an instance by invoking a function
1000 obtained from the instance's \emph{vtable}: \xref{sec:fixme-vtable}.
1001
1002 \subsubsection{Methods}
1003 A \emph{method} is a unit of behaviour. In other object systems, methods may
1004 be called `member functions'.
1005
1006 A method is associated with a message. When a message is received by an
1007 instance, all of the methods associated with that message on the instance's
1008 class or any of its superclasses are \emph{applicable}. The details of how
1009 the applicable methods are invoked are described fully in
1010 \xref{sec:fixme-method-combination}.
1011
1012 \subsection{Chains and instance layout}
1013
1014 \include{sod-backg}
1015 \include{sod-protocol}
1016
1017 \end{document}
1018 \f
1019 %%% Local variables:
1020 %%% mode: LaTeX
1021 %%% TeX-PDF-mode: t
1022 %%% End: