Commit | Line | Data |
---|---|---|
1f1d88f5 MW |
1 | \documentclass[noarticle]{strayman} |
2 | ||
3 | \usepackage[T1]{fontenc} | |
4 | \usepackage[utf8]{inputenc} | |
5 | \usepackage[palatino, helvetica, courier, maths=cmr]{mdwfonts} | |
fca95fc1 | 6 | \usepackage{tikz} |
1f1d88f5 MW |
7 | \usepackage{syntax} |
8 | \usepackage{sverb} | |
dea4d055 | 9 | \usepackage{mdwtab} |
fca95fc1 MW |
10 | \usepackage[mdwmargin]{mdwthm} |
11 | \usepackage{amssymb} | |
dea4d055 | 12 | \usepackage{footnote} |
1f1d88f5 MW |
13 | \usepackage{at} |
14 | \usepackage{mdwref} | |
15 | ||
16 | \title{A Sensible Object Design for C} | |
17 | \author{Mark Wooding} | |
18 | ||
dea4d055 MW |
19 | \makeatletter |
20 | ||
21 | \errorcontextlines999 | |
22 | ||
1f1d88f5 MW |
23 | \def\syntleft{\normalfont\itshape} |
24 | \let\syntright\empty | |
25 | ||
dea4d055 MW |
26 | \let\codeface\sffamily |
27 | ||
28 | \def\ulitleft{\normalfont\codeface} | |
1f1d88f5 MW |
29 | \let\ulitright\empty |
30 | ||
31 | \let\listingsize\relax | |
32 | ||
33 | \let\epsilon\varepsilon | |
34 | ||
dea4d055 MW |
35 | \atdef <#1>{\synt{#1}\@scripts} |
36 | \atdef "#1"{\lit*{#1}\@scripts} | |
37 | \atdef `#1'{\lit{#1}\@scripts} | |
38 | \atdef |#1|{\textsf{#1}\@scripts} | |
39 | \def\dbl@maybe#1{\let\@tempa#1\futurelet\@ch\dbl@maybe@i} | |
40 | \def\dbl@maybe@i{\m@maybe\ifx\@ch\@tempa\@tempa\!\@tempa% | |
41 | \expandafter\@firstoftwo\expandafter\@scripts% | |
42 | \else\@tempa\expandafter\@scripts\fi} | |
43 | \atdef [{\dbl@maybe[} | |
44 | \atdef ]{\dbl@maybe]} | |
45 | \atdef {{\m@maybe\{\@scripts} | |
46 | \atdef }{\m@maybe\}\@scripts} | |
47 | \atdef ({\m@maybe(\@scripts} | |
48 | \atdef ){\m@maybe)\@scripts} | |
49 | \atdef !{\m@maybe|\@scripts} | |
50 | \atdef to{\leavevmode\unskip\quad\m@maybe\longrightarrow\m@maybe@end\quad} | |
51 | \let\m@maybe@end\relax | |
52 | \def\m@maybe{\ifmmode\else$\let\m@maybe@end$\fi} | |
53 | \def\@scripts{\futurelet\@ch\@scripts@i} | |
1f1d88f5 | 54 | |
fca95fc1 MW |
55 | \def\chain#1#2{\mathsf{ch}_{#1}(#2)} |
56 | \def\chainhead#1#2{\mathsf{hd}_{#1}(#2)} | |
57 | \def\chaintail#1#2{\mathsf{tl}_{#1}(#2)} | |
58 | ||
59 | \let\implies\Rightarrow | |
60 | ||
dea4d055 | 61 | \atdef ;#1\\{\normalfont\itshape;#1\\} |
239fa5bd MW |
62 | \let\@@grammar\grammar |
63 | \def\grammar{\def\textbar{\hbox{$|$}}\@@grammar} | |
dea4d055 MW |
64 | |
65 | \begingroup\lccode`\~=`\_\lowercase{\endgroup | |
66 | \def\@scripts@i{\if1\ifx\@ch~1\else\ifx\@ch^1\else0\fi\fi% | |
67 | \expandafter\@scripts@ii\else\expandafter\m@maybe@end\fi}} | |
68 | \def\@scripts@ii#1#2{\m@maybe#1{#2}\@scripts} | |
69 | ||
70 | \def\Cplusplus{C\kern-\p@++} | |
1f1d88f5 MW |
71 | \def\Csharp{C\#} |
72 | \def\man#1#2{\textbf{#1}(#2)} | |
73 | ||
74 | \begingroup\lccode`\~=`\ | |
75 | \lowercase{ | |
76 | \endgroup | |
77 | \def\prog{% | |
dea4d055 | 78 | \codeface% |
1f1d88f5 | 79 | \quote% |
dea4d055 | 80 | \let\old@nl\\% |
1f1d88f5 MW |
81 | \obeylines% |
82 | \tabbing% | |
83 | \global\let~\\% | |
84 | \global\let\\\textbackslash% | |
85 | } | |
86 | \def\endprog{% | |
87 | \endtabbing% | |
dea4d055 | 88 | \global\let\\\old@nl% |
1f1d88f5 MW |
89 | \endquote% |
90 | }} | |
91 | ||
dea4d055 MW |
92 | \newenvironment{boxy}[1][\q@]{% |
93 | \dimen@\linewidth\advance\dimen@-1.2pt\advance\dimen@-2ex% | |
94 | \medskip% | |
95 | \vbox\bgroup\hrule\hbox\bgroup\vrule% | |
96 | \vbox\bgroup\vskip1ex\hbox\bgroup\hskip1ex\minipage\dimen@% | |
97 | \def\@temp{#1}\ifx\@temp\q@\else\leavevmode{\headfam\bfseries#1\quad}\fi% | |
98 | }{% | |
99 | \endminipage\hskip1ex\egroup\vskip1ex\egroup% | |
100 | \vrule\egroup\hrule\egroup% | |
101 | \medskip% | |
102 | } | |
103 | ||
104 | \def\definedescribecategory#1#2{\@namedef{cat!#1}{#2}} | |
105 | \def\describecategoryname#1{% | |
106 | \expandafter\let\expandafter\@tempa\csname cat!#1\endcsname% | |
107 | \ifx\@tempa\relax#1\else\@tempa\fi} | |
108 | \definedescribecategory{fun}{function} | |
109 | \definedescribecategory{gf}{generic function} | |
110 | \definedescribecategory{var}{variable} | |
111 | \definedescribecategory{const}{constant} | |
112 | \definedescribecategory{meth}{primary method} | |
113 | \definedescribecategory{ar-meth}{around-method} | |
114 | \definedescribecategory{be-meth}{before-method} | |
115 | \definedescribecategory{af-meth}{after-method} | |
116 | \definedescribecategory{cls}{class} | |
117 | \definedescribecategory{ty}{type} | |
118 | \definedescribecategory{mac}{macro} | |
119 | ||
120 | \def\q@{\q@} | |
121 | \newenvironment{describe}[3][\q@]{% | |
122 | \normalfont% | |
123 | \par\goodbreak% | |
124 | \vspace{\bigskipamount}% | |
125 | \setbox\z@\hbox{\bfseries[\describecategoryname{#2}]}% | |
126 | \dimen@\linewidth\advance\dimen@-\wd\z@% | |
127 | \def\@temp##1 ##2\q@{\message{#2:##1}\label{#2:##1}}% | |
128 | \def\@tempa{#1}\ifx\@tempa\q@\@temp#3 \q@\else\@temp{#1} \\\fi% | |
129 | \edef\@temp{{\the\linewidth}{@{}p{\the\dimen@}% | |
130 | @{\extracolsep{\fill}}l@{\extracolsep{0pt}}}}% | |
131 | \noindent\csname tabular*\expandafter\endcsname\@temp% | |
132 | \tabbing\codeface#3\endtabbing&\unhbox\z@\\\endtabular% | |
133 | % \@afterheading% | |
134 | \list{}{\rightmargin\z@}\item% | |
135 | }{% | |
136 | \endlist% | |
137 | } | |
138 | ||
139 | \def\push{\quad\=\+\kill} | |
140 | ||
1f1d88f5 MW |
141 | \begin{document} |
142 | ||
143 | \maketitle | |
144 | ||
145 | \include{sod-tut} | |
146 | ||
147 | %%%-------------------------------------------------------------------------- | |
148 | \chapter{Internals} | |
149 | ||
150 | \section{Generated names} | |
151 | ||
152 | The generated names for functions and objects related to a class are | |
153 | constructed systematically so as not to interfere with each other. The rules | |
154 | on class, slot and message naming exist so as to ensure that the generated | |
155 | names don't collide with each other. | |
156 | ||
157 | The following notation is used in this section. | |
158 | \begin{description} | |
159 | \item[@<class>] The full name of the `focus' class: the one for which we are | |
160 | generating name. | |
161 | \item[@<super-nick>] The nickname of a superclass. | |
162 | \item[@<head-nick>] The nickname of the chain-head class of the chain | |
163 | in question. | |
164 | \end{description} | |
165 | ||
166 | \subsection{Instance layout} | |
167 | ||
168 | %%%-------------------------------------------------------------------------- | |
169 | \section{Syntax} | |
170 | \label{sec:syntax} | |
171 | ||
172 | Fortunately, Sod is syntactically quite simple. I've used a little slightly | |
fca95fc1 MW |
173 | unusual notation in order to make the presentation easier to read. For any |
174 | nonterminal $x$: | |
1f1d88f5 MW |
175 | \begin{itemize} |
176 | \item $\epsilon$ denotes the empty nonterminal: | |
177 | \begin{quote} | |
178 | $\epsilon$ ::= | |
179 | \end{quote} | |
fca95fc1 | 180 | \item @[$x$@] means an optional $x$: |
1f1d88f5 | 181 | \begin{quote} |
fca95fc1 | 182 | \syntax{@[$x$@] ::= $\epsilon$ @! $x$} |
1f1d88f5 | 183 | \end{quote} |
fca95fc1 | 184 | \item $x^*$ means a sequence of zero or more $x$s: |
1f1d88f5 | 185 | \begin{quote} |
fca95fc1 | 186 | \syntax{$x^*$ ::= $\epsilon$ @! $x^*$ $x$} |
1f1d88f5 | 187 | \end{quote} |
fca95fc1 | 188 | \item $x^+$ means a sequence of one or more $x$s: |
1f1d88f5 | 189 | \begin{quote} |
fca95fc1 | 190 | \syntax{$x^+$ ::= $x$ $x^*$} |
1f1d88f5 | 191 | \end{quote} |
fca95fc1 | 192 | \item $x$@<-list> means a sequence of one or more $x$s separated |
1f1d88f5 MW |
193 | by commas: |
194 | \begin{quote} | |
fca95fc1 | 195 | \syntax{$x$<-list> ::= $x$ @! $x$<-list> "," $x$} |
1f1d88f5 MW |
196 | \end{quote} |
197 | \end{itemize} | |
198 | ||
199 | \subsection{Lexical syntax} | |
200 | \label{sec:syntax.lex} | |
201 | ||
202 | Whitespace and comments are discarded. The remaining characters are | |
203 | collected into tokens according to the following syntax. | |
204 | ||
205 | \begin{grammar} | |
206 | <token> ::= <identifier> | |
1f1d88f5 MW |
207 | \alt <string-literal> |
208 | \alt <char-literal> | |
209 | \alt <integer-literal> | |
210 | \alt <punctuation> | |
211 | \end{grammar} | |
212 | ||
fca95fc1 MW |
213 | This syntax is slightly ambiguous, and is disambiguated by the \emph{maximal |
214 | munch} rule: at each stage we take the longest sequence of characters which | |
215 | could be a token. | |
1f1d88f5 MW |
216 | |
217 | \subsubsection{Identifiers} \label{sec:syntax.lex.id} | |
218 | ||
219 | \begin{grammar} | |
239fa5bd | 220 | <identifier> ::= <id-start-char> @<id-body-char>^* |
1f1d88f5 | 221 | |
239fa5bd | 222 | <id-start-char> ::= <alpha-char> | "_" |
1f1d88f5 | 223 | |
239fa5bd | 224 | <id-body-char> ::= <id-start-char> @! <digit-char> |
1f1d88f5 | 225 | |
239fa5bd MW |
226 | <alpha-char> ::= "A" | "B" | \dots\ | "Z" |
227 | \alt "a" | "b" | \dots\ | "z" | |
1f1d88f5 MW |
228 | \alt <extended-alpha-char> |
229 | ||
239fa5bd | 230 | <digit-char> ::= "0" | <nonzero-digit-char> |
1f1d88f5 | 231 | |
239fa5bd | 232 | <nonzero-digit-char> ::= "1" | "2" $| \cdots |$ "9" |
1f1d88f5 MW |
233 | \end{grammar} |
234 | ||
235 | The precise definition of @<alpha-char> is left to the function | |
236 | \textsf{alpha-char-p} in the hosting Lisp system. For portability, | |
237 | programmers are encouraged to limit themselves to the standard ASCII letters. | |
238 | ||
fca95fc1 MW |
239 | There are no reserved words at the lexical level, but the higher-level syntax |
240 | recognizes certain identifiers as \emph{keywords} in some contexts. There is | |
241 | also an ambiguity (inherited from C) in the declaration syntax which is | |
242 | settled by distinguishing type names from other identifiers at a lexical | |
243 | level. | |
1f1d88f5 MW |
244 | |
245 | \subsubsection{String and character literals} \label{sec:syntax.lex.string} | |
246 | ||
247 | \begin{grammar} | |
239fa5bd | 248 | <string-literal> ::= "\"" @<string-literal-char>^* "\"" |
1f1d88f5 MW |
249 | |
250 | <char-literal> ::= "'" <char-literal-char> "'" | |
251 | ||
252 | <string-literal-char> ::= any character other than "\\" or "\"" | |
253 | \alt "\\" <char> | |
254 | ||
255 | <char-literal-char> ::= any character other than "\\" or "'" | |
256 | \alt "\\" <char> | |
257 | ||
258 | <char> ::= any single character | |
259 | \end{grammar} | |
260 | ||
261 | The syntax for string and character literals differs from~C. In particular, | |
262 | escape sequences such as @`\textbackslash n' are not recognized. The use | |
263 | of string and character literals in Sod, outside of C~fragments, is limited, | |
264 | and the simple syntax seems adequate. For the sake of future compatibility, | |
265 | the use of character sequences which resemble C escape sequences is | |
266 | discouraged. | |
267 | ||
268 | \subsubsection{Integer literals} \label{sec:syntax.lex.int} | |
269 | ||
270 | \begin{grammar} | |
271 | <integer-literal> ::= <decimal-integer> | |
272 | \alt <binary-integer> | |
273 | \alt <octal-integer> | |
274 | \alt <hex-integer> | |
275 | ||
239fa5bd | 276 | <decimal-integer> ::= <nonzero-digit-char> @<digit-char>^* |
1f1d88f5 | 277 | |
239fa5bd | 278 | <binary-integer> ::= "0" @("b"|"B"@) @<binary-digit-char>^+ |
1f1d88f5 | 279 | |
239fa5bd | 280 | <binary-digit-char> ::= "0" | "1" |
1f1d88f5 | 281 | |
239fa5bd | 282 | <octal-integer> ::= "0" @["o"|"O"@] @<octal-digit-char>^+ |
1f1d88f5 | 283 | |
239fa5bd | 284 | <octal-digit-char> ::= "0" | "1" $| \cdots |$ "7" |
1f1d88f5 | 285 | |
239fa5bd | 286 | <hex-integer> ::= "0" @("x"|"X"@) @<hex-digit-char>^+ |
1f1d88f5 MW |
287 | |
288 | <hex-digit-char> ::= <digit-char> | |
239fa5bd MW |
289 | \alt "A" | "B" | "C" | "D" | "E" | "F" |
290 | \alt "a" | "b" | "c" | "d" | "e" | "f" | |
1f1d88f5 MW |
291 | \end{grammar} |
292 | ||
293 | Sod understands only integers, not floating-point numbers; its integer syntax | |
294 | goes slightly beyond C in allowing a @`0o' prefix for octal and @`0b' for | |
295 | binary. However, length and signedness indicators are not permitted. | |
296 | ||
297 | \subsubsection{Punctuation} \label{sec:syntax.lex.punct} | |
298 | ||
299 | \begin{grammar} | |
fca95fc1 | 300 | <punctuation> ::= any nonalphanumeric character other than "_", "\"" or "'" |
1f1d88f5 MW |
301 | \end{grammar} |
302 | ||
1f1d88f5 MW |
303 | \subsubsection{Comments} \label{sec:lex-comment} |
304 | ||
305 | \begin{grammar} | |
306 | <comment> ::= <block-comment> | |
307 | \alt <line-comment> | |
308 | ||
309 | <block-comment> ::= | |
310 | "/*" | |
239fa5bd MW |
311 | @<not-star>^* @(@<star>^+ <not-star-or-slash> @<not-star>^*@)^* |
312 | @<star>^* | |
1f1d88f5 MW |
313 | "*/" |
314 | ||
315 | <star> ::= "*" | |
316 | ||
317 | <not-star> ::= any character other than "*" | |
318 | ||
319 | <not-star-or-slash> ::= any character other than "*" or "/" | |
320 | ||
239fa5bd | 321 | <line-comment> ::= "//" @<not-newline>^* <newline> |
1f1d88f5 MW |
322 | |
323 | <newline> ::= a newline character | |
324 | ||
325 | <not-newline> ::= any character other than newline | |
326 | \end{grammar} | |
327 | ||
328 | Comments are exactly as in C99: both traditional block comments `\texttt{/*} | |
329 | \dots\ \texttt{*/}' and \Cplusplus-style `\texttt{//} \dots' comments are | |
330 | permitted and ignored. | |
331 | ||
332 | \subsection{Special nonterminals} | |
333 | \label{sec:special-nonterminals} | |
334 | ||
335 | Aside from the lexical syntax presented above (\xref{sec:lexical-syntax}), | |
336 | two special nonterminals occur in the module syntax. | |
337 | ||
338 | \subsubsection{S-expressions} \label{sec:syntax-sexp} | |
339 | ||
340 | \begin{grammar} | |
341 | <s-expression> ::= an S-expression, as parsed by the Lisp reader | |
342 | \end{grammar} | |
343 | ||
344 | When an S-expression is expected, the Sod parser simply calls the host Lisp | |
345 | system's \textsf{read} function. Sod modules are permitted to modify the | |
346 | read table to extend the S-expression syntax. | |
347 | ||
348 | S-expressions are self-delimiting, so no end-marker is needed. | |
349 | ||
350 | \subsubsection{C fragments} \label{sec:syntax.lex.cfrag} | |
351 | ||
352 | \begin{grammar} | |
353 | <c-fragment> ::= a sequence of C tokens, with matching brackets | |
354 | \end{grammar} | |
355 | ||
356 | Sequences of C code are simply stored and written to the output unchanged | |
357 | during translation. They are read using a simple scanner which nonetheless | |
358 | understands C comments and string and character literals. | |
359 | ||
360 | A C fragment is terminated by one of a small number of delimiter characters | |
361 | determined by the immediately surrounding context -- usually a closing brace | |
362 | or bracket. The first such delimiter character which is not enclosed in | |
363 | brackets, braces or parenthesis ends the fragment. | |
364 | ||
365 | \subsection{Module syntax} \label{sec:syntax-module} | |
366 | ||
367 | \begin{grammar} | |
239fa5bd | 368 | <module> ::= @<definition>^* |
1f1d88f5 MW |
369 | |
370 | <definition> ::= <import-definition> | |
371 | \alt <load-definition> | |
372 | \alt <lisp-definition> | |
373 | \alt <code-definition> | |
374 | \alt <typename-definition> | |
375 | \alt <class-definition> | |
376 | \end{grammar} | |
377 | ||
378 | A module is the top-level syntactic item. A module consists of a sequence of | |
379 | definitions. | |
380 | ||
381 | \subsection{Simple definitions} \label{sec:syntax.defs} | |
382 | ||
383 | \subsubsection{Importing modules} \label{sec:syntax.defs.import} | |
384 | ||
385 | \begin{grammar} | |
386 | <import-definition> ::= "import" <string> ";" | |
387 | \end{grammar} | |
388 | ||
389 | The module named @<string> is processed and its definitions made available. | |
390 | ||
391 | A search is made for a module source file as follows. | |
392 | \begin{itemize} | |
393 | \item The module name @<string> is converted into a filename by appending | |
394 | @`.sod', if it has no extension already.\footnote{% | |
395 | Technically, what happens is \textsf{(merge-pathnames name (make-pathname | |
396 | :type "SOD" :case :common))}, so exactly what this means varies | |
397 | according to the host system.} % | |
398 | \item The file is looked for relative to the directory containing the | |
399 | importing module. | |
400 | \item If that fails, then the file is looked for in each directory on the | |
401 | module search path in turn. | |
402 | \item If the file still isn't found, an error is reported and the import | |
403 | fails. | |
404 | \end{itemize} | |
405 | At this point, if the file has previously been imported, nothing further | |
406 | happens.\footnote{% | |
407 | This check is done using \textsf{truename}, so it should see through simple | |
408 | tricks like symbolic links. However, it may be confused by fancy things | |
409 | like bind mounts and so on.} % | |
410 | ||
411 | Recursive imports, either direct or indirect, are an error. | |
412 | ||
413 | \subsubsection{Loading extensions} \label{sec:syntax.defs.load} | |
414 | ||
415 | \begin{grammar} | |
416 | <load-definition> ::= "load" <string> ";" | |
417 | \end{grammar} | |
418 | ||
419 | The Lisp file named @<string> is loaded and evaluated. | |
420 | ||
421 | A search is made for a Lisp source file as follows. | |
422 | \begin{itemize} | |
423 | \item The name @<string> is converted into a filename by appending @`.lisp', | |
424 | if it has no extension already.\footnote{% | |
425 | Technically, what happens is \textsf{(merge-pathnames name (make-pathname | |
426 | :type "LISP" :case :common))}, so exactly what this means varies | |
427 | according to the host system.} % | |
428 | \item A search is then made in the same manner as for module imports | |
429 | (\xref{sec:syntax-module}). | |
430 | \end{itemize} | |
431 | If the file is found, it is loaded using the host Lisp's \textsf{load} | |
432 | function. | |
433 | ||
434 | Note that Sod doesn't attempt to compile Lisp files, or even to look for | |
435 | existing compiled files. The right way to package a substantial extension to | |
436 | the Sod translator is to provide the extension as a standard ASDF system (or | |
437 | similar) and leave a dropping @"foo-extension.lisp" in the module path saying | |
438 | something like | |
fca95fc1 MW |
439 | \begin{quote} |
440 | \textsf{(asdf:load-system :foo-extension)} | |
441 | \end{quote} | |
1f1d88f5 MW |
442 | which will arrange for the extension to be compiled if necessary. |
443 | ||
444 | (This approach means that the language doesn't need to depend on any | |
445 | particular system definition facility. It's bad enough already that it | |
446 | depends on Common Lisp.) | |
447 | ||
448 | \subsubsection{Lisp escapes} \label{sec:syntax.defs.lisp} | |
449 | ||
450 | \begin{grammar} | |
451 | <lisp-definition> ::= "lisp" <s-expression> ";" | |
452 | \end{grammar} | |
453 | ||
454 | The @<s-expression> is evaluated immediately. It can do anything it likes. | |
455 | ||
456 | \textbf{Warning!} This means that hostile Sod modules are a security hazard. | |
457 | Lisp code can read and write files, start other programs, and make network | |
458 | connections. Don't install Sod modules from sources that you don't | |
459 | trust.\footnote{% | |
460 | Presumably you were going to run the corresponding code at some point, so | |
461 | this isn't as unusually scary as it sounds. But please be careful.} % | |
462 | ||
463 | \subsubsection{Declaring type names} \label{sec:syntax.defs.typename} | |
464 | ||
465 | \begin{grammar} | |
466 | <typename-definition> ::= | |
467 | "typename" <identifier-list> ";" | |
468 | \end{grammar} | |
469 | ||
470 | Each @<identifier> is declared as naming a C type. This is important because | |
471 | the C type syntax -- which Sod uses -- is ambiguous, and disambiguation is | |
472 | done by distinguishing type names from other identifiers. | |
473 | ||
474 | Don't declare class names using @"typename"; use @"class" forward | |
475 | declarations instead. | |
476 | ||
477 | \subsection{Literal code} \label{sec:syntax-code} | |
478 | ||
479 | \begin{grammar} | |
480 | <code-definition> ::= | |
239fa5bd | 481 | "code" <identifier> ":" <identifier> @[<constraints>@] |
1f1d88f5 MW |
482 | "{" <c-fragment> "}" |
483 | ||
484 | <constraints> ::= "[" <constraint-list> "]" | |
485 | ||
239fa5bd | 486 | <constraint> ::= @<identifier>^+ |
1f1d88f5 MW |
487 | \end{grammar} |
488 | ||
489 | The @<c-fragment> will be output unchanged to one of the output files. | |
490 | ||
491 | The first @<identifier> is the symbolic name of an output file. Predefined | |
492 | output file names are @"c" and @"h", which are the implementation code and | |
493 | header file respectively; other output files can be defined by extensions. | |
494 | ||
495 | The second @<identifier> provides a name for the output item. Several C | |
496 | fragments can have the same name: they will be concatenated together in the | |
497 | order in which they were encountered. | |
498 | ||
499 | The @<constraints> provide a means for specifying where in the output file | |
500 | the output item should appear. (Note the two kinds of square brackets shown | |
501 | in the syntax: square brackets must appear around the constraints if they are | |
502 | present, but that they may be omitted.) Each comma-separated @<constraint> | |
503 | is a sequence of identifiers naming output items, and indicates that the | |
504 | output items must appear in the order given -- though the translator is free | |
505 | to insert additional items in between them. (The particular output items | |
506 | needn't be defined already -- indeed, they needn't be defined ever.) | |
507 | ||
508 | There is a predefined output item @"includes" in both the @"c" and @"h" | |
509 | output files which is a suitable place for inserting @"\#include" | |
510 | preprocessor directives in order to declare types and functions for use | |
511 | elsewhere in the generated output files. | |
512 | ||
513 | \subsection{Property sets} \label{sec:syntax.propset} | |
514 | ||
515 | \begin{grammar} | |
516 | <properties> ::= "[" <property-list> "]" | |
517 | ||
518 | <property> ::= <identifier> "=" <expression> | |
519 | \end{grammar} | |
520 | ||
521 | Property sets are a means for associating miscellaneous information with | |
522 | classes and related items. By using property sets, additional information | |
523 | can be passed to extensions without the need to introduce idiosyncratic | |
524 | syntax. | |
525 | ||
526 | A property has a name, given as an @<identifier>, and a value computed by | |
527 | evaluating an @<expression>. The value can be one of a number of types, | |
528 | though the only operators currently defined act on integer values only. | |
529 | ||
530 | \subsubsection{The expression evaluator} \label{sec:syntax.propset.expr} | |
531 | ||
532 | \begin{grammar} | |
533 | <expression> ::= <term> | <expression> "+" <term> | <expression> "-" <term> | |
534 | ||
535 | <term> ::= <factor> | <term> "*" <factor> | <term> "/" <factor> | |
536 | ||
537 | <factor> ::= <primary> | "+" <factor> | "-" <factor> | |
538 | ||
539 | <primary> ::= | |
540 | <integer-literal> | <string-literal> | <char-literal> | <identifier> | |
541 | \alt "?" <s-expression> | |
542 | \alt "(" <expression> ")" | |
543 | \end{grammar} | |
544 | ||
545 | The arithmetic expression syntax is simple and standard; there are currently | |
546 | no bitwise, logical, or comparison operators. | |
547 | ||
548 | A @<primary> expression may be a literal or an identifier. Note that | |
549 | identifiers stand for themselves: they \emph{do not} denote values. For more | |
550 | fancy expressions, the syntax | |
551 | \begin{quote} | |
552 | @"?" @<s-expression> | |
553 | \end{quote} | |
554 | causes the @<s-expression> to be evaluated using the Lisp \textsf{eval} | |
555 | function. | |
556 | %%% FIXME crossref to extension docs | |
557 | ||
558 | \subsection{C types} \label{sec:syntax.c-types} | |
559 | ||
560 | Sod's syntax for C types closely mirrors the standard C syntax. A C type has | |
561 | two parts: a sequence of @<declaration-specifier>s and a @<declarator>. In | |
562 | Sod, a type must contain at least one @<declaration-specifier> (i.e., | |
563 | `implicit @"int"' is forbidden), and storage-class specifiers are not | |
564 | recognized. | |
565 | ||
566 | \subsubsection{Declaration specifiers} \label{sec:syntax.c-types.declspec} | |
567 | ||
568 | \begin{grammar} | |
569 | <declaration-specifier> ::= <type-name> | |
570 | \alt "struct" <identifier> | "union" <identifier> | "enum" <identifier> | |
571 | \alt "void" | "char" | "int" | "float" | "double" | |
572 | \alt "short" | "long" | |
573 | \alt "signed" | "unsigned" | |
574 | \alt <qualifier> | |
575 | ||
576 | <qualifier> ::= "const" | "volatile" | "restrict" | |
577 | ||
578 | <type-name> ::= <identifier> | |
579 | \end{grammar} | |
580 | ||
581 | A @<type-name> is an identifier which has been declared as being a type name, | |
582 | using the @"typename" or @"class" definitions. | |
583 | ||
584 | Declaration specifiers may appear in any order. However, not all | |
585 | combinations are permitted. A declaration specifier must consist of zero or | |
586 | more @<qualifiers>, and one of the following, up to reordering. | |
587 | \begin{itemize} | |
588 | \item @<type-name> | |
239fa5bd | 589 | \item @"struct" @<identifier>, @"union" @<identifier>, @"enum" @<identifier> |
1f1d88f5 MW |
590 | \item @"void" |
591 | \item @"char", @"unsigned char", @"signed char" | |
592 | \item @"short", @"unsigned short", @"signed short" | |
593 | \item @"short int", @"unsigned short int", @"signed short int" | |
594 | \item @"int", @"unsigned int", @"signed int", @"unsigned", @"signed" | |
595 | \item @"long", @"unsigned long", @"signed long" | |
596 | \item @"long int", @"unsigned long int", @"signed long int" | |
597 | \item @"long long", @"unsigned long long", @"signed long long" | |
598 | \item @"long long int", @"unsigned long long int", @"signed long long int" | |
599 | \item @"float", @"double", @"long double" | |
600 | \end{itemize} | |
601 | All of these have their usual C meanings. | |
602 | ||
603 | \subsubsection{Declarators} \label{sec:syntax.c-types.declarator} | |
604 | ||
605 | \begin{grammar} | |
ea578bb4 | 606 | <declarator>$[k]$ ::= @<pointer>^* <primary-declarator>$[k]$ |
1f1d88f5 | 607 | |
ea578bb4 MW |
608 | <primary-declarator>$[k]$ ::= $k$ |
609 | \alt "(" <primary-declarator>$[k]$ ")" | |
610 | \alt <primary-declarator>$[k]$ @<declarator-suffix>^* | |
1f1d88f5 | 611 | |
239fa5bd | 612 | <pointer> ::= "*" @<qualifier>^* |
1f1d88f5 MW |
613 | |
614 | <declarator-suffix> ::= "[" <c-fragment> "]" | |
615 | \alt "(" <arguments> ")" | |
616 | ||
ea578bb4 | 617 | <arguments> ::= $\epsilon$ | "..." |
239fa5bd | 618 | \alt <argument-list> @["," "..."@] |
1f1d88f5 | 619 | |
239fa5bd | 620 | <argument> ::= @<declaration-specifier>^+ <argument-declarator> |
1f1d88f5 | 621 | |
ea578bb4 MW |
622 | <argument-declarator> ::= <declarator>@[<identifier> @! $\epsilon$@] |
623 | ||
624 | <simple-declarator> ::= <declarator>@[<identifier>@] | |
1f1d88f5 | 625 | |
ea578bb4 | 626 | <dotted-name> ::= <identifier> "." <identifier> |
1f1d88f5 | 627 | |
ea578bb4 | 628 | <dotted-declarator> ::= <declarator>@[<dotted-name>@] |
1f1d88f5 MW |
629 | \end{grammar} |
630 | ||
631 | The declarator syntax is taken from C, but with some differences. | |
632 | \begin{itemize} | |
633 | \item Array dimensions are uninterpreted @<c-fragments>, terminated by a | |
634 | closing square bracket. This allows array dimensions to contain arbitrary | |
635 | constant expressions. | |
636 | \item A declarator may have either a single @<identifier> at its centre or a | |
637 | pair of @<identifier>s separated by a @`.'; this is used to refer to | |
638 | slots or messages defined in superclasses. | |
639 | \end{itemize} | |
640 | The remaining differences are (I hope) a matter of presentation rather than | |
641 | substance. | |
642 | ||
643 | \subsection{Defining classes} \label{sec:syntax.class} | |
644 | ||
645 | \begin{grammar} | |
646 | <class-definition> ::= <class-forward-declaration> | |
647 | \alt <full-class-definition> | |
648 | \end{grammar} | |
649 | ||
650 | \subsubsection{Forward declarations} \label{sec:class.class.forward} | |
651 | ||
652 | \begin{grammar} | |
653 | <class-forward-declaration> ::= "class" <identifier> ";" | |
654 | \end{grammar} | |
655 | ||
656 | A @<class-forward-declaration> informs Sod that an @<identifier> will be used | |
657 | to name a class which is currently undefined. Forward declarations are | |
658 | necessary in order to resolve certain kinds of circularity. For example, | |
659 | \begin{listing} | |
660 | class Sub; | |
661 | ||
662 | class Super : SodObject { | |
663 | Sub *sub; | |
664 | }; | |
665 | ||
666 | class Sub : Super { | |
667 | /* ... */ | |
668 | }; | |
669 | \end{listing} | |
670 | ||
671 | \subsubsection{Full class definitions} \label{sec:class.class.full} | |
672 | ||
673 | \begin{grammar} | |
674 | <full-class-definition> ::= | |
239fa5bd | 675 | @[<properties>@] |
1f1d88f5 | 676 | "class" <identifier> ":" <identifier-list> |
239fa5bd | 677 | "{" @<class-item>^* "}" |
1f1d88f5 MW |
678 | |
679 | <class-item> ::= <slot-item> ";" | |
680 | \alt <message-item> | |
681 | \alt <method-item> | |
682 | \alt <initializer-item> ";" | |
683 | \end{grammar} | |
684 | ||
685 | A full class definition provides a complete description of a class. | |
686 | ||
687 | The first @<identifier> gives the name of the class. It is an error to | |
688 | give the name of an existing class (other than a forward-referenced class), | |
689 | or an existing type name. It is conventional to give classes `MixedCase' | |
690 | names, to distinguish them from other kinds of identifiers. | |
691 | ||
692 | The @<identifier-list> names the direct superclasses for the new class. It | |
693 | is an error if any of these @<identifier>s does not name a defined class. | |
694 | ||
695 | The @<properties> provide additional information. The standard class | |
696 | properties are as follows. | |
697 | \begin{description} | |
698 | \item[@"lisp_class"] The name of the Lisp class to use within the translator | |
699 | to represent this class. The property value must be an identifier; the | |
700 | default is @"sod_class". Extensions may define classes with additional | |
701 | behaviour, and may recognize additional class properties. | |
702 | \item[@"metaclass"] The name of the Sod metaclass for this class. In the | |
703 | generated code, a class is itself an instance of another class -- its | |
704 | \emph{metaclass}. The metaclass defines which slots the class will have, | |
705 | which messages it will respond to, and what its behaviour will be when it | |
706 | receives them. The property value must be an identifier naming a defined | |
707 | subclass of @"SodClass". The default metaclass is @"SodClass". | |
708 | %%% FIXME xref to theory | |
709 | \item[@"nick"] A nickname for the class, to be used to distinguish it from | |
710 | other classes in various limited contexts. The property value must be an | |
711 | identifier; the default is constructed by forcing the class name to | |
712 | lower-case. | |
713 | \end{description} | |
714 | ||
715 | The class body consists of a sequence of @<class-item>s enclosed in braces. | |
716 | These items are discussed on the following sections. | |
717 | ||
718 | \subsubsection{Slot items} \label{sec:sntax.class.slot} | |
719 | ||
720 | \begin{grammar} | |
721 | <slot-item> ::= | |
239fa5bd MW |
722 | @[<properties>@] |
723 | @<declaration-specifier>^+ <init-declarator-list> | |
1f1d88f5 | 724 | |
239fa5bd | 725 | <init-declarator> ::= <declarator> @["=" <initializer>@] |
1f1d88f5 MW |
726 | \end{grammar} |
727 | ||
728 | A @<slot-item> defines one or more slots. All instances of the class and any | |
729 | subclass will contain these slot, with the names and types given by the | |
730 | @<declaration-specifiers> and the @<declarators>. Slot declarators may not | |
731 | contain qualified identifiers. | |
732 | ||
733 | It is not possible to declare a slot with function type: such an item is | |
734 | interpreted as being a @<message-item> or @<method-item>. Pointers to | |
735 | functions are fine. | |
736 | ||
737 | An @<initializer>, if present, is treated as if a separate | |
738 | @<initializer-item> containing the slot name and initializer were present. | |
739 | For example, | |
740 | \begin{listing} | |
741 | [nick = eg] | |
742 | class Example : Super { | |
743 | int foo = 17; | |
744 | }; | |
745 | \end{listing} | |
746 | means the same as | |
747 | \begin{listing} | |
748 | [nick = eg] | |
749 | class Example : Super { | |
750 | int foo; | |
751 | eg.foo = 17; | |
752 | }; | |
753 | \end{listing} | |
754 | ||
755 | \subsubsection{Initializer items} \label{sec:syntax.class.init} | |
756 | ||
757 | \begin{grammar} | |
239fa5bd | 758 | <initializer-item> ::= @["class"@] <slot-initializer-list> |
1f1d88f5 MW |
759 | |
760 | <slot-initializer> ::= <qualified-identifier> "=" <initializer> | |
761 | ||
762 | <initializer> :: "{" <c-fragment> "}" | <c-fragment> | |
763 | \end{grammar} | |
764 | ||
765 | An @<initializer-item> provides an initial value for one or more slots. If | |
766 | prefixed by @"class", then the initial values are for class slots (i.e., | |
767 | slots of the class object itself); otherwise they are for instance slots. | |
768 | ||
769 | The first component of the @<qualified-identifier> must be the nickname of | |
770 | one of the class's superclasses (including itself); the second must be the | |
771 | name of a slot defined in that superclass. | |
772 | ||
773 | The initializer has one of two forms. | |
774 | \begin{itemize} | |
775 | \item A @<c-fragment> enclosed in braces denotes an aggregate initializer. | |
776 | This is suitable for initializing structure, union or array slots. | |
777 | \item A @<c-fragment> \emph{not} beginning with an open brace is a `bare' | |
778 | initializer, and continues until the next @`,' or @`;' which is not within | |
779 | nested brackets. Bare initializers are suitable for initializing scalar | |
780 | slots, such as pointers or integers, and strings. | |
781 | \end{itemize} | |
782 | ||
783 | \subsubsection{Message items} \label{sec:syntax.class.message} | |
784 | ||
785 | \begin{grammar} | |
786 | <message-item> ::= | |
239fa5bd MW |
787 | @[<properties>@] |
788 | @<declaration-specifier>^+ <declarator> @[<method-body>@] | |
1f1d88f5 MW |
789 | \end{grammar} |
790 | ||
791 | \subsubsection{Method items} \label{sec:syntax.class.method} | |
792 | ||
793 | \begin{grammar} | |
794 | <method-item> ::= | |
239fa5bd MW |
795 | @[<properties>@] |
796 | @<declaration-specifier>^+ <declarator> <method-body> | |
1f1d88f5 MW |
797 | |
798 | <method-body> ::= "{" <c-fragment> "}" | "extern" ";" | |
799 | \end{grammar} | |
800 | ||
801 | %%%-------------------------------------------------------------------------- | |
802 | \section{Class objects} | |
803 | ||
804 | \begin{listing} | |
805 | typedef struct SodClass__ichain_obj SodClass; | |
806 | ||
807 | struct sod_chain { | |
808 | size_t n_classes; /* Number of classes in chain */ | |
809 | const SodClass *const *classes; /* Vector of classes, head first */ | |
810 | size_t off_ichain; /* Offset of ichain from instance base */ | |
811 | const struct sod_vtable *vt; /* Vtable pointer for chain */ | |
812 | size_t ichainsz; /* Size of the ichain structure */ | |
813 | }; | |
814 | ||
815 | struct sod_vtable { | |
816 | SodClass *_class; /* Pointer to instance's class */ | |
817 | size_t _base; /* Offset to instance base */ | |
818 | }; | |
819 | ||
820 | struct SodClass__islots { | |
821 | ||
822 | /* Basic information */ | |
823 | const char *name; /* The class's name as a string */ | |
824 | const char *nick; /* The nickname as a string */ | |
825 | ||
826 | /* Instance allocation and initialization */ | |
827 | size_t instsz; /* Instance layout size in bytes */ | |
828 | void *(*imprint)(void *); /* Stamp instance with vtable ptrs */ | |
829 | void *(*init)(void *); /* Initialize instance */ | |
830 | ||
831 | /* Superclass structure */ | |
832 | size_t n_supers; /* Number of direct superclasses */ | |
833 | const SodClass *const *supers; /* Vector of direct superclasses */ | |
834 | size_t n_cpl; /* Length of class precedence list */ | |
835 | const SodClass *const *cpl; /* Vector for class precedence list */ | |
836 | ||
837 | /* Chain structure */ | |
838 | const SodClass *link; /* Link to next class in chain */ | |
839 | const SodClass *head; /* Pointer to head of chain */ | |
840 | size_t level; /* Index of class in its chain */ | |
841 | size_t n_chains; /* Number of superclass chains */ | |
842 | const sod_chain *chains; /* Vector of chain structures */ | |
843 | ||
844 | /* Layout */ | |
845 | size_t off_islots; /* Offset of islots from ichain base */ | |
846 | size_t islotsz; /* Size of instance slots */ | |
847 | }; | |
848 | ||
849 | struct SodClass__ichain_obj { | |
850 | const SodClass__vt_obj *_vt; | |
851 | struct SodClass__islots cls; | |
852 | }; | |
853 | ||
854 | struct sod_instance { | |
855 | struct sod_vtable *_vt; | |
856 | }; | |
857 | \end{listing} | |
858 | ||
859 | \begin{listing} | |
860 | void *sod_convert(const SodClass *cls, const void *obj) | |
861 | { | |
862 | const struct sod_instance *inst = obj; | |
863 | const SodClass *real = inst->_vt->_cls; | |
864 | const struct sod_chain *chain; | |
865 | size_t i, index; | |
866 | ||
867 | for (i = 0; i < real->cls.n_chains; i++) { | |
868 | chain = &real->cls.chains[i]; | |
869 | if (chain->classes[0] == cls->cls.head) { | |
870 | index = cls->cls.index; | |
871 | if (index < chain->n_classes && chain->classes[index] == cls) | |
872 | return ((char *)cls - inst->_vt._base + chain->off_ichain); | |
873 | else | |
874 | return (0); | |
875 | } | |
876 | } | |
877 | return (0); | |
878 | } | |
879 | \end{listing} | |
880 | ||
881 | %%%-------------------------------------------------------------------------- | |
882 | \section{Classes} | |
fca95fc1 | 883 | \label{sec:class} |
1f1d88f5 | 884 | |
fca95fc1 | 885 | \subsection{Classes and superclasses} \label{sec:class.defs} |
1f1d88f5 MW |
886 | |
887 | A @<full-class-definition> must list one or more existing classes to be the | |
888 | \emph{direct superclasses} for the new class being defined. We make the | |
889 | following definitions. | |
890 | \begin{itemize} | |
891 | \item The \emph{superclasses} of a class consist of the class itself together | |
892 | with the superclasses of its direct superclasses. | |
893 | \item The \emph{proper superclasses} of a class are its superclasses other | |
894 | than itself. | |
895 | \item If $C$ is a (proper) superclass of $D$ then $D$ is a (\emph{proper}) | |
896 | \emph{subclass} of $C$. | |
897 | \end{itemize} | |
898 | The predefined class @|SodObject| has no direct superclasses; it is unique in | |
899 | this respect. All classes are subclasses of @|SodObject|. | |
900 | ||
fca95fc1 | 901 | \subsection{The class precedence list} \label{sec:class.cpl} |
1f1d88f5 MW |
902 | |
903 | Let $C$ be a class. The superclasses of $C$ form a directed graph, with an | |
904 | edge from each class to each of its direct superclasses. This is the | |
905 | \emph{superclass graph of $C$}. | |
906 | ||
907 | In order to resolve inheritance of items, we define a \emph{class precedence | |
908 | list} (or CPL) for each class, which imposes a total order on that class's | |
909 | superclasses. The default algorithm for computing the CPL is the \emph{C3} | |
910 | algorithm \cite{fixme-c3}, though extensions may implement other algorithms. | |
911 | ||
912 | The default algorithm works as follows. Let $C$ be the class whose CPL we | |
913 | are to compute. Let $X$ and $Y$ be two of $C$'s superclasses. | |
914 | \begin{itemize} | |
915 | \item $C$ must appear first in the CPL. | |
916 | \item If $X$ appears before $Y$ in the CPL of one of $C$'s direct | |
917 | superclasses, then $X$ appears before $Y$ in the $C$'s CPL. | |
918 | \item If the above rules don't suffice to order $X$ and $Y$, then whichever | |
919 | of $X$ and $Y$ has a subclass which appears further left in the list of | |
920 | $C$'s direct superclasses will appear earlier in the CPL. | |
921 | \end{itemize} | |
922 | This last rule is sufficient to disambiguate because if both $X$ and $Y$ are | |
923 | superclasses of the same direct superclass of $C$ then that direct | |
924 | superclass's CPL will order $X$ and $Y$. | |
925 | ||
926 | We say that \emph{$X$ is more specific than $Y$ as a superclass of $C$} if | |
927 | $X$ is earlier than $Y$ in $C$'s class precedence list. If $C$ is clear from | |
928 | context then we omit it, saying simply that $X$ is more specific than $Y$. | |
929 | ||
fca95fc1 | 930 | \subsection{Instances and metaclasses} \label{sec:class.meta} |
1f1d88f5 MW |
931 | |
932 | A class defines the structure and behaviour of its \emph{instances}: run-time | |
933 | objects created (possibly) dynamically. An instance is an instance of only | |
934 | one class, though structurally it may be used in place of an instance of any | |
935 | of that class's superclasses. It is possible, with care, to change the class | |
936 | of an instance at run-time. | |
937 | ||
938 | Classes are themselves represented as instances -- called \emph{class | |
939 | objects} -- in the running program. Being instances, they have a class, | |
940 | called the \emph{metaclass}. The metaclass defines the structure and | |
941 | behaviour of the class object. | |
942 | ||
943 | The predefined class @|SodClass| is the default metaclass for new classes. | |
944 | @|SodClass| has @|SodObject| as its only direct superclass. @|SodClass| is | |
945 | its own metaclass. | |
946 | ||
fca95fc1 MW |
947 | To make matters more complicated, Sod has \emph{two} distinct metalevels: as |
948 | well as the runtime metalevel, as discussed above, there's a compile-time | |
949 | metalevel hosted in the Sod translator. Since Sod is written in Common Lisp, | |
950 | a Sod class's compile-time metaclass is a CLOS class. The usual compile-time | |
951 | metaclass is @|sod-class|. The compile-time metalevel is the subject of | |
952 | \xref{ch:api}. | |
953 | ||
954 | \subsection{Items and inheritance} \label{sec:class.inherit} | |
1f1d88f5 MW |
955 | |
956 | A class definition also declares \emph{slots}, \emph{messages}, | |
957 | \emph{initializers} and \emph{methods} -- collectively referred to as | |
958 | \emph{items}. In addition to the items declared in the class definition -- | |
959 | the class's \emph{direct items} -- a class also \emph{inherits} items from | |
960 | its superclasses. | |
961 | ||
962 | The precise rules for item inheritance vary according to the kinds of items | |
963 | involved. | |
964 | ||
965 | Some object systems have a notion of `repeated inheritance': if there are | |
966 | multiple paths in the superclass graph from a class to one of its | |
967 | superclasses then items defined in that superclass may appear duplicated in | |
968 | the subclass. Sod does not have this notion. | |
969 | ||
fca95fc1 | 970 | \subsubsection{Slots} \label{sec:class.inherit.slots} |
1f1d88f5 MW |
971 | A \emph{slot} is a unit of state. In other object systems, slots may be |
972 | called `fields', `member variables', or `instance variables'. | |
973 | ||
974 | A slot has a \emph{name} and a \emph{type}. The name serves only to | |
975 | distinguish the slot from other direct slots defined by the same class. A | |
976 | class inherits all of its proper superclasses' slots. Slots inherited from | |
977 | superclasses do not conflict with each other or with direct slots, even if | |
978 | they have the same names. | |
979 | ||
980 | At run-time, each instance of the class holds a separate value for each slot, | |
981 | whether direct or inherited. Changing the value of an instance's slot | |
982 | doesn't affect other instances. | |
983 | ||
fca95fc1 | 984 | \subsubsection{Initializers} \label{sec:class.inherit.init} |
1f1d88f5 MW |
985 | Mumble. |
986 | ||
fca95fc1 | 987 | \subsubsection{Messages} \label{sec:class.inherit.messages} |
1f1d88f5 MW |
988 | A \emph{message} is the stimulus for behaviour. In Sod, a class must define, |
989 | statically, the name and format of the messages it is able to receive and the | |
990 | values it will return in reply. In this respect, a message is similar to | |
991 | `abstract member functions' or `interface member functions' in other object | |
992 | systems. | |
993 | ||
994 | Like slots, a message has a \emph{name} and a \emph{type}. Again, the name | |
995 | serves only to distinguish the message from other direct messages defined by | |
996 | the same class. Messages inherited from superclasses do not conflict with | |
997 | each other or with direct messages, even if they have the same name. | |
998 | ||
999 | At run-time, one sends a message to an instance by invoking a function | |
1000 | obtained from the instance's \emph{vtable}: \xref{sec:fixme-vtable}. | |
1001 | ||
fca95fc1 | 1002 | \subsubsection{Methods} \label{sec:class.inherit.methods} |
1f1d88f5 MW |
1003 | A \emph{method} is a unit of behaviour. In other object systems, methods may |
1004 | be called `member functions'. | |
1005 | ||
1006 | A method is associated with a message. When a message is received by an | |
1007 | instance, all of the methods associated with that message on the instance's | |
1008 | class or any of its superclasses are \emph{applicable}. The details of how | |
1009 | the applicable methods are invoked are described fully in | |
1010 | \xref{sec:fixme-method-combination}. | |
1011 | ||
fca95fc1 MW |
1012 | \subsection{Chains and instance layout} \label{sec:class.layout} |
1013 | ||
1014 | C is a rather low-level language, and in particular it exposes details of the | |
1015 | way data is laid out in memory. Since an instance of a class~$C$ should be | |
1016 | (at least in principle) usable anywhere an instance of some superclass $B | |
1017 | \succeq C$ is expected, this implies that an instance of the subclass $C$ | |
1018 | needs to contain within it a complete instance of each superclass $B$, laid | |
1019 | out according to the rules of instances of $B$, so that if we have (the | |
1020 | address of) an instance of $C$, we can easily construct a pointer to a thing | |
1021 | which looks like an instance of $B$ contained within it. | |
1022 | ||
1023 | Specifically, the information we need to retain for an instance of a | |
1024 | class~$C$ is: | |
1025 | \begin{itemize} | |
1026 | \item the values of each of the slots defined by $C$, including those defined | |
1027 | by superclasses; | |
1028 | \item information which will let us convert a pointer to $C$ into a pointer | |
1029 | to any superclass $B \succeq C$; | |
1030 | \item information which will let us call the appropriate effective method for | |
1031 | each message defined by $C$, including those defined by superclasses; and | |
1032 | \item some additional meta-level information, such as how to find the class | |
1033 | object for $C$ given (the address of) one of its instances. | |
1034 | \end{itemize} | |
1035 | ||
1036 | Observe that, while each distinct instance must clearly have its own storage | |
1037 | for slots, all instances of $C$ can share a single copy of the remaining | |
1038 | information. The individual instance only needs to keep a pointer to this | |
1039 | shared table, which, inspired by the similar structure in many \Cplusplus\ | |
1040 | ABIs, are called a \emph{vtable}. | |
1041 | ||
1042 | The easiest approach would be to decide that instances of $C$ are exactly | |
1043 | like instances of $B$, only with extra space at the end for the extra slots | |
1044 | which $C$ defines over and above those already existing in $B$. Conversion | |
1045 | is then trivial: a pointer to an instance of $C$ can be converted to a | |
1046 | pointer to an instance of some superclass $B$ simply by casting. Even though | |
1047 | the root class @|SodObject| doesn't have any slots at all, its instances will | |
1048 | still need a vtable so that you can find its class object: the address of the | |
1049 | vtable therefore needs to be at the very start of the instance structure. | |
1050 | Again, a vtable for a superclass would have a vtable for each of its | |
1051 | superclasses as a prefix, with new items added afterwards. | |
1052 | ||
1053 | This appealing approach works well for an object system which only permits | |
1054 | single inheritance of both state and behaviour. Alas, it breaks down when | |
1055 | multiple inheritance is allowed: $C$ can be a subclass of both $B$ and $B'$, | |
1056 | even though $B$ is not a subclass of $B'$, nor \emph{vice versa}; so, in | |
1057 | general, $B$'s instance structure will not be a prefix of $B'$'s, nor will | |
1058 | $B'$'s be a prefix of $B$'s, and therefore $C$ cannot have both $B$ and $B'$ | |
1059 | as a prefix. | |
1060 | ||
1061 | A (non-root) class may -- though need not -- have a distinguished \emph{link} | |
1062 | superclass, which need not be a direct superclass. Furthermore, each | |
1063 | class~$C$ must satisfy the \emph{chain condition}: for any superclass $A$ of | |
1064 | $C$, there can be at most one other superclass of $C$ whose link superclass | |
1065 | is $A$.\footnote{% | |
1066 | That is, it's permitted for two classes $B$ and $B'$ to have the same link | |
1067 | superclass $A$, but $B$ and $B'$ can't then both be superclasses of the | |
1068 | same class $C$.} % | |
1069 | Therefore, the links partition the superclasses of~$C$ into nice linear | |
1070 | \emph{chains}, such that each superclass is a member of exactly one chain. | |
1071 | If a class~$B$ has a link superclass~$A$, then $B$'s \emph{level} is one more | |
1072 | than that of $A$; otherwise $B$ is called a \emph{chain head} and its level | |
1073 | is zero. If the classes in a chain are written in a list, chain head first, | |
1074 | then the level of each class gives its index in the list. | |
1075 | ||
1076 | Chains therefore allow us to recover some of the linearity properties which | |
1077 | made layout simple in the case of single inheritance. The instance structure | |
1078 | for a class $C$ contains a substructure for each of $C$'s superclass chains; | |
1079 | a pointer to an object of class $C$ actually points to the substructure for | |
1080 | the chain containing $C$. The order of these substructures is unimportant | |
1081 | for now.\footnote{% | |
1082 | The chains appear in the order in which their most specific classes appear | |
1083 | in $C$'s class precedence list. This guarantees that the chain containing | |
1084 | $C$ itself appears first, so that a pointer to $C$'s instance structure is | |
1085 | actually a pointer to $C$'s chain substructure. Apart from that, it's a | |
1086 | simple, stable, but basically arbitrary choice which can't be changed | |
1087 | without breaking the ABI.} % | |
1088 | The substructure for each chain begins with a pointer to a vtable, followed | |
1089 | by a structure for each superclass in the chain containing the slots defined | |
1090 | by that superclass, with the chain head (least specific class) first. | |
1091 | ||
1092 | Suppose we have a pointer to (static) type $C$, and want to convert it into a | |
1093 | pointer to some superclass $B$ of $C$ -- an \emph{upcast}.\footnote{% | |
1094 | In the more general case, we have a pointer to static type $C$, which | |
1095 | actually points to an object of some subclass $D$ of $C$, and want to | |
1096 | convert it into a pointer to type $B$. Such a conversion is called a | |
1097 | \emph{downcast} if $B$ is a subclass of $C$, or a \emph{cross-cast} | |
1098 | otherwise. Downcasts and cross-casts require complicated run-time | |
1099 | checking, and can will fail unless $B$ is a superclass of $D$.} % | |
1100 | If $B$ is in the same chain as $C$ -- an \emph{in-chain upcast} -- then the | |
1101 | pointer value is already correct and it's only necessary to cast it | |
1102 | appropriately. Otherwise -- a \emph{cross-chain upcast} -- the pointer needs | |
1103 | to be adjusted to point to a different chain substructure. Since the lengths | |
1104 | and relative positions of the chain substructures vary between classes, the | |
1105 | adjustments are stored in the vtable. Cross-chain upcasts are therefore a | |
1106 | bit slower than in-chain upcasts. | |
1107 | ||
1108 | Each chain has its own separate vtable, because much of the metadata stored | |
1109 | in the vtable is specific to a particular chain. For example: | |
1110 | \begin{itemize} | |
1111 | \item offsets to other chains' substructures will vary depending on which | |
1112 | chain we start from; and | |
1113 | \item entry points to methods { | |
1114 | ||
1115 | %%%-------------------------------------------------------------------------- | |
1116 | \chapter{The Lisp programming interface} \label{ch:api} | |
1117 | ||
1118 | %% output for `h' files | |
1119 | %% | |
1120 | %% prologue | |
1121 | %% guard start | |
1122 | %% typedefs start | |
1123 | %% typedefs | |
1124 | %% typedefs end | |
1125 | %% includes start | |
1126 | %% includes | |
1127 | %% includes end | |
1128 | %% classes start | |
1129 | %% CLASS banner | |
1130 | %% CLASS islots start | |
1131 | %% CLASS islots slots | |
1132 | %% CLASS islots end | |
1133 | %% CLASS vtmsgs start | |
1134 | %% CLASS vtmsgs CLASS start | |
1135 | %% CLASS vtmsgs CLASS slots | |
1136 | %% CLASS vtmsgs CLASS end | |
1137 | %% CLASS vtmsgs end | |
1138 | %% CLASS vtables start | |
1139 | %% CLASS vtables CHAIN-HEAD start | |
1140 | %% CLASS vtables CHAIN-HEAD slots | |
1141 | %% CLASS vtables CHAIN-HEAD end | |
1142 | %% CLASS vtables end | |
1143 | %% CLASS vtable-externs | |
1144 | %% CLASS vtable-externs-after | |
1145 | %% CLASS methods start | |
1146 | %% CLASS methods | |
1147 | %% CLASS methods end | |
1148 | %% CLASS ichains start | |
1149 | %% CLASS ichains CHAIN-HEAD start | |
1150 | %% CLASS ichains CHAIN-HEAD slots | |
1151 | %% CLASS ichains CHAIN-HEAD end | |
1152 | %% CLASS ichains end | |
1153 | %% CLASS ilayout start | |
1154 | %% CLASS ilayout slots | |
1155 | %% CLASS ilayout end | |
1156 | %% CLASS conversions | |
1157 | %% CLASS object | |
1158 | %% classes end | |
1159 | %% guard end | |
1160 | %% epilogue | |
1161 | ||
1162 | %% output for `c' files | |
1163 | %% | |
1164 | %% prologue | |
1165 | %% includes start | |
1166 | %% includes | |
1167 | %% includes end | |
1168 | %% classes start | |
1169 | %% CLASS banner | |
1170 | %% CLASS direct-methods start | |
1171 | %% CLASS direct-methods METHOD start | |
1172 | %% CLASS direct-methods METHOD body | |
1173 | %% CLASS direct-methods METHOD end | |
1174 | %% CLASS direct-methods end | |
1175 | %% CLASS effective-methods | |
1176 | %% CLASS vtables start | |
1177 | %% CLASS vtables CHAIN-HEAD start | |
1178 | %% CLASS vtables CHAIN-HEAD class-pointer METACLASS | |
1179 | %% CLASS vtables CHAIN-HEAD base-offset | |
1180 | %% CLASS vtables CHAIN-HEAD chain-offset TARGET-HEAD | |
1181 | %% CLASS vtables CHAIN-HEAD vtmsgs CLASS start | |
1182 | %% CLASS vtables CHAIN-HEAD vtmsgs CLASS slots | |
1183 | %% CLASS vtables CHAIN-HEAD vtmsgs CLASS end | |
1184 | %% CLASS vtables CHAIN-HEAD end | |
1185 | %% CLASS vtables end | |
1186 | %% CLASS object prepare | |
1187 | %% CLASS object start | |
1188 | %% CLASS object CHAIN-HEAD ichain start | |
1189 | %% CLASS object SUPER slots start | |
1190 | %% CLASS object SUPER slots | |
1191 | %% CLASS object SUPER vtable | |
1192 | %% CLASS object SUPER slots end | |
1193 | %% CLASS object CHAIN-HEAD ichain end | |
1194 | %% CLASS object end | |
1195 | %% classes end | |
1196 | %% epilogue | |
1197 | ||
1198 | %%%-------------------------------------------------------------------------- | |
1f1d88f5 | 1199 | |
3be8c2bf | 1200 | \include{sod-backg} |
a07d8d00 | 1201 | \include{sod-protocol} |
1f1d88f5 MW |
1202 | |
1203 | \end{document} | |
1204 | \f | |
1205 | %%% Local variables: | |
1206 | %%% mode: LaTeX | |
1207 | %%% TeX-PDF-mode: t | |
1208 | %%% End: |