| 1 | \documentclass[noarticle]{strayman} |
| 2 | |
| 3 | \usepackage[T1]{fontenc} |
| 4 | \usepackage[utf8]{inputenc} |
| 5 | \usepackage[palatino, helvetica, courier, maths=cmr]{mdwfonts} |
| 6 | \usepackage{tikz} |
| 7 | \usepackage{syntax} |
| 8 | \usepackage{sverb} |
| 9 | \usepackage{mdwtab} |
| 10 | \usepackage[mdwmargin]{mdwthm} |
| 11 | \usepackage{amssymb} |
| 12 | \usepackage{footnote} |
| 13 | \usepackage{at} |
| 14 | \usepackage{mdwref} |
| 15 | |
| 16 | \title{A Sensible Object Design for C} |
| 17 | \author{Mark Wooding} |
| 18 | |
| 19 | \makeatletter |
| 20 | |
| 21 | \errorcontextlines999 |
| 22 | |
| 23 | \def\syntleft{\normalfont\itshape} |
| 24 | \let\syntright\empty |
| 25 | |
| 26 | \let\codeface\sffamily |
| 27 | |
| 28 | \def\ulitleft{\normalfont\codeface} |
| 29 | \let\ulitright\empty |
| 30 | |
| 31 | \let\listingsize\relax |
| 32 | |
| 33 | \let\epsilon\varepsilon |
| 34 | |
| 35 | \atdef <#1>{\synt{#1}\@scripts} |
| 36 | \atdef "#1"{\lit*{#1}\@scripts} |
| 37 | \atdef `#1'{\lit{#1}\@scripts} |
| 38 | \atdef |#1|{\textsf{#1}\@scripts} |
| 39 | \def\dbl@maybe#1{\let\@tempa#1\futurelet\@ch\dbl@maybe@i} |
| 40 | \def\dbl@maybe@i{\m@maybe\ifx\@ch\@tempa\@tempa\!\@tempa% |
| 41 | \expandafter\@firstoftwo\expandafter\@scripts% |
| 42 | \else\@tempa\expandafter\@scripts\fi} |
| 43 | \atdef [{\dbl@maybe[} |
| 44 | \atdef ]{\dbl@maybe]} |
| 45 | \atdef {{\m@maybe\{\@scripts} |
| 46 | \atdef }{\m@maybe\}\@scripts} |
| 47 | \atdef ({\m@maybe(\@scripts} |
| 48 | \atdef ){\m@maybe)\@scripts} |
| 49 | \atdef !{\m@maybe|\@scripts} |
| 50 | \atdef to{\leavevmode\unskip\quad\m@maybe\longrightarrow\m@maybe@end\quad} |
| 51 | \let\m@maybe@end\relax |
| 52 | \def\m@maybe{\ifmmode\else$\let\m@maybe@end$\fi} |
| 53 | \def\@scripts{\futurelet\@ch\@scripts@i} |
| 54 | |
| 55 | \def\chain#1#2{\mathsf{ch}_{#1}(#2)} |
| 56 | \def\chainhead#1#2{\mathsf{hd}_{#1}(#2)} |
| 57 | \def\chaintail#1#2{\mathsf{tl}_{#1}(#2)} |
| 58 | |
| 59 | \let\implies\Rightarrow |
| 60 | |
| 61 | \atdef ;#1\\{\normalfont\itshape;#1\\} |
| 62 | \let\@@grammar\grammar |
| 63 | \def\grammar{\def\textbar{\hbox{$|$}}\@@grammar} |
| 64 | |
| 65 | \begingroup\lccode`\~=`\_\lowercase{\endgroup |
| 66 | \def\@scripts@i{\if1\ifx\@ch~1\else\ifx\@ch^1\else0\fi\fi% |
| 67 | \expandafter\@scripts@ii\else\expandafter\m@maybe@end\fi}} |
| 68 | \def\@scripts@ii#1#2{\m@maybe#1{#2}\@scripts} |
| 69 | |
| 70 | \def\Cplusplus{C\kern-\p@++} |
| 71 | \def\Csharp{C\#} |
| 72 | \def\man#1#2{\textbf{#1}(#2)} |
| 73 | |
| 74 | \begingroup\lccode`\~=`\ |
| 75 | \lowercase{ |
| 76 | \endgroup |
| 77 | \def\prog{% |
| 78 | \codeface% |
| 79 | \quote% |
| 80 | \let\old@nl\\% |
| 81 | \obeylines% |
| 82 | \tabbing% |
| 83 | \global\let~\\% |
| 84 | \global\let\\\textbackslash% |
| 85 | } |
| 86 | \def\endprog{% |
| 87 | \endtabbing% |
| 88 | \global\let\\\old@nl% |
| 89 | \endquote% |
| 90 | }} |
| 91 | |
| 92 | \newenvironment{boxy}[1][\q@]{% |
| 93 | \dimen@\linewidth\advance\dimen@-1.2pt\advance\dimen@-2ex% |
| 94 | \medskip% |
| 95 | \vbox\bgroup\hrule\hbox\bgroup\vrule% |
| 96 | \vbox\bgroup\vskip1ex\hbox\bgroup\hskip1ex\minipage\dimen@% |
| 97 | \def\@temp{#1}\ifx\@temp\q@\else\leavevmode{\headfam\bfseries#1\quad}\fi% |
| 98 | }{% |
| 99 | \endminipage\hskip1ex\egroup\vskip1ex\egroup% |
| 100 | \vrule\egroup\hrule\egroup% |
| 101 | \medskip% |
| 102 | } |
| 103 | |
| 104 | \def\definedescribecategory#1#2{\@namedef{cat!#1}{#2}} |
| 105 | \def\describecategoryname#1{% |
| 106 | \expandafter\let\expandafter\@tempa\csname cat!#1\endcsname% |
| 107 | \ifx\@tempa\relax#1\else\@tempa\fi} |
| 108 | \definedescribecategory{fun}{function} |
| 109 | \definedescribecategory{gf}{generic function} |
| 110 | \definedescribecategory{var}{variable} |
| 111 | \definedescribecategory{const}{constant} |
| 112 | \definedescribecategory{meth}{primary method} |
| 113 | \definedescribecategory{ar-meth}{around-method} |
| 114 | \definedescribecategory{be-meth}{before-method} |
| 115 | \definedescribecategory{af-meth}{after-method} |
| 116 | \definedescribecategory{cls}{class} |
| 117 | \definedescribecategory{ty}{type} |
| 118 | \definedescribecategory{mac}{macro} |
| 119 | |
| 120 | \def\q@{\q@} |
| 121 | \newenvironment{describe}[3][\q@]{% |
| 122 | \normalfont% |
| 123 | \par\goodbreak% |
| 124 | \vspace{\bigskipamount}% |
| 125 | \setbox\z@\hbox{\bfseries[\describecategoryname{#2}]}% |
| 126 | \dimen@\linewidth\advance\dimen@-\wd\z@% |
| 127 | \def\@temp##1 ##2\q@{\message{#2:##1}\label{#2:##1}}% |
| 128 | \def\@tempa{#1}\ifx\@tempa\q@\@temp#3 \q@\else\@temp{#1} \\\fi% |
| 129 | \edef\@temp{{\the\linewidth}{@{}p{\the\dimen@}% |
| 130 | @{\extracolsep{\fill}}l@{\extracolsep{0pt}}}}% |
| 131 | \noindent\csname tabular*\expandafter\endcsname\@temp% |
| 132 | \tabbing\codeface#3\endtabbing&\unhbox\z@\\\endtabular% |
| 133 | % \@afterheading% |
| 134 | \list{}{\rightmargin\z@}\item% |
| 135 | }{% |
| 136 | \endlist% |
| 137 | } |
| 138 | |
| 139 | \def\push{\quad\=\+\kill} |
| 140 | |
| 141 | \begin{document} |
| 142 | |
| 143 | \maketitle |
| 144 | |
| 145 | \include{sod-tut} |
| 146 | |
| 147 | %%%-------------------------------------------------------------------------- |
| 148 | \chapter{Internals} |
| 149 | |
| 150 | \section{Generated names} |
| 151 | |
| 152 | The generated names for functions and objects related to a class are |
| 153 | constructed systematically so as not to interfere with each other. The rules |
| 154 | on class, slot and message naming exist so as to ensure that the generated |
| 155 | names don't collide with each other. |
| 156 | |
| 157 | The following notation is used in this section. |
| 158 | \begin{description} |
| 159 | \item[@<class>] The full name of the `focus' class: the one for which we are |
| 160 | generating name. |
| 161 | \item[@<super-nick>] The nickname of a superclass. |
| 162 | \item[@<head-nick>] The nickname of the chain-head class of the chain |
| 163 | in question. |
| 164 | \end{description} |
| 165 | |
| 166 | \subsection{Instance layout} |
| 167 | |
| 168 | %%%-------------------------------------------------------------------------- |
| 169 | \section{Syntax} |
| 170 | \label{sec:syntax} |
| 171 | |
| 172 | Fortunately, Sod is syntactically quite simple. I've used a little slightly |
| 173 | unusual notation in order to make the presentation easier to read. For any |
| 174 | nonterminal $x$: |
| 175 | \begin{itemize} |
| 176 | \item $\epsilon$ denotes the empty nonterminal: |
| 177 | \begin{quote} |
| 178 | $\epsilon$ ::= |
| 179 | \end{quote} |
| 180 | \item @[$x$@] means an optional $x$: |
| 181 | \begin{quote} |
| 182 | \syntax{@[$x$@] ::= $\epsilon$ @! $x$} |
| 183 | \end{quote} |
| 184 | \item $x^*$ means a sequence of zero or more $x$s: |
| 185 | \begin{quote} |
| 186 | \syntax{$x^*$ ::= $\epsilon$ @! $x^*$ $x$} |
| 187 | \end{quote} |
| 188 | \item $x^+$ means a sequence of one or more $x$s: |
| 189 | \begin{quote} |
| 190 | \syntax{$x^+$ ::= $x$ $x^*$} |
| 191 | \end{quote} |
| 192 | \item $x$@<-list> means a sequence of one or more $x$s separated |
| 193 | by commas: |
| 194 | \begin{quote} |
| 195 | \syntax{$x$<-list> ::= $x$ @! $x$<-list> "," $x$} |
| 196 | \end{quote} |
| 197 | \end{itemize} |
| 198 | |
| 199 | \subsection{Lexical syntax} |
| 200 | \label{sec:syntax.lex} |
| 201 | |
| 202 | Whitespace and comments are discarded. The remaining characters are |
| 203 | collected into tokens according to the following syntax. |
| 204 | |
| 205 | \begin{grammar} |
| 206 | <token> ::= <identifier> |
| 207 | \alt <string-literal> |
| 208 | \alt <char-literal> |
| 209 | \alt <integer-literal> |
| 210 | \alt <punctuation> |
| 211 | \end{grammar} |
| 212 | |
| 213 | This syntax is slightly ambiguous, and is disambiguated by the \emph{maximal |
| 214 | munch} rule: at each stage we take the longest sequence of characters which |
| 215 | could be a token. |
| 216 | |
| 217 | \subsubsection{Identifiers} \label{sec:syntax.lex.id} |
| 218 | |
| 219 | \begin{grammar} |
| 220 | <identifier> ::= <id-start-char> @<id-body-char>^* |
| 221 | |
| 222 | <id-start-char> ::= <alpha-char> | "_" |
| 223 | |
| 224 | <id-body-char> ::= <id-start-char> @! <digit-char> |
| 225 | |
| 226 | <alpha-char> ::= "A" | "B" | \dots\ | "Z" |
| 227 | \alt "a" | "b" | \dots\ | "z" |
| 228 | \alt <extended-alpha-char> |
| 229 | |
| 230 | <digit-char> ::= "0" | <nonzero-digit-char> |
| 231 | |
| 232 | <nonzero-digit-char> ::= "1" | "2" $| \cdots |$ "9" |
| 233 | \end{grammar} |
| 234 | |
| 235 | The precise definition of @<alpha-char> is left to the function |
| 236 | \textsf{alpha-char-p} in the hosting Lisp system. For portability, |
| 237 | programmers are encouraged to limit themselves to the standard ASCII letters. |
| 238 | |
| 239 | There are no reserved words at the lexical level, but the higher-level syntax |
| 240 | recognizes certain identifiers as \emph{keywords} in some contexts. There is |
| 241 | also an ambiguity (inherited from C) in the declaration syntax which is |
| 242 | settled by distinguishing type names from other identifiers at a lexical |
| 243 | level. |
| 244 | |
| 245 | \subsubsection{String and character literals} \label{sec:syntax.lex.string} |
| 246 | |
| 247 | \begin{grammar} |
| 248 | <string-literal> ::= "\"" @<string-literal-char>^* "\"" |
| 249 | |
| 250 | <char-literal> ::= "'" <char-literal-char> "'" |
| 251 | |
| 252 | <string-literal-char> ::= any character other than "\\" or "\"" |
| 253 | \alt "\\" <char> |
| 254 | |
| 255 | <char-literal-char> ::= any character other than "\\" or "'" |
| 256 | \alt "\\" <char> |
| 257 | |
| 258 | <char> ::= any single character |
| 259 | \end{grammar} |
| 260 | |
| 261 | The syntax for string and character literals differs from~C. In particular, |
| 262 | escape sequences such as @`\textbackslash n' are not recognized. The use |
| 263 | of string and character literals in Sod, outside of C~fragments, is limited, |
| 264 | and the simple syntax seems adequate. For the sake of future compatibility, |
| 265 | the use of character sequences which resemble C escape sequences is |
| 266 | discouraged. |
| 267 | |
| 268 | \subsubsection{Integer literals} \label{sec:syntax.lex.int} |
| 269 | |
| 270 | \begin{grammar} |
| 271 | <integer-literal> ::= <decimal-integer> |
| 272 | \alt <binary-integer> |
| 273 | \alt <octal-integer> |
| 274 | \alt <hex-integer> |
| 275 | |
| 276 | <decimal-integer> ::= <nonzero-digit-char> @<digit-char>^* |
| 277 | |
| 278 | <binary-integer> ::= "0" @("b"|"B"@) @<binary-digit-char>^+ |
| 279 | |
| 280 | <binary-digit-char> ::= "0" | "1" |
| 281 | |
| 282 | <octal-integer> ::= "0" @["o"|"O"@] @<octal-digit-char>^+ |
| 283 | |
| 284 | <octal-digit-char> ::= "0" | "1" $| \cdots |$ "7" |
| 285 | |
| 286 | <hex-integer> ::= "0" @("x"|"X"@) @<hex-digit-char>^+ |
| 287 | |
| 288 | <hex-digit-char> ::= <digit-char> |
| 289 | \alt "A" | "B" | "C" | "D" | "E" | "F" |
| 290 | \alt "a" | "b" | "c" | "d" | "e" | "f" |
| 291 | \end{grammar} |
| 292 | |
| 293 | Sod understands only integers, not floating-point numbers; its integer syntax |
| 294 | goes slightly beyond C in allowing a @`0o' prefix for octal and @`0b' for |
| 295 | binary. However, length and signedness indicators are not permitted. |
| 296 | |
| 297 | \subsubsection{Punctuation} \label{sec:syntax.lex.punct} |
| 298 | |
| 299 | \begin{grammar} |
| 300 | <punctuation> ::= any nonalphanumeric character other than "_", "\"" or "'" |
| 301 | \end{grammar} |
| 302 | |
| 303 | \subsubsection{Comments} \label{sec:lex-comment} |
| 304 | |
| 305 | \begin{grammar} |
| 306 | <comment> ::= <block-comment> |
| 307 | \alt <line-comment> |
| 308 | |
| 309 | <block-comment> ::= |
| 310 | "/*" |
| 311 | @<not-star>^* @(@<star>^+ <not-star-or-slash> @<not-star>^*@)^* |
| 312 | @<star>^* |
| 313 | "*/" |
| 314 | |
| 315 | <star> ::= "*" |
| 316 | |
| 317 | <not-star> ::= any character other than "*" |
| 318 | |
| 319 | <not-star-or-slash> ::= any character other than "*" or "/" |
| 320 | |
| 321 | <line-comment> ::= "//" @<not-newline>^* <newline> |
| 322 | |
| 323 | <newline> ::= a newline character |
| 324 | |
| 325 | <not-newline> ::= any character other than newline |
| 326 | \end{grammar} |
| 327 | |
| 328 | Comments are exactly as in C99: both traditional block comments `\texttt{/*} |
| 329 | \dots\ \texttt{*/}' and \Cplusplus-style `\texttt{//} \dots' comments are |
| 330 | permitted and ignored. |
| 331 | |
| 332 | \subsection{Special nonterminals} |
| 333 | \label{sec:special-nonterminals} |
| 334 | |
| 335 | Aside from the lexical syntax presented above (\xref{sec:lexical-syntax}), |
| 336 | two special nonterminals occur in the module syntax. |
| 337 | |
| 338 | \subsubsection{S-expressions} \label{sec:syntax-sexp} |
| 339 | |
| 340 | \begin{grammar} |
| 341 | <s-expression> ::= an S-expression, as parsed by the Lisp reader |
| 342 | \end{grammar} |
| 343 | |
| 344 | When an S-expression is expected, the Sod parser simply calls the host Lisp |
| 345 | system's \textsf{read} function. Sod modules are permitted to modify the |
| 346 | read table to extend the S-expression syntax. |
| 347 | |
| 348 | S-expressions are self-delimiting, so no end-marker is needed. |
| 349 | |
| 350 | \subsubsection{C fragments} \label{sec:syntax.lex.cfrag} |
| 351 | |
| 352 | \begin{grammar} |
| 353 | <c-fragment> ::= a sequence of C tokens, with matching brackets |
| 354 | \end{grammar} |
| 355 | |
| 356 | Sequences of C code are simply stored and written to the output unchanged |
| 357 | during translation. They are read using a simple scanner which nonetheless |
| 358 | understands C comments and string and character literals. |
| 359 | |
| 360 | A C fragment is terminated by one of a small number of delimiter characters |
| 361 | determined by the immediately surrounding context -- usually a closing brace |
| 362 | or bracket. The first such delimiter character which is not enclosed in |
| 363 | brackets, braces or parenthesis ends the fragment. |
| 364 | |
| 365 | \subsection{Module syntax} \label{sec:syntax-module} |
| 366 | |
| 367 | \begin{grammar} |
| 368 | <module> ::= @<definition>^* |
| 369 | |
| 370 | <definition> ::= <import-definition> |
| 371 | \alt <load-definition> |
| 372 | \alt <lisp-definition> |
| 373 | \alt <code-definition> |
| 374 | \alt <typename-definition> |
| 375 | \alt <class-definition> |
| 376 | \end{grammar} |
| 377 | |
| 378 | A module is the top-level syntactic item. A module consists of a sequence of |
| 379 | definitions. |
| 380 | |
| 381 | \subsection{Simple definitions} \label{sec:syntax.defs} |
| 382 | |
| 383 | \subsubsection{Importing modules} \label{sec:syntax.defs.import} |
| 384 | |
| 385 | \begin{grammar} |
| 386 | <import-definition> ::= "import" <string> ";" |
| 387 | \end{grammar} |
| 388 | |
| 389 | The module named @<string> is processed and its definitions made available. |
| 390 | |
| 391 | A search is made for a module source file as follows. |
| 392 | \begin{itemize} |
| 393 | \item The module name @<string> is converted into a filename by appending |
| 394 | @`.sod', if it has no extension already.\footnote{% |
| 395 | Technically, what happens is \textsf{(merge-pathnames name (make-pathname |
| 396 | :type "SOD" :case :common))}, so exactly what this means varies |
| 397 | according to the host system.} % |
| 398 | \item The file is looked for relative to the directory containing the |
| 399 | importing module. |
| 400 | \item If that fails, then the file is looked for in each directory on the |
| 401 | module search path in turn. |
| 402 | \item If the file still isn't found, an error is reported and the import |
| 403 | fails. |
| 404 | \end{itemize} |
| 405 | At this point, if the file has previously been imported, nothing further |
| 406 | happens.\footnote{% |
| 407 | This check is done using \textsf{truename}, so it should see through simple |
| 408 | tricks like symbolic links. However, it may be confused by fancy things |
| 409 | like bind mounts and so on.} % |
| 410 | |
| 411 | Recursive imports, either direct or indirect, are an error. |
| 412 | |
| 413 | \subsubsection{Loading extensions} \label{sec:syntax.defs.load} |
| 414 | |
| 415 | \begin{grammar} |
| 416 | <load-definition> ::= "load" <string> ";" |
| 417 | \end{grammar} |
| 418 | |
| 419 | The Lisp file named @<string> is loaded and evaluated. |
| 420 | |
| 421 | A search is made for a Lisp source file as follows. |
| 422 | \begin{itemize} |
| 423 | \item The name @<string> is converted into a filename by appending @`.lisp', |
| 424 | if it has no extension already.\footnote{% |
| 425 | Technically, what happens is \textsf{(merge-pathnames name (make-pathname |
| 426 | :type "LISP" :case :common))}, so exactly what this means varies |
| 427 | according to the host system.} % |
| 428 | \item A search is then made in the same manner as for module imports |
| 429 | (\xref{sec:syntax-module}). |
| 430 | \end{itemize} |
| 431 | If the file is found, it is loaded using the host Lisp's \textsf{load} |
| 432 | function. |
| 433 | |
| 434 | Note that Sod doesn't attempt to compile Lisp files, or even to look for |
| 435 | existing compiled files. The right way to package a substantial extension to |
| 436 | the Sod translator is to provide the extension as a standard ASDF system (or |
| 437 | similar) and leave a dropping @"foo-extension.lisp" in the module path saying |
| 438 | something like |
| 439 | \begin{quote} |
| 440 | \textsf{(asdf:load-system :foo-extension)} |
| 441 | \end{quote} |
| 442 | which will arrange for the extension to be compiled if necessary. |
| 443 | |
| 444 | (This approach means that the language doesn't need to depend on any |
| 445 | particular system definition facility. It's bad enough already that it |
| 446 | depends on Common Lisp.) |
| 447 | |
| 448 | \subsubsection{Lisp escapes} \label{sec:syntax.defs.lisp} |
| 449 | |
| 450 | \begin{grammar} |
| 451 | <lisp-definition> ::= "lisp" <s-expression> ";" |
| 452 | \end{grammar} |
| 453 | |
| 454 | The @<s-expression> is evaluated immediately. It can do anything it likes. |
| 455 | |
| 456 | \textbf{Warning!} This means that hostile Sod modules are a security hazard. |
| 457 | Lisp code can read and write files, start other programs, and make network |
| 458 | connections. Don't install Sod modules from sources that you don't |
| 459 | trust.\footnote{% |
| 460 | Presumably you were going to run the corresponding code at some point, so |
| 461 | this isn't as unusually scary as it sounds. But please be careful.} % |
| 462 | |
| 463 | \subsubsection{Declaring type names} \label{sec:syntax.defs.typename} |
| 464 | |
| 465 | \begin{grammar} |
| 466 | <typename-definition> ::= |
| 467 | "typename" <identifier-list> ";" |
| 468 | \end{grammar} |
| 469 | |
| 470 | Each @<identifier> is declared as naming a C type. This is important because |
| 471 | the C type syntax -- which Sod uses -- is ambiguous, and disambiguation is |
| 472 | done by distinguishing type names from other identifiers. |
| 473 | |
| 474 | Don't declare class names using @"typename"; use @"class" forward |
| 475 | declarations instead. |
| 476 | |
| 477 | \subsection{Literal code} \label{sec:syntax-code} |
| 478 | |
| 479 | \begin{grammar} |
| 480 | <code-definition> ::= |
| 481 | "code" <identifier> ":" <identifier> @[<constraints>@] |
| 482 | "{" <c-fragment> "}" |
| 483 | |
| 484 | <constraints> ::= "[" <constraint-list> "]" |
| 485 | |
| 486 | <constraint> ::= @<identifier>^+ |
| 487 | \end{grammar} |
| 488 | |
| 489 | The @<c-fragment> will be output unchanged to one of the output files. |
| 490 | |
| 491 | The first @<identifier> is the symbolic name of an output file. Predefined |
| 492 | output file names are @"c" and @"h", which are the implementation code and |
| 493 | header file respectively; other output files can be defined by extensions. |
| 494 | |
| 495 | The second @<identifier> provides a name for the output item. Several C |
| 496 | fragments can have the same name: they will be concatenated together in the |
| 497 | order in which they were encountered. |
| 498 | |
| 499 | The @<constraints> provide a means for specifying where in the output file |
| 500 | the output item should appear. (Note the two kinds of square brackets shown |
| 501 | in the syntax: square brackets must appear around the constraints if they are |
| 502 | present, but that they may be omitted.) Each comma-separated @<constraint> |
| 503 | is a sequence of identifiers naming output items, and indicates that the |
| 504 | output items must appear in the order given -- though the translator is free |
| 505 | to insert additional items in between them. (The particular output items |
| 506 | needn't be defined already -- indeed, they needn't be defined ever.) |
| 507 | |
| 508 | There is a predefined output item @"includes" in both the @"c" and @"h" |
| 509 | output files which is a suitable place for inserting @"\#include" |
| 510 | preprocessor directives in order to declare types and functions for use |
| 511 | elsewhere in the generated output files. |
| 512 | |
| 513 | \subsection{Property sets} \label{sec:syntax.propset} |
| 514 | |
| 515 | \begin{grammar} |
| 516 | <properties> ::= "[" <property-list> "]" |
| 517 | |
| 518 | <property> ::= <identifier> "=" <expression> |
| 519 | \end{grammar} |
| 520 | |
| 521 | Property sets are a means for associating miscellaneous information with |
| 522 | classes and related items. By using property sets, additional information |
| 523 | can be passed to extensions without the need to introduce idiosyncratic |
| 524 | syntax. |
| 525 | |
| 526 | A property has a name, given as an @<identifier>, and a value computed by |
| 527 | evaluating an @<expression>. The value can be one of a number of types, |
| 528 | though the only operators currently defined act on integer values only. |
| 529 | |
| 530 | \subsubsection{The expression evaluator} \label{sec:syntax.propset.expr} |
| 531 | |
| 532 | \begin{grammar} |
| 533 | <expression> ::= <term> | <expression> "+" <term> | <expression> "-" <term> |
| 534 | |
| 535 | <term> ::= <factor> | <term> "*" <factor> | <term> "/" <factor> |
| 536 | |
| 537 | <factor> ::= <primary> | "+" <factor> | "-" <factor> |
| 538 | |
| 539 | <primary> ::= |
| 540 | <integer-literal> | <string-literal> | <char-literal> | <identifier> |
| 541 | \alt "?" <s-expression> |
| 542 | \alt "(" <expression> ")" |
| 543 | \end{grammar} |
| 544 | |
| 545 | The arithmetic expression syntax is simple and standard; there are currently |
| 546 | no bitwise, logical, or comparison operators. |
| 547 | |
| 548 | A @<primary> expression may be a literal or an identifier. Note that |
| 549 | identifiers stand for themselves: they \emph{do not} denote values. For more |
| 550 | fancy expressions, the syntax |
| 551 | \begin{quote} |
| 552 | @"?" @<s-expression> |
| 553 | \end{quote} |
| 554 | causes the @<s-expression> to be evaluated using the Lisp \textsf{eval} |
| 555 | function. |
| 556 | %%% FIXME crossref to extension docs |
| 557 | |
| 558 | \subsection{C types} \label{sec:syntax.c-types} |
| 559 | |
| 560 | Sod's syntax for C types closely mirrors the standard C syntax. A C type has |
| 561 | two parts: a sequence of @<declaration-specifier>s and a @<declarator>. In |
| 562 | Sod, a type must contain at least one @<declaration-specifier> (i.e., |
| 563 | `implicit @"int"' is forbidden), and storage-class specifiers are not |
| 564 | recognized. |
| 565 | |
| 566 | \subsubsection{Declaration specifiers} \label{sec:syntax.c-types.declspec} |
| 567 | |
| 568 | \begin{grammar} |
| 569 | <declaration-specifier> ::= <type-name> |
| 570 | \alt "struct" <identifier> | "union" <identifier> | "enum" <identifier> |
| 571 | \alt "void" | "char" | "int" | "float" | "double" |
| 572 | \alt "short" | "long" |
| 573 | \alt "signed" | "unsigned" |
| 574 | \alt <qualifier> |
| 575 | |
| 576 | <qualifier> ::= "const" | "volatile" | "restrict" |
| 577 | |
| 578 | <type-name> ::= <identifier> |
| 579 | \end{grammar} |
| 580 | |
| 581 | A @<type-name> is an identifier which has been declared as being a type name, |
| 582 | using the @"typename" or @"class" definitions. |
| 583 | |
| 584 | Declaration specifiers may appear in any order. However, not all |
| 585 | combinations are permitted. A declaration specifier must consist of zero or |
| 586 | more @<qualifiers>, and one of the following, up to reordering. |
| 587 | \begin{itemize} |
| 588 | \item @<type-name> |
| 589 | \item @"struct" @<identifier>, @"union" @<identifier>, @"enum" @<identifier> |
| 590 | \item @"void" |
| 591 | \item @"char", @"unsigned char", @"signed char" |
| 592 | \item @"short", @"unsigned short", @"signed short" |
| 593 | \item @"short int", @"unsigned short int", @"signed short int" |
| 594 | \item @"int", @"unsigned int", @"signed int", @"unsigned", @"signed" |
| 595 | \item @"long", @"unsigned long", @"signed long" |
| 596 | \item @"long int", @"unsigned long int", @"signed long int" |
| 597 | \item @"long long", @"unsigned long long", @"signed long long" |
| 598 | \item @"long long int", @"unsigned long long int", @"signed long long int" |
| 599 | \item @"float", @"double", @"long double" |
| 600 | \end{itemize} |
| 601 | All of these have their usual C meanings. |
| 602 | |
| 603 | \subsubsection{Declarators} \label{sec:syntax.c-types.declarator} |
| 604 | |
| 605 | \begin{grammar} |
| 606 | <declarator>$[k]$ ::= @<pointer>^* <primary-declarator>$[k]$ |
| 607 | |
| 608 | <primary-declarator>$[k]$ ::= $k$ |
| 609 | \alt "(" <primary-declarator>$[k]$ ")" |
| 610 | \alt <primary-declarator>$[k]$ @<declarator-suffix>^* |
| 611 | |
| 612 | <pointer> ::= "*" @<qualifier>^* |
| 613 | |
| 614 | <declarator-suffix> ::= "[" <c-fragment> "]" |
| 615 | \alt "(" <arguments> ")" |
| 616 | |
| 617 | <arguments> ::= $\epsilon$ | "..." |
| 618 | \alt <argument-list> @["," "..."@] |
| 619 | |
| 620 | <argument> ::= @<declaration-specifier>^+ <argument-declarator> |
| 621 | |
| 622 | <argument-declarator> ::= <declarator>@[<identifier> @! $\epsilon$@] |
| 623 | |
| 624 | <simple-declarator> ::= <declarator>@[<identifier>@] |
| 625 | |
| 626 | <dotted-name> ::= <identifier> "." <identifier> |
| 627 | |
| 628 | <dotted-declarator> ::= <declarator>@[<dotted-name>@] |
| 629 | \end{grammar} |
| 630 | |
| 631 | The declarator syntax is taken from C, but with some differences. |
| 632 | \begin{itemize} |
| 633 | \item Array dimensions are uninterpreted @<c-fragments>, terminated by a |
| 634 | closing square bracket. This allows array dimensions to contain arbitrary |
| 635 | constant expressions. |
| 636 | \item A declarator may have either a single @<identifier> at its centre or a |
| 637 | pair of @<identifier>s separated by a @`.'; this is used to refer to |
| 638 | slots or messages defined in superclasses. |
| 639 | \end{itemize} |
| 640 | The remaining differences are (I hope) a matter of presentation rather than |
| 641 | substance. |
| 642 | |
| 643 | \subsection{Defining classes} \label{sec:syntax.class} |
| 644 | |
| 645 | \begin{grammar} |
| 646 | <class-definition> ::= <class-forward-declaration> |
| 647 | \alt <full-class-definition> |
| 648 | \end{grammar} |
| 649 | |
| 650 | \subsubsection{Forward declarations} \label{sec:class.class.forward} |
| 651 | |
| 652 | \begin{grammar} |
| 653 | <class-forward-declaration> ::= "class" <identifier> ";" |
| 654 | \end{grammar} |
| 655 | |
| 656 | A @<class-forward-declaration> informs Sod that an @<identifier> will be used |
| 657 | to name a class which is currently undefined. Forward declarations are |
| 658 | necessary in order to resolve certain kinds of circularity. For example, |
| 659 | \begin{listing} |
| 660 | class Sub; |
| 661 | |
| 662 | class Super : SodObject { |
| 663 | Sub *sub; |
| 664 | }; |
| 665 | |
| 666 | class Sub : Super { |
| 667 | /* ... */ |
| 668 | }; |
| 669 | \end{listing} |
| 670 | |
| 671 | \subsubsection{Full class definitions} \label{sec:class.class.full} |
| 672 | |
| 673 | \begin{grammar} |
| 674 | <full-class-definition> ::= |
| 675 | @[<properties>@] |
| 676 | "class" <identifier> ":" <identifier-list> |
| 677 | "{" @<class-item>^* "}" |
| 678 | |
| 679 | <class-item> ::= <slot-item> ";" |
| 680 | \alt <message-item> |
| 681 | \alt <method-item> |
| 682 | \alt <initializer-item> ";" |
| 683 | \end{grammar} |
| 684 | |
| 685 | A full class definition provides a complete description of a class. |
| 686 | |
| 687 | The first @<identifier> gives the name of the class. It is an error to |
| 688 | give the name of an existing class (other than a forward-referenced class), |
| 689 | or an existing type name. It is conventional to give classes `MixedCase' |
| 690 | names, to distinguish them from other kinds of identifiers. |
| 691 | |
| 692 | The @<identifier-list> names the direct superclasses for the new class. It |
| 693 | is an error if any of these @<identifier>s does not name a defined class. |
| 694 | |
| 695 | The @<properties> provide additional information. The standard class |
| 696 | properties are as follows. |
| 697 | \begin{description} |
| 698 | \item[@"lisp_class"] The name of the Lisp class to use within the translator |
| 699 | to represent this class. The property value must be an identifier; the |
| 700 | default is @"sod_class". Extensions may define classes with additional |
| 701 | behaviour, and may recognize additional class properties. |
| 702 | \item[@"metaclass"] The name of the Sod metaclass for this class. In the |
| 703 | generated code, a class is itself an instance of another class -- its |
| 704 | \emph{metaclass}. The metaclass defines which slots the class will have, |
| 705 | which messages it will respond to, and what its behaviour will be when it |
| 706 | receives them. The property value must be an identifier naming a defined |
| 707 | subclass of @"SodClass". The default metaclass is @"SodClass". |
| 708 | %%% FIXME xref to theory |
| 709 | \item[@"nick"] A nickname for the class, to be used to distinguish it from |
| 710 | other classes in various limited contexts. The property value must be an |
| 711 | identifier; the default is constructed by forcing the class name to |
| 712 | lower-case. |
| 713 | \end{description} |
| 714 | |
| 715 | The class body consists of a sequence of @<class-item>s enclosed in braces. |
| 716 | These items are discussed on the following sections. |
| 717 | |
| 718 | \subsubsection{Slot items} \label{sec:sntax.class.slot} |
| 719 | |
| 720 | \begin{grammar} |
| 721 | <slot-item> ::= |
| 722 | @[<properties>@] |
| 723 | @<declaration-specifier>^+ <init-declarator-list> |
| 724 | |
| 725 | <init-declarator> ::= <declarator> @["=" <initializer>@] |
| 726 | \end{grammar} |
| 727 | |
| 728 | A @<slot-item> defines one or more slots. All instances of the class and any |
| 729 | subclass will contain these slot, with the names and types given by the |
| 730 | @<declaration-specifiers> and the @<declarators>. Slot declarators may not |
| 731 | contain qualified identifiers. |
| 732 | |
| 733 | It is not possible to declare a slot with function type: such an item is |
| 734 | interpreted as being a @<message-item> or @<method-item>. Pointers to |
| 735 | functions are fine. |
| 736 | |
| 737 | An @<initializer>, if present, is treated as if a separate |
| 738 | @<initializer-item> containing the slot name and initializer were present. |
| 739 | For example, |
| 740 | \begin{listing} |
| 741 | [nick = eg] |
| 742 | class Example : Super { |
| 743 | int foo = 17; |
| 744 | }; |
| 745 | \end{listing} |
| 746 | means the same as |
| 747 | \begin{listing} |
| 748 | [nick = eg] |
| 749 | class Example : Super { |
| 750 | int foo; |
| 751 | eg.foo = 17; |
| 752 | }; |
| 753 | \end{listing} |
| 754 | |
| 755 | \subsubsection{Initializer items} \label{sec:syntax.class.init} |
| 756 | |
| 757 | \begin{grammar} |
| 758 | <initializer-item> ::= @["class"@] <slot-initializer-list> |
| 759 | |
| 760 | <slot-initializer> ::= <qualified-identifier> "=" <initializer> |
| 761 | |
| 762 | <initializer> :: "{" <c-fragment> "}" | <c-fragment> |
| 763 | \end{grammar} |
| 764 | |
| 765 | An @<initializer-item> provides an initial value for one or more slots. If |
| 766 | prefixed by @"class", then the initial values are for class slots (i.e., |
| 767 | slots of the class object itself); otherwise they are for instance slots. |
| 768 | |
| 769 | The first component of the @<qualified-identifier> must be the nickname of |
| 770 | one of the class's superclasses (including itself); the second must be the |
| 771 | name of a slot defined in that superclass. |
| 772 | |
| 773 | The initializer has one of two forms. |
| 774 | \begin{itemize} |
| 775 | \item A @<c-fragment> enclosed in braces denotes an aggregate initializer. |
| 776 | This is suitable for initializing structure, union or array slots. |
| 777 | \item A @<c-fragment> \emph{not} beginning with an open brace is a `bare' |
| 778 | initializer, and continues until the next @`,' or @`;' which is not within |
| 779 | nested brackets. Bare initializers are suitable for initializing scalar |
| 780 | slots, such as pointers or integers, and strings. |
| 781 | \end{itemize} |
| 782 | |
| 783 | \subsubsection{Message items} \label{sec:syntax.class.message} |
| 784 | |
| 785 | \begin{grammar} |
| 786 | <message-item> ::= |
| 787 | @[<properties>@] |
| 788 | @<declaration-specifier>^+ <declarator> @[<method-body>@] |
| 789 | \end{grammar} |
| 790 | |
| 791 | \subsubsection{Method items} \label{sec:syntax.class.method} |
| 792 | |
| 793 | \begin{grammar} |
| 794 | <method-item> ::= |
| 795 | @[<properties>@] |
| 796 | @<declaration-specifier>^+ <declarator> <method-body> |
| 797 | |
| 798 | <method-body> ::= "{" <c-fragment> "}" | "extern" ";" |
| 799 | \end{grammar} |
| 800 | |
| 801 | %%%-------------------------------------------------------------------------- |
| 802 | \section{Class objects} |
| 803 | |
| 804 | \begin{listing} |
| 805 | typedef struct SodClass__ichain_obj SodClass; |
| 806 | |
| 807 | struct sod_chain { |
| 808 | size_t n_classes; /* Number of classes in chain */ |
| 809 | const SodClass *const *classes; /* Vector of classes, head first */ |
| 810 | size_t off_ichain; /* Offset of ichain from instance base */ |
| 811 | const struct sod_vtable *vt; /* Vtable pointer for chain */ |
| 812 | size_t ichainsz; /* Size of the ichain structure */ |
| 813 | }; |
| 814 | |
| 815 | struct sod_vtable { |
| 816 | SodClass *_class; /* Pointer to instance's class */ |
| 817 | size_t _base; /* Offset to instance base */ |
| 818 | }; |
| 819 | |
| 820 | struct SodClass__islots { |
| 821 | |
| 822 | /* Basic information */ |
| 823 | const char *name; /* The class's name as a string */ |
| 824 | const char *nick; /* The nickname as a string */ |
| 825 | |
| 826 | /* Instance allocation and initialization */ |
| 827 | size_t instsz; /* Instance layout size in bytes */ |
| 828 | void *(*imprint)(void *); /* Stamp instance with vtable ptrs */ |
| 829 | void *(*init)(void *); /* Initialize instance */ |
| 830 | |
| 831 | /* Superclass structure */ |
| 832 | size_t n_supers; /* Number of direct superclasses */ |
| 833 | const SodClass *const *supers; /* Vector of direct superclasses */ |
| 834 | size_t n_cpl; /* Length of class precedence list */ |
| 835 | const SodClass *const *cpl; /* Vector for class precedence list */ |
| 836 | |
| 837 | /* Chain structure */ |
| 838 | const SodClass *link; /* Link to next class in chain */ |
| 839 | const SodClass *head; /* Pointer to head of chain */ |
| 840 | size_t level; /* Index of class in its chain */ |
| 841 | size_t n_chains; /* Number of superclass chains */ |
| 842 | const sod_chain *chains; /* Vector of chain structures */ |
| 843 | |
| 844 | /* Layout */ |
| 845 | size_t off_islots; /* Offset of islots from ichain base */ |
| 846 | size_t islotsz; /* Size of instance slots */ |
| 847 | }; |
| 848 | |
| 849 | struct SodClass__ichain_obj { |
| 850 | const SodClass__vt_obj *_vt; |
| 851 | struct SodClass__islots cls; |
| 852 | }; |
| 853 | |
| 854 | struct sod_instance { |
| 855 | struct sod_vtable *_vt; |
| 856 | }; |
| 857 | \end{listing} |
| 858 | |
| 859 | \begin{listing} |
| 860 | void *sod_convert(const SodClass *cls, const void *obj) |
| 861 | { |
| 862 | const struct sod_instance *inst = obj; |
| 863 | const SodClass *real = inst->_vt->_cls; |
| 864 | const struct sod_chain *chain; |
| 865 | size_t i, index; |
| 866 | |
| 867 | for (i = 0; i < real->cls.n_chains; i++) { |
| 868 | chain = &real->cls.chains[i]; |
| 869 | if (chain->classes[0] == cls->cls.head) { |
| 870 | index = cls->cls.index; |
| 871 | if (index < chain->n_classes && chain->classes[index] == cls) |
| 872 | return ((char *)cls - inst->_vt._base + chain->off_ichain); |
| 873 | else |
| 874 | return (0); |
| 875 | } |
| 876 | } |
| 877 | return (0); |
| 878 | } |
| 879 | \end{listing} |
| 880 | |
| 881 | %%%-------------------------------------------------------------------------- |
| 882 | \section{Classes} |
| 883 | \label{sec:class} |
| 884 | |
| 885 | \subsection{Classes and superclasses} \label{sec:class.defs} |
| 886 | |
| 887 | A @<full-class-definition> must list one or more existing classes to be the |
| 888 | \emph{direct superclasses} for the new class being defined. We make the |
| 889 | following definitions. |
| 890 | \begin{itemize} |
| 891 | \item The \emph{superclasses} of a class consist of the class itself together |
| 892 | with the superclasses of its direct superclasses. |
| 893 | \item The \emph{proper superclasses} of a class are its superclasses other |
| 894 | than itself. |
| 895 | \item If $C$ is a (proper) superclass of $D$ then $D$ is a (\emph{proper}) |
| 896 | \emph{subclass} of $C$. |
| 897 | \end{itemize} |
| 898 | The predefined class @|SodObject| has no direct superclasses; it is unique in |
| 899 | this respect. All classes are subclasses of @|SodObject|. |
| 900 | |
| 901 | \subsection{The class precedence list} \label{sec:class.cpl} |
| 902 | |
| 903 | Let $C$ be a class. The superclasses of $C$ form a directed graph, with an |
| 904 | edge from each class to each of its direct superclasses. This is the |
| 905 | \emph{superclass graph of $C$}. |
| 906 | |
| 907 | In order to resolve inheritance of items, we define a \emph{class precedence |
| 908 | list} (or CPL) for each class, which imposes a total order on that class's |
| 909 | superclasses. The default algorithm for computing the CPL is the \emph{C3} |
| 910 | algorithm \cite{fixme-c3}, though extensions may implement other algorithms. |
| 911 | |
| 912 | The default algorithm works as follows. Let $C$ be the class whose CPL we |
| 913 | are to compute. Let $X$ and $Y$ be two of $C$'s superclasses. |
| 914 | \begin{itemize} |
| 915 | \item $C$ must appear first in the CPL. |
| 916 | \item If $X$ appears before $Y$ in the CPL of one of $C$'s direct |
| 917 | superclasses, then $X$ appears before $Y$ in the $C$'s CPL. |
| 918 | \item If the above rules don't suffice to order $X$ and $Y$, then whichever |
| 919 | of $X$ and $Y$ has a subclass which appears further left in the list of |
| 920 | $C$'s direct superclasses will appear earlier in the CPL. |
| 921 | \end{itemize} |
| 922 | This last rule is sufficient to disambiguate because if both $X$ and $Y$ are |
| 923 | superclasses of the same direct superclass of $C$ then that direct |
| 924 | superclass's CPL will order $X$ and $Y$. |
| 925 | |
| 926 | We say that \emph{$X$ is more specific than $Y$ as a superclass of $C$} if |
| 927 | $X$ is earlier than $Y$ in $C$'s class precedence list. If $C$ is clear from |
| 928 | context then we omit it, saying simply that $X$ is more specific than $Y$. |
| 929 | |
| 930 | \subsection{Instances and metaclasses} \label{sec:class.meta} |
| 931 | |
| 932 | A class defines the structure and behaviour of its \emph{instances}: run-time |
| 933 | objects created (possibly) dynamically. An instance is an instance of only |
| 934 | one class, though structurally it may be used in place of an instance of any |
| 935 | of that class's superclasses. It is possible, with care, to change the class |
| 936 | of an instance at run-time. |
| 937 | |
| 938 | Classes are themselves represented as instances -- called \emph{class |
| 939 | objects} -- in the running program. Being instances, they have a class, |
| 940 | called the \emph{metaclass}. The metaclass defines the structure and |
| 941 | behaviour of the class object. |
| 942 | |
| 943 | The predefined class @|SodClass| is the default metaclass for new classes. |
| 944 | @|SodClass| has @|SodObject| as its only direct superclass. @|SodClass| is |
| 945 | its own metaclass. |
| 946 | |
| 947 | To make matters more complicated, Sod has \emph{two} distinct metalevels: as |
| 948 | well as the runtime metalevel, as discussed above, there's a compile-time |
| 949 | metalevel hosted in the Sod translator. Since Sod is written in Common Lisp, |
| 950 | a Sod class's compile-time metaclass is a CLOS class. The usual compile-time |
| 951 | metaclass is @|sod-class|. The compile-time metalevel is the subject of |
| 952 | \xref{ch:api}. |
| 953 | |
| 954 | \subsection{Items and inheritance} \label{sec:class.inherit} |
| 955 | |
| 956 | A class definition also declares \emph{slots}, \emph{messages}, |
| 957 | \emph{initializers} and \emph{methods} -- collectively referred to as |
| 958 | \emph{items}. In addition to the items declared in the class definition -- |
| 959 | the class's \emph{direct items} -- a class also \emph{inherits} items from |
| 960 | its superclasses. |
| 961 | |
| 962 | The precise rules for item inheritance vary according to the kinds of items |
| 963 | involved. |
| 964 | |
| 965 | Some object systems have a notion of `repeated inheritance': if there are |
| 966 | multiple paths in the superclass graph from a class to one of its |
| 967 | superclasses then items defined in that superclass may appear duplicated in |
| 968 | the subclass. Sod does not have this notion. |
| 969 | |
| 970 | \subsubsection{Slots} \label{sec:class.inherit.slots} |
| 971 | A \emph{slot} is a unit of state. In other object systems, slots may be |
| 972 | called `fields', `member variables', or `instance variables'. |
| 973 | |
| 974 | A slot has a \emph{name} and a \emph{type}. The name serves only to |
| 975 | distinguish the slot from other direct slots defined by the same class. A |
| 976 | class inherits all of its proper superclasses' slots. Slots inherited from |
| 977 | superclasses do not conflict with each other or with direct slots, even if |
| 978 | they have the same names. |
| 979 | |
| 980 | At run-time, each instance of the class holds a separate value for each slot, |
| 981 | whether direct or inherited. Changing the value of an instance's slot |
| 982 | doesn't affect other instances. |
| 983 | |
| 984 | \subsubsection{Initializers} \label{sec:class.inherit.init} |
| 985 | Mumble. |
| 986 | |
| 987 | \subsubsection{Messages} \label{sec:class.inherit.messages} |
| 988 | A \emph{message} is the stimulus for behaviour. In Sod, a class must define, |
| 989 | statically, the name and format of the messages it is able to receive and the |
| 990 | values it will return in reply. In this respect, a message is similar to |
| 991 | `abstract member functions' or `interface member functions' in other object |
| 992 | systems. |
| 993 | |
| 994 | Like slots, a message has a \emph{name} and a \emph{type}. Again, the name |
| 995 | serves only to distinguish the message from other direct messages defined by |
| 996 | the same class. Messages inherited from superclasses do not conflict with |
| 997 | each other or with direct messages, even if they have the same name. |
| 998 | |
| 999 | At run-time, one sends a message to an instance by invoking a function |
| 1000 | obtained from the instance's \emph{vtable}: \xref{sec:fixme-vtable}. |
| 1001 | |
| 1002 | \subsubsection{Methods} \label{sec:class.inherit.methods} |
| 1003 | A \emph{method} is a unit of behaviour. In other object systems, methods may |
| 1004 | be called `member functions'. |
| 1005 | |
| 1006 | A method is associated with a message. When a message is received by an |
| 1007 | instance, all of the methods associated with that message on the instance's |
| 1008 | class or any of its superclasses are \emph{applicable}. The details of how |
| 1009 | the applicable methods are invoked are described fully in |
| 1010 | \xref{sec:fixme-method-combination}. |
| 1011 | |
| 1012 | \subsection{Chains and instance layout} \label{sec:class.layout} |
| 1013 | |
| 1014 | C is a rather low-level language, and in particular it exposes details of the |
| 1015 | way data is laid out in memory. Since an instance of a class~$C$ should be |
| 1016 | (at least in principle) usable anywhere an instance of some superclass $B |
| 1017 | \succeq C$ is expected, this implies that an instance of the subclass $C$ |
| 1018 | needs to contain within it a complete instance of each superclass $B$, laid |
| 1019 | out according to the rules of instances of $B$, so that if we have (the |
| 1020 | address of) an instance of $C$, we can easily construct a pointer to a thing |
| 1021 | which looks like an instance of $B$ contained within it. |
| 1022 | |
| 1023 | Specifically, the information we need to retain for an instance of a |
| 1024 | class~$C$ is: |
| 1025 | \begin{itemize} |
| 1026 | \item the values of each of the slots defined by $C$, including those defined |
| 1027 | by superclasses; |
| 1028 | \item information which will let us convert a pointer to $C$ into a pointer |
| 1029 | to any superclass $B \succeq C$; |
| 1030 | \item information which will let us call the appropriate effective method for |
| 1031 | each message defined by $C$, including those defined by superclasses; and |
| 1032 | \item some additional meta-level information, such as how to find the class |
| 1033 | object for $C$ given (the address of) one of its instances. |
| 1034 | \end{itemize} |
| 1035 | |
| 1036 | Observe that, while each distinct instance must clearly have its own storage |
| 1037 | for slots, all instances of $C$ can share a single copy of the remaining |
| 1038 | information. The individual instance only needs to keep a pointer to this |
| 1039 | shared table, which, inspired by the similar structure in many \Cplusplus\ |
| 1040 | ABIs, are called a \emph{vtable}. |
| 1041 | |
| 1042 | The easiest approach would be to decide that instances of $C$ are exactly |
| 1043 | like instances of $B$, only with extra space at the end for the extra slots |
| 1044 | which $C$ defines over and above those already existing in $B$. Conversion |
| 1045 | is then trivial: a pointer to an instance of $C$ can be converted to a |
| 1046 | pointer to an instance of some superclass $B$ simply by casting. Even though |
| 1047 | the root class @|SodObject| doesn't have any slots at all, its instances will |
| 1048 | still need a vtable so that you can find its class object: the address of the |
| 1049 | vtable therefore needs to be at the very start of the instance structure. |
| 1050 | Again, a vtable for a superclass would have a vtable for each of its |
| 1051 | superclasses as a prefix, with new items added afterwards. |
| 1052 | |
| 1053 | This appealing approach works well for an object system which only permits |
| 1054 | single inheritance of both state and behaviour. Alas, it breaks down when |
| 1055 | multiple inheritance is allowed: $C$ can be a subclass of both $B$ and $B'$, |
| 1056 | even though $B$ is not a subclass of $B'$, nor \emph{vice versa}; so, in |
| 1057 | general, $B$'s instance structure will not be a prefix of $B'$'s, nor will |
| 1058 | $B'$'s be a prefix of $B$'s, and therefore $C$ cannot have both $B$ and $B'$ |
| 1059 | as a prefix. |
| 1060 | |
| 1061 | A (non-root) class may -- though need not -- have a distinguished \emph{link} |
| 1062 | superclass, which need not be a direct superclass. Furthermore, each |
| 1063 | class~$C$ must satisfy the \emph{chain condition}: for any superclass $A$ of |
| 1064 | $C$, there can be at most one other superclass of $C$ whose link superclass |
| 1065 | is $A$.\footnote{% |
| 1066 | That is, it's permitted for two classes $B$ and $B'$ to have the same link |
| 1067 | superclass $A$, but $B$ and $B'$ can't then both be superclasses of the |
| 1068 | same class $C$.} % |
| 1069 | Therefore, the links partition the superclasses of~$C$ into nice linear |
| 1070 | \emph{chains}, such that each superclass is a member of exactly one chain. |
| 1071 | If a class~$B$ has a link superclass~$A$, then $B$'s \emph{level} is one more |
| 1072 | than that of $A$; otherwise $B$ is called a \emph{chain head} and its level |
| 1073 | is zero. If the classes in a chain are written in a list, chain head first, |
| 1074 | then the level of each class gives its index in the list. |
| 1075 | |
| 1076 | Chains therefore allow us to recover some of the linearity properties which |
| 1077 | made layout simple in the case of single inheritance. The instance structure |
| 1078 | for a class $C$ contains a substructure for each of $C$'s superclass chains; |
| 1079 | a pointer to an object of class $C$ actually points to the substructure for |
| 1080 | the chain containing $C$. The order of these substructures is unimportant |
| 1081 | for now.\footnote{% |
| 1082 | The chains appear in the order in which their most specific classes appear |
| 1083 | in $C$'s class precedence list. This guarantees that the chain containing |
| 1084 | $C$ itself appears first, so that a pointer to $C$'s instance structure is |
| 1085 | actually a pointer to $C$'s chain substructure. Apart from that, it's a |
| 1086 | simple, stable, but basically arbitrary choice which can't be changed |
| 1087 | without breaking the ABI.} % |
| 1088 | The substructure for each chain begins with a pointer to a vtable, followed |
| 1089 | by a structure for each superclass in the chain containing the slots defined |
| 1090 | by that superclass, with the chain head (least specific class) first. |
| 1091 | |
| 1092 | Suppose we have a pointer to (static) type $C$, and want to convert it into a |
| 1093 | pointer to some superclass $B$ of $C$ -- an \emph{upcast}.\footnote{% |
| 1094 | In the more general case, we have a pointer to static type $C$, which |
| 1095 | actually points to an object of some subclass $D$ of $C$, and want to |
| 1096 | convert it into a pointer to type $B$. Such a conversion is called a |
| 1097 | \emph{downcast} if $B$ is a subclass of $C$, or a \emph{cross-cast} |
| 1098 | otherwise. Downcasts and cross-casts require complicated run-time |
| 1099 | checking, and can will fail unless $B$ is a superclass of $D$.} % |
| 1100 | If $B$ is in the same chain as $C$ -- an \emph{in-chain upcast} -- then the |
| 1101 | pointer value is already correct and it's only necessary to cast it |
| 1102 | appropriately. Otherwise -- a \emph{cross-chain upcast} -- the pointer needs |
| 1103 | to be adjusted to point to a different chain substructure. Since the lengths |
| 1104 | and relative positions of the chain substructures vary between classes, the |
| 1105 | adjustments are stored in the vtable. Cross-chain upcasts are therefore a |
| 1106 | bit slower than in-chain upcasts. |
| 1107 | |
| 1108 | Each chain has its own separate vtable, because much of the metadata stored |
| 1109 | in the vtable is specific to a particular chain. For example: |
| 1110 | \begin{itemize} |
| 1111 | \item offsets to other chains' substructures will vary depending on which |
| 1112 | chain we start from; and |
| 1113 | \item entry points to methods { |
| 1114 | |
| 1115 | %%%-------------------------------------------------------------------------- |
| 1116 | \chapter{The Lisp programming interface} \label{ch:api} |
| 1117 | |
| 1118 | %% output for `h' files |
| 1119 | %% |
| 1120 | %% prologue |
| 1121 | %% guard start |
| 1122 | %% typedefs start |
| 1123 | %% typedefs |
| 1124 | %% typedefs end |
| 1125 | %% includes start |
| 1126 | %% includes |
| 1127 | %% includes end |
| 1128 | %% classes start |
| 1129 | %% CLASS banner |
| 1130 | %% CLASS islots start |
| 1131 | %% CLASS islots slots |
| 1132 | %% CLASS islots end |
| 1133 | %% CLASS vtmsgs start |
| 1134 | %% CLASS vtmsgs CLASS start |
| 1135 | %% CLASS vtmsgs CLASS slots |
| 1136 | %% CLASS vtmsgs CLASS end |
| 1137 | %% CLASS vtmsgs end |
| 1138 | %% CLASS vtables start |
| 1139 | %% CLASS vtables CHAIN-HEAD start |
| 1140 | %% CLASS vtables CHAIN-HEAD slots |
| 1141 | %% CLASS vtables CHAIN-HEAD end |
| 1142 | %% CLASS vtables end |
| 1143 | %% CLASS vtable-externs |
| 1144 | %% CLASS vtable-externs-after |
| 1145 | %% CLASS methods start |
| 1146 | %% CLASS methods |
| 1147 | %% CLASS methods end |
| 1148 | %% CLASS ichains start |
| 1149 | %% CLASS ichains CHAIN-HEAD start |
| 1150 | %% CLASS ichains CHAIN-HEAD slots |
| 1151 | %% CLASS ichains CHAIN-HEAD end |
| 1152 | %% CLASS ichains end |
| 1153 | %% CLASS ilayout start |
| 1154 | %% CLASS ilayout slots |
| 1155 | %% CLASS ilayout end |
| 1156 | %% CLASS conversions |
| 1157 | %% CLASS object |
| 1158 | %% classes end |
| 1159 | %% guard end |
| 1160 | %% epilogue |
| 1161 | |
| 1162 | %% output for `c' files |
| 1163 | %% |
| 1164 | %% prologue |
| 1165 | %% includes start |
| 1166 | %% includes |
| 1167 | %% includes end |
| 1168 | %% classes start |
| 1169 | %% CLASS banner |
| 1170 | %% CLASS direct-methods start |
| 1171 | %% CLASS direct-methods METHOD start |
| 1172 | %% CLASS direct-methods METHOD body |
| 1173 | %% CLASS direct-methods METHOD end |
| 1174 | %% CLASS direct-methods end |
| 1175 | %% CLASS effective-methods |
| 1176 | %% CLASS vtables start |
| 1177 | %% CLASS vtables CHAIN-HEAD start |
| 1178 | %% CLASS vtables CHAIN-HEAD class-pointer METACLASS |
| 1179 | %% CLASS vtables CHAIN-HEAD base-offset |
| 1180 | %% CLASS vtables CHAIN-HEAD chain-offset TARGET-HEAD |
| 1181 | %% CLASS vtables CHAIN-HEAD vtmsgs CLASS start |
| 1182 | %% CLASS vtables CHAIN-HEAD vtmsgs CLASS slots |
| 1183 | %% CLASS vtables CHAIN-HEAD vtmsgs CLASS end |
| 1184 | %% CLASS vtables CHAIN-HEAD end |
| 1185 | %% CLASS vtables end |
| 1186 | %% CLASS object prepare |
| 1187 | %% CLASS object start |
| 1188 | %% CLASS object CHAIN-HEAD ichain start |
| 1189 | %% CLASS object SUPER slots start |
| 1190 | %% CLASS object SUPER slots |
| 1191 | %% CLASS object SUPER vtable |
| 1192 | %% CLASS object SUPER slots end |
| 1193 | %% CLASS object CHAIN-HEAD ichain end |
| 1194 | %% CLASS object end |
| 1195 | %% classes end |
| 1196 | %% epilogue |
| 1197 | |
| 1198 | %%%-------------------------------------------------------------------------- |
| 1199 | |
| 1200 | \include{sod-backg} |
| 1201 | \include{sod-protocol} |
| 1202 | |
| 1203 | \end{document} |
| 1204 | \f |
| 1205 | %%% Local variables: |
| 1206 | %%% mode: LaTeX |
| 1207 | %%% TeX-PDF-mode: t |
| 1208 | %%% End: |