mdw@git.distorted.org.uk Git - sod/blob - sod.tex

   1 \documentclass[noarticle]{strayman}
   2
   3 \usepackage[T1]{fontenc}
   4 \usepackage[utf8]{inputenc}
   5 \usepackage[palatino, helvetica, courier, maths=cmr]{mdwfonts}
   6 \usepackage{syntax}
   7 \usepackage{sverb}
   8 \usepackage{at}
   9 \usepackage{mdwref}
  10
  11 \title{A Sensible Object Design for C}
  12 \author{Mark Wooding}
  13
  14 \def\syntleft{\normalfont\itshape}
  15 \let\syntright\empty
  16
  17 \def\ulitleft{\normalfont\sffamily}
  18 \let\ulitright\empty
  19
  20 \let\listingsize\relax
  21
  22 \let\epsilon\varepsilon
  23
  24 \atdef <#1>{\synt{#1}}
  25 \atdef "#1"{\lit*{#1}}
  26 \atdef `#1'{\lit{#1}}
  27 \atdef |#1|{\textsf{#1}}
  28
  29 \def\Cplusplus{C\kern-1pt++}
  30 \def\Csharp{C\#}
  31 \def\man#1#2{\textbf{#1}(#2)}
  32
  33 \begingroup\lccode`\~=`\
  34 \lowercase{
  35 \endgroup
  36 \def\prog{%
  37   \sffamily%
  38   \quote%
  39   \let\oldnl\\%
  40   \obeylines%
  41   \tabbing%
  42   \global\let~\\%
  43   \global\let\\\textbackslash%
  44 }
  45 \def\endprog{%
  46   \endtabbing%
  47   \global\let\\\oldnl%
  48   \endquote%
  49 }}
  50
  51 \begin{document}
  52
  53 \maketitle
  54
  55 \include{sod-tut}
  56
  57 %%%--------------------------------------------------------------------------
  58 \chapter{Internals}
  59
  60 \section{Generated names}
  61
  62 The generated names for functions and objects related to a class are
  63 constructed systematically so as not to interfere with each other.  The rules
  64 on class, slot and message naming exist so as to ensure that the generated
  65 names don't collide with each other.
  66
  67 The following notation is used in this section.
  68 \begin{description}
  69 \item[@<class>] The full name of the `focus' class: the one for which we are
  70   generating name.
  71 \item[@<super-nick>] The nickname of a superclass.
  72 \item[@<head-nick>] The nickname of the chain-head class of the chain
  73   in question.
  74 \end{description}
  75
  76 \subsection{Instance layout}
  77
  78 %%%--------------------------------------------------------------------------
  79 \section{Syntax}
  80 \label{sec:syntax}
  81
  82 Fortunately, Sod is syntactically quite simple.  I've used a little slightly
  83 unusual notation in order to make the presentation easier to read.
  84 \begin{itemize}
  85 \item $\epsilon$ denotes the empty nonterminal:
  86   \begin{quote}
  87     $\epsilon$ ::=
  88   \end{quote}
  89 \item $[$@<item>$]$ means an optional @<item>:
  90   \begin{quote}
  91     \syntax{$[$<item>$]$ ::= $\epsilon$ | <item>}
  92   \end{quote}
  93 \item @<item>$^*$ means a sequence of zero or more @<item>s:
  94   \begin{quote}
  95     \syntax{<item>$^*$ ::= $\epsilon$ | <item>$^*$ <item>}
  96   \end{quote}
  97 \item @<item>$^+$ means a sequence of one or more @<item>s:
  98   \begin{quote}
  99     \syntax{<item>$^+$ ::= <item> <item>$^*$}
 100   \end{quote}
 101 \item @<item-list> means a sequence of one or more @<item>s separated
 102   by commas:
 103   \begin{quote}
 104     \syntax{<item-list> ::= <item> | <item-list> "," <item>}
 105   \end{quote}
 106 \end{itemize}
 107
 108 \subsection{Lexical syntax}
 109 \label{sec:syntax.lex}
 110
 111 Whitespace and comments are discarded.  The remaining characters are
 112 collected into tokens according to the following syntax.
 113
 114 \begin{grammar}
 115 <token> ::= <identifier>
 116 \alt <reserved-word>
 117 \alt <string-literal>
 118 \alt <char-literal>
 119 \alt <integer-literal>
 120 \alt <punctuation>
 121 \end{grammar}
 122
 123 This syntax is slightly ambiguous.  The following two rules serve to
 124 disambiguate:
 125 \begin{enumerate}
 126 \item Reserved words take precedence.  All @<reserved-word>s are
 127   syntactically @<identifier>s; Sod resolves the ambiguity in favour of
 128   @<reserved-word>.
 129 \item `Maximal munch'.  In other cases, at each stage we take the longest
 130   sequence of characters which could be a token.
 131 \end{enumerate}
 132
 133 \subsubsection{Identifiers} \label{sec:syntax.lex.id}
 134
 135 \begin{grammar}
 136 <identifier> ::= <id-start-char> <id-body-char>$^*$
 137
 138 <id-start-char> ::= <alpha-char> $|$ "_"
 139
 140 <id-body-char> ::= <id-start-char> $|$ <digit-char>
 141
 142 <alpha-char> ::= "A" $|$ "B" $|$ \dots\ $|$ "Z"
 143 \alt "a" $|$ "b" $|$ \dots\ $|$ "z"
 144 \alt <extended-alpha-char>
 145
 146 <digit-char> ::= "0" $|$ <nonzero-digit-char>
 147
 148 <nonzero-digit-char> ::= "1" $|$ "2" $| \cdots |$ "9"
 149 \end{grammar}
 150
 151 The precise definition of @<alpha-char> is left to the function
 152 \textsf{alpha-char-p} in the hosting Lisp system.  For portability,
 153 programmers are encouraged to limit themselves to the standard ASCII letters.
 154
 155 \subsubsection{Reserved words} \label{sec:syntax.lex.reserved}
 156
 157 \begin{grammar}
 158 <reserved-word> ::=
 159 "char" $|$ "class" $|$ "code" $|$ "const" $|$ "double" $|$ "enum" $|$
 160 "extern" $|$ "float" $|$ "import" $|$ "int" $|$ "lisp" $|$ "load" $|$ "long"
 161 $|$ "restrict" $|$ "short" $|$ "signed" $|$ "struct" $|$ "typename" $|$
 162 "union" $|$ "unsigned" $|$ "void" $|$ "volatile"
 163 \end{grammar}
 164
 165 Many of these are borrowed from~C; however, some (e.g., @"import" and
 166 @"lisp") are not, and some C reserved words are not reserved (e.g.,
 167 @"static").
 168
 169 \subsubsection{String and character literals} \label{sec:syntax.lex.string}
 170
 171 \begin{grammar}
 172 <string-literal> ::= "\"" <string-literal-char>$^*$ "\""
 173
 174 <char-literal> ::= "'" <char-literal-char> "'"
 175
 176 <string-literal-char> ::= any character other than "\\" or "\""
 177 \alt "\\" <char>
 178
 179 <char-literal-char> ::= any character other than "\\" or "'"
 180 \alt "\\" <char>
 181
 182 <char> ::= any single character
 183 \end{grammar}
 184
 185 The syntax for string and character literals differs from~C.  In particular,
 186 escape sequences such as @`\textbackslash n' are not recognized.  The use
 187 of string and character literals in Sod, outside of C~fragments, is limited,
 188 and the simple syntax seems adequate.  For the sake of future compatibility,
 189 the use of character sequences which resemble C escape sequences is
 190 discouraged.
 191
 192 \subsubsection{Integer literals} \label{sec:syntax.lex.int}
 193
 194 \begin{grammar}
 195 <integer-literal> ::= <decimal-integer>
 196 \alt <binary-integer>
 197 \alt <octal-integer>
 198 \alt <hex-integer>
 199
 200 <decimal-integer> ::= <nonzero-digit-char> <digit-char>$^*$
 201
 202 <binary-integer> ::= "0" $($"b"$|$"B"$)$ <binary-digit-char>$^+$
 203
 204 <binary-digit-char> ::= "0" $|$ "1"
 205
 206 <octal-integer> ::= "0" $[$"o"$|$"O"$]$ <octal-digit-char>$^+$
 207
 208 <octal-digit-char> ::= "0" $|$ "1" $| \cdots |$ "7"
 209
 210 <hex-integer> ::= "0" $($"x"$|$"X"$)$ <hex-digit-char>$^+$
 211
 212 <hex-digit-char> ::= <digit-char>
 213 \alt "A" $|$ "B" $|$ "C" $|$ "D" $|$ "E" $|$ "F"
 214 \alt "a" $|$ "b" $|$ "c" $|$ "d" $|$ "e" $|$ "f"
 215 \end{grammar}
 216
 217 Sod understands only integers, not floating-point numbers; its integer syntax
 218 goes slightly beyond C in allowing a @`0o' prefix for octal and @`0b' for
 219 binary.  However, length and signedness indicators are not permitted.
 220
 221 \subsubsection{Punctuation} \label{sec:syntax.lex.punct}
 222
 223 \begin{grammar}
 224 <punctuation> ::= any character other than "\"" or "'"
 225 \end{grammar}
 226
 227 Due to the `maximal munch' rule, @<punctuation> tokens cannot be
 228 alphanumeric.
 229
 230 \subsubsection{Comments} \label{sec:lex-comment}
 231
 232 \begin{grammar}
 233 <comment> ::= <block-comment>
 234 \alt <line-comment>
 235
 236 <block-comment> ::=
 237   "/*"
 238   <not-star>$^*$ $($<star>$^+$ <not-star-or-slash> <not-star>$^*)^*$
 239   <star>$^*$
 240   "*/"
 241
 242 <star> ::= "*"
 243
 244 <not-star> ::= any character other than "*"
 245
 246 <not-star-or-slash> ::= any character other than "*" or  "/"
 247
 248 <line-comment> ::= "//" <not-newline>$^*$ <newline>
 249
 250 <newline> ::= a newline character
 251
 252 <not-newline> ::= any character other than newline
 253 \end{grammar}
 254
 255 Comments are exactly as in C99: both traditional block comments `\texttt{/*}
 256 \dots\ \texttt{*/}' and \Cplusplus-style `\texttt{//} \dots' comments are
 257 permitted and ignored.
 258
 259 \subsection{Special nonterminals}
 260 \label{sec:special-nonterminals}
 261
 262 Aside from the lexical syntax presented above (\xref{sec:lexical-syntax}),
 263 two special nonterminals occur in the module syntax.
 264
 265 \subsubsection{S-expressions} \label{sec:syntax-sexp}
 266
 267 \begin{grammar}
 268 <s-expression> ::= an S-expression, as parsed by the Lisp reader
 269 \end{grammar}
 270
 271 When an S-expression is expected, the Sod parser simply calls the host Lisp
 272 system's \textsf{read} function.  Sod modules are permitted to modify the
 273 read table to extend the S-expression syntax.
 274
 275 S-expressions are self-delimiting, so no end-marker is needed.
 276
 277 \subsubsection{C fragments} \label{sec:syntax.lex.cfrag}
 278
 279 \begin{grammar}
 280 <c-fragment> ::= a sequence of C tokens, with matching brackets
 281 \end{grammar}
 282
 283 Sequences of C code are simply stored and written to the output unchanged
 284 during translation.  They are read using a simple scanner which nonetheless
 285 understands C comments and string and character literals.
 286
 287 A C fragment is terminated by one of a small number of delimiter characters
 288 determined by the immediately surrounding context -- usually a closing brace
 289 or bracket.  The first such delimiter character which is not enclosed in
 290 brackets, braces or parenthesis ends the fragment.
 291
 292 \subsection{Module syntax} \label{sec:syntax-module}
 293
 294 \begin{grammar}
 295 <module> ::= <definition>$^*$
 296
 297 <definition> ::= <import-definition>
 298 \alt <load-definition>
 299 \alt <lisp-definition>
 300 \alt <code-definition>
 301 \alt <typename-definition>
 302 \alt <class-definition>
 303 \end{grammar}
 304
 305 A module is the top-level syntactic item.  A module consists of a sequence of
 306 definitions.
 307
 308 \subsection{Simple definitions} \label{sec:syntax.defs}
 309
 310 \subsubsection{Importing modules} \label{sec:syntax.defs.import}
 311
 312 \begin{grammar}
 313 <import-definition> ::= "import" <string> ";"
 314 \end{grammar}
 315
 316 The module named @<string> is processed and its definitions made available.
 317
 318 A search is made for a module source file as follows.
 319 \begin{itemize}
 320 \item The module name @<string> is converted into a filename by appending
 321   @`.sod', if it has no extension already.\footnote{%
 322     Technically, what happens is \textsf{(merge-pathnames name (make-pathname
 323     :type "SOD" :case :common))}, so exactly what this means varies
 324     according to the host system.} %
 325 \item The file is looked for relative to the directory containing the
 326   importing module.
 327 \item If that fails, then the file is looked for in each directory on the
 328   module search path in turn.
 329 \item If the file still isn't found, an error is reported and the import
 330   fails.
 331 \end{itemize}
 332 At this point, if the file has previously been imported, nothing further
 333 happens.\footnote{%
 334   This check is done using \textsf{truename}, so it should see through simple
 335   tricks like symbolic links.  However, it may be confused by fancy things
 336   like bind mounts and so on.} %
 337
 338 Recursive imports, either direct or indirect, are an error.
 339
 340 \subsubsection{Loading extensions} \label{sec:syntax.defs.load}
 341
 342 \begin{grammar}
 343 <load-definition> ::= "load" <string> ";"
 344 \end{grammar}
 345
 346 The Lisp file named @<string> is loaded and evaluated.
 347
 348 A search is made for a Lisp source file as follows.
 349 \begin{itemize}
 350 \item The name @<string> is converted into a filename by appending @`.lisp',
 351   if it has no extension already.\footnote{%
 352     Technically, what happens is \textsf{(merge-pathnames name (make-pathname
 353     :type "LISP" :case :common))}, so exactly what this means varies
 354     according to the host system.} %
 355 \item A search is then made in the same manner as for module imports
 356   (\xref{sec:syntax-module}).
 357 \end{itemize}
 358 If the file is found, it is loaded using the host Lisp's \textsf{load}
 359 function.
 360
 361 Note that Sod doesn't attempt to compile Lisp files, or even to look for
 362 existing compiled files.  The right way to package a substantial extension to
 363 the Sod translator is to provide the extension as a standard ASDF system (or
 364 similar) and leave a dropping @"foo-extension.lisp" in the module path saying
 365 something like
 366 \begin{listing}
 367 (asdf:operate 'asdf:load-op :foo-extension)
 368 \end{listing}
 369 which will arrange for the extension to be compiled if necessary.
 370
 371 (This approach means that the language doesn't need to depend on any
 372 particular system definition facility.  It's bad enough already that it
 373 depends on Common Lisp.)
 374
 375 \subsubsection{Lisp escapes} \label{sec:syntax.defs.lisp}
 376
 377 \begin{grammar}
 378 <lisp-definition> ::= "lisp" <s-expression> ";"
 379 \end{grammar}
 380
 381 The @<s-expression> is evaluated immediately.  It can do anything it likes.
 382
 383 \textbf{Warning!}  This means that hostile Sod modules are a security hazard.
 384 Lisp code can read and write files, start other programs, and make network
 385 connections.  Don't install Sod modules from sources that you don't
 386 trust.\footnote{%
 387   Presumably you were going to run the corresponding code at some point, so
 388   this isn't as unusually scary as it sounds.  But please be careful.} %
 389
 390 \subsubsection{Declaring type names} \label{sec:syntax.defs.typename}
 391
 392 \begin{grammar}
 393 <typename-definition> ::=
 394   "typename" <identifier-list> ";"
 395 \end{grammar}
 396
 397 Each @<identifier> is declared as naming a C type.  This is important because
 398 the C type syntax -- which Sod uses -- is ambiguous, and disambiguation is
 399 done by distinguishing type names from other identifiers.
 400
 401 Don't declare class names using @"typename"; use @"class" forward
 402 declarations instead.
 403
 404 \subsection{Literal code} \label{sec:syntax-code}
 405
 406 \begin{grammar}
 407 <code-definition> ::=
 408   "code" <identifier> ":" <identifier> $[$<constraints>$]$
 409   "{" <c-fragment> "}"
 410
 411 <constraints> ::= "[" <constraint-list> "]"
 412
 413 <constraint> ::= <identifier>$^+$
 414 \end{grammar}
 415
 416 The @<c-fragment> will be output unchanged to one of the output files.
 417
 418 The first @<identifier> is the symbolic name of an output file.  Predefined
 419 output file names are @"c" and @"h", which are the implementation code and
 420 header file respectively; other output files can be defined by extensions.
 421
 422 The second @<identifier> provides a name for the output item.  Several C
 423 fragments can have the same name: they will be concatenated together in the
 424 order in which they were encountered.
 425
 426 The @<constraints> provide a means for specifying where in the output file
 427 the output item should appear.  (Note the two kinds of square brackets shown
 428 in the syntax: square brackets must appear around the constraints if they are
 429 present, but that they may be omitted.)  Each comma-separated @<constraint>
 430 is a sequence of identifiers naming output items, and indicates that the
 431 output items must appear in the order given -- though the translator is free
 432 to insert additional items in between them.  (The particular output items
 433 needn't be defined already -- indeed, they needn't be defined ever.)
 434
 435 There is a predefined output item @"includes" in both the @"c" and @"h"
 436 output files which is a suitable place for inserting @"\#include"
 437 preprocessor directives in order to declare types and functions for use
 438 elsewhere in the generated output files.
 439
 440 \subsection{Property sets} \label{sec:syntax.propset}
 441
 442 \begin{grammar}
 443 <properties> ::= "[" <property-list> "]"
 444
 445 <property> ::= <identifier> "=" <expression>
 446 \end{grammar}
 447
 448 Property sets are a means for associating miscellaneous information with
 449 classes and related items.  By using property sets, additional information
 450 can be passed to extensions without the need to introduce idiosyncratic
 451 syntax.
 452
 453 A property has a name, given as an @<identifier>, and a value computed by
 454 evaluating an @<expression>.  The value can be one of a number of types,
 455 though the only operators currently defined act on integer values only.
 456
 457 \subsubsection{The expression evaluator} \label{sec:syntax.propset.expr}
 458
 459 \begin{grammar}
 460 <expression> ::= <term> | <expression> "+" <term> | <expression> "-" <term>
 461
 462 <term> ::= <factor> | <term> "*" <factor> | <term> "/" <factor>
 463
 464 <factor> ::= <primary> | "+" <factor> | "-" <factor>
 465
 466 <primary> ::=
 467      <integer-literal> | <string-literal> | <char-literal> | <identifier>
 468 \alt "?" <s-expression>
 469 \alt "(" <expression> ")"
 470 \end{grammar}
 471
 472 The arithmetic expression syntax is simple and standard; there are currently
 473 no bitwise, logical, or comparison operators.
 474
 475 A @<primary> expression may be a literal or an identifier.  Note that
 476 identifiers stand for themselves: they \emph{do not} denote values.  For more
 477 fancy expressions, the syntax
 478 \begin{quote}
 479   @"?" @<s-expression>
 480 \end{quote}
 481 causes the @<s-expression> to be evaluated using the Lisp \textsf{eval}
 482 function.
 483 %%% FIXME crossref to extension docs
 484
 485 \subsection{C types} \label{sec:syntax.c-types}
 486
 487 Sod's syntax for C types closely mirrors the standard C syntax.  A C type has
 488 two parts: a sequence of @<declaration-specifier>s and a @<declarator>.  In
 489 Sod, a type must contain at least one @<declaration-specifier> (i.e.,
 490 `implicit @"int"' is forbidden), and storage-class specifiers are not
 491 recognized.
 492
 493 \subsubsection{Declaration specifiers} \label{sec:syntax.c-types.declspec}
 494
 495 \begin{grammar}
 496 <declaration-specifier> ::= <type-name>
 497 \alt "struct" <identifier> | "union" <identifier> | "enum" <identifier>
 498 \alt "void" | "char" | "int" | "float" | "double"
 499 \alt "short" | "long"
 500 \alt "signed" | "unsigned"
 501 \alt <qualifier>
 502
 503 <qualifier> ::= "const" | "volatile" | "restrict"
 504
 505 <type-name> ::= <identifier>
 506 \end{grammar}
 507
 508 A @<type-name> is an identifier which has been declared as being a type name,
 509 using the @"typename" or @"class" definitions.
 510
 511 Declaration specifiers may appear in any order.  However, not all
 512 combinations are permitted.  A declaration specifier must consist of zero or
 513 more @<qualifiers>, and one of the following, up to reordering.
 514 \begin{itemize}
 515 \item @<type-name>
 516 \item @"struct" <identifier>, @"union" <identifier>, @"enum" <identifier>
 517 \item @"void"
 518 \item @"char", @"unsigned char", @"signed char"
 519 \item @"short", @"unsigned short", @"signed short"
 520 \item @"short int", @"unsigned short int", @"signed short int"
 521 \item @"int", @"unsigned int", @"signed int", @"unsigned", @"signed"
 522 \item @"long", @"unsigned long", @"signed long"
 523 \item @"long int", @"unsigned long int", @"signed long int"
 524 \item @"long long", @"unsigned long long", @"signed long long"
 525 \item @"long long int", @"unsigned long long int", @"signed long long int"
 526 \item @"float", @"double", @"long double"
 527 \end{itemize}
 528 All of these have their usual C meanings.
 529
 530 \subsubsection{Declarators} \label{sec:syntax.c-types.declarator}
 531
 532 \begin{grammar}
 533 <declarator> ::=
 534   <pointer>$^*$ <inner-declarator> <declarator-suffix>$^*$
 535
 536 <inner-declarator> ::= <identifier> | <qualified-identifier>
 537 \alt "(" <declarator> ")"
 538
 539 <qualified-identifier> ::= <identifier> "." <identifier>
 540
 541 <pointer> ::= "*" <qualifier>$^*$
 542
 543 <declarator-suffix> ::= "[" <c-fragment> "]"
 544 \alt "(" <arguments> ")"
 545
 546 <arguments> ::= <empty> | "..."
 547 \alt <argument-list> $[$"," "..."$]$
 548
 549 <argument> ::= <declaration-specifier>$^+$ <argument-declarator>
 550
 551 <argument-declarator> ::= <declarator> | $[$<abstract-declarator>$]$
 552
 553 <abstract-declarator> ::=
 554   <pointer>$^+$ | <pointer>$^*$ <inner-abstract-declarator>
 555
 556 <inner-abstract-declarator> ::= "(" <abstract-declarator> ")"
 557 \alt $[$<inner-abstract-declarator>$]$ <declarator-suffix>$^+$
 558 \end{grammar}
 559
 560 The declarator syntax is taken from C, but with some differences.
 561 \begin{itemize}
 562 \item Array dimensions are uninterpreted @<c-fragments>, terminated by a
 563   closing square bracket.  This allows array dimensions to contain arbitrary
 564   constant expressions.
 565 \item A declarator may have either a single @<identifier> at its centre or a
 566   pair of @<identifier>s separated by a @`.'; this is used to refer to
 567   slots or messages defined in superclasses.
 568 \end{itemize}
 569 The remaining differences are (I hope) a matter of presentation rather than
 570 substance.
 571
 572 \subsection{Defining classes} \label{sec:syntax.class}
 573
 574 \begin{grammar}
 575 <class-definition> ::= <class-forward-declaration>
 576 \alt <full-class-definition>
 577 \end{grammar}
 578
 579 \subsubsection{Forward declarations} \label{sec:class.class.forward}
 580
 581 \begin{grammar}
 582 <class-forward-declaration> ::= "class" <identifier> ";"
 583 \end{grammar}
 584
 585 A @<class-forward-declaration> informs Sod that an @<identifier> will be used
 586 to name a class which is currently undefined.  Forward declarations are
 587 necessary in order to resolve certain kinds of circularity.  For example,
 588 \begin{listing}
 589 class Sub;
 590
 591 class Super : SodObject {
 592   Sub *sub;
 593 };
 594
 595 class Sub : Super {
 596   /* ... */
 597 };
 598 \end{listing}
 599
 600 \subsubsection{Full class definitions} \label{sec:class.class.full}
 601
 602 \begin{grammar}
 603 <full-class-definition> ::=
 604   $[$<properties>$]$
 605   "class" <identifier> ":" <identifier-list>
 606   "{" <class-item>$^*$ "}"
 607
 608 <class-item> ::= <slot-item> ";"
 609 \alt <message-item>
 610 \alt <method-item>
 611 \alt  <initializer-item> ";"
 612 \end{grammar}
 613
 614 A full class definition provides a complete description of a class.
 615
 616 The first @<identifier> gives the name of the class.  It is an error to
 617 give the name of an existing class (other than a forward-referenced class),
 618 or an existing type name.  It is conventional to give classes `MixedCase'
 619 names, to distinguish them from other kinds of identifiers.
 620
 621 The @<identifier-list> names the direct superclasses for the new class.  It
 622 is an error if any of these @<identifier>s does not name a defined class.
 623
 624 The @<properties> provide additional information.  The standard class
 625 properties are as follows.
 626 \begin{description}
 627 \item[@"lisp_class"] The name of the Lisp class to use within the translator
 628   to represent this class.  The property value must be an identifier; the
 629   default is @"sod_class".  Extensions may define classes with additional
 630   behaviour, and may recognize additional class properties.
 631 \item[@"metaclass"] The name of the Sod metaclass for this class.  In the
 632   generated code, a class is itself an instance of another class -- its
 633   \emph{metaclass}.  The metaclass defines which slots the class will have,
 634   which messages it will respond to, and what its behaviour will be when it
 635   receives them.  The property value must be an identifier naming a defined
 636   subclass of @"SodClass".  The default metaclass is @"SodClass".
 637   %%% FIXME xref to theory
 638 \item[@"nick"] A nickname for the class, to be used to distinguish it from
 639   other classes in various limited contexts.  The property value must be an
 640   identifier; the default is constructed by forcing the class name to
 641   lower-case.
 642 \end{description}
 643
 644 The class body consists of a sequence of @<class-item>s enclosed in braces.
 645 These items are discussed on the following sections.
 646
 647 \subsubsection{Slot items} \label{sec:sntax.class.slot}
 648
 649 \begin{grammar}
 650 <slot-item> ::=
 651   $[$<properties>$]$
 652   <declaration-specifier>$^+$ <init-declarator-list>
 653
 654 <init-declarator> ::= <declarator> $[$"=" <initializer>$]$
 655 \end{grammar}
 656
 657 A @<slot-item> defines one or more slots.  All instances of the class and any
 658 subclass will contain these slot, with the names and types given by the
 659 @<declaration-specifiers> and the @<declarators>.  Slot declarators may not
 660 contain qualified identifiers.
 661
 662 It is not possible to declare a slot with function type: such an item is
 663 interpreted as being a @<message-item> or @<method-item>.  Pointers to
 664 functions are fine.
 665
 666 An @<initializer>, if present, is treated as if a separate
 667 @<initializer-item> containing the slot name and initializer were present.
 668 For example,
 669 \begin{listing}
 670 [nick = eg]
 671 class Example : Super {
 672   int foo = 17;
 673 };
 674 \end{listing}
 675 means the same as
 676 \begin{listing}
 677 [nick = eg]
 678 class Example : Super {
 679   int foo;
 680   eg.foo = 17;
 681 };
 682 \end{listing}
 683
 684 \subsubsection{Initializer items} \label{sec:syntax.class.init}
 685
 686 \begin{grammar}
 687 <initializer-item> ::= $[$"class"$]$ <slot-initializer-list>
 688
 689 <slot-initializer> ::= <qualified-identifier> "=" <initializer>
 690
 691 <initializer> :: "{" <c-fragment> "}" | <c-fragment>
 692 \end{grammar}
 693
 694 An @<initializer-item> provides an initial value for one or more slots.  If
 695 prefixed by @"class", then the initial values are for class slots (i.e.,
 696 slots of the class object itself); otherwise they are for instance slots.
 697
 698 The first component of the @<qualified-identifier> must be the nickname of
 699 one of the class's superclasses (including itself); the second must be the
 700 name of a slot defined in that superclass.
 701
 702 The initializer has one of two forms.
 703 \begin{itemize}
 704 \item A @<c-fragment> enclosed in braces denotes an aggregate initializer.
 705   This is suitable for initializing structure, union or array slots.
 706 \item A @<c-fragment> \emph{not} beginning with an open brace is a `bare'
 707   initializer, and continues until the next @`,' or @`;' which is not within
 708   nested brackets.  Bare initializers are suitable for initializing scalar
 709   slots, such as pointers or integers, and strings.
 710 \end{itemize}
 711
 712 \subsubsection{Message items} \label{sec:syntax.class.message}
 713
 714 \begin{grammar}
 715 <message-item> ::=
 716   $[$<properties>$]$
 717   <declaration-specifier>$^+$ <declarator> $[$<method-body>$]$
 718 \end{grammar}
 719
 720 \subsubsection{Method items} \label{sec:syntax.class.method}
 721
 722 \begin{grammar}
 723 <method-item> ::=
 724   $[$<properties>$]$
 725   <declaration-specifier>$^+$ <declarator> <method-body>
 726
 727 <method-body> ::= "{" <c-fragment> "}" | "extern" ";"
 728 \end{grammar}
 729
 730 %%%--------------------------------------------------------------------------
 731 \section{Class objects}
 732
 733 \begin{listing}
 734 typedef struct SodClass__ichain_obj SodClass;
 735
 736 struct sod_chain {
 737   size_t n_classes;                     /* Number of classes in chain */
 738   const SodClass *const *classes;       /* Vector of classes, head first */
 739   size_t off_ichain;                    /* Offset of ichain from instance base */
 740   const struct sod_vtable *vt;          /* Vtable pointer for chain */
 741   size_t ichainsz;                      /* Size of the ichain structure */
 742 };
 743
 744 struct sod_vtable {
 745   SodClass *_class;                     /* Pointer to instance's class */
 746   size_t _base;                         /* Offset to instance base */
 747 };
 748
 749 struct SodClass__islots {
 750
 751   /* Basic information */
 752   const char *name;                     /* The class's name as a string */
 753   const char *nick;                     /* The nickname as a string */
 754
 755   /* Instance allocation and initialization */
 756   size_t instsz;                        /* Instance layout size in bytes */
 757   void *(*imprint)(void *);             /* Stamp instance with vtable ptrs */
 758   void *(*init)(void *);                /* Initialize instance */
 759
 760   /* Superclass structure */
 761   size_t n_supers;                      /* Number of direct superclasses */
 762   const SodClass *const *supers;        /* Vector of direct superclasses */
 763   size_t n_cpl;                         /* Length of class precedence list */
 764   const SodClass *const *cpl;           /* Vector for class precedence list */
 765
 766   /* Chain structure */
 767   const SodClass *link;                 /* Link to next class in chain */
 768   const SodClass *head;                 /* Pointer to head of chain */
 769   size_t level;                         /* Index of class in its chain */
 770   size_t n_chains;                      /* Number of superclass chains */
 771   const sod_chain *chains;              /* Vector of chain structures */
 772
 773   /* Layout */
 774   size_t off_islots;                    /* Offset of islots from ichain base */
 775   size_t islotsz;                       /* Size of instance slots */
 776 };
 777
 778 struct SodClass__ichain_obj {
 779   const SodClass__vt_obj *_vt;
 780   struct SodClass__islots cls;
 781 };
 782
 783 struct sod_instance {
 784   struct sod_vtable *_vt;
 785 };
 786 \end{listing}
 787
 788 \begin{listing}
 789 void *sod_convert(const SodClass *cls, const void *obj)
 790 {
 791   const struct sod_instance *inst = obj;
 792   const SodClass *real = inst->_vt->_cls;
 793   const struct sod_chain *chain;
 794   size_t i, index;
 795
 796   for (i = 0; i < real->cls.n_chains; i++) {
 797     chain = &real->cls.chains[i];
 798     if (chain->classes[0] == cls->cls.head) {
 799       index = cls->cls.index;
 800       if (index < chain->n_classes && chain->classes[index] == cls)
 801         return ((char *)cls - inst->_vt._base + chain->off_ichain);
 802       else
 803         return (0);
 804     }
 805   }
 806   return (0);
 807 }
 808 \end{listing}
 809
 810 %%%--------------------------------------------------------------------------
 811 \section{Classes}
 812
 813 \subsection{Classes and superclasses}
 814
 815 A @<full-class-definition> must list one or more existing classes to be the
 816 \emph{direct superclasses} for the new class being defined.  We make the
 817 following definitions.
 818 \begin{itemize}
 819 \item The \emph{superclasses} of a class consist of the class itself together
 820   with the superclasses of its direct superclasses.
 821 \item The \emph{proper superclasses} of a class are its superclasses other
 822   than itself.
 823 \item If $C$ is a (proper) superclass of $D$ then $D$ is a (\emph{proper})
 824   \emph{subclass} of $C$.
 825 \end{itemize}
 826 The predefined class @|SodObject| has no direct superclasses; it is unique in
 827 this respect.  All classes are subclasses of @|SodObject|.
 828
 829 \subsection{The class precedence list}
 830
 831 Let $C$ be a class.  The superclasses of $C$ form a directed graph, with an
 832 edge from each class to each of its direct superclasses.  This is the
 833 \emph{superclass graph of $C$}.
 834
 835 In order to resolve inheritance of items, we define a \emph{class precedence
 836   list} (or CPL) for each class, which imposes a total order on that class's
 837 superclasses.  The default algorithm for computing the CPL is the \emph{C3}
 838 algorithm \cite{fixme-c3}, though extensions may implement other algorithms.
 839
 840 The default algorithm works as follows.  Let $C$ be the class whose CPL we
 841 are to compute.  Let $X$ and $Y$ be two of $C$'s superclasses.
 842 \begin{itemize}
 843 \item $C$ must appear first in the CPL.
 844 \item If $X$ appears before $Y$ in the CPL of one of $C$'s direct
 845   superclasses, then $X$ appears before $Y$ in the $C$'s CPL.
 846 \item If the above rules don't suffice to order $X$ and $Y$, then whichever
 847   of $X$ and $Y$ has a subclass which appears further left in the list of
 848   $C$'s direct superclasses will appear earlier in the CPL.
 849 \end{itemize}
 850 This last rule is sufficient to disambiguate because if both $X$ and $Y$ are
 851 superclasses of the same direct superclass of $C$ then that direct
 852 superclass's CPL will order $X$ and $Y$.
 853
 854 We say that \emph{$X$ is more specific than $Y$ as a superclass of $C$} if
 855 $X$ is earlier than $Y$ in $C$'s class precedence list.  If $C$ is clear from
 856 context then we omit it, saying simply that $X$ is more specific than $Y$.
 857
 858 \subsection{Instances and metaclasses}
 859
 860 A class defines the structure and behaviour of its \emph{instances}: run-time
 861 objects created (possibly) dynamically.  An instance is an instance of only
 862 one class, though structurally it may be used in place of an instance of any
 863 of that class's superclasses.  It is possible, with care, to change the class
 864 of an instance at run-time.
 865
 866 Classes are themselves represented as instances -- called \emph{class
 867   objects} -- in the running program.  Being instances, they have a class,
 868 called the \emph{metaclass}.  The metaclass defines the structure and
 869 behaviour of the class object.
 870
 871 The predefined class @|SodClass| is the default metaclass for new classes.
 872 @|SodClass| has @|SodObject| as its only direct superclass.  @|SodClass| is
 873 its own metaclass.
 874
 875 \subsection{Items and inheritance}
 876
 877 A class definition also declares \emph{slots}, \emph{messages},
 878 \emph{initializers} and \emph{methods} -- collectively referred to as
 879 \emph{items}.  In addition to the items declared in the class definition --
 880 the class's \emph{direct items} -- a class also \emph{inherits} items from
 881 its superclasses.
 882
 883 The precise rules for item inheritance vary according to the kinds of items
 884 involved.
 885
 886 Some object systems have a notion of `repeated inheritance': if there are
 887 multiple paths in the superclass graph from a class to one of its
 888 superclasses then items defined in that superclass may appear duplicated in
 889 the subclass.  Sod does not have this notion.
 890
 891 \subsubsection{Slots}
 892 A \emph{slot} is a unit of state.  In other object systems, slots may be
 893 called `fields', `member variables', or `instance variables'.
 894
 895 A slot has a \emph{name} and a \emph{type}.  The name serves only to
 896 distinguish the slot from other direct slots defined by the same class.  A
 897 class inherits all of its proper superclasses' slots.  Slots inherited from
 898 superclasses do not conflict with each other or with direct slots, even if
 899 they have the same names.
 900
 901 At run-time, each instance of the class holds a separate value for each slot,
 902 whether direct or inherited.  Changing the value of an instance's slot
 903 doesn't affect other instances.
 904
 905 \subsubsection{Initializers}
 906 Mumble.
 907
 908 \subsubsection{Messages}
 909 A \emph{message} is the stimulus for behaviour.  In Sod, a class must define,
 910 statically, the name and format of the messages it is able to receive and the
 911 values it will return in reply.  In this respect, a message is similar to
 912 `abstract member functions' or `interface member functions' in other object
 913 systems.
 914
 915 Like slots, a message has a \emph{name} and a \emph{type}.  Again, the name
 916 serves only to distinguish the message from other direct messages defined by
 917 the same class.  Messages inherited from superclasses do not conflict with
 918 each other or with direct messages, even if they have the same name.
 919
 920 At run-time, one sends a message to an instance by invoking a function
 921 obtained from the instance's \emph{vtable}: \xref{sec:fixme-vtable}.
 922
 923 \subsubsection{Methods}
 924 A \emph{method} is a unit of behaviour.  In other object systems, methods may
 925 be called `member functions'.
 926
 927 A method is associated with a message.  When a message is received by an
 928 instance, all of the methods associated with that message on the instance's
 929 class or any of its superclasses are \emph{applicable}.  The details of how
 930 the applicable methods are invoked are described fully in
 931 \xref{sec:fixme-method-combination}.
 932
 933 \subsection{Chains and instance layout}
 934
 935 \include{sod-backg}
 936
 937 \end{document}
 938 \f
 939 %%% Local variables:
 940 %%% mode: LaTeX
 941 %%% TeX-PDF-mode: t
 942 %%% End: