Compatibility: the `init' function no longer calls `imprint' for you.
[sod] / doc / structures.tex
1 %%% -*-latex-*-
2 %%%
3 %%% In-depth exploration of the generated structures
4 %%%
5 %%% (c) 2015 Straylight/Edgeware
6 %%%
7
8 %%%----- Licensing notice ---------------------------------------------------
9 %%%
10 %%% This file is part of the Simple Object Definition system.
11 %%%
12 %%% SOD is free software; you can redistribute it and/or modify
13 %%% it under the terms of the GNU General Public License as published by
14 %%% the Free Software Foundation; either version 2 of the License, or
15 %%% (at your option) any later version.
16 %%%
17 %%% SOD is distributed in the hope that it will be useful,
18 %%% but WITHOUT ANY WARRANTY; without even the implied warranty of
19 %%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 %%% GNU General Public License for more details.
21 %%%
22 %%% You should have received a copy of the GNU General Public License
23 %%% along with SOD; if not, write to the Free Software Foundation,
24 %%% Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25
26 \chapter{Object structures} \label{ch:structures}
27
28 This chapter describes the structure and layout of standard Sod objects,
29 classes and associated metadata. Note that Sod's object system is very
30 flexible and it's possible for an extension to define a new root class which
31 works very differently from the standard @|SodObject| described here.
32
33 The concrete types described in \xref{sec:structures.common} and
34 \ref{sec:structures.root} are declared by the header file @|<sod/sod.h>|.
35 The definitions described in \xref{sec:structures.layout} are defined in the
36 header file generated by the containing module.
37
38 %%%--------------------------------------------------------------------------
39 \section{Common instance structure} \label{sec:structures.common}
40
41 As described below, a pointer to an instance actually points to an
42 \emph{instance chain} structure within the instances overall layout
43 structure.
44
45 Instance chains contain slots and vtable pointers, as described below. All
46 instances have the basic structure of a @|struct sod_instance|.
47
48 \begin{describe}[struct sod_instance]{type}
49 {struct sod_instance \{ \\ \ind
50 const struct sod_vtable *_vt; \- \\
51 \};}
52
53 The basic structure of all instances. Members are as follows.
54 \begin{description} \let\makelabel\code
55 \item[_vt] A pointer to a \emph{vtable}, which has the basic structure of a
56 @|struct sod_vtable|, described below.
57 \end{description}
58 \end{describe}
59
60 \begin{describe}[struct sod_vtable]{type}
61 {struct sod_vtable \{ \\ \ind
62 const SodClass *_class; \\
63 size_t _base; \- \\
64 \};}
65
66 A vtable contains static metadata needed for efficient conversions and
67 message dispatch, and pointers to the instance's class. Each chain points
68 to a different vtable. All vtables have the basic structure of a @|struct
69 sod_vtable|, which has the following members.
70 \begin{description} \let\makelabel\code
71 \item[_class] A pointer to the instance's class object.
72 \item[_base] The offset of this chain structure above the start of the
73 overall instance layout, in bytes. Subtracting @|_base| from the
74 instance chain pointer finds the layout base address.
75 \end{description}
76 \end{describe}
77
78 %%%--------------------------------------------------------------------------
79 \section{Built-in root objects} \label{sec:structures.root}
80
81 This section describes the built-in classes @|SodObject| and @|SodClass|,
82 which are the standard roots of the inheritance and metaclass graphs
83 respectively. Specifically, @|SodObject| has no direct superclasses, and
84 @|SodClass| is its own metaclass. It is not possible to define root classes
85 in module files because of circularities: @|SodObject| has @|SodClass| as its
86 metaclass, and @|SodClass| is a subclass of @|SodObject|. Extensions can
87 define additional root classes, but this is tricky, and not really to be
88 recommended.
89
90
91 \subsection{The SodObject class} \label{sec:structures.root.sodobject}
92
93 \begin{figure}[tbp]
94 \begin{tabular}{p{10pt}p{10pt}}
95 \begin{nprog}
96 struct SodObject__ilayout \{ \\ \ind
97 union \{ \\ \ind
98 struct SodObject__ichain_obj \{ \\ \ind
99 const struct SodObject__vt_obj *_vt; \- \\
100 \} obj; \- \\
101 \} obj; \- \\
102 \};
103 \end{nprog}
104 &
105 \begin{nprog}
106 struct SodObject__vt_obj \{ \\ \ind
107 const SodClass *_class; \\
108 size_t _base; \- \\
109 \};
110 \end{nprog} \\
111 \end{tabular}
112 \caption{Instance and vtable layout of @|SodObject|}
113 \label{fig:structures.root.sodobject}
114 \end{figure}
115
116 \begin{describe}[SodObject]{cls}
117 {[nick = obj, metaclass = SodClass, lisp_metaclass = sod_class] \\
118 class SodObject \{ \}}
119
120 The @|SodObject| class defines no slots or messages. Because @|SodObject|
121 has no direct superclasses, there is only one chain, and no inherited
122 slots or messages, so the single chain contains only a vtable pointer.
123
124 Since there are no messages, and @|SodClass| also has only one chain, the
125 vtable contains only the standard class pointer and offset-to-base members.
126 In a direct instance of @|SodObject| (why would you want one?) the class
127 pointer contains the address of @|SodObject__class| and the offset is zero.
128
129 The instance and vtable layout of @|SodObject| is shown in
130 \xref{fig:structures.root.sodobject}.
131 \end{describe}
132
133
134 \subsection{The SodClass class} \label{sec:structures.root.sodclass}
135
136 \begin{describe}[SodClass]{cls}
137 {[nick = cls, link = SodObject] \\
138 class SodClass : SodObject \{ \\ \ind
139 const char *name; \\
140 const char *nick; \\
141 size_t initsz; \\
142 void *(*imprint)(void *@<p>); \\
143 void *(*init)(void *@<p>); \\
144 size_t n_supers; \\
145 const SodClass *const *supers; \\
146 size_t n_cpl; \\
147 const SodClass *const *cpl; \\
148 const SodClass *link; \\
149 const SodClass *head; \\
150 size_t level; \\
151 size_t n_chains; \\
152 const struct sod_chain *chains; \\
153 size_t off_islots; \\
154 size_t islotsz; \- \\
155 \}}
156
157 The @|SodClass| class defines no messages, but there are a number of slots.
158 Its only direct superclass is @|SodObject| and so (like its superclass) its
159 vtable is trivial.
160
161 The slots defined are as follows.
162 \begin{description} \let\makelabel\code
163
164 \item[name] A pointer to the class's name.
165
166 \item[nick] A pointer to the class's nickname.
167
168 \item[initsz] The size in bytes required to store an instance of the class.
169
170 \item[imprint] A pointer to a function: given a pointer @<p> to at least
171 @<initsz> bytes of appropriately aligned memory, `imprint' this memory it
172 so that it becomes a minimally functional instance of the class: all of
173 the vtable and class pointers are properly initialized, but the slots are
174 left untouched. The function returns its argument @<p>.
175
176 \item[init] A pointer to a function: given a pointer @<p> to an imprinted
177 instance, initialize all of its slots for which initializers are defined.
178 Other slots are left untouched. The function returns its argument @<p>.
179
180 \item[n_supers] The number of direct superclasses. (This is zero exactly
181 in the case of @|SodObject|.)
182
183 \item[supers] A pointer to an array of @<n_supers> pointers to class
184 objects listing the class's direct superclasses, in the order in which
185 they were listed in the class definition. If @<n_supers> is zero, then
186 this pointer is null.
187
188 \item[n_cpl] The number of superclasses in the class's class precedence
189 list.
190
191 \item[cpl] A pointer to an array of pointers to class objects listing all
192 of the class's superclasses, from most- to least-specific, starting with
193 the class itself, so $@|$c$@->cls.cpl[0]| = c$ for all class objects
194 $c$.
195
196 \item[link] If the class is a chain head, then this is a null pointer;
197 otherwise it points to the class's distinguished link superclass (which
198 might or might not be a direct superclass).
199
200 \item[head] A pointer to the least-specific class in this class's chain; so
201 @|$c$@->cls.head@->cls.link| is always null, and either @|$c$@->cls.link|
202 is null (in which case $@|$c$@->cls.head| = c$) or $@|$c$@->cls.head| =
203 @|$c$@->cls.link@->cls.head|$.
204
205 \item[level] The number of less specific superclasses in this class's
206 chain. If @|$c$@->cls.link| is null then @|$c$@->cls.level| is zero;
207 otherwise $@|$c$@->cls.level| = @|$c$@->cls.link@->cls.level| + 1$.
208
209 \item[n_chains] The number of chains formed by the class's superclasses.
210
211 \item[chains] A pointer to an array of @|struct sod_chain| structures (see
212 below) describing the class's superclass chains, in decreasing order of
213 specificity of their most specific classes. It is always the case that
214 $@|$c$@->cls.chains[0].classes[$c$@->cls.level]| = c$.
215
216 \item[off_islots] The offset of the class's @|islots| structure relative to
217 its containing @|ichain| structure. The class doesn't define any slots
218 if and only if this is zero. (The offset can't be zero because the
219 vtable pointer is at offset zero.)
220
221 \item[islotsz] The size required to store the class's direct slots, i.e.,
222 the size of its @|islots| structure. The class doesn't define any slots
223 if and only if this is zero.
224
225 \end{description}
226 \end{describe}
227
228 \begin{describe}[struct sod_chain]{type}
229 {struct sod_chain \{ \\ \ind
230 size_t n_classes; \\
231 const SodClass *const *classes; \\
232 size_t off_ichain; \\
233 const struct sod_vtable *vt; \\
234 size_t ichainsz; \- \\
235 \};}
236
237 The @|struct sod_chain| structure describes an individual chain of
238 superclasses. It has the following members.
239 \begin{description} \let\makelabel\code
240
241 \item[n_classes] The number of classes in the chain. This is always at
242 least one.
243
244 \item[classes] A pointer to an array of class pointers listing the classes
245 in the chain from least- to most-specific. So
246 $@|@<classes>[$i$]@->cls.head| = @|@<classes>[0]|$ for all $0 \le i <
247 @<n_classes>$, @|@<classes>[0]@->cls.link| is always null, and
248 $@|@<classes>[$i$]@->cls.link| = @|@<classes>[$i - 1$]|$ if $1 \le i <
249 @<n_classes>$.
250
251 \item[off_ichain] The size of the @|ichain| structure for this chain.
252
253 \item[vt] The vtable for this chain. (It is possible, therefore, to
254 partially duplicate the behaviour of the @<imprint> function by walking
255 the chain structure.\footnote{%
256 There isn't enough information readily available to fill in the class
257 pointers correctly.} %
258 The @<imprint> function is much faster, though.)
259
260 \item[ichainsz] The size of the @|ichain| structure for this chain.
261
262 \end{description}
263 \end{describe}
264
265 %%%--------------------------------------------------------------------------
266 \section{Class and vtable layout} \label{sec:structures.layout}
267
268 The layout algorithms for Sod instances and vtables are nontrivial. They are
269 defined here in full detail, since they're effectively fixed by Sod's ABI
270 compatibility guarantees, so they might as well be documented for the sake of
271 interoperating programs.
272
273 Unfortunately, the descriptions are rather complicated, and, for the most
274 part not necessary to a working understanding of Sod. The skeleton structure
275 definitions shown should be more than enough for readers attempting to make
276 sense of the generated headers and tables.
277
278 In the description that follows, uppercase letters vary over class names,
279 while the corresponding lowercase letters indicate the class nicknames.
280 Throughout, we consider a class $C$ (therefore with nickname $c$).
281
282
283 \subsection{Generic instance structure}
284 \label{sec:structures.layout.instance}
285
286 The entire state of an instance of $C$ is contained in a single structure of
287 type @|struct $C$__ilayout|.
288
289 \begin{prog}
290 struct $C$__ilayout \{ \\ \ind
291 union $C$__ichainu_$h$ \{ \\ \ind
292 struct $C$__ichain_$h$ \{ \\ \ind
293 const struct $C$__vt_$h$ *_vt; \\
294 struct $H$__islots $h$; \\
295 \quad$\vdots$ \\
296 struct $C$__islots \{ \\ \ind
297 @<type>_1 @<slot>_1; \\
298 \quad$\vdots$ \\
299 @<type>_n @<slot>_n; \- \\
300 \} $c$; \- \\
301 \} $c$; \\
302 struct $H$__ichain_$h$ $h$; \\
303 \quad$\vdots$ \- \\
304 \} $h$; \\
305 union $B$__ichainu_$i$ $i$; \\
306 \quad$\vdots$ \- \\
307 \};
308 \\+
309 typedef struct $C$__ichain_$h$ $C$;
310 \end{prog}
311
312 The set of superclasses of $C$, including itself, can be partitioned into
313 chains by following their distinguished superclass links. (Formally, the
314 chains are the equivalence classes determined by the reflexive, symmetric,
315 transitive closure of the `links to' relation.) Chains are identified by
316 naming their least specific classes; the least specific class in a chain is
317 called the \emph{chain head}. Suppose that the chain head of the chain
318 containing $C$ itself is named $H$ (though keep in mind that it's possible
319 that .$H$ is in fact $C$ itself.)
320
321 \subsubsection{The ilayout structure}
322 The @|ilayout| structure contains one member for each of $C$'s superclass
323 chains. The first such member is
324 \begin{prog}
325 union $C$__ichainu_$h$ $h$;
326 \end{prog}
327 described below; this is followed by members
328 \begin{prog}
329 union $B$__ichainu_$i$ $i$;
330 \end{prog}
331 for each other chain, where $I$ is the head and $B$ the tail (most-specific)
332 class of the chain. The members are in decreasing order of the specificity
333 of the chains' most-specific classes. (Note that all but the first of these
334 unions has already been defined as part of the definition of the
335 corresponding $B$.)
336
337 \subsubsection{The ichainu union}
338 The @|ichainu| union contains a member for each class in the chain. The
339 first is
340 \begin{prog}
341 struct $C$__ichain_$h$ $c$;
342 \end{prog}
343 and this is followed by corresponding members
344 \begin{prog}
345 struct $A$__ichain_$h$ $a$;
346 \end{prog}
347 for each of $C$'s superclasses $A$ in the same chain in some (unimportant)
348 order.
349
350 \subsubsection{The ichain structure}
351 The
352 @|ichain|
353 structure contains (in order), a pointer
354 \begin{prog}
355 const struct $C$__vt_$h$ *_vt;
356 \end{prog}
357 followed by a structure
358 \begin{prog}
359 struct $A$__islots $a$;
360 \end{prog}
361 for each superclass $A$ of $C$ in the same chain which defines slots, from
362 least- to most-specific; if $C$ defines any slots, then the last member is
363 \begin{prog}
364 struct $C$__islots $c$;
365 \end{prog}
366 A `pointer to $C$' is always assumed (and, indeed, defined in C's
367 type system) to be a pointer to the @|struct $C$__ichain_$h$|.
368
369 \subsubsection{The islots structure}
370 Finally, the @|islots| structure simply contains one member for each slot
371 defined by $C$ in the order they appear in the class definition.
372
373
374 \subsection{Generic vtable structure} \label{sec:structures.layout.vtable}
375
376 As described above, each @|ichain| structure of an instance's storage has a
377 vtable pointer
378 \begin{prog}
379 const struct $C$__vt_$h$ *_vt;
380 \end{prog}
381 In general, the vtables for the different chains will have \emph{different}
382 structures.
383
384 The instance layout split neatly into disjoint chains. This is necessary
385 because each @|ichain| must have as a prefix the @|ichain| for each
386 superclass in the same chain, and each slot must be stored in exactly one
387 place. The layout of vtables doesn't have this second requirement: it
388 doesn't matter that there are multiple method entry pointers for the same
389 effective method as long as they all work correctly. Indeed, it's essential
390 that they do, because each chain's method entry function will need to apply a
391 different offset to the receiver pointer before invoking the effective
392 method.
393
394 A vtable for a class $C$ with chain head $H$ has the following general
395 structure.
396 \begin{prog}
397 union $C$__vtu_$h$ \{ \\ \ind
398 struct $C$__vt_$h$ \{ \\ \ind
399 const $P$ *_class; \\
400 size_t _base; \\
401 \quad$\vdots$ \\
402 const $Q$ *_cls_$j$; \\
403 \quad$\vdots$ \\
404 ptrdiff_t _off_$i$; \\
405 \quad$\vdots$ \\
406 struct $C$__vtmsgs_$a$ \{ \\ \ind
407 @<type> (*@<msg>)($C$ *, $\dots$); \\
408 \quad$\vdots$ \- \\
409 \} $a$; \\
410 \quad$\vdots$ \- \\
411 \} $c$; \- \\
412 \};
413 \\+
414 extern const union $C$__vtu_$h$ $C$__vtable_$h$;
415 \end{prog}
416
417 \subsubsection{The vtu union}
418 The outer layer is a @|union $C$__vtu_$h$| containing a member
419 \begin{prog}
420 struct $A$__vt_$h$ $a$;
421 \end{prog}
422 for each of $C$'s superclasses $A$ in the same chain, with $C$ itself listed
423 first.
424
425 This is mostly an irrelevant detail,
426 whose purpose is to defend against malicious compilers:
427 pointers are always to one of the inner
428 @|vt|
429 structures.
430 It's important only because it's the outer
431 @|vtu|
432 union which is exported by name.
433 Specifically, for each chain of
434 $C$'s
435 superclasses
436 there is an external object
437 \begin{prog}
438 const union $A$__vtu_$i$ $C$__vtable_$i$;
439 \end{prog}
440 where $A$ and $I$ are respectively the most and least specific classes in the
441 chain.
442
443 \subsubsection{The vt structure}
444 The first member in the @|vt| structure is the \emph{root class pointer}
445 \begin{prog}
446 const $P$ *_class;
447 \end{prog}
448 Among the superclasses of $C$ there must be exactly one class $O$ which
449 itself has no direct superclasses; this is the \emph{root superclass} of $C$.
450 (This is a rule enforced by the Sod translator.) The metaclass $R$ of $O$ is
451 then the \emph{root metaclass} of $C$. The @|_class| member points to the
452 @|ichain| structure of most specific superclass $P$ of $M$ in the same chain
453 as $R$.
454
455 This is followed by the \emph{base offset}
456 \begin{prog}
457 size_t _base;
458 \end{prog}
459 which is simply the offset of the @|ichain| structure from the instance base.
460
461 The rest of the vtable structure is populated by walking the superclass chain
462 containing $C$ as follows. For each such superclass $B$, in increasing order
463 of specificity, walk the class precedence list of $B$, again starting with
464 its least-specific superclass. (This complex procedure guarantees that the
465 vtable structure for a class is a prefix of the vtable structure for any of
466 its subclasses in the same chain.)
467
468 So, let $A$ be some superclass of $C$ which has been encountered during this
469 traversal.
470
471 \begin{itemize}
472
473 \item Let $N$ be the metaclass of $A$. Examine the superclass chains of $N$
474 in order of decreasing specificity of their most-specific classes. Let $J$
475 be the chain head of such a chain, and let $Q$ be the most specific
476 superclass of $M$ in the same chain as $J$. Then, if there is currently no
477 class pointer of type $Q$, then add a member
478 \begin{prog}
479 const $Q$ *_cls_$j$;
480 \end{prog}
481 to the vtable pointing to the appropriate @|islots| structure within $M$'s
482 class object.
483
484 \item Examine the superclass chains of $A$ in order of decreasing specificity
485 of their most-specific classes. Let $I$ be the chain head of such a chain.
486 If there is currently no member @|_off_$i$| then add a member
487 \begin{prog}
488 ptrdiff_t _off_$i$;
489 \end{prog}
490 to the vtable, containing the (signed) offset from the @|ichain| structure
491 of the chain headed by $h$ to that of the chain headed by $i$ within the
492 instance's layout.
493
494 \item If class $A$ defines any messages, and there is currently no member
495 $a$, then add a member
496 \begin{prog}
497 struct $C$__vtmsgs_$a$ $a$;
498 \end{prog}
499 to the vtable. See below.
500
501 \end{itemize}
502
503 \subsubsection{The vtmsgs structure}
504 Finally, the @|vtmsgs| structures contain pointers to the effective method
505 entry functions for the messages defined by a superclass. There may be more
506 than one method entry for a message, but all of the entry pointers for a
507 message appear together, and entry pointers for separate messages appear in
508 the order in which the messages are defined. If the receiver class has no
509 applicable primary method for a message then it's usual for the method entry
510 pointer to be null (though, as with a lot of things in Sod, extensions may do
511 something different).
512
513 For a standard message which takes a fixed number of arguments, defined as
514 \begin{prog}
515 @<type>_0 $m$(@<type>_1 @<arg>_1, $\ldots$, @<type>_n @<arg>_n);
516 \end{prog}
517 there is always a `main' entry point,
518 \begin{prog}
519 @<type>_0 $m$($C$ *me, @<type>_1 @<arg>_1, $\ldots$, @<type>_n @<arg>_n);
520 \end{prog}
521
522 For a standard message which takes a variable number of arguments,
523 defined as
524 \begin{prog}
525 @<type>_0 $m$(@<type>_1 @<arg>_1, $\ldots$, @<type>_n @<arg>_n, \dots);
526 \end{prog}
527 or a standard message which takes keyword arguments, defined as
528 \begin{prog}
529 @<type>_0 $m$(\=@<type>_1 @<arg>_1, $\ldots$, @<type>_n @<arg>_n? \+ \\
530 @<type>_{n+1} @<kw>_{n+1} @[= @<dflt>_{n+1}@], $\ldots$,
531 @<type>_m @<kw>_m @[= @<dflt>_m@]);
532 \end{prog}
533 two entry points are defined: the usual `main' entry point which accepts a
534 variable number of arguments, and a `valist' entry point which accepts an
535 argument of type @|va_list| in place of the variable portion of the argument
536 list or keywords.
537 \begin{prog}
538 @<type>_0 $m$($C$ *me, @<type>_1 @<arg>_1, $\ldots$,
539 @<type>_n @<arg>_n, \dots); \\
540 @<type>_0 $m$__v($C$ *me, @<type>_1 @<arg>_1, $\ldots$,
541 @<type>_n @<arg>_n, va_list sod__ap);
542 \end{prog}
543
544
545 \subsection{Additional definitions} \label{sec:structures.layout.additional}
546
547 In addition to the instance and vtable structures described above, the
548 following definitions are made for each class $C$.
549
550 For each message $m$ directly defined by $C$ there is a macro definition
551 \begin{prog}
552 \#define $C$_$m$(@<me>, $\ldots$) @<me>@->_vt@->$c$.$m$(@<me>, $\ldots$)
553 \end{prog}
554 which makes sending the message $m$ to an instance of (any subclass of) $C$
555 somewhat less ugly.
556
557 If $m$ takes a variable number of arguments, or keyword arguments, the macro
558 is more complicated and is only available in compilers advertising C99
559 support, but the effect is the same. For each variable-argument message,
560 there is also an additional macro for calling the `valist' entry point.
561 \begin{prog}
562 \#define $C$_$m$__v(@<me>, $\ldots$, @<sod__ap>)
563 @<me>@->_vt@->$c$.$m$__v(@<me>, $\ldots$, @<sod__ap>)
564 \end{prog}
565
566 For each proper superclass $A$ of $C$, there is a macro defined
567 \begin{prog}
568 $A$ *$C$__CONV_$a$($C$ *_obj);
569 \end{prog}
570 (named in \emph{upper case}) which converts a (static-type) pointer to $C$ to
571 a pointer to the same actual instance, but statically typed as a pointer to
572 $A$. This is most useful when $A$ is not in the same chain as $C$ since
573 in-chain upcasts are both trivial and rarely needed, but the full set is
574 defined for the sake of completeness.
575
576 Finally, the class object is defined as
577 \begin{prog}
578 extern const struct $R$__ilayout $C$__classobj; \\
579 \#define $C$__class (\&$C$__classobj.$j$.$r$)
580 \end{prog}
581 The exported symbol @|$C$__classobj| contains the entire class instance.
582 This is usually rather unwieldy. The macro @|$C$__class| is usable as a
583 pointer of type @|const $R$~*|, where $R$ is the root metaclass of $C$, i.e.,
584 the metaclass of the least specific superclass of $C$; usually this is
585 @|const SodClass~*|.
586
587 %%%----- That's all, folks --------------------------------------------------
588
589 %%% Local variables:
590 %%% mode: LaTeX
591 %%% TeX-master: "sod.tex"
592 %%% TeX-PDF-mode: t
593 %%% End: