doc/: Just some whitespace fiddling.
[sod] / doc / structures.tex
1 %%% -*-latex-*-
2 %%%
3 %%% In-depth exploration of the generated structures
4 %%%
5 %%% (c) 2015 Straylight/Edgeware
6 %%%
7
8 %%%----- Licensing notice ---------------------------------------------------
9 %%%
10 %%% This file is part of the Simple Object Definition system.
11 %%%
12 %%% SOD is free software; you can redistribute it and/or modify
13 %%% it under the terms of the GNU General Public License as published by
14 %%% the Free Software Foundation; either version 2 of the License, or
15 %%% (at your option) any later version.
16 %%%
17 %%% SOD is distributed in the hope that it will be useful,
18 %%% but WITHOUT ANY WARRANTY; without even the implied warranty of
19 %%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 %%% GNU General Public License for more details.
21 %%%
22 %%% You should have received a copy of the GNU General Public License
23 %%% along with SOD; if not, write to the Free Software Foundation,
24 %%% Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25
26 \chapter{Object structures} \label{ch:structures}
27
28 This chapter describes the structure and layout of standard Sod objects,
29 classes and associated metadata. Note that Sod's object system is very
30 flexible and it's possible for an extension to define a new root class which
31 works very differently from the standard @|SodObject| described here.
32
33 The concrete types described in \xref{sec:structures.common} and
34 \ref{sec:structures.root} are declared by the header file @|<sod/sod.h>|.
35 The definitions described in sections \ref{sec:structures.layout} are defined
36 in the header file generated by the containing module.
37
38 %%%--------------------------------------------------------------------------
39 \section{Common instance structure} \label{sec:structures.common}
40
41 As described below, a pointer to an instance actually points to an
42 \emph{instance chain} structure within the instances overall layout
43 structure.
44
45 Instance chains contain slots and vtable pointers, as described below. All
46 instances have the basic structure of a @|struct sod_instance|.
47
48 \begin{describe}[struct sod_instance]{type}
49 {struct sod_instance \{ \\ \ind
50 const struct sod_vtable *_vt; \- \\
51 \};}
52
53 The basic structure of all instances. Members are as follows.
54 \begin{description} \let\makelabel\code
55 \item[_vt] A pointer to a \emph{vtable}, which has the basic structure of a
56 @|struct sod_vtable|, described below.
57 \end{description}
58 \end{describe}
59
60 \begin{describe}[struct sod_vtable]{type}
61 {struct sod_vtable \{ \\ \ind
62 const SodClass *_class; \\
63 size_t _base; \- \\
64 \};}
65
66 A vtable contains static metadata needed for efficient conversions and
67 message dispatch, and pointers to the instance's class. Each chain points
68 to a different vtable. All vtables have the basic structure of a @|struct
69 sod_vtable|, which has the following members.
70 \begin{description} \let\makelabel\code
71 \item[_class] A pointer to the instance's class object.
72 \item[_base] The offset of this chain structure above the start of the
73 overall instance layout, in bytes. Subtracting @|_base| from the
74 instance chain pointer finds the layout base address.
75 \end{description}
76 \end{describe}
77
78 %%%--------------------------------------------------------------------------
79 \section{Built-in root objects} \label{sec:structures.root}
80
81 This section describes the built-in classes @|SodObject| and @|SodClass|,
82 which are the standard roots of the inheritance and metaclass graphs
83 respectively. Specifically, @|SodObject| has no direct superclasses, and
84 @|SodClass| is its own metaclass. It is not possible to define root classes
85 in module files because of circularities: @|SodObject| has @|SodClass| as its
86 metaclass, and @|SodClass| is a subclass of @|SodObject|. Extensions can
87 define additional root classes, but this is tricky, and not really to be
88 recommended.
89
90
91 \subsection{The SodObject class} \label{sec:structures.root.sodobject}
92
93 \begin{figure}[tbp]
94 \begin{tabular}{p{10pt}p{10pt}}
95 \begin{prog}
96 struct SodObject__ilayout \{ \\ \ind
97 union \{ \\ \ind
98 struct SodObject__ichain_obj \{ \\ \ind
99 const struct SodObject__vt_obj *_vt; \- \\
100 \}; \- \\
101 \} obj; \- \\
102 \};
103 \end{prog}
104 &
105 \begin{prog}
106 struct SodObject__vt_obj \{ \\ \ind
107 const SodClass *_class; \\
108 size_t _base; \- \\
109 \};
110 \end{prog} \\
111 \end{tabular}
112 \caption{Instance and vtable layout of @|SodObject|}
113 \label{fig:structures.root.sodobject}
114 \end{figure}
115
116 \begin{describe}[SodObject]{cls}
117 {[metaclass = SodClass, lisp_metaclass = sod_class] \\
118 class SodObject \{ \}}
119
120 The @|SodObject| class defines no slots or messages. Because @|SodObject|
121 has no direct superclasses, there is only one chain, and no inherited
122 slots or messages, so the single chain contains only a vtable pointer.
123
124 Since there are no messages, and @|SodClass| also has only one chain, the
125 vtable contains only the standard class pointer and offset-to-base members.
126 In a direct instance of @|SodObject| (why would you want one?) the class
127 pointer contains the address of @|SodObject__class| and the offset is zero.
128
129 The instance and vtable layout of @|SodObject| is shown in
130 \xref{fig:structures.root.sodobject}.
131 \end{describe}
132
133
134 \subsection{The SodClass class} \label{sec:structures.root.sodclass}
135
136 \begin{describe}[SodClass]{cls}
137 {class SodClass : SodObject \{ \\ \ind
138 const char *name; \\
139 const char *nick; \\
140 size_t initsz; \\
141 void *(*imprint)(void *@<p>); \\
142 void *(*init)(void *@<p>); \\
143 size_t n_supers; \\
144 const SodClass *const *supers; \\
145 size_t n_cpl; \\
146 const SodClass *const *cpl; \\
147 const SodClass *link; \\
148 const SodClass *head; \\
149 size_t level; \\
150 size_t n_chains; \\
151 const struct sod_chain *chains; \\
152 size_t off_islots; \\
153 size_t islotsz; \- \\
154 \}}
155
156 The @|SodClass| class defines no messages, but there are a number of slots.
157 Its only direct superclass is @|SodObject| and so (like its superclass) its
158 vtable is trivial.
159
160 The slots defined are as follows.
161 \begin{description} \let\makelabel\code
162
163 \item[name] A pointer to the class's name.
164
165 \item[nick] A pointer to the class's nickname.
166
167 \item[initsz] The size in bytes required to store an instance of the class.
168
169 \item[imprint] A pointer to a function: given a pointer @<p> to at least
170 @<initsz> bytes of appropriately aligned memory, `imprint' this memory it
171 so that it becomes a minimally functional instance of the class: all of
172 the vtable and class pointers are properly initialized, but the slots are
173 left untouched. The function returns its argument @<p>.
174
175 \item[init] A pointer to a function: given a pointer @<p> to at least
176 @<initsz> bytes of appropriately aligned memory, initialize an instance
177 of the class in it: all of the vtable and class pointers are initialized,
178 as are slots for which initializers are defined. Other slots are left
179 untouched. The function returns its argument @<p>.
180
181 \item[n_supers] The number of direct superclasses. (This is zero exactly
182 in the case of @|SodObject|.)
183
184 \item[supers] A pointer to an array of @<n_supers> pointers to class
185 objects listing the class's direct superclasses, in the order in which
186 they were listed in the class definition. If @<n_supers> is zero, then
187 this pointer is null.
188
189 \item[n_cpl] The number of superclasses in the class's class precedence
190 list.
191
192 \item[cpl] A pointer to an array of pointers to class objects listing all
193 of the class's superclasses, from most- to least-specific, starting with
194 the class itself, so $c@->@|cls|.@|cpl|[0] = c$ for all class objects
195 $c$.
196
197 \item[link] If the class is a chain head, then this is a null pointer;
198 otherwise it points to the class's distinguished link superclass (which
199 might or might not be a direct superclass).
200
201 \item[head] A pointer to the least-specific class in this class's chain; so
202 $c@->@|cls|.@|head|@->@|cls|.@|link|$ is always null, and either
203 $c@->@|cls|.@|link|$ is null (in which case $c@->@|cls|.@|head| = c$) or
204 $c@->@|cls|.@|head| = c@->@|cls|.@|link|@->@|cls|.@|head|$.
205
206 \item[level] The number of less specific superclasses in this class's
207 chain. If $c@->@|cls|.@|link|$ is null then $c@->@|cls|.@|level|$ is
208 zero; otherwise $c@->@|cls|.@|level| =
209 c@->@|cls|.@|link|@->@|cls|.@|level| + 1$.
210
211 \item[n_chains] The number of chains formed by the class's superclasses.
212
213 \item[chains] A pointer to an array of @|struct sod_chain| structures (see
214 below) describing the class's superclass chains, in decreasing order of
215 specificity of their most specific classes. It is always the case that
216 $c@->@|cls|.@|chains|[0].@|classes|[c@->@|cls|.@|level|] = c$.
217
218 \item[off_islots] The offset of the class's @|islots| structure relative to
219 its containing @|ichain| structure. The class doesn't define any slots
220 if and only if this is zero. (The offset can't be zero because the
221 vtable pointer is at offset zero.)
222
223 \item[islotsz] The size required to store the class's direct slots, i.e.,
224 the size of its @|islots| structure. The class doesn't define any slots
225 if and only if this is zero.
226
227 \end{description}
228 \end{describe}
229
230 \begin{describe}[struct sod_chain]{type}
231 {struct sod_chain \{ \\ \ind
232 size_t n_classes; \\
233 const SodClass *const *classes; \\
234 size_t off_ichain; \\
235 const struct sod_vtable *vt; \\
236 size_t ichainsz; \- \\
237 \};}
238
239 The @|struct sod_chain| structure describes an individual chain of
240 superclasses. It has the following members.
241 \begin{description} \let\makelabel\code
242
243 \item[n_classes] The number of classes in the chain. This is always at
244 least one.
245
246 \item[classes] A pointer to an array of class pointers listing the classes
247 in the chain from least- to most-specific. So
248 $@<classes>[i]@->@|cls|.@|head| = @<classes>[0]$ for all $0 \le i <
249 @<n_classes>$, $@<classes>[0]@->@|cls|.@|link|$ is always null, and
250 $@<classes>[i]@->@|cls|.@|link| = @<classes>[i - 1]$ if $1 \le i <
251 @<n_classes>$.
252
253 \item[off_ichain] The size of the @|ichain| structure for this chain.
254
255 \item[vt] The vtable for this chain. (It is possible, therefore, to
256 partially duplicate the behaviour of the @<imprint> function by walking
257 the chain structure.\footnote{%
258 There isn't enough information readily available to fill in the class
259 pointers correctly.} %
260 The @<imprint> function is much faster, though.)
261
262 \item[ichainsz] The size of the @|ichain| structure for this chain.
263
264 \end{description}
265 \end{describe}
266
267 %%%--------------------------------------------------------------------------
268 \section{Class and vtable layout} \label{sec:structures.layout}
269
270 The layout algorithms for Sod instances and vtables are nontrivial. They are
271 defined here in full detail, since they're effectively fixed by Sod's ABI
272 compatibility guarantees, so they might as well be documented for the sake of
273 interoperating programs.
274
275 Unfortunately, the descriptions are rather complicated, and, for the most
276 part not necessary to a working understanding of Sod. The skeleton structure
277 definitions shown should be more than enough for readers attempting to make
278 sense of the generated headers and tables.
279
280 In the description that follows, uppercase letters vary over class names,
281 while the corresponding lowercase letters indicate the class nicknames.
282 Throughout, we consider a class $C$ (therefore with nickname $c$).
283
284
285 \subsection{Generic instance structure}
286 \label{sec:structures.layout.instance}
287
288 The entire state of an instance of $C$ is contained in a single structure of
289 type @|struct $C$__ilayout|.
290
291 \begin{prog}
292 struct $C$__ilayout \{ \\ \ind
293 union $C$__ichainu_$h$ \{ \\ \ind
294 struct $C$__ichain_$h$ \{ \\ \ind
295 const struct $C$__vt_$h$ *_vt; \\
296 struct $H$__islots $h$; \\
297 \quad$\vdots$ \\
298 struct $C$__islots \{ \\ \ind
299 @<type>_1 @<slot>_1; \\
300 \quad$\vdots$ \\
301 @<type>_n @<slot>_n; \- \\
302 \} $c$; \- \\
303 \} $c$; \\
304 struct $H$__ichain_$h$ $h$; \\
305 \quad$\vdots$ \- \\
306 \} $h$; \\
307 union $B$__ichainu_$i$ $i$; \\
308 \quad$\vdots$ \- \\
309 \};
310 \\[\bigskipamount]
311 typedef struct $C$__ichain_$h$ $C$;
312 \end{prog}
313
314 The set of superclasses of $C$, including itself, can be partitioned into
315 chains by following their distinguished superclass links. (Formally, the
316 chains are the equivalence classes determined by the reflexive, symmetric,
317 transitive closure of the `links to' relation.) Chains are identified by
318 naming their least specific classes; the least specific class in a chain is
319 called the \emph{chain head}. Suppose that the chain head of the chain
320 containing $C$ itself is named $H$ (though keep in mind that it's possible
321 that .$H$ is in fact $C$ itself.)
322
323 \subsubsection{The ilayout structure}
324 The @|ilayout| structure contains one member for each of $C$'s superclass
325 chains. The first such member is
326 \begin{prog}
327 union $C$__ichainu_$h$ $h$;
328 \end{prog}
329 described below; this is followed by members
330 \begin{prog}
331 union $B$__ichainu_$i$ $i$;
332 \end{prog}
333 for each other chain, where $I$ is the head and $B$ the tail (most-specific)
334 class of the chain. The members are in decreasing order of the specificity
335 of the chains' most-specific classes. (Note that all but the first of these
336 unions has already been defined as part of the definition of the
337 corresponding $B$.)
338
339 \subsubsection{The ichainu union}
340 The @|ichainu| union contains a member for each class in the chain. The
341 first is
342 \begin{prog}
343 struct $C$__ichain_$h$ $c$;
344 \end{prog}
345 and this is followed by corresponding members
346 \begin{prog}
347 struct $A$__ichain_$h$ $a$;
348 \end{prog}
349 for each of $C$'s superclasses $A$ in the same chain in some (unimportant)
350 order.
351
352 \subsubsection{The ichain structure}
353 The
354 @|ichain|
355 structure contains (in order), a pointer
356 \begin{prog}
357 const struct $C$__vt_$h$ *_vt;
358 \end{prog}
359 followed by a structure
360 \begin{prog}
361 struct $A$__islots $a$;
362 \end{prog}
363 for each superclass $A$ of $C$ in the same chain which defines slots, from
364 least- to most-specific; if $C$ defines any slots, then the last member is
365 \begin{prog}
366 struct $C$__islots $c$;
367 \end{prog}
368 A `pointer to $C$' is always assumed (and, indeed, defined in C's
369 type system) to be a pointer to the @|struct $C$__ichain_$h$|.
370
371 \subsubsection{The islots structure}
372 Finally, the @|islots| structure simply contains one member for each slot
373 defined by $C$ in the order they appear in the class definition.
374
375
376 \subsection{Generic vtable structure} \label{sec:structures.layout.vtable}
377
378 As described above, each @|ichain| structure of an instance's storage has a
379 vtable pointer
380 \begin{prog}
381 const struct $C$__vt_$h$ *_vt;
382 \end{prog}
383 In general, the vtables for the different chains will have \emph{different}
384 structures.
385
386 The instance layout split neatly into disjoint chains. This is necessary
387 because each @|ichain| must have as a prefix the @|ichain| for each
388 superclass in the same chain, and each slot must be stored in exactly one
389 place. The layout of vtables doesn't have this second requirement: it
390 doesn't matter that there are multiple method entry pointers for the same
391 effective method as long as they all work correctly. Indeed, it's essential
392 that they do, because each chain's method entry function will need to apply a
393 different offset to the receiver pointer before invoking the effective
394 method.
395
396 A vtable for a class $C$ with chain head $H$ has the following general
397 structure.
398 \begin{prog}
399 union $C$__vtu_$h$ \{ \\ \ind
400 struct $C$__vt_$h$ \{ \\ \ind
401 const $P$ *_class; \\
402 size_t _base; \\
403 \quad$\vdots$ \\
404 const $Q$ *_cls_$j$; \\
405 \quad$\vdots$ \\
406 ptrdiff_t _off_$i$; \\
407 \quad$\vdots$ \\
408 struct $C$__vtmsgs_$a$ \{ \\ \ind
409 @<type> (*@<msg>)($C$ *, $\dots$); \\
410 \quad$\vdots$ \- \\
411 \} $a$; \\
412 \quad$\vdots$ \- \\
413 \} $c$; \- \\
414 \};
415 \\[\bigskipamount]
416 extern const union $C$__vtu_$h$ $C$__vtable_$h$;
417 \end{prog}
418
419 \subsubsection{The vtu union}
420 The outer layer is a @|union $C$__vtu_$h$| containing a member
421 \begin{prog}
422 struct $A$__vt_$h$ $a$;
423 \end{prog}
424 for each of $C$'s superclasses $A$ in the same chain, with $C$ itself listed
425 first.
426
427 This is mostly an irrelevant detail,
428 whose purpose is to defend against malicious compilers:
429 pointers are always to one of the inner
430 @|vt|
431 structures.
432 It's important only because it's the outer
433 @|vtu|
434 union which is exported by name.
435 Specifically, for each chain of
436 $C$'s
437 superclasses
438 there is an external object
439 \begin{prog}
440 const union $A$__vtu_$i$ $C$__vtable_$i$;
441 \end{prog}
442 where $A$ and $I$ are respectively the most and least specific classes in the
443 chain.
444
445 \subsubsection{The vt structure}
446 The first member in the @|vt| structure is the \emph{root class pointer}
447 \begin{prog}
448 const $P$ *_class;
449 \end{prog}
450 Among the superclasses of $C$ there must be exactly one class $O$ which
451 itself has no direct superclasses; this is the \emph{root superclass} of $C$.
452 (This is a rule enforced by the Sod translator.) The metaclass $R$ of $O$ is
453 then the \emph{root metaclass} of $C$. The @|_class| member points to the
454 @|ichain| structure of most specific superclass $P$ of $M$ in the same chain
455 as $R$.
456
457 This is followed by the \emph{base offset}
458 \begin{prog}
459 size_t _base;
460 \end{prog}
461 which is simply the offset of the @|ichain| structure from the instance base.
462
463 The rest of the vtable structure is populated by walking the superclass chain
464 containing $C$ as follows. For each such superclass $B$, in increasing order
465 of specificity, walk the class precedence list of $B$, again starting with
466 its least-specific superclass. (This complex procedure guarantees that the
467 vtable structure for a class is a prefix of the vtable structure for any of
468 its subclasses in the same chain.)
469
470 So, let $A$ be some superclass of $C$ which has been encountered during this
471 traversal.
472
473 \begin{itemize}
474
475 \item Let $N$ be the metaclass of $A$. Examine the superclass chains of $N$
476 in order of decreasing specificity of their most-specific classes. Let $J$
477 be the chain head of such a chain, and let $Q$ be the most specific
478 superclass of $M$ in the same chain as $J$. Then, if there is currently no
479 class pointer of type $Q$, then add a member
480 \begin{prog}
481 const $Q$ *_cls_$j$;
482 \end{prog}
483 to the vtable pointing to the appropriate @|islots| structure within $M$'s
484 class object.
485
486 \item Examine the superclass chains of $A$ in order of decreasing specificity
487 of their most-specific classes. Let $I$ be the chain head of such a chain.
488 If there is currently no member @|_off_$i$| then add a member
489 \begin{prog}
490 ptrdiff_t _off_$i$;
491 \end{prog}
492 to the vtable, containing the (signed) offset from the @|ichain| structure
493 of the chain headed by $h$ to that of the chain headed by $i$ within the
494 instance's layout.
495
496 \item If class $A$ defines any messages, and there is currently no member
497 $a$, then add a member
498 \begin{prog}
499 struct $C$__vtmsgs_$a$ $a$;
500 \end{prog}
501 to the vtable. See below.
502
503 \end{itemize}
504
505 \subsubsection{The vtmsgs structure}
506 Finally, the @|vtmsgs| structures contain pointers to the effective method
507 entry functions for the messages defined by a superclass. There may be more
508 than one method entry for a message, but all of the entry pointers for a
509 message appear together, and entry pointers for separate messages appear in
510 the order in which the messages are defined. If the receiver class has no
511 applicable primary method for a message then it's usual for the method entry
512 pointer to be null (though, as with a lot of things in Sod, extensions may do
513 something different).
514
515 For a standard message which takes a fixed number of arguments, defined as
516 \begin{prog}
517 @<type>_0 $m$(@<type>_1 @<arg>_1, $\ldots$, @<type>_n @<arg>_n);
518 \end{prog}
519 there is always a `main' entry point,
520 \begin{prog}
521 @<type>_0 $m$($C$ *me, @<type>_1 @<arg>_1, $\ldots$, @<type>_n @<arg>_n);
522 \end{prog}
523
524 For a standard message which takes a variable number of arguments,
525 defined as
526 \begin{prog}
527 @<type>_0 $m$(@<type>_1 @<arg>_1, $\ldots$, @<type>_n @<arg>_n, \dots);
528 \end{prog}
529 two entry points are defined: the usual `main' entry point which accepts a
530 variable number of arguments, and a `valist' entry point which accepts an
531 argument of type @|va_list| in place of the variable portion of the argument
532 list.
533 \begin{prog}
534 @<type>_0 $m$($C$ *me, @<type>_1 @<arg>_1, $\ldots$,
535 @<type>_n @<arg>_n, \dots); \\
536 @<type>_0 $m$__v($C$ *me, @<type>_1 @<arg>_1, $\ldots$,
537 @<type>_n @<arg>_n, va_list sod__ap);
538 \end{prog}
539
540
541 \subsection{Additional definitions} \label{sec:structures.layout.additional}
542
543 In addition to the instance and vtable structures described above, the
544 following definitions are made for each class $C$.
545
546 For each message $m$ directly defined by $C$ there is a macro definition
547 \begin{prog}
548 \#define $C$_$m$(@<me>, $\ldots$) @<me>@->_vt@->$c$.$m$(@<me>, $\ldots$)
549 \end{prog}
550 which makes sending the message $m$ to an instance of (any subclass of) $C$
551 somewhat less ugly.
552
553 If $m$ takes a variable number of arguments, the macro is more complicated
554 and is only available in compilers advertising C99 support, but the effect is
555 the same. For each variable-argument message, there is also an additional
556 macro for calling the `valist' entry point.
557 \begin{prog}
558 \#define $C$_$m$__v(@<me>, $\ldots$, @<sod__ap>)
559 @<me>@->_vt@->$c$.$m$__v(@<me>, $\ldots$, @<sod__ap>)
560 \end{prog}
561
562 For each proper superclass $A$ of $C$, there is a macro defined
563 \begin{prog}
564 $A$ *$C$__CONV_$a$($C$ *_obj);
565 \end{prog}
566 (named in \emph{upper case}) which converts a (static-type) pointer to $C$ to
567 a pointer to the same actual instance, but statically typed as a pointer to
568 $A$. This is most useful when $A$ is not in the same chain as $C$ since
569 in-chain upcasts are both trivial and rarely needed, but the full set is
570 defined for the sake of completeness.
571
572 Finally, the class object is defined as
573 \begin{prog}
574 extern const struct $R$__ilayout $C$__classobj; \\
575 \#define $C$__class (\&$C$__classobj.$j$.$r$)
576 \end{prog}
577 The exported symbol @|$C$__classobj| contains the entire class instance.
578 This is usually rather unwieldy. The macro @|$C$__class| is usable as a
579 pointer of type @|const $R$~*|, where $R$ is the root metaclass of $C$, i.e.,
580 the metaclass of the least specific superclass of $C$; usually this is
581 @|const SodClass~*|.
582
583 %%%----- That's all, folks --------------------------------------------------
584
585 %%% Local variables:
586 %%% mode: LaTeX
587 %%% TeX-master: "sod.tex"
588 %%% TeX-PDF-mode: t
589 %%% End: