+%%% -*-latex-*-
+%%%
+%%% Conceptual background
+%%%
+%%% (c) 2015 Straylight/Edgeware
+%%%
+
+%%%----- Licensing notice ---------------------------------------------------
+%%%
+%%% This file is part of the Sensble Object Design, an object system for C.
+%%%
+%%% SOD is free software; you can redistribute it and/or modify
+%%% it under the terms of the GNU General Public License as published by
+%%% the Free Software Foundation; either version 2 of the License, or
+%%% (at your option) any later version.
+%%%
+%%% SOD is distributed in the hope that it will be useful,
+%%% but WITHOUT ANY WARRANTY; without even the implied warranty of
+%%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+%%% GNU General Public License for more details.
+%%%
+%%% You should have received a copy of the GNU General Public License
+%%% along with SOD; if not, write to the Free Software Foundation,
+%%% Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+\chapter{Cutting-room floor}
+
+%%%--------------------------------------------------------------------------
+\section{Generated names}
+
+The generated names for functions and objects related to a class are
+constructed systematically so as not to interfere with each other. The rules
+on class, slot and message naming exist so as to ensure that the generated
+names don't collide with each other.
+
+The following notation is used in this section.
+\begin{description}
+\item[@<class>] The full name of the `focus' class: the one for which we are
+ generating name.
+\item[@<super-nick>] The nickname of a superclass.
+\item[@<head-nick>] The nickname of the chain-head class of the chain
+ in question.
+\end{description}
+
+\subsection{Instance layout}
+
+%%%--------------------------------------------------------------------------
+\section{Class objects}
+
+\begin{listing}
+typedef struct SodClass__ichain_obj SodClass;
+
+struct sod_chain {
+ size_t n_classes; /* Number of classes in chain */
+ const SodClass *const *classes; /* Vector of classes, head first */
+ size_t off_ichain; /* Offset of ichain from instance base */
+ const struct sod_vtable *vt; /* Vtable pointer for chain */
+ size_t ichainsz; /* Size of the ichain structure */
+};
+
+struct sod_vtable {
+ SodClass *_class; /* Pointer to instance's class */
+ size_t _base; /* Offset to instance base */
+};
+
+struct SodClass__islots {
+
+ /* Basic information */
+ const char *name; /* The class's name as a string */
+ const char *nick; /* The nickname as a string */
+
+ /* Instance allocation and initialization */
+ size_t instsz; /* Instance layout size in bytes */
+ void *(*imprint)(void *); /* Stamp instance with vtable ptrs */
+ void *(*init)(void *); /* Initialize instance */
+
+ /* Superclass structure */
+ size_t n_supers; /* Number of direct superclasses */
+ const SodClass *const *supers; /* Vector of direct superclasses */
+ size_t n_cpl; /* Length of class precedence list */
+ const SodClass *const *cpl; /* Vector for class precedence list */
+
+ /* Chain structure */
+ const SodClass *link; /* Link to next class in chain */
+ const SodClass *head; /* Pointer to head of chain */
+ size_t level; /* Index of class in its chain */
+ size_t n_chains; /* Number of superclass chains */
+ const sod_chain *chains; /* Vector of chain structures */
+
+ /* Layout */
+ size_t off_islots; /* Offset of islots from ichain base */
+ size_t islotsz; /* Size of instance slots */
+};
+
+struct SodClass__ichain_obj {
+ const SodClass__vt_obj *_vt;
+ struct SodClass__islots cls;
+};
+
+struct sod_instance {
+ struct sod_vtable *_vt;
+};
+\end{listing}
+
+\begin{listing}
+void *sod_convert(const SodClass *cls, const void *obj)
+{
+ const struct sod_instance *inst = obj;
+ const SodClass *real = inst->_vt->_cls;
+ const struct sod_chain *chain;
+ size_t i, index;
+
+ for (i = 0; i < real->cls.n_chains; i++) {
+ chain = &real->cls.chains[i];
+ if (chain->classes[0] == cls->cls.head) {
+ index = cls->cls.index;
+ if (index < chain->n_classes && chain->classes[index] == cls)
+ return ((char *)cls - inst->_vt._base + chain->off_ichain);
+ else
+ return (0);
+ }
+ }
+ return (0);
+}
+\end{listing}
+
+%%%--------------------------------------------------------------------------
+\section{Classes}
+\label{sec:class}
+
+\subsection{Classes and superclasses} \label{sec:class.defs}
+
+A @<full-class-definition> must list one or more existing classes to be the
+\emph{direct superclasses} for the new class being defined. We make the
+following definitions.
+\begin{itemize}
+\item The \emph{superclasses} of a class consist of the class itself together
+ with the superclasses of its direct superclasses.
+\item The \emph{proper superclasses} of a class are its superclasses other
+ than itself.
+\item If $C$ is a (proper) superclass of $D$ then $D$ is a (\emph{proper})
+ \emph{subclass} of $C$.
+\end{itemize}
+The predefined class @|SodObject| has no direct superclasses; it is unique in
+this respect. All classes are subclasses of @|SodObject|.
+
+\subsection{The class precedence list} \label{sec:class.cpl}
+
+Let $C$ be a class. The superclasses of $C$ form a directed graph, with an
+edge from each class to each of its direct superclasses. This is the
+\emph{superclass graph of $C$}.
+
+In order to resolve inheritance of items, we define a \emph{class precedence
+ list} (or CPL) for each class, which imposes a total order on that class's
+superclasses. The default algorithm for computing the CPL is the \emph{C3}
+algorithm \cite{fixme-c3}, though extensions may implement other algorithms.
+
+The default algorithm works as follows. Let $C$ be the class whose CPL we
+are to compute. Let $X$ and $Y$ be two of $C$'s superclasses.
+\begin{itemize}
+\item $C$ must appear first in the CPL.
+\item If $X$ appears before $Y$ in the CPL of one of $C$'s direct
+ superclasses, then $X$ appears before $Y$ in the $C$'s CPL.
+\item If the above rules don't suffice to order $X$ and $Y$, then whichever
+ of $X$ and $Y$ has a subclass which appears further left in the list of
+ $C$'s direct superclasses will appear earlier in the CPL.
+\end{itemize}
+This last rule is sufficient to disambiguate because if both $X$ and $Y$ are
+superclasses of the same direct superclass of $C$ then that direct
+superclass's CPL will order $X$ and $Y$.
+
+We say that \emph{$X$ is more specific than $Y$ as a superclass of $C$} if
+$X$ is earlier than $Y$ in $C$'s class precedence list. If $C$ is clear from
+context then we omit it, saying simply that $X$ is more specific than $Y$.
+
+\subsection{Instances and metaclasses} \label{sec:class.meta}
+
+A class defines the structure and behaviour of its \emph{instances}: run-time
+objects created (possibly) dynamically. An instance is an instance of only
+one class, though structurally it may be used in place of an instance of any
+of that class's superclasses. It is possible, with care, to change the class
+of an instance at run-time.
+
+Classes are themselves represented as instances -- called \emph{class
+ objects} -- in the running program. Being instances, they have a class,
+called the \emph{metaclass}. The metaclass defines the structure and
+behaviour of the class object.
+
+The predefined class @|SodClass| is the default metaclass for new classes.
+@|SodClass| has @|SodObject| as its only direct superclass. @|SodClass| is
+its own metaclass.
+
+To make matters more complicated, Sod has \emph{two} distinct metalevels: as
+well as the runtime metalevel, as discussed above, there's a compile-time
+metalevel hosted in the Sod translator. Since Sod is written in Common Lisp,
+a Sod class's compile-time metaclass is a CLOS class. The usual compile-time
+metaclass is @|sod-class|. The compile-time metalevel is the subject of
+\xref{ch:api}.
+
+\subsection{Items and inheritance} \label{sec:class.inherit}
+
+A class definition also declares \emph{slots}, \emph{messages},
+\emph{initializers} and \emph{methods} -- collectively referred to as
+\emph{items}. In addition to the items declared in the class definition --
+the class's \emph{direct items} -- a class also \emph{inherits} items from
+its superclasses.
+
+The precise rules for item inheritance vary according to the kinds of items
+involved.
+
+Some object systems have a notion of `repeated inheritance': if there are
+multiple paths in the superclass graph from a class to one of its
+superclasses then items defined in that superclass may appear duplicated in
+the subclass. Sod does not have this notion.
+
+\subsubsection{Slots} \label{sec:class.inherit.slots}
+A \emph{slot} is a unit of state. In other object systems, slots may be
+called `fields', `member variables', or `instance variables'.
+
+A slot has a \emph{name} and a \emph{type}. The name serves only to
+distinguish the slot from other direct slots defined by the same class. A
+class inherits all of its proper superclasses' slots. Slots inherited from
+superclasses do not conflict with each other or with direct slots, even if
+they have the same names.
+
+At run-time, each instance of the class holds a separate value for each slot,
+whether direct or inherited. Changing the value of an instance's slot
+doesn't affect other instances.
+
+\subsubsection{Initializers} \label{sec:class.inherit.init}
+Mumble.
+
+\subsubsection{Messages} \label{sec:class.inherit.messages}
+A \emph{message} is the stimulus for behaviour. In Sod, a class must define,
+statically, the name and format of the messages it is able to receive and the
+values it will return in reply. In this respect, a message is similar to
+`abstract member functions' or `interface member functions' in other object
+systems.
+
+Like slots, a message has a \emph{name} and a \emph{type}. Again, the name
+serves only to distinguish the message from other direct messages defined by
+the same class. Messages inherited from superclasses do not conflict with
+each other or with direct messages, even if they have the same name.
+
+At run-time, one sends a message to an instance by invoking a function
+obtained from the instance's \emph{vtable}: \xref{sec:fixme-vtable}.
+
+\subsubsection{Methods} \label{sec:class.inherit.methods}
+A \emph{method} is a unit of behaviour. In other object systems, methods may
+be called `member functions'.
+
+A method is associated with a message. When a message is received by an
+instance, all of the methods associated with that message on the instance's
+class or any of its superclasses are \emph{applicable}. The details of how
+the applicable methods are invoked are described fully in
+\xref{sec:fixme-method-combination}.
+
+\subsection{Chains and instance layout} \label{sec:class.layout}
+
+C is a rather low-level language, and in particular it exposes details of the
+way data is laid out in memory. Since an instance of a class~$C$ should be
+(at least in principle) usable anywhere an instance of some superclass $B
+\succeq C$ is expected, this implies that an instance of the subclass $C$
+needs to contain within it a complete instance of each superclass $B$, laid
+out according to the rules of instances of $B$, so that if we have (the
+address of) an instance of $C$, we can easily construct a pointer to a thing
+which looks like an instance of $B$ contained within it.
+
+Specifically, the information we need to retain for an instance of a
+class~$C$ is:
+\begin{itemize}
+\item the values of each of the slots defined by $C$, including those defined
+ by superclasses;
+\item information which will let us convert a pointer to $C$ into a pointer
+ to any superclass $B \succeq C$;
+\item information which will let us call the appropriate effective method for
+ each message defined by $C$, including those defined by superclasses; and
+\item some additional meta-level information, such as how to find the class
+ object for $C$ given (the address of) one of its instances.
+\end{itemize}
+
+Observe that, while each distinct instance must clearly have its own storage
+for slots, all instances of $C$ can share a single copy of the remaining
+information. The individual instance only needs to keep a pointer to this
+shared table, which, inspired by the similar structure in many \Cplusplus\
+ABIs, are called a \emph{vtable}.
+
+The easiest approach would be to decide that instances of $C$ are exactly
+like instances of $B$, only with extra space at the end for the extra slots
+which $C$ defines over and above those already existing in $B$. Conversion
+is then trivial: a pointer to an instance of $C$ can be converted to a
+pointer to an instance of some superclass $B$ simply by casting. Even though
+the root class @|SodObject| doesn't have any slots at all, its instances will
+still need a vtable so that you can find its class object: the address of the
+vtable therefore needs to be at the very start of the instance structure.
+Again, a vtable for a superclass would have a vtable for each of its
+superclasses as a prefix, with new items added afterwards.
+
+This appealing approach works well for an object system which only permits
+single inheritance of both state and behaviour. Alas, it breaks down when
+multiple inheritance is allowed: $C$ can be a subclass of both $B$ and $B'$,
+even though $B$ is not a subclass of $B'$, nor \emph{vice versa}; so, in
+general, $B$'s instance structure will not be a prefix of $B'$'s, nor will
+$B'$'s be a prefix of $B$'s, and therefore $C$ cannot have both $B$ and $B'$
+as a prefix.
+
+A (non-root) class may -- though need not -- have a distinguished \emph{link}
+superclass, which need not be a direct superclass. Furthermore, each
+class~$C$ must satisfy the \emph{chain condition}: for any superclass $A$ of
+$C$, there can be at most one other superclass of $C$ whose link superclass
+is $A$.\footnote{%
+ That is, it's permitted for two classes $B$ and $B'$ to have the same link
+ superclass $A$, but $B$ and $B'$ can't then both be superclasses of the
+ same class $C$.} %
+Therefore, the links partition the superclasses of~$C$ into nice linear
+\emph{chains}, such that each superclass is a member of exactly one chain.
+If a class~$B$ has a link superclass~$A$, then $B$'s \emph{level} is one more
+than that of $A$; otherwise $B$ is called a \emph{chain head} and its level
+is zero. If the classes in a chain are written in a list, chain head first,
+then the level of each class gives its index in the list.
+
+Chains therefore allow us to recover some of the linearity properties which
+made layout simple in the case of single inheritance. The instance structure
+for a class $C$ contains a substructure for each of $C$'s superclass chains;
+a pointer to an object of class $C$ actually points to the substructure for
+the chain containing $C$. The order of these substructures is unimportant
+for now.\footnote{%
+ The chains appear in the order in which their most specific classes appear
+ in $C$'s class precedence list. This guarantees that the chain containing
+ $C$ itself appears first, so that a pointer to $C$'s instance structure is
+ actually a pointer to $C$'s chain substructure. Apart from that, it's a
+ simple, stable, but basically arbitrary choice which can't be changed
+ without breaking the ABI.} %
+The substructure for each chain begins with a pointer to a vtable, followed
+by a structure for each superclass in the chain containing the slots defined
+by that superclass, with the chain head (least specific class) first.
+
+Suppose we have a pointer to (static) type $C$, and want to convert it into a
+pointer to some superclass $B$ of $C$ -- an \emph{upcast}.\footnote{%
+ In the more general case, we have a pointer to static type $C$, which
+ actually points to an object of some subclass $D$ of $C$, and want to
+ convert it into a pointer to type $B$. Such a conversion is called a
+ \emph{downcast} if $B$ is a subclass of $C$, or a \emph{cross-cast}
+ otherwise. Downcasts and cross-casts require complicated run-time
+ checking, and can will fail unless $B$ is a superclass of $D$.} %
+If $B$ is in the same chain as $C$ -- an \emph{in-chain upcast} -- then the
+pointer value is already correct and it's only necessary to cast it
+appropriately. Otherwise -- a \emph{cross-chain upcast} -- the pointer needs
+to be adjusted to point to a different chain substructure. Since the lengths
+and relative positions of the chain substructures vary between classes, the
+adjustments are stored in the vtable. Cross-chain upcasts are therefore a
+bit slower than in-chain upcasts.
+
+Each chain has its own separate vtable, because much of the metadata stored
+in the vtable is specific to a particular chain. For example:
+\begin{itemize}
+\item offsets to other chains' substructures will vary depending on which
+ chain we start from; and
+\item entry points to methods
+\end{itemize}
+%%%--------------------------------------------------------------------------
+\section{Superclass linearization}
+
+Before making any decisions about relationships between superclasses, Sod
+\emph{linearizes} them, i.e., imposes a total order consistent with the
+direct-subclass/superclass partial order.
+
+In the vague hope that we don't be completely bogged down in formalism by the
+end of this, let's introduce some notation. We'll fix some class $z$ and
+consider its set of superclasses $S(z) = \{ a, b, \dots \}$. We can define a
+relation $c \prec_1 d$ if $c$ is a direct subclass of $d$, and extend it by
+taking the reflexive, transitive closure: $c \preceq d$ if and only if
+\begin{itemize}
+\item $c = d$, or
+\item there exists some class $x$ such that $c \prec_1 x$ and $x \preceq d$.
+\end{itemize}
+This is the `is-subclass-of' relation we've been using so far.\footnote{%
+ In some object systems, notably Flavors, this relation is allowed to fail
+ to be a partial order because of cycles in the class graph. I haven't
+ given a great deal of thought to how well Sod would cope with a cyclic
+ class graph.} %
+We write $d \succeq c$ and say that $d$ is a superclass of $c$ if and only if
+$c \preceq d$.
+
+The problem comes when we try to resolve inheritance questions. A class
+should inherit behaviour from its superclasses; but, in a world of multiple
+inheritance, which one do we choose? We get a simple version of this problem
+when we try to resolve inheritance of slot initializers: only one initializer
+can be inherited.
+
+We start by collecting into a set~$I$ the classes which define an initializer
+for the slot. If $I$ contains both a class $x$ and one of $x$'s superclasses
+then we should prefer $x$ and consider the superclass to be overridden. So
+we should confine our attention to \emph{least} classes: a member $x$ of a
+set $I$ is least, with respect to a particular partial order, if $y \preceq
+x$ only when $x = y$. If there is a single least class in our set the we
+have a winner. Otherwise we want some way to choose among them.
+
+This is not uncontroversial. Languages such as \Cplusplus\ refuse to choose
+among least classes; instead, any program in which such a choice must be made
+is simply declared erroneous.
+
+Simply throwing up our hands in horror at this situation is satisfactory when
+we only wanted to pick one `winner', as we do for slot initializers.
+However, method combination is a much more complicated business. We don't
+want to pick just one winner: we want to order all of the applicable methods
+in some way. Insisting that there is a clear winner at every step along the
+chain is too much of an imposition. Instead, we \emph{linearize} the
+classes.
+
+%%%--------------------------------------------------------------------------
+\section{Invariance, covariance, contravariance}
+
+In Sod, at least with regard to the existing method combinations, method
+types are \emph{invariant}. This is not an accident, and it's not due to
+ignorance.
+
+The \emph{signature} of a function, method or message describes its argument
+and return-value types. If a method's arguments are an integer and a string,
+and it returns a character, we might write its signature as
+\[ (@|int|, @|string|) \to @|char| \]
+In Sod, a method's arguments have to match its message's arguments precisely,
+and the return type must either be @|void| -- for a dæmon method -- or again
+match the message's return type. This is argument and return-type
+\emph{invariance}.
+
+Some object systems allow methods with subtly different signatures to be
+defined on a single message. In particular, since the idea is that instances
+of a subclass ought to be broadly compatible~(see \xref{sec:phil.lsp}) with
+existing code which expects instances of a superclass, we might be able to
+get away with bending method signatures one way or another to permit this.
+
+\Cplusplus\ permits \emph{return-type covariance}, where a method's return
+type can be a subclass of the return type specified by a less-specific
+method. Eiffel allows \emph{argument covariance}, where a method's arguments
+can be subclasses of the arguments specified by a less-specific
+method.\footnote{%
+ Attentive readers will note that I ought to be talking about pointers to
+ instances throughout. I'm trying to limit the weight of the notation.
+ Besides, I prefer data models as found in Lisp and Python where all values
+ are held by reference.} %
+
+Eiffel's argument covariance is unsafe.\footnote{%
+ Argument covariance is correct if you're doing runtime dispatch based on
+ argument types. Eiffel isn't: it's single dispatch, like Sod is.} %
+Suppose that we have two pairs of classes, $a \prec_1 b$ and $c \prec_1 d$.
+Class $b$ defines a message $m$ with signature $d \to @|int|$; class $a$
+defines a method with signature $c \to @|int|$. This means that it's wrong
+to send $m$ to an instance $a$ carrying an argument of type $d$. But of
+course, we can treat an instance of $a$ as if it's an instance of $b$,
+whereupon it appears that we are permitted to pass a~$c$ in our message. The
+result is a well-known hole in the type system. Oops.
+
+\Cplusplus's return-type covariance is fine. Also fine is argument
+\emph{contravariance}. If $b$ defined its message to have signature $c \to
+@|int|$, and $a$ were to broaden its method to $d \to @|int|$, there'd be no
+problem. All $c$s are $d$s, so viewing an $a$ as a $b$ does no harm.
+
+All of this fiddling with types is fine as long as method inheritance or
+overriding is an all-or-nothing thing. But Sod has method combinations,
+where applicable methods are taken from the instance's class and all its
+superclasses and combined. And this makes everything very messy.
+
+It's possible to sort all of the mess out in the generated effective method
+-- we'd just have to convert the arguments to the types that were expected by
+the direct methods. This would require expensive run-time conversions of all
+of the non-invariant arguments and return values. And we'd need some
+complicated rule so that we could choose sensible types for the method
+entries in our vtables. Something like this:
+\begin{quote} \itshape
+ For each named argument of a message, there must be a unique greatest type
+ among the types given for that argument by the applicable methods; and
+ there must be a unique least type among all of the return types of the
+ applicable methods.
+\end{quote}
+I have visions of people wanting to write special no-effect methods whose
+only purpose is to guide the translator around the class graph properly.
+Let's not.
+
+%% things to talk about:
+%% Liskov substitution principle and why it's mad
+
+%%%----- That's all, folks --------------------------------------------------
+
+%%% Local variables:
+%%% mode: LaTeX
+%%% TeX-master: "sod.tex"
+%%% TeX-PDF-mode: t
+%%% End: