a3f3e513f96da07ac6d58039a1e441354aaebed8
[sod] / src / codegen-proto.lisp
1 ;;; -*-lisp-*-
2 ;;;
3 ;;; Code generation protocol
4 ;;;
5 ;;; (c) 2009 Straylight/Edgeware
6 ;;;
7
8 ;;;----- Licensing notice ---------------------------------------------------
9 ;;;
10 ;;; This file is part of the Sensible Object Design, an object system for C.
11 ;;;
12 ;;; SOD is free software; you can redistribute it and/or modify
13 ;;; it under the terms of the GNU General Public License as published by
14 ;;; the Free Software Foundation; either version 2 of the License, or
15 ;;; (at your option) any later version.
16 ;;;
17 ;;; SOD is distributed in the hope that it will be useful,
18 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;;; GNU General Public License for more details.
21 ;;;
22 ;;; You should have received a copy of the GNU General Public License
23 ;;; along with SOD; if not, write to the Free Software Foundation,
24 ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25
26 (cl:in-package #:sod)
27
28 ;;;--------------------------------------------------------------------------
29 ;;; Temporary names.
30
31 ;; Protocol.
32
33 (export 'format-temporary-name)
34 (defgeneric format-temporary-name (var stream)
35 (:documentation
36 "Write the name of a temporary variable VAR to STREAM."))
37
38 (export 'var-in-use-p)
39 (defgeneric var-in-use-p (var)
40 (:documentation
41 "Answer whether VAR is currently being used. See `with-temporary-var'.")
42 (:method (var)
43 "Non-temporary variables are always in use."
44 (declare (ignore var))
45 t))
46 (defgeneric (setf var-in-use-p) (value var)
47 (:documentation
48 "Record whether VAR is currently being used. See `with-temporary-var'."))
49
50 ;; Root class.
51
52 (export '(temporary-name temp-tag))
53 (defclass temporary-name ()
54 ((tag :initarg :tag :reader temp-tag))
55 (:documentation
56 "Base class for temporary variable and argument names."))
57
58 ;; Important temporary names.
59
60 (export '(*sod-ap* *sod-master-ap*))
61 (defparameter *sod-ap*
62 (make-instance 'temporary-name :tag "sod__ap"))
63 (defparameter *sod-master-ap*
64 (make-instance 'temporary-name :tag "sod__master_ap"))
65 (defparameter *sod-tmp-ap*
66 (make-instance 'temporary-name :tag "sod__tmp_ap"))
67 (defparameter *sod-tmp-val*
68 (make-instance 'temporary-name :tag "sod__t"))
69 (defparameter *sod-keywords*
70 (make-instance 'temporary-name :tag "sod__kw"))
71 (defparameter *sod-key-pointer*
72 (make-instance 'temporary-name :tag "sod__keys"))
73
74 (export '*null-pointer*)
75 (defparameter *null-pointer* "NULL")
76
77 ;;;--------------------------------------------------------------------------
78 ;;; Instructions.
79
80 ;; Classes.
81
82 (export 'inst)
83 (defclass inst () ()
84 (:documentation
85 "A base class for instructions.
86
87 An `instruction' is anything which might be useful to string into a code
88 generator. Both statements and expressions can be represented by trees of
89 instructions. The `definst' macro is a convenient way of defining new
90 instructions.
91
92 The only important protocol for instructions is output, which is achieved
93 by calling `print-object' with `*print-escape*' nil.
94
95 This doesn't really do very much, but it acts as a handy marker for
96 instruction subclasses."))
97
98 (export 'inst-metric)
99 (defgeneric inst-metric (inst)
100 (:documentation
101 "Returns a `metric' describing how complicated INST is.
102
103 The default metric of an inst node is simply 1; `inst' subclasses
104 generated by `definst' (q.v.) have an automatically generated method which
105 returns one plus the sum of the metrics of the node's children.
106
107 This isn't intended to be a particularly rigorous definition. Its purpose
108 is to allow code generators to make decisions about inlining or calling
109 code fairly simply.")
110 (:method ((inst t))
111 (declare (ignore inst))
112 1)
113 (:method ((inst null))
114 (declare (ignore inst))
115 1)
116 (:method ((inst list))
117 (reduce #'+ inst :key #'inst-metric)))
118
119 ;; Instruction definition.
120
121 (export 'definst)
122 (defmacro definst (code (streamvar &key export) args &body body)
123 "Define an instruction type and describe how to output it.
124
125 An `inst' can represent any structured piece of output syntax: a
126 statement, expression or declaration, for example. This macro defines the
127 following things:
128
129 * A class `CODE-inst' to represent the instruction.
130
131 * Instance slots named after the ARGS, with matching keyword initargs,
132 and `inst-ARG' readers.
133
134 * A constructor `make-CODE-inst' which accepts the ARGS (as an ordinary
135 BVL) as arguments and returns a fresh instance.
136
137 * A print method, which prints a diagnostic dump if `*print-escape*' is
138 set, or invokes the BODY (with STREAMVAR bound to the output stream)
139 otherwise. The BODY is expected to produce target code at this
140 point.
141
142 The ARGS are an ordinary lambda-list, with the following quirks:
143
144 * Where an argument-name symbol is expected (as opposed to a list), a
145 list (ARG SLOT) may be written instead. This allows the slots to be
146 named independently of the argument names, which is handy if they'd
147 otherwise conflict with exported symbol names.
148
149 * If an argument name begins with a `%' character, then the `%' is
150 stripped off, except when naming the actual slot. Hence, `%FOO' is
151 equivalent to a list `(FOO %FOO)', except that a `%'-symbol can be
152 used even where the lambda-list syntax permits a list.
153
154 If EXPORT is non-nil, then export the `CODE-inst' and `make-CODE-inst'
155 symbols."
156
157 (multiple-value-bind (bvl public private)
158 ;; The hard part of this is digging through the BVL to find the slot
159 ;; names. Collect them into an actual BVL which will be acceptable to
160 ;; `defun', and (matching) lists of the PUBLIC and PRIVATE names of the
161 ;; slots.
162
163 (let ((state :mandatory)
164 (bvl (make-list-builder))
165 (public (make-list-builder))
166 (private (make-list-builder)))
167
168 (labels ((recurse-arg (arg path)
169 ;; Figure out the argument name in ARG, which might be a
170 ;; symbol or a list with the actual argument name buried
171 ;; in it somewhere. Once we've found it, return the
172 ;; appropriate entries to add to the BVL, PUBLIC, and
173 ;; PRIVATE lists.
174 ;;
175 ;; The PATH indicates a route to take through the tree to
176 ;; find the actual argument name: it's a list of
177 ;; nonnegative integers, one for each level of structure:
178 ;; the integer indicates which element of the list at that
179 ;; level to descend into to find the argument name
180 ;; according to the usual BVL syntax. It's always
181 ;; acceptable for a level to actually be a symbol, which
182 ;; is then the argument name we were after. If we reach
183 ;; the bottom and we still have a list, then it must be a
184 ;; (PUBLIC PRIVATE) pair.
185
186 (cond ((symbolp arg)
187 ;; We've bottommed out at a symbol. If it starts
188 ;; with a `%' then that's the private name: strip
189 ;; the `%' to find the public name. Otherwise, the
190 ;; symbol is all we have.
191
192 (let ((name (symbol-name arg)))
193 (if (and (plusp (length name))
194 (char= (char name 0) #\%))
195 (let ((public (intern (subseq name 1))))
196 (values public public arg))
197 (values arg arg arg))))
198
199 ((atom arg)
200 ;; Any other kind of atom is obviously bogus.
201 (error "Unexpected item ~S in lambda-list." arg))
202
203 ((null path)
204 ;; We've bottommed out of the path and still have a
205 ;; list. It must be (PUBLIC PRIVATE).
206
207 (multiple-value-bind (public private)
208 (if (cdr arg) (values (car arg) (cadr arg))
209 (values (car arg) (car arg)))
210 (values public public private)))
211
212 (t
213 ;; We have a list. Take the first step in the
214 ;; PATH, and recursively process corresponding list
215 ;; element with the remainder of the PATH. The
216 ;; PUBLIC and PRIVATE slot names are fine, but we
217 ;; must splice the given BVL entry into our list
218 ;; structure.
219
220 (let* ((step (car path))
221 (mine (nthcdr step arg)))
222 (multiple-value-bind (full public private)
223 (recurse-arg (car mine) (cdr path))
224 (values (append (subseq arg 0 step)
225 full
226 (cdr mine))
227 public
228 private))))))
229
230 (hack-arg (arg maxdp)
231 ;; Find the actual argument name in a BVL entry, and add
232 ;; the appropriate entries to the `bvl', `public', and
233 ;; `private' lists.
234
235 (multiple-value-bind (full public-name private-name)
236 (recurse-arg arg maxdp)
237 (lbuild-add bvl full)
238 (lbuild-add public public-name)
239 (lbuild-add private private-name))))
240
241 ;; Process the augmented BVL, extracting a standard BVL suitable
242 ;; for `defun', and the public and private slot names into our
243 ;; list.
244 (dolist (arg args)
245 (cond ((or (eq arg '&optional)
246 (eq arg '&rest)
247 (eq arg '&key)
248 (eq arg '&aux))
249 (setf state arg)
250 (lbuild-add bvl arg))
251
252 ((eq arg '&allow-other-keys)
253 (lbuild-add bvl arg))
254
255 ((or (eq state :mandatory)
256 (eq state '&rest))
257 (hack-arg arg '()))
258
259 ((or (eq state '&optional)
260 (eq state '&aux))
261 (hack-arg arg '(0)))
262
263 ((eq state '&key)
264 (hack-arg arg '(0 1)))
265
266 (t
267 (error "Confusion in ~S!" 'definst)))))
268
269 ;; Done! That was something of a performance.
270 (values (lbuild-list bvl)
271 (lbuild-list public)
272 (lbuild-list private)))
273
274 ;; Now we can actually build the pieces of the code-generation machinery.
275 (let* ((inst-var (gensym "INST"))
276 (class-name (symbolicate code '-inst))
277 (constructor-name (symbolicate 'make- code '-inst))
278 (keys (mapcar (lambda (arg) (intern (symbol-name arg) :keyword))
279 public)))
280
281 ;; We have many jobs to do in the expansion.
282 `(progn
283
284 ;; A class to hold the data.
285 (defclass ,class-name (inst)
286 ,(mapcar (lambda (public-slot private-slot key)
287 `(,private-slot :initarg ,key
288 :reader ,(symbolicate 'inst- public-slot)))
289 public private keys))
290
291 ;; A constructor to make an instance of the class.
292 (defun ,constructor-name (,@bvl)
293 (make-instance ',class-name ,@(mappend #'list keys public)))
294
295 ;; A method on `inst-metric', to feed into inlining heuristics.
296 (defmethod inst-metric ((,inst-var ,class-name))
297 (with-slots (,@private) ,inst-var
298 (+ 1 ,@(mapcar (lambda (slot) `(inst-metric ,slot)) private))))
299
300 ;; A method to actually produce the necessary output.
301 (defmethod print-object ((,inst-var ,class-name) ,streamvar)
302 (with-slots ,(mapcar #'list public private) ,inst-var
303 (if *print-escape*
304 (print-unreadable-object (,inst-var ,streamvar :type t)
305 (format ,streamvar "~@<~@{~S ~@_~S~^ ~_~}~:>"
306 ,@(mappend #'list keys public)))
307 (block ,code ,@body))))
308
309 ;; Maybe export all of this stuff.
310 ,@(and export `((export '(,class-name ,constructor-name
311 ,@(mapcar (lambda (slot)
312 (symbolicate 'inst- slot))
313 public)))))
314
315 ;; And try not to spam a REPL.
316 ',code))))
317
318 ;; Formatting utilities.
319
320 (defun format-compound-statement* (stream child morep thunk)
321 "Underlying function for `format-compound-statement'."
322 (cond ((typep child 'block-inst)
323 (funcall thunk stream)
324 (write-char #\space stream)
325 (princ child stream)
326 (when morep (write-char #\space stream)))
327 (t
328 (pprint-logical-block (stream nil)
329 (funcall thunk stream)
330 (write-char #\space stream)
331 (pprint-indent :block 2 stream)
332 (pprint-newline :linear stream)
333 (princ child stream)
334 (pprint-indent :block 0 stream))
335 (case morep
336 (:space
337 (write-char #\space stream)
338 (pprint-newline :linear stream))
339 ((t)
340 (pprint-newline :mandatory stream))))))
341
342 (export 'format-compound-statement)
343 (defmacro format-compound-statement
344 ((stream child &optional morep) &body body)
345 "Format a compound statement to STREAM.
346
347 The introductory material is printed by BODY. The CHILD is formatted
348 properly according to whether it's a `block-inst'. If MOREP is true, then
349 allow for more stuff following the child."
350 `(format-compound-statement* ,stream ,child ,morep
351 (lambda (,stream) ,@body)))
352
353 (export 'format-banner-comment)
354 (defun format-banner-comment (stream control &rest args)
355 (format stream "~@</~@<* ~@;~?~:>~_ */~:>" control args))
356
357 ;; Important instruction classes.
358
359 (definst var (stream :export t) (name %type &optional init)
360 (pprint-logical-block (stream nil)
361 (pprint-c-type type stream name)
362 (when init
363 (format stream " = ~2I~_~A" init))
364 (write-char #\; stream)))
365
366 (definst function (stream :export t)
367 (name %type body &optional %banner &rest banner-args)
368 (pprint-logical-block (stream nil)
369 (when banner
370 (apply #'format-banner-comment stream banner banner-args)
371 (pprint-newline :mandatory stream))
372 (princ "static " stream)
373 (pprint-c-type type stream name)
374 (format stream "~:@_~A~:@_~:@_" body)))
375
376 ;; Expression statements.
377 (definst expr (stream :export t) (%expr)
378 (format stream "~A;" expr))
379 (definst set (stream :export t) (var %expr)
380 (format stream "~@<~A = ~2I~_~A;~:>" var expr))
381 (definst update (stream :export t) (var op %expr)
382 (format stream "~@<~A ~A= ~2I~_~A;~:>" var op expr))
383
384 ;; Special kinds of expressions.
385 (definst call (stream :export t) (%func &rest args)
386 (format stream "~@<~A~4I~_(~@<~{~A~^, ~_~}~:>)~:>" func args))
387 (definst cond (stream :export t) (%cond conseq alt)
388 (format stream "~@<~A ~2I~@_~@<? ~A ~_: ~A~:>~:>" cond conseq alt))
389
390 ;; Simple statements.
391 (definst return (stream :export t) (%expr)
392 (format stream "return~@[ (~A)~];" expr))
393 (definst break (stream :export t) ()
394 (format stream "break;"))
395 (definst continue (stream :export t) ()
396 (format stream "continue;"))
397
398 ;; Compound statements.
399
400 (defvar *first-statement-p* t
401 "True if this is the first statement in a block.
402
403 This is used to communicate between `block-inst' and `banner-inst' so that
404 they get the formatting right between them.")
405
406 (definst banner (stream :export t) (control &rest args)
407 (pprint-logical-block (stream nil)
408 (unless *first-statement-p* (pprint-newline :mandatory stream))
409 (apply #'format-banner-comment stream control args)))
410
411 (export 'emit-banner)
412 (defun emit-banner (codegen control &rest args)
413 (emit-inst codegen (apply #'make-banner-inst control args)))
414
415 (definst block (stream :export t) (decls body)
416 (write-char #\{ stream)
417 (pprint-newline :mandatory stream)
418 (pprint-logical-block (stream nil)
419 (let ((newlinep nil))
420 (flet ((newline ()
421 (if newlinep
422 (pprint-newline :mandatory stream)
423 (setf newlinep t))))
424 (pprint-indent :block 2 stream)
425 (write-string " " stream)
426 (when decls
427 (dolist (decl decls)
428 (newline)
429 (write decl :stream stream))
430 (when body (newline)))
431 (let ((*first-statement-p* t))
432 (dolist (inst body)
433 (newline)
434 (write inst :stream stream)
435 (setf *first-statement-p* nil))))))
436 (pprint-newline :mandatory stream)
437 (write-char #\} stream))
438
439 (definst if (stream :export t) (%cond conseq &optional alt)
440 (let ((stmt "if"))
441 (loop (format-compound-statement (stream conseq (if alt t nil))
442 (format stream "~A (~A)" stmt cond))
443 (typecase alt
444 (null (return))
445 (if-inst (setf stmt "else if"
446 cond (inst-cond alt)
447 conseq (inst-conseq alt)
448 alt (inst-alt alt)))
449 (t (format-compound-statement (stream alt)
450 (format stream "else"))
451 (return))))))
452
453 (definst while (stream :export t) (%cond body)
454 (format-compound-statement (stream body)
455 (format stream "while (~A)" cond)))
456
457 (definst do-while (stream :export t) (body %cond)
458 (format-compound-statement (stream body :space)
459 (write-string "do" stream))
460 (format stream "while (~A);" cond))
461
462 (definst for (stream :export t) (init %cond update body)
463 (format-compound-statement (stream body)
464 (format stream "for (~@<~@[~A~];~@[ ~_~A~];~@[ ~_~A~]~:>)"
465 init cond update)))
466
467 ;;;--------------------------------------------------------------------------
468 ;;; Code generation.
469
470 ;; Accessors.
471
472 (export 'codegen-functions)
473 (defgeneric codegen-functions (codegen)
474 (:documentation
475 "Return the list of `function-inst's of completed functions."))
476
477 (export 'ensure-var)
478 (defgeneric ensure-var (codegen name type &optional init)
479 (:documentation
480 "Add a variable to CODEGEN's list.
481
482 The variable is called NAME (which should be comparable using `equal' and
483 print to an identifier) and has the given TYPE. If INIT is present and
484 non-nil it is an expression `inst' used to provide the variable with an
485 initial value."))
486
487 (export '(emit-inst emit-insts))
488 (defgeneric emit-inst (codegen inst)
489 (:documentation
490 "Add INST to the end of CODEGEN's list of instructions."))
491 (defgeneric emit-insts (codegen insts)
492 (:documentation
493 "Add a list of INSTS to the end of CODEGEN's list of instructions.")
494 (:method (codegen insts)
495 (dolist (inst insts) (emit-inst codegen inst))))
496
497 (export '(emit-decl emit-decls))
498 (defgeneric emit-decl (codegen inst)
499 (:documentation
500 "Add INST to the end of CODEGEN's list of declarations."))
501 (defgeneric emit-decls (codegen insts)
502 (:documentation
503 "Add a list of INSTS to the end of CODEGEN's list of declarations."))
504
505 (export 'codegen-push)
506 (defgeneric codegen-push (codegen)
507 (:documentation
508 "Pushes the current code generation state onto a stack.
509
510 The state consists of the accumulated variables and instructions."))
511
512 (export 'codegen-pop)
513 (defgeneric codegen-pop (codegen)
514 (:documentation
515 "Pops a saved state off of the CODEGEN's stack.
516
517 Returns the newly accumulated variables and instructions as lists, as
518 separate values."))
519
520 (export 'codegen-add-function)
521 (defgeneric codegen-add-function (codegen function)
522 (:documentation
523 "Adds a function to CODEGEN's list.
524
525 Actually, we're not picky: FUNCTION can be any kind of object that you're
526 willing to find in the list returned by `codegen-functions'."))
527
528 (export 'temporary-var)
529 (defgeneric temporary-var (codegen type)
530 (:documentation
531 "Return the name of a temporary variable.
532
533 The temporary variable will have the given TYPE, and will be marked
534 in-use. You should clear the in-use flag explicitly when you've finished
535 with the variable -- or, better, use `with-temporary-var' to do the
536 cleanup automatically."))
537
538 (export 'codegen-build-function)
539 (defun codegen-build-function
540 (codegen name type vars insts &optional banner &rest banner-args)
541 "Build a function and add it to CODEGEN's list.
542
543 Returns the function's name."
544 (codegen-add-function codegen
545 (apply #'make-function-inst name type
546 (make-block-inst vars insts)
547 banner banner-args))
548 name)
549
550 (export 'codegen-pop-block)
551 (defgeneric codegen-pop-block (codegen)
552 (:documentation
553 "Makes a block (`block-inst') out of the completed code in CODEGEN.")
554 (:method (codegen)
555 (multiple-value-bind (vars insts) (codegen-pop codegen)
556 (make-block-inst vars insts))))
557
558 (export 'codegen-pop-function)
559 (defgeneric codegen-pop-function
560 (codegen name type &optional banner &rest banner-args)
561 (:documentation
562 "Makes a function out of the completed code in CODEGEN.
563
564 The NAME can be any object you like. The TYPE should be a function type
565 object which includes argument names. The return value is the NAME.")
566 (:method (codegen name type &optional banner &rest banner-args)
567 (multiple-value-bind (vars insts) (codegen-pop codegen)
568 (apply #'codegen-build-function codegen name type vars insts
569 banner banner-args))))
570
571 (export 'with-temporary-var)
572 (defmacro with-temporary-var ((codegen var type) &body body)
573 "Evaluate BODY with VAR bound to a temporary variable name.
574
575 During BODY, VAR will be marked in-use; when BODY ends, VAR will be marked
576 available for re-use."
577 (multiple-value-bind (doc decls body) (parse-body body :docp nil)
578 (declare (ignore doc))
579 `(let ((,var (temporary-var ,codegen ,type)))
580 ,@decls
581 (unwind-protect
582 (progn ,@body)
583 (setf (var-in-use-p ,var) nil)))))
584
585 ;;;--------------------------------------------------------------------------
586 ;;; Code generation idioms.
587
588 (export 'deliver-expr)
589 (defun deliver-expr (codegen target expr)
590 "Emit code to deliver the value of EXPR to the TARGET.
591
592 The TARGET may be one of the following.
593
594 * `:void', indicating that the value is to be discarded. The expression
595 will still be evaluated.
596
597 * `:void-return', indicating that the value is to be discarded (as for
598 `:void') and furthermore a `return' from the current function should
599 be forced after computing the value.
600
601 * `:return', indicating that the value is to be returned from the
602 current function.
603
604 * A variable name, indicating that the value is to be stored in the
605 variable.
606
607 In the cases of `:return', `:void' and `:void-return' targets, it is valid
608 for EXPR to be nil; this signifies that no computation needs to be
609 performed. Variable-name targets require an expression."
610
611 (case target
612 (:return (emit-inst codegen (make-return-inst expr)))
613 (:void (when expr (emit-inst codegen (make-expr-inst expr))))
614 (:void-return (when expr (emit-inst codegen (make-expr-inst expr)))
615 (emit-inst codegen (make-return-inst nil)))
616 (t (emit-inst codegen (make-set-inst target expr)))))
617
618 (export 'convert-stmts)
619 (defun convert-stmts (codegen target type func)
620 "Invoke FUNC to deliver a value to a non-`:return' target.
621
622 FUNC is a function which accepts a single argument, a non-`:return'
623 target, and generates statements which deliver a value (see
624 `deliver-expr') of the specified TYPE to this target. In general, the
625 generated code will have the form
626
627 setup instructions...
628 (deliver-expr CODEGEN TARGET (compute value...))
629 cleanup instructions...
630
631 where the cleanup instructions are essential to the proper working of the
632 generated program.
633
634 The `convert-stmts' function will call FUNC to generate code, and arrange
635 that its value is correctly delivered to TARGET, regardless of what the
636 TARGET is -- i.e., it lifts the restriction to non-`:return' targets. It
637 does this by inventing a new temporary variable."
638
639 (case target
640 (:return (with-temporary-var (codegen var type)
641 (funcall func var)
642 (deliver-expr codegen target var)))
643 (:void-return (funcall func :void)
644 (emit-inst codegen (make-return-inst nil)))
645 (t (funcall func target))))
646
647 (export 'deliver-call)
648 (defun deliver-call (codegen target func &rest args)
649 "Emit a statement to call FUNC with ARGS and deliver the result to TARGET."
650 (deliver-expr codegen target (apply #'make-call-inst func args)))
651
652 ;;;----- That's all, folks --------------------------------------------------