@@@ progfmt wip
[sod] / doc / progfmt
1 #! /bin/sh
2 ":"; ### -*-lisp-*-
3 ":"; CL_SOURCE_REGISTRY=$(pwd)/build/src/:
4 ":"; ASDF_OUTPUT_TRANSLATIONS=$(pwd)/src:$(pwd)/build/src
5 ":"; export CL_SOURCE_REGISTRY ASDF_OUTPUT_TRANSLATIONS
6 ":"; exec cl-launch -X -l "sbcl cmucl" -s asdf -i "(main)" -- "$0" "$@" || exit 1
7
8 ;;;--------------------------------------------------------------------------
9 ;;; Random utilities.
10
11 (defun whitespace-char-p (char)
12 (case char
13 (#.(loop for i below char-code-limit
14 for ch = (code-char i)
15 unless (with-input-from-string (in (string ch))
16 (peek-char t in nil))
17 collect ch) t)
18 (t nil)))
19
20 (defun slurp-file (pathname)
21 (with-open-file (stream pathname)
22 (let* ((length (file-length stream))
23 (string (make-string length))
24 (n (read-sequence string stream))
25 (ch (read-char stream nil)))
26 (unless (and (= n length) (null ch))
27 (error "File `~A' unexpectedly changed size when reading" pathname))
28 string)))
29
30 (defun update-hash-table (map alist)
31 (dolist (item alist map)
32 (let ((key (car item))
33 (value (cdr item)))
34 (if value (setf (gethash key map) value)
35 (remhash key map)))))
36
37 (defun copy-hash-table (map)
38 (let ((new (make-hash-table :test (hash-table-test map))))
39 (maphash (lambda (key value)
40 (setf (gethash key new) value))
41 map)
42 new))
43
44 (defun build-hash-table (alist &key (test #'eql))
45 (update-hash-table (make-hash-table :test test) alist))
46
47 (defun modified-hash-table (map alist)
48 (update-hash-table (copy-hash-table map) alist))
49
50 (defun binary-search (item-key vector lessp &key (key #'identity))
51 (let ((len (length vector)))
52 (if (or (zerop len)
53 (funcall lessp item-key (funcall key (aref vector 0))))
54 (values 0 nil)
55 (let ((lo 0) (hi len))
56 (loop (let ((gap (- hi lo)))
57 (when (< gap 2) (return))
58 (let ((mid (+ lo (floor gap 2))))
59 (if (funcall lessp item-key
60 (funcall key (aref vector mid)))
61 (setf hi mid)
62 (setf lo mid)))))
63 (cond (( = lo hi) (values 0 nil))
64 ((funcall lessp (funcall key (aref vector lo))
65 item-key)
66 (values (1+ lo) nil))
67 (t (values lo t)))))))
68
69 (defun add-to-sorted-vector (item vector lessp &key (key #'identity))
70 (multiple-value-bind (index foundp)
71 (binary-search (funcall key item) vector lessp)
72 (unless foundp
73 (let ((len (fill-pointer vector))
74 (size (array-dimension vector 0)))
75 (when (= len size)
76 (setf vector (adjust-array vector (* 2 size))))
77 (setf (fill-pointer vector) (1+ len))
78 (replace vector vector
79 :start1 (1+ index) :end1 (1+ len)
80 :start2 index :end2 len)
81 (setf (aref vector index) item)))
82 vector))
83
84 (defmacro sorted-vector-push (item vector-place lessp &key key
85 &environment env)
86 (let ((item-var (gensym "ITEM-")))
87 (multiple-value-bind (vars values temps store-form load-form)
88 (get-setf-expansion vector-place env)
89 `(let* ((,item-var ,item)
90 ,@(mapcar #'list vars values)
91 (,(car temps) (add-to-sorted-vector ,item-var ,load-form ,lessp
92 ,@(and key `(:key ,key)))))
93 ,store-form))))
94
95 (defparameter *char-latex-map*
96 (build-hash-table '((#\\ . "\\textbackslash{}")
97 (#\{ . "\\{")
98 (#\} . "\\}")
99 (#\% . "\\%")
100 (#\$ . "\\$")
101 (#\# . "\\#")
102 (#\& . "\\&")
103 (#\^ . "\\textasciicircum{}")
104 (#\~ . "\\textasciitilde{}")
105 (#\` . "\\textasciigrave{}")
106 (#\' . "\\textquotesingle{}")
107 (#\- . "\\fakeminus{}")
108 (#\< . "{<}")
109 (#\> . "{>}"))))
110
111 (defun string-latex (string &optional (map *char-latex-map*))
112 (with-output-to-string (out)
113 (dotimes (i (length string))
114 (let ((char (char string i)))
115 (multiple-value-bind (latex foundp) (gethash char map)
116 (if foundp (write-string latex out)
117 (write-char char out)))))))
118
119 ;;;--------------------------------------------------------------------------
120 ;;; Tokens.
121
122 (defclass token ()
123 ((string :type string :initarg :string :reader token-string)
124 (line :type fixnum :initarg :line :reader token-line)
125 (start-column :type fixnum :initarg :start-column :reader token-start-column)
126 (end-column :type fixnum :initarg :end-column :reader token-end-column)
127 (active-alignment-points :type list
128 :initform (make-array 4
129 :element-type
130 '(unsigned-byte 16)
131 :fill-pointer 0)
132 :accessor token-active-alignment-points)))
133
134 (defmethod print-object ((token token) stream)
135 (print-unreadable-object (token stream :type t)
136 (prin1 (token-string token) stream)))
137
138 (defclass whitespace (token)
139 ())
140
141 (defclass tabulation (whitespace)
142 ())
143
144 (defclass indentation (whitespace)
145 ())
146
147 (defclass operator (token)
148 ((latex :type string :initarg :latex :reader token-latex)))
149
150 (defmethod shared-initialize :after ((token operator) slot-names &key)
151 (when (and (not (slot-boundp token 'latex))
152 (or (eq slot-names t) (member 'latex slot-names)))
153 (setf (slot-value token 'latex)
154 (slot-value token 'string))))
155
156 (defclass identifier (token)
157 ())
158
159 (defclass reserved (token)
160 ())
161
162 (defclass numeric (token)
163 ())
164
165 (defclass literal (token)
166 ())
167
168 (defclass comment-delimiter (operator)
169 ())
170
171 (defclass comment-body (token)
172 ())
173
174 (defgeneric token-width (token)
175 (:method ((token token)) (length (token-string token))))
176 (defgeneric token-alignment-points (token)
177 (:method ((token token)) nil))
178 (defgeneric activate-token-alignment-point (token offset)
179 (:method ((token token) offset)
180 (let* ((vec (active-alignment-points token))
181 (len (fill-pointer vec)))
182 (defgeneric format-token (token stream)
183 (:method ((token token) stream)
184 (princ (string-latex (token-string token)) stream)))
185
186 (defclass alignment-point ()
187 ())
188
189 (defclass relative-alignment-point (alignment-point)
190 ())
191
192 (defclass absolute-alignment-point (alignment-point)
193 ())
194
195 (defun tokenize-whitespace (line pos end emit &key force-indent)
196 (when (whitespace-char-p (char line pos))
197 (let ((next (or (position-if-not #'whitespace-char-p line
198 :start pos :end end)
199 end)))
200 (funcall emit
201 (cond ((or (zerop pos) force-indent) 'indentation)
202 ((find #\tab line :start pos :end next) 'tabulation)
203 (t 'whitespace))
204 pos next))))
205
206 (defun move-over (line pos end string)
207 (let ((next (+ pos (length string))))
208 (if (and (<= next end)
209 (string= string line :start2 pos :end2 next))
210 next
211 nil)))
212
213 (defun try-tokenize (line pos end emit class alist)
214 (dolist (item alist)
215 (multiple-value-bind (string initargs)
216 (if (consp item) (values (car item) (cdr item))
217 (values item nil))
218 (let ((next (move-over line pos end string)))
219 (when next
220 (return (apply emit class pos next initargs)))))))
221
222 ;;;--------------------------------------------------------------------------
223 ;;; Language definitions.
224
225 (defparameter *language-matchers* nil)
226
227 (defmacro deflanguage (name (pathname) &body body)
228 `(progn
229 (pushnew (cons ',name (lambda (,pathname) ,@body))
230 *language-matchers*
231 :key #'car)
232 ',name))
233
234 (let ((templates (mapcar (lambda (type)
235 (make-pathname :type type :case :common))
236 '("C" "CC" "CPP" "C++" "CXX"
237 "H" "HH" "HPP" "H++" "HXX"
238 "SOD"))))
239 (deflanguage c-language (pathname)
240 (find pathname templates :test #'pathname-match-p)))
241
242 (let ((templates (mapcar (lambda (type)
243 (make-pathname :type type :case :common))
244 '("LISP" "EL" "SCM"))))
245 (deflanguage lisp-language (pathname)
246 (find pathname templates :test #'pathname-match-p)))
247
248 (deflanguage make-language (pathname)
249 (pathname-match-p pathname
250 (make-pathname :name "MAKEFILE" :case :common)))
251
252 (defun guess-language (pathname)
253 (car (or (find-if (lambda (item)
254 (funcall (cdr item) pathname))
255 *language-matchers*)
256 (error "No language found for `~A'." pathname))))
257
258 (defclass base-language ()
259 ())
260
261 ;;;--------------------------------------------------------------------------
262 ;;; C(-ish) language scanner.
263
264 (defclass c-language (base-language)
265 ((state :type (member :toplevel :comment)
266 :initform :toplevel :accessor lang-state)))
267
268 (defparameter *c-keywords*
269 (build-hash-table '(("alignas" . t)
270 ("alignof" . t)
271 ("and" . t)
272 ("and_eq" . t)
273 ("asm" . t)
274 ("atomic" . t)
275 ("auto" . t)
276 ("bitand" . t)
277 ("bitor" . t)
278 ("bool" . t)
279 ("break" . t)
280 ("case" . t)
281 ("catch" . t)
282 ("char" . t)
283 ("char16_t" . t)
284 ("char32_t" . t)
285 ("class" . t)
286 ("complex" . t)
287 ("compl" . t)
288 ("const" . t)
289 ("constexpr" . t)
290 ("const_cast" . t)
291 ("continue" . t)
292 ("decltype" . t)
293 ("defined" . t)
294 ("default" . t)
295 ("delete" . t)
296 ("do" . t)
297 ("double" . t)
298 ("dynamic_cast" . t)
299 ("else" . t)
300 ("enum" . t)
301 ("explicit" . t)
302 ("export" . t)
303 ("extern" . t)
304 ("float" . t)
305 ("for" . t)
306 ("friend" . t)
307 ("goto" . t)
308 ("if" . t)
309 ("imaginary" . t)
310 ("inline" . t)
311 ("int" . t)
312 ("long" . t)
313 ("mutable" . t)
314 ("namespace" . t)
315 ("new" . t)
316 ("noexcept" . t)
317 ("noreturn" . t)
318 ("not" . t)
319 ("not_eq" . t)
320 ("nullptr" . t)
321 ("operator" . t)
322 ("or" . t)
323 ("or_eq" . t)
324 ("private" . t)
325 ("protected" . t)
326 ("public" . t)
327 ("register" . t)
328 ("reinterpret_cast" . t)
329 ("restrict" . t)
330 ("return" . t)
331 ("short" . t)
332 ("signed" . t)
333 ("sizeof" . t)
334 ("static" . t)
335 ("static_assert" . t)
336 ("static_cast" . t)
337 ("struct" . t)
338 ("switch" . t)
339 ("template" . t)
340 ("throw" . t)
341 ("try" . t)
342 ("thread_local" . t)
343 ("typedef" . t)
344 ("typeid" . t)
345 ("typeof" . t)
346 ("typename" . t)
347 ("union" . t)
348 ("unsigned" . t)
349 ("using" . t)
350 ("virtual" . t)
351 ("void" . t)
352 ("volatile" . t)
353 ("wchar_t" . t)
354 ("while" . t)
355 ("xor" . t)
356 ("xor_eq" . t)
357 ("_Alignas" . t)
358 ("_Alignof" . t)
359 ("_Atomic" . t)
360 ("_Bool" . t)
361 ("_Complex" . t)
362 ("_Generic" . t)
363 ("_Imaginary" . t)
364 ("_Noreturn" . t)
365 ("_Pragma" . t)
366 ("_Static_assert" . t)
367 ("_Thread_local" . t)
368 ("__alignof__" . t)
369 ("__asm__" . t)
370 ("__attribute__" . t)
371 ("__complex__" . t)
372 ("__const__" . t)
373 ("__extension__" . t)
374 ("__imag__" . t)
375 ("__inline__" . t)
376 ("__label__" . t)
377 ("__real__" . t)
378 ("__signed__" . t)
379 ("__typeof__" . t)
380 ("__volatile__" . t))
381 :test #'equal))
382
383 (defparameter *c-preprocessor-keywords*
384 (build-hash-table '(("define" . :toplevel)
385 ("elif" . :toplevel)
386 ("else" . :toplevel)
387 ("endif" . :toplevel)
388 ("error" . :toplevel)
389 ("if" . :toplevel)
390 ("ifdef" . :toplevel)
391 ("ifndef" . :toplevel)
392 ("include" . :include)
393 ("line" . :toplevel)
394 ("pragma" . :toplevel)
395 ("undef" . :toplevel))))
396
397 (defun scan-c-identifier (line pos end)
398 (if (and (< pos end)
399 (let ((char (char line pos)))
400 (or (char= char #\_) (alpha-char-p char))))
401 (let ((next (or (position-if-not (lambda (char)
402 (or (char= char #\_)
403 (alphanumericp char)))
404 line
405 :start (1+ pos) :end end)
406 end)))
407 (values (subseq line pos next) next))
408 (values nil nil)))
409
410 (defun tokenize-c-comment (lang line pos end emit)
411 (and (>= (- end pos) 2)
412 (char= (char line pos) #\/)
413 (let ((body-start (+ pos 2))
414 (ch (char line (1+ pos))))
415 (or (and (char= ch #\/)
416 (setf (lang-state lang) :toplevel)
417 (funcall emit 'comment-delimiter pos body-start)
418 (when (< body-start end)
419 (funcall emit 'comment-body body-start end))
420 end)
421 (and (char= ch #\*)
422 (funcall emit 'comment-delimiter pos body-start)
423 (let* ((end-delim (search "*/" line
424 :start2 body-start :end2 end))
425 (body-end (or end-delim end))
426 (next (if end-delim (+ end-delim 2) end)))
427 (when (< body-start body-end)
428 (funcall emit 'comment-body body-start body-end))
429 (cond (end-delim
430 (setf (lang-state lang) :toplevel)
431 (funcall emit 'comment-delimiter end-delim next))
432 (t
433 (setf (lang-state lang) :comment)
434 next))))))))
435
436 (defmethod next-token ((lang c-language) line pos end emit)
437 (let ((state (lang-state lang))
438 (char (char line pos)))
439 (ecase state
440 (:toplevel
441 (or (and (zerop pos)
442 (let ((next (or (tokenize-whitespace line pos end emit)
443 pos)))
444 (cond ((and (< next end)
445 (char= (char line next) #\#))
446 (setf (lang-state lang) :preproc-keyword)
447 (funcall emit 'operator next (1+ next)))
448 ((> next pos)
449 next))))
450
451 (tokenize-whitespace line pos end emit)
452
453 (multiple-value-bind (ident next) (scan-c-identifier line pos end)
454 (and ident
455 (funcall emit
456 (if (gethash ident *c-keywords*)
457 'reserved
458 'identifier)
459 pos next)))
460
461 (let ((i (cond ((digit-char-p char)
462 (1+ pos))
463 ((and (char= char #\.)
464 (>= (- end pos) 2)
465 (digit-char-p (char line (1+ pos))))
466 (+ pos 2))
467 (t
468 nil))))
469 (when i
470 (loop (when (>= i end) (return))
471 (let ((char (char line i)))
472 (cond ((or (char= char #\e)
473 (char= char #\E)
474 (char= char #\p)
475 (char= char #\P))
476 (if (and (>= (- end i) 2)
477 (let ((ch (char line (1+ i))))
478 (or (char= ch #\+)
479 (char= ch #\-))))
480 (incf i 2)
481 (incf i 1)))
482 ((or (char= char #\_)
483 (char= char #\.)
484 (alphanumericp char))
485 (incf i))
486 (t
487 (return)))))
488 (funcall emit 'numeric pos i)))
489
490 (and (or (char= char #\")
491 (char= char #\'))
492 (let ((i (1+ pos)))
493 (loop (when (>= i end)
494 (error "Missing `~A'." char))
495 (let ((ch (char line i)))
496 (cond ((char= ch char) (return))
497 ((char= ch #\\) (incf i 2))
498 (t (incf i)))))
499 (funcall emit 'literal pos (1+ i))))
500
501 (tokenize-c-comment lang line pos end emit)
502
503 (try-tokenize line pos end emit 'operator
504 '("->" "++" ("--" :latex "{--}\\,{--}")
505 "<<" ">>" "<=" ">=" "==" "!=" "&&" "||"
506 "*=" "/=" "%=" "+=" "-=" "<<=" ">>="
507 "&=" "^=" "|=" "##"
508 "<:" ":>" "<%" "%>" "%:" "%:%:"))
509
510 (funcall emit 'operator pos (1+ pos))))
511
512 (:comment
513 (assert (zerop pos))
514 (let ((next (tokenize-whitespace line pos end emit)))
515 (when next (setf pos next)))
516 (when (and (< pos end)
517 (char= (char line pos) #\*))
518 (setf pos (funcall emit 'comment-delimiter pos (1+ pos))))
519 (let* ((end-delim (search "*/" line :start2 pos :end2 end))
520 (body-end (or end-delim end))
521 (next (if end-delim (+ end-delim 2) end)))
522 (when (< pos body-end)
523 (funcall emit 'comment-body pos body-end))
524 (cond (end-delim
525 (setf (lang-state lang) :toplevel)
526 (funcall emit 'comment-delimiter body-end next))
527 (t
528 next))))
529
530 (:preproc-keyword
531 (or (tokenize-whitespace line pos end emit)
532 (tokenize-c-comment lang line pos end emit)
533
534 (multiple-value-bind (ident next) (scan-c-identifier line pos end)
535 (let ((next-state (gethash ident *c-preprocessor-keywords*)))
536 (funcall emit (if next-state 'reserved 'identifier) pos next)
537 (setf (lang-state lang) (or next-state :toplevel))))
538
539 (progn (setf (lang-state lang) :toplevel) pos))))))
540
541 (defun tokenize-line (lang line)
542 (let ((tokens nil) (pos 0) (end (length line)))
543 (loop (if (>= pos end) (return (nreverse tokens))
544 (next-token lang line pos end
545 (lambda (class start end &rest initargs)
546 (assert (= start pos))
547 (push (apply #'make-instance class
548 :string (subseq line start end)
549 initargs)
550 tokens)
551 (setf pos end)))))))
552
553
554 (defgeneric scan-line ((lang c-language) line)
555 (let* ((i 0)
556 (spaces 0)
557 (last 0)
558 (end (length line))
559 (state (lang-state lang))
560 (markers nil)
561 (comment (if (eq state :comment) :trad nil))
562 (comment-begin (if (eq state :comment) :star nil)
563 (escape nil)
564 (frags nil))
565 (loop
566 (when (>= i end) (return))
567 (let ((ch (char line i))
568 (align-next nil))
569 (cond ((char= ch #\tab) (setf space 8))
570 ((whitespace-char-p ch) (incf space))
571 (t
572 (when (and (plusp start) (>= space 2))
573 (push (cons i :align) markers))
574 (case ch
575 ((#\, #\;) (setf space 1))
576 ((#\( #\[ #\{) (push (cons (1+ i) :align) markers)))))
577 (ecase state
578 (:toplevel
579 (case ch
580 (#\" (push (cons (1+ i) :align) merkers)
581 (setf state :string))
582 (#\' (setf state :char))
583 (#\/ (when (< (1+ i) end)
584 (let ((nch (char line (1+ i))))
585 (case nch
586 (#\/ (push (cons i :comment-start) markers)
587 (push (cons (+ i 2) :comment-body) markers)
588 (setf state :comment
589 comment :line
590 comment-begin t)
591 (incf i))
592 (#\* (push (cons i :comment-start) markers)
593 (push (cons (1+ i) :align) markers)
594 (push (cons (+ i 2) :comment-body) markers)
595 (setf state :comment
596 comment :trad
597 comment-begin t)
598 (incf i))))))))
599 (:string
600 (cond (escape (setf escape nil))
601 ((char= ch #\") (setf state :toplevel))
602 ((char= ch #\\) (setf escape t))))
603 (:char
604 (cond (escape (setf escape nil))
605 ((char= ch #\') (setf state :toplevel))
606 ((char= ch #\\) (setf escape t))))
607 (:comment
608 (when (and comment-begin (not (whitespace-char-p ch)))
609 (cond ((char= ch #\*)-
610 (push (cons i :comment-mid) markers)
611 (push (cons (1+ i) :comment-body) markers))
612 (setf comment-
613
614 (case ch
615 (#\space (incf space))
616 (#\tab)
617 (
618 (ecase state
619 (:top
620
621 ;;;--------------------------------------------------------------------------
622 ;;; The indentation algorithm.
623
624 (defvar *indent-high-water-mark* 0)
625
626 (defclass offset ()
627 ((position :type fixnum :initarg :position :reader offset-position)
628 (livep :type boolean :initarg :livep :initform nil
629 :accessor offset-live-p)))
630
631 (defclass line ()
632 ((text :type string :initarg :text :reader line-text)
633 (offsets :type list :initarg :offsets :initform nil
634 :reader line-offsets)))
635
636
637
638 ;;;--------------------------------------------------------------------------
639 ;;; Main program.
640
641 (defun main ()
642 (format t "Hello, world!~%"))
643
644 #+interactive (main)
645
646 ;;;----- That's all, folks --------------------------------------------------