+#! /bin/sh
+":"; ### -*-lisp-*-
+":"; CL_SOURCE_REGISTRY=$(pwd)/build/src/:
+":"; ASDF_OUTPUT_TRANSLATIONS=$(pwd)/src:$(pwd)/build/src
+":"; export CL_SOURCE_REGISTRY ASDF_OUTPUT_TRANSLATIONS
+":"; exec cl-launch -X -l "sbcl cmucl" -s asdf -i "(main)" -- "$0" "$@" || exit 1
+
+;;;--------------------------------------------------------------------------
+;;; Random utilities.
+
+(defun whitespace-char-p (char)
+ (case char
+ (#.(loop for i below char-code-limit
+ for ch = (code-char i)
+ unless (with-input-from-string (in (string ch))
+ (peek-char t in nil))
+ collect ch) t)
+ (t nil)))
+
+(defun slurp-file (pathname)
+ (with-open-file (stream pathname)
+ (let* ((length (file-length stream))
+ (string (make-string length))
+ (n (read-sequence string stream))
+ (ch (read-char stream nil)))
+ (unless (and (= n length) (null ch))
+ (error "File `~A' unexpectedly changed size when reading" pathname))
+ string)))
+
+(defun update-hash-table (map alist)
+ (dolist (item alist map)
+ (let ((key (car item))
+ (value (cdr item)))
+ (if value (setf (gethash key map) value)
+ (remhash key map)))))
+
+(defun copy-hash-table (map)
+ (let ((new (make-hash-table :test (hash-table-test map))))
+ (maphash (lambda (key value)
+ (setf (gethash key new) value))
+ map)
+ new))
+
+(defun build-hash-table (alist &key (test #'eql))
+ (update-hash-table (make-hash-table :test test) alist))
+
+(defun modified-hash-table (map alist)
+ (update-hash-table (copy-hash-table map) alist))
+
+(defun binary-search (item-key vector lessp &key (key #'identity))
+ (let ((len (length vector)))
+ (if (or (zerop len)
+ (funcall lessp item-key (funcall key (aref vector 0))))
+ (values 0 nil)
+ (let ((lo 0) (hi len))
+ (loop (let ((gap (- hi lo)))
+ (when (< gap 2) (return))
+ (let ((mid (+ lo (floor gap 2))))
+ (if (funcall lessp item-key
+ (funcall key (aref vector mid)))
+ (setf hi mid)
+ (setf lo mid)))))
+ (cond (( = lo hi) (values 0 nil))
+ ((funcall lessp (funcall key (aref vector lo))
+ item-key)
+ (values (1+ lo) nil))
+ (t (values lo t)))))))
+
+(defun add-to-sorted-vector (item vector lessp &key (key #'identity))
+ (multiple-value-bind (index foundp)
+ (binary-search (funcall key item) vector lessp)
+ (unless foundp
+ (let ((len (fill-pointer vector))
+ (size (array-dimension vector 0)))
+ (when (= len size)
+ (setf vector (adjust-array vector (* 2 size))))
+ (setf (fill-pointer vector) (1+ len))
+ (replace vector vector
+ :start1 (1+ index) :end1 (1+ len)
+ :start2 index :end2 len)
+ (setf (aref vector index) item)))
+ vector))
+
+(defmacro sorted-vector-push (item vector-place lessp &key key
+ &environment env)
+ (let ((item-var (gensym "ITEM-")))
+ (multiple-value-bind (vars values temps store-form load-form)
+ (get-setf-expansion vector-place env)
+ `(let* ((,item-var ,item)
+ ,@(mapcar #'list vars values)
+ (,(car temps) (add-to-sorted-vector ,item-var ,load-form ,lessp
+ ,@(and key `(:key ,key)))))
+ ,store-form))))
+
+(defparameter *char-latex-map*
+ (build-hash-table '((#\\ . "\\textbackslash{}")
+ (#\{ . "\\{")
+ (#\} . "\\}")
+ (#\% . "\\%")
+ (#\$ . "\\$")
+ (#\# . "\\#")
+ (#\& . "\\&")
+ (#\^ . "\\textasciicircum{}")
+ (#\~ . "\\textasciitilde{}")
+ (#\` . "\\textasciigrave{}")
+ (#\' . "\\textquotesingle{}")
+ (#\- . "\\fakeminus{}")
+ (#\< . "{<}")
+ (#\> . "{>}"))))
+
+(defun string-latex (string &optional (map *char-latex-map*))
+ (with-output-to-string (out)
+ (dotimes (i (length string))
+ (let ((char (char string i)))
+ (multiple-value-bind (latex foundp) (gethash char map)
+ (if foundp (write-string latex out)
+ (write-char char out)))))))
+
+;;;--------------------------------------------------------------------------
+;;; Tokens.
+
+(defclass token ()
+ ((string :type string :initarg :string :reader token-string)
+ (line :type fixnum :initarg :line :reader token-line)
+ (start-column :type fixnum :initarg :start-column :reader token-start-column)
+ (end-column :type fixnum :initarg :end-column :reader token-end-column)
+ (active-alignment-points :type list
+ :initform (make-array 4
+ :element-type
+ '(unsigned-byte 16)
+ :fill-pointer 0)
+ :accessor token-active-alignment-points)))
+
+(defmethod print-object ((token token) stream)
+ (print-unreadable-object (token stream :type t)
+ (prin1 (token-string token) stream)))
+
+(defclass whitespace (token)
+ ())
+
+(defclass tabulation (whitespace)
+ ())
+
+(defclass indentation (whitespace)
+ ())
+
+(defclass operator (token)
+ ((latex :type string :initarg :latex :reader token-latex)))
+
+(defmethod shared-initialize :after ((token operator) slot-names &key)
+ (when (and (not (slot-boundp token 'latex))
+ (or (eq slot-names t) (member 'latex slot-names)))
+ (setf (slot-value token 'latex)
+ (slot-value token 'string))))
+
+(defclass identifier (token)
+ ())
+
+(defclass reserved (token)
+ ())
+
+(defclass numeric (token)
+ ())
+
+(defclass literal (token)
+ ())
+
+(defclass comment-delimiter (operator)
+ ())
+
+(defclass comment-body (token)
+ ())
+
+(defgeneric token-width (token)
+ (:method ((token token)) (length (token-string token))))
+(defgeneric token-alignment-points (token)
+ (:method ((token token)) nil))
+(defgeneric activate-token-alignment-point (token offset)
+ (:method ((token token) offset)
+ (let* ((vec (active-alignment-points token))
+ (len (fill-pointer vec)))
+(defgeneric format-token (token stream)
+ (:method ((token token) stream)
+ (princ (string-latex (token-string token)) stream)))
+
+(defclass alignment-point ()
+ ())
+
+(defclass relative-alignment-point (alignment-point)
+ ())
+
+(defclass absolute-alignment-point (alignment-point)
+ ())
+
+(defun tokenize-whitespace (line pos end emit &key force-indent)
+ (when (whitespace-char-p (char line pos))
+ (let ((next (or (position-if-not #'whitespace-char-p line
+ :start pos :end end)
+ end)))
+ (funcall emit
+ (cond ((or (zerop pos) force-indent) 'indentation)
+ ((find #\tab line :start pos :end next) 'tabulation)
+ (t 'whitespace))
+ pos next))))
+
+(defun move-over (line pos end string)
+ (let ((next (+ pos (length string))))
+ (if (and (<= next end)
+ (string= string line :start2 pos :end2 next))
+ next
+ nil)))
+
+(defun try-tokenize (line pos end emit class alist)
+ (dolist (item alist)
+ (multiple-value-bind (string initargs)
+ (if (consp item) (values (car item) (cdr item))
+ (values item nil))
+ (let ((next (move-over line pos end string)))
+ (when next
+ (return (apply emit class pos next initargs)))))))
+
+;;;--------------------------------------------------------------------------
+;;; Language definitions.
+
+(defparameter *language-matchers* nil)
+
+(defmacro deflanguage (name (pathname) &body body)
+ `(progn
+ (pushnew (cons ',name (lambda (,pathname) ,@body))
+ *language-matchers*
+ :key #'car)
+ ',name))
+
+(let ((templates (mapcar (lambda (type)
+ (make-pathname :type type :case :common))
+ '("C" "CC" "CPP" "C++" "CXX"
+ "H" "HH" "HPP" "H++" "HXX"
+ "SOD"))))
+ (deflanguage c-language (pathname)
+ (find pathname templates :test #'pathname-match-p)))
+
+(let ((templates (mapcar (lambda (type)
+ (make-pathname :type type :case :common))
+ '("LISP" "EL" "SCM"))))
+ (deflanguage lisp-language (pathname)
+ (find pathname templates :test #'pathname-match-p)))
+
+(deflanguage make-language (pathname)
+ (pathname-match-p pathname
+ (make-pathname :name "MAKEFILE" :case :common)))
+
+(defun guess-language (pathname)
+ (car (or (find-if (lambda (item)
+ (funcall (cdr item) pathname))
+ *language-matchers*)
+ (error "No language found for `~A'." pathname))))
+
+(defclass base-language ()
+ ())
+
+;;;--------------------------------------------------------------------------
+;;; C(-ish) language scanner.
+
+(defclass c-language (base-language)
+ ((state :type (member :toplevel :comment)
+ :initform :toplevel :accessor lang-state)))
+
+(defparameter *c-keywords*
+ (build-hash-table '(("alignas" . t)
+ ("alignof" . t)
+ ("and" . t)
+ ("and_eq" . t)
+ ("asm" . t)
+ ("atomic" . t)
+ ("auto" . t)
+ ("bitand" . t)
+ ("bitor" . t)
+ ("bool" . t)
+ ("break" . t)
+ ("case" . t)
+ ("catch" . t)
+ ("char" . t)
+ ("char16_t" . t)
+ ("char32_t" . t)
+ ("class" . t)
+ ("complex" . t)
+ ("compl" . t)
+ ("const" . t)
+ ("constexpr" . t)
+ ("const_cast" . t)
+ ("continue" . t)
+ ("decltype" . t)
+ ("defined" . t)
+ ("default" . t)
+ ("delete" . t)
+ ("do" . t)
+ ("double" . t)
+ ("dynamic_cast" . t)
+ ("else" . t)
+ ("enum" . t)
+ ("explicit" . t)
+ ("export" . t)
+ ("extern" . t)
+ ("float" . t)
+ ("for" . t)
+ ("friend" . t)
+ ("goto" . t)
+ ("if" . t)
+ ("imaginary" . t)
+ ("inline" . t)
+ ("int" . t)
+ ("long" . t)
+ ("mutable" . t)
+ ("namespace" . t)
+ ("new" . t)
+ ("noexcept" . t)
+ ("noreturn" . t)
+ ("not" . t)
+ ("not_eq" . t)
+ ("nullptr" . t)
+ ("operator" . t)
+ ("or" . t)
+ ("or_eq" . t)
+ ("private" . t)
+ ("protected" . t)
+ ("public" . t)
+ ("register" . t)
+ ("reinterpret_cast" . t)
+ ("restrict" . t)
+ ("return" . t)
+ ("short" . t)
+ ("signed" . t)
+ ("sizeof" . t)
+ ("static" . t)
+ ("static_assert" . t)
+ ("static_cast" . t)
+ ("struct" . t)
+ ("switch" . t)
+ ("template" . t)
+ ("throw" . t)
+ ("try" . t)
+ ("thread_local" . t)
+ ("typedef" . t)
+ ("typeid" . t)
+ ("typeof" . t)
+ ("typename" . t)
+ ("union" . t)
+ ("unsigned" . t)
+ ("using" . t)
+ ("virtual" . t)
+ ("void" . t)
+ ("volatile" . t)
+ ("wchar_t" . t)
+ ("while" . t)
+ ("xor" . t)
+ ("xor_eq" . t)
+ ("_Alignas" . t)
+ ("_Alignof" . t)
+ ("_Atomic" . t)
+ ("_Bool" . t)
+ ("_Complex" . t)
+ ("_Generic" . t)
+ ("_Imaginary" . t)
+ ("_Noreturn" . t)
+ ("_Pragma" . t)
+ ("_Static_assert" . t)
+ ("_Thread_local" . t)
+ ("__alignof__" . t)
+ ("__asm__" . t)
+ ("__attribute__" . t)
+ ("__complex__" . t)
+ ("__const__" . t)
+ ("__extension__" . t)
+ ("__imag__" . t)
+ ("__inline__" . t)
+ ("__label__" . t)
+ ("__real__" . t)
+ ("__signed__" . t)
+ ("__typeof__" . t)
+ ("__volatile__" . t))
+ :test #'equal))
+
+(defparameter *c-preprocessor-keywords*
+ (build-hash-table '(("define" . :toplevel)
+ ("elif" . :toplevel)
+ ("else" . :toplevel)
+ ("endif" . :toplevel)
+ ("error" . :toplevel)
+ ("if" . :toplevel)
+ ("ifdef" . :toplevel)
+ ("ifndef" . :toplevel)
+ ("include" . :include)
+ ("line" . :toplevel)
+ ("pragma" . :toplevel)
+ ("undef" . :toplevel))))
+
+(defun scan-c-identifier (line pos end)
+ (if (and (< pos end)
+ (let ((char (char line pos)))
+ (or (char= char #\_) (alpha-char-p char))))
+ (let ((next (or (position-if-not (lambda (char)
+ (or (char= char #\_)
+ (alphanumericp char)))
+ line
+ :start (1+ pos) :end end)
+ end)))
+ (values (subseq line pos next) next))
+ (values nil nil)))
+
+(defun tokenize-c-comment (lang line pos end emit)
+ (and (>= (- end pos) 2)
+ (char= (char line pos) #\/)
+ (let ((body-start (+ pos 2))
+ (ch (char line (1+ pos))))
+ (or (and (char= ch #\/)
+ (setf (lang-state lang) :toplevel)
+ (funcall emit 'comment-delimiter pos body-start)
+ (when (< body-start end)
+ (funcall emit 'comment-body body-start end))
+ end)
+ (and (char= ch #\*)
+ (funcall emit 'comment-delimiter pos body-start)
+ (let* ((end-delim (search "*/" line
+ :start2 body-start :end2 end))
+ (body-end (or end-delim end))
+ (next (if end-delim (+ end-delim 2) end)))
+ (when (< body-start body-end)
+ (funcall emit 'comment-body body-start body-end))
+ (cond (end-delim
+ (setf (lang-state lang) :toplevel)
+ (funcall emit 'comment-delimiter end-delim next))
+ (t
+ (setf (lang-state lang) :comment)
+ next))))))))
+
+(defmethod next-token ((lang c-language) line pos end emit)
+ (let ((state (lang-state lang))
+ (char (char line pos)))
+ (ecase state
+ (:toplevel
+ (or (and (zerop pos)
+ (let ((next (or (tokenize-whitespace line pos end emit)
+ pos)))
+ (cond ((and (< next end)
+ (char= (char line next) #\#))
+ (setf (lang-state lang) :preproc-keyword)
+ (funcall emit 'operator next (1+ next)))
+ ((> next pos)
+ next))))
+
+ (tokenize-whitespace line pos end emit)
+
+ (multiple-value-bind (ident next) (scan-c-identifier line pos end)
+ (and ident
+ (funcall emit
+ (if (gethash ident *c-keywords*)
+ 'reserved
+ 'identifier)
+ pos next)))
+
+ (let ((i (cond ((digit-char-p char)
+ (1+ pos))
+ ((and (char= char #\.)
+ (>= (- end pos) 2)
+ (digit-char-p (char line (1+ pos))))
+ (+ pos 2))
+ (t
+ nil))))
+ (when i
+ (loop (when (>= i end) (return))
+ (let ((char (char line i)))
+ (cond ((or (char= char #\e)
+ (char= char #\E)
+ (char= char #\p)
+ (char= char #\P))
+ (if (and (>= (- end i) 2)
+ (let ((ch (char line (1+ i))))
+ (or (char= ch #\+)
+ (char= ch #\-))))
+ (incf i 2)
+ (incf i 1)))
+ ((or (char= char #\_)
+ (char= char #\.)
+ (alphanumericp char))
+ (incf i))
+ (t
+ (return)))))
+ (funcall emit 'numeric pos i)))
+
+ (and (or (char= char #\")
+ (char= char #\'))
+ (let ((i (1+ pos)))
+ (loop (when (>= i end)
+ (error "Missing `~A'." char))
+ (let ((ch (char line i)))
+ (cond ((char= ch char) (return))
+ ((char= ch #\\) (incf i 2))
+ (t (incf i)))))
+ (funcall emit 'literal pos (1+ i))))
+
+ (tokenize-c-comment lang line pos end emit)
+
+ (try-tokenize line pos end emit 'operator
+ '("->" "++" ("--" :latex "{--}\\,{--}")
+ "<<" ">>" "<=" ">=" "==" "!=" "&&" "||"
+ "*=" "/=" "%=" "+=" "-=" "<<=" ">>="
+ "&=" "^=" "|=" "##"
+ "<:" ":>" "<%" "%>" "%:" "%:%:"))
+
+ (funcall emit 'operator pos (1+ pos))))
+
+ (:comment
+ (assert (zerop pos))
+ (let ((next (tokenize-whitespace line pos end emit)))
+ (when next (setf pos next)))
+ (when (and (< pos end)
+ (char= (char line pos) #\*))
+ (setf pos (funcall emit 'comment-delimiter pos (1+ pos))))
+ (let* ((end-delim (search "*/" line :start2 pos :end2 end))
+ (body-end (or end-delim end))
+ (next (if end-delim (+ end-delim 2) end)))
+ (when (< pos body-end)
+ (funcall emit 'comment-body pos body-end))
+ (cond (end-delim
+ (setf (lang-state lang) :toplevel)
+ (funcall emit 'comment-delimiter body-end next))
+ (t
+ next))))
+
+ (:preproc-keyword
+ (or (tokenize-whitespace line pos end emit)
+ (tokenize-c-comment lang line pos end emit)
+
+ (multiple-value-bind (ident next) (scan-c-identifier line pos end)
+ (let ((next-state (gethash ident *c-preprocessor-keywords*)))
+ (funcall emit (if next-state 'reserved 'identifier) pos next)
+ (setf (lang-state lang) (or next-state :toplevel))))
+
+ (progn (setf (lang-state lang) :toplevel) pos))))))
+
+(defun tokenize-line (lang line)
+ (let ((tokens nil) (pos 0) (end (length line)))
+ (loop (if (>= pos end) (return (nreverse tokens))
+ (next-token lang line pos end
+ (lambda (class start end &rest initargs)
+ (assert (= start pos))
+ (push (apply #'make-instance class
+ :string (subseq line start end)
+ initargs)
+ tokens)
+ (setf pos end)))))))
+
+
+(defgeneric scan-line ((lang c-language) line)
+ (let* ((i 0)
+ (spaces 0)
+ (last 0)
+ (end (length line))
+ (state (lang-state lang))
+ (markers nil)
+ (comment (if (eq state :comment) :trad nil))
+ (comment-begin (if (eq state :comment) :star nil)
+ (escape nil)
+ (frags nil))
+ (loop
+ (when (>= i end) (return))
+ (let ((ch (char line i))
+ (align-next nil))
+ (cond ((char= ch #\tab) (setf space 8))
+ ((whitespace-char-p ch) (incf space))
+ (t
+ (when (and (plusp start) (>= space 2))
+ (push (cons i :align) markers))
+ (case ch
+ ((#\, #\;) (setf space 1))
+ ((#\( #\[ #\{) (push (cons (1+ i) :align) markers)))))
+ (ecase state
+ (:toplevel
+ (case ch
+ (#\" (push (cons (1+ i) :align) merkers)
+ (setf state :string))
+ (#\' (setf state :char))
+ (#\/ (when (< (1+ i) end)
+ (let ((nch (char line (1+ i))))
+ (case nch
+ (#\/ (push (cons i :comment-start) markers)
+ (push (cons (+ i 2) :comment-body) markers)
+ (setf state :comment
+ comment :line
+ comment-begin t)
+ (incf i))
+ (#\* (push (cons i :comment-start) markers)
+ (push (cons (1+ i) :align) markers)
+ (push (cons (+ i 2) :comment-body) markers)
+ (setf state :comment
+ comment :trad
+ comment-begin t)
+ (incf i))))))))
+ (:string
+ (cond (escape (setf escape nil))
+ ((char= ch #\") (setf state :toplevel))
+ ((char= ch #\\) (setf escape t))))
+ (:char
+ (cond (escape (setf escape nil))
+ ((char= ch #\') (setf state :toplevel))
+ ((char= ch #\\) (setf escape t))))
+ (:comment
+ (when (and comment-begin (not (whitespace-char-p ch)))
+ (cond ((char= ch #\*)-
+ (push (cons i :comment-mid) markers)
+ (push (cons (1+ i) :comment-body) markers))
+ (setf comment-
+
+ (case ch
+ (#\space (incf space))
+ (#\tab)
+ (
+ (ecase state
+ (:top
+
+;;;--------------------------------------------------------------------------
+;;; The indentation algorithm.
+
+(defvar *indent-high-water-mark* 0)
+
+(defclass offset ()
+ ((position :type fixnum :initarg :position :reader offset-position)
+ (livep :type boolean :initarg :livep :initform nil
+ :accessor offset-live-p)))
+
+(defclass line ()
+ ((text :type string :initarg :text :reader line-text)
+ (offsets :type list :initarg :offsets :initform nil
+ :reader line-offsets)))
+
+
+
+;;;--------------------------------------------------------------------------
+;;; Main program.
+
+(defun main ()
+ (format t "Hello, world!~%"))
+
+#+interactive (main)
+
+;;;----- That's all, folks --------------------------------------------------