From: Mark Wooding Date: Tue, 23 Jun 2020 09:45:35 +0000 (+0100) Subject: @@@ progfmt wip X-Git-Url: https://git.distorted.org.uk/~mdw/sod/commitdiff_plain/refs/heads/mdw/progfmt @@@ progfmt wip --- diff --git a/doc/progfmt b/doc/progfmt new file mode 100755 index 0000000..0e6e6bc --- /dev/null +++ b/doc/progfmt @@ -0,0 +1,646 @@ +#! /bin/sh +":"; ### -*-lisp-*- +":"; CL_SOURCE_REGISTRY=$(pwd)/build/src/: +":"; ASDF_OUTPUT_TRANSLATIONS=$(pwd)/src:$(pwd)/build/src +":"; export CL_SOURCE_REGISTRY ASDF_OUTPUT_TRANSLATIONS +":"; exec cl-launch -X -l "sbcl cmucl" -s asdf -i "(main)" -- "$0" "$@" || exit 1 + +;;;-------------------------------------------------------------------------- +;;; Random utilities. + +(defun whitespace-char-p (char) + (case char + (#.(loop for i below char-code-limit + for ch = (code-char i) + unless (with-input-from-string (in (string ch)) + (peek-char t in nil)) + collect ch) t) + (t nil))) + +(defun slurp-file (pathname) + (with-open-file (stream pathname) + (let* ((length (file-length stream)) + (string (make-string length)) + (n (read-sequence string stream)) + (ch (read-char stream nil))) + (unless (and (= n length) (null ch)) + (error "File `~A' unexpectedly changed size when reading" pathname)) + string))) + +(defun update-hash-table (map alist) + (dolist (item alist map) + (let ((key (car item)) + (value (cdr item))) + (if value (setf (gethash key map) value) + (remhash key map))))) + +(defun copy-hash-table (map) + (let ((new (make-hash-table :test (hash-table-test map)))) + (maphash (lambda (key value) + (setf (gethash key new) value)) + map) + new)) + +(defun build-hash-table (alist &key (test #'eql)) + (update-hash-table (make-hash-table :test test) alist)) + +(defun modified-hash-table (map alist) + (update-hash-table (copy-hash-table map) alist)) + +(defun binary-search (item-key vector lessp &key (key #'identity)) + (let ((len (length vector))) + (if (or (zerop len) + (funcall lessp item-key (funcall key (aref vector 0)))) + (values 0 nil) + (let ((lo 0) (hi len)) + (loop (let ((gap (- hi lo))) + (when (< gap 2) (return)) + (let ((mid (+ lo (floor gap 2)))) + (if (funcall lessp item-key + (funcall key (aref vector mid))) + (setf hi mid) + (setf lo mid))))) + (cond (( = lo hi) (values 0 nil)) + ((funcall lessp (funcall key (aref vector lo)) + item-key) + (values (1+ lo) nil)) + (t (values lo t))))))) + +(defun add-to-sorted-vector (item vector lessp &key (key #'identity)) + (multiple-value-bind (index foundp) + (binary-search (funcall key item) vector lessp) + (unless foundp + (let ((len (fill-pointer vector)) + (size (array-dimension vector 0))) + (when (= len size) + (setf vector (adjust-array vector (* 2 size)))) + (setf (fill-pointer vector) (1+ len)) + (replace vector vector + :start1 (1+ index) :end1 (1+ len) + :start2 index :end2 len) + (setf (aref vector index) item))) + vector)) + +(defmacro sorted-vector-push (item vector-place lessp &key key + &environment env) + (let ((item-var (gensym "ITEM-"))) + (multiple-value-bind (vars values temps store-form load-form) + (get-setf-expansion vector-place env) + `(let* ((,item-var ,item) + ,@(mapcar #'list vars values) + (,(car temps) (add-to-sorted-vector ,item-var ,load-form ,lessp + ,@(and key `(:key ,key))))) + ,store-form)))) + +(defparameter *char-latex-map* + (build-hash-table '((#\\ . "\\textbackslash{}") + (#\{ . "\\{") + (#\} . "\\}") + (#\% . "\\%") + (#\$ . "\\$") + (#\# . "\\#") + (#\& . "\\&") + (#\^ . "\\textasciicircum{}") + (#\~ . "\\textasciitilde{}") + (#\` . "\\textasciigrave{}") + (#\' . "\\textquotesingle{}") + (#\- . "\\fakeminus{}") + (#\< . "{<}") + (#\> . "{>}")))) + +(defun string-latex (string &optional (map *char-latex-map*)) + (with-output-to-string (out) + (dotimes (i (length string)) + (let ((char (char string i))) + (multiple-value-bind (latex foundp) (gethash char map) + (if foundp (write-string latex out) + (write-char char out))))))) + +;;;-------------------------------------------------------------------------- +;;; Tokens. + +(defclass token () + ((string :type string :initarg :string :reader token-string) + (line :type fixnum :initarg :line :reader token-line) + (start-column :type fixnum :initarg :start-column :reader token-start-column) + (end-column :type fixnum :initarg :end-column :reader token-end-column) + (active-alignment-points :type list + :initform (make-array 4 + :element-type + '(unsigned-byte 16) + :fill-pointer 0) + :accessor token-active-alignment-points))) + +(defmethod print-object ((token token) stream) + (print-unreadable-object (token stream :type t) + (prin1 (token-string token) stream))) + +(defclass whitespace (token) + ()) + +(defclass tabulation (whitespace) + ()) + +(defclass indentation (whitespace) + ()) + +(defclass operator (token) + ((latex :type string :initarg :latex :reader token-latex))) + +(defmethod shared-initialize :after ((token operator) slot-names &key) + (when (and (not (slot-boundp token 'latex)) + (or (eq slot-names t) (member 'latex slot-names))) + (setf (slot-value token 'latex) + (slot-value token 'string)))) + +(defclass identifier (token) + ()) + +(defclass reserved (token) + ()) + +(defclass numeric (token) + ()) + +(defclass literal (token) + ()) + +(defclass comment-delimiter (operator) + ()) + +(defclass comment-body (token) + ()) + +(defgeneric token-width (token) + (:method ((token token)) (length (token-string token)))) +(defgeneric token-alignment-points (token) + (:method ((token token)) nil)) +(defgeneric activate-token-alignment-point (token offset) + (:method ((token token) offset) + (let* ((vec (active-alignment-points token)) + (len (fill-pointer vec))) +(defgeneric format-token (token stream) + (:method ((token token) stream) + (princ (string-latex (token-string token)) stream))) + +(defclass alignment-point () + ()) + +(defclass relative-alignment-point (alignment-point) + ()) + +(defclass absolute-alignment-point (alignment-point) + ()) + +(defun tokenize-whitespace (line pos end emit &key force-indent) + (when (whitespace-char-p (char line pos)) + (let ((next (or (position-if-not #'whitespace-char-p line + :start pos :end end) + end))) + (funcall emit + (cond ((or (zerop pos) force-indent) 'indentation) + ((find #\tab line :start pos :end next) 'tabulation) + (t 'whitespace)) + pos next)))) + +(defun move-over (line pos end string) + (let ((next (+ pos (length string)))) + (if (and (<= next end) + (string= string line :start2 pos :end2 next)) + next + nil))) + +(defun try-tokenize (line pos end emit class alist) + (dolist (item alist) + (multiple-value-bind (string initargs) + (if (consp item) (values (car item) (cdr item)) + (values item nil)) + (let ((next (move-over line pos end string))) + (when next + (return (apply emit class pos next initargs))))))) + +;;;-------------------------------------------------------------------------- +;;; Language definitions. + +(defparameter *language-matchers* nil) + +(defmacro deflanguage (name (pathname) &body body) + `(progn + (pushnew (cons ',name (lambda (,pathname) ,@body)) + *language-matchers* + :key #'car) + ',name)) + +(let ((templates (mapcar (lambda (type) + (make-pathname :type type :case :common)) + '("C" "CC" "CPP" "C++" "CXX" + "H" "HH" "HPP" "H++" "HXX" + "SOD")))) + (deflanguage c-language (pathname) + (find pathname templates :test #'pathname-match-p))) + +(let ((templates (mapcar (lambda (type) + (make-pathname :type type :case :common)) + '("LISP" "EL" "SCM")))) + (deflanguage lisp-language (pathname) + (find pathname templates :test #'pathname-match-p))) + +(deflanguage make-language (pathname) + (pathname-match-p pathname + (make-pathname :name "MAKEFILE" :case :common))) + +(defun guess-language (pathname) + (car (or (find-if (lambda (item) + (funcall (cdr item) pathname)) + *language-matchers*) + (error "No language found for `~A'." pathname)))) + +(defclass base-language () + ()) + +;;;-------------------------------------------------------------------------- +;;; C(-ish) language scanner. + +(defclass c-language (base-language) + ((state :type (member :toplevel :comment) + :initform :toplevel :accessor lang-state))) + +(defparameter *c-keywords* + (build-hash-table '(("alignas" . t) + ("alignof" . t) + ("and" . t) + ("and_eq" . t) + ("asm" . t) + ("atomic" . t) + ("auto" . t) + ("bitand" . t) + ("bitor" . t) + ("bool" . t) + ("break" . t) + ("case" . t) + ("catch" . t) + ("char" . t) + ("char16_t" . t) + ("char32_t" . t) + ("class" . t) + ("complex" . t) + ("compl" . t) + ("const" . t) + ("constexpr" . t) + ("const_cast" . t) + ("continue" . t) + ("decltype" . t) + ("defined" . t) + ("default" . t) + ("delete" . t) + ("do" . t) + ("double" . t) + ("dynamic_cast" . t) + ("else" . t) + ("enum" . t) + ("explicit" . t) + ("export" . t) + ("extern" . t) + ("float" . t) + ("for" . t) + ("friend" . t) + ("goto" . t) + ("if" . t) + ("imaginary" . t) + ("inline" . t) + ("int" . t) + ("long" . t) + ("mutable" . t) + ("namespace" . t) + ("new" . t) + ("noexcept" . t) + ("noreturn" . t) + ("not" . t) + ("not_eq" . t) + ("nullptr" . t) + ("operator" . t) + ("or" . t) + ("or_eq" . t) + ("private" . t) + ("protected" . t) + ("public" . t) + ("register" . t) + ("reinterpret_cast" . t) + ("restrict" . t) + ("return" . t) + ("short" . t) + ("signed" . t) + ("sizeof" . t) + ("static" . t) + ("static_assert" . t) + ("static_cast" . t) + ("struct" . t) + ("switch" . t) + ("template" . t) + ("throw" . t) + ("try" . t) + ("thread_local" . t) + ("typedef" . t) + ("typeid" . t) + ("typeof" . t) + ("typename" . t) + ("union" . t) + ("unsigned" . t) + ("using" . t) + ("virtual" . t) + ("void" . t) + ("volatile" . t) + ("wchar_t" . t) + ("while" . t) + ("xor" . t) + ("xor_eq" . t) + ("_Alignas" . t) + ("_Alignof" . t) + ("_Atomic" . t) + ("_Bool" . t) + ("_Complex" . t) + ("_Generic" . t) + ("_Imaginary" . t) + ("_Noreturn" . t) + ("_Pragma" . t) + ("_Static_assert" . t) + ("_Thread_local" . t) + ("__alignof__" . t) + ("__asm__" . t) + ("__attribute__" . t) + ("__complex__" . t) + ("__const__" . t) + ("__extension__" . t) + ("__imag__" . t) + ("__inline__" . t) + ("__label__" . t) + ("__real__" . t) + ("__signed__" . t) + ("__typeof__" . t) + ("__volatile__" . t)) + :test #'equal)) + +(defparameter *c-preprocessor-keywords* + (build-hash-table '(("define" . :toplevel) + ("elif" . :toplevel) + ("else" . :toplevel) + ("endif" . :toplevel) + ("error" . :toplevel) + ("if" . :toplevel) + ("ifdef" . :toplevel) + ("ifndef" . :toplevel) + ("include" . :include) + ("line" . :toplevel) + ("pragma" . :toplevel) + ("undef" . :toplevel)))) + +(defun scan-c-identifier (line pos end) + (if (and (< pos end) + (let ((char (char line pos))) + (or (char= char #\_) (alpha-char-p char)))) + (let ((next (or (position-if-not (lambda (char) + (or (char= char #\_) + (alphanumericp char))) + line + :start (1+ pos) :end end) + end))) + (values (subseq line pos next) next)) + (values nil nil))) + +(defun tokenize-c-comment (lang line pos end emit) + (and (>= (- end pos) 2) + (char= (char line pos) #\/) + (let ((body-start (+ pos 2)) + (ch (char line (1+ pos)))) + (or (and (char= ch #\/) + (setf (lang-state lang) :toplevel) + (funcall emit 'comment-delimiter pos body-start) + (when (< body-start end) + (funcall emit 'comment-body body-start end)) + end) + (and (char= ch #\*) + (funcall emit 'comment-delimiter pos body-start) + (let* ((end-delim (search "*/" line + :start2 body-start :end2 end)) + (body-end (or end-delim end)) + (next (if end-delim (+ end-delim 2) end))) + (when (< body-start body-end) + (funcall emit 'comment-body body-start body-end)) + (cond (end-delim + (setf (lang-state lang) :toplevel) + (funcall emit 'comment-delimiter end-delim next)) + (t + (setf (lang-state lang) :comment) + next)))))))) + +(defmethod next-token ((lang c-language) line pos end emit) + (let ((state (lang-state lang)) + (char (char line pos))) + (ecase state + (:toplevel + (or (and (zerop pos) + (let ((next (or (tokenize-whitespace line pos end emit) + pos))) + (cond ((and (< next end) + (char= (char line next) #\#)) + (setf (lang-state lang) :preproc-keyword) + (funcall emit 'operator next (1+ next))) + ((> next pos) + next)))) + + (tokenize-whitespace line pos end emit) + + (multiple-value-bind (ident next) (scan-c-identifier line pos end) + (and ident + (funcall emit + (if (gethash ident *c-keywords*) + 'reserved + 'identifier) + pos next))) + + (let ((i (cond ((digit-char-p char) + (1+ pos)) + ((and (char= char #\.) + (>= (- end pos) 2) + (digit-char-p (char line (1+ pos)))) + (+ pos 2)) + (t + nil)))) + (when i + (loop (when (>= i end) (return)) + (let ((char (char line i))) + (cond ((or (char= char #\e) + (char= char #\E) + (char= char #\p) + (char= char #\P)) + (if (and (>= (- end i) 2) + (let ((ch (char line (1+ i)))) + (or (char= ch #\+) + (char= ch #\-)))) + (incf i 2) + (incf i 1))) + ((or (char= char #\_) + (char= char #\.) + (alphanumericp char)) + (incf i)) + (t + (return))))) + (funcall emit 'numeric pos i))) + + (and (or (char= char #\") + (char= char #\')) + (let ((i (1+ pos))) + (loop (when (>= i end) + (error "Missing `~A'." char)) + (let ((ch (char line i))) + (cond ((char= ch char) (return)) + ((char= ch #\\) (incf i 2)) + (t (incf i))))) + (funcall emit 'literal pos (1+ i)))) + + (tokenize-c-comment lang line pos end emit) + + (try-tokenize line pos end emit 'operator + '("->" "++" ("--" :latex "{--}\\,{--}") + "<<" ">>" "<=" ">=" "==" "!=" "&&" "||" + "*=" "/=" "%=" "+=" "-=" "<<=" ">>=" + "&=" "^=" "|=" "##" + "<:" ":>" "<%" "%>" "%:" "%:%:")) + + (funcall emit 'operator pos (1+ pos)))) + + (:comment + (assert (zerop pos)) + (let ((next (tokenize-whitespace line pos end emit))) + (when next (setf pos next))) + (when (and (< pos end) + (char= (char line pos) #\*)) + (setf pos (funcall emit 'comment-delimiter pos (1+ pos)))) + (let* ((end-delim (search "*/" line :start2 pos :end2 end)) + (body-end (or end-delim end)) + (next (if end-delim (+ end-delim 2) end))) + (when (< pos body-end) + (funcall emit 'comment-body pos body-end)) + (cond (end-delim + (setf (lang-state lang) :toplevel) + (funcall emit 'comment-delimiter body-end next)) + (t + next)))) + + (:preproc-keyword + (or (tokenize-whitespace line pos end emit) + (tokenize-c-comment lang line pos end emit) + + (multiple-value-bind (ident next) (scan-c-identifier line pos end) + (let ((next-state (gethash ident *c-preprocessor-keywords*))) + (funcall emit (if next-state 'reserved 'identifier) pos next) + (setf (lang-state lang) (or next-state :toplevel)))) + + (progn (setf (lang-state lang) :toplevel) pos)))))) + +(defun tokenize-line (lang line) + (let ((tokens nil) (pos 0) (end (length line))) + (loop (if (>= pos end) (return (nreverse tokens)) + (next-token lang line pos end + (lambda (class start end &rest initargs) + (assert (= start pos)) + (push (apply #'make-instance class + :string (subseq line start end) + initargs) + tokens) + (setf pos end))))))) + + +(defgeneric scan-line ((lang c-language) line) + (let* ((i 0) + (spaces 0) + (last 0) + (end (length line)) + (state (lang-state lang)) + (markers nil) + (comment (if (eq state :comment) :trad nil)) + (comment-begin (if (eq state :comment) :star nil) + (escape nil) + (frags nil)) + (loop + (when (>= i end) (return)) + (let ((ch (char line i)) + (align-next nil)) + (cond ((char= ch #\tab) (setf space 8)) + ((whitespace-char-p ch) (incf space)) + (t + (when (and (plusp start) (>= space 2)) + (push (cons i :align) markers)) + (case ch + ((#\, #\;) (setf space 1)) + ((#\( #\[ #\{) (push (cons (1+ i) :align) markers))))) + (ecase state + (:toplevel + (case ch + (#\" (push (cons (1+ i) :align) merkers) + (setf state :string)) + (#\' (setf state :char)) + (#\/ (when (< (1+ i) end) + (let ((nch (char line (1+ i)))) + (case nch + (#\/ (push (cons i :comment-start) markers) + (push (cons (+ i 2) :comment-body) markers) + (setf state :comment + comment :line + comment-begin t) + (incf i)) + (#\* (push (cons i :comment-start) markers) + (push (cons (1+ i) :align) markers) + (push (cons (+ i 2) :comment-body) markers) + (setf state :comment + comment :trad + comment-begin t) + (incf i)))))))) + (:string + (cond (escape (setf escape nil)) + ((char= ch #\") (setf state :toplevel)) + ((char= ch #\\) (setf escape t)))) + (:char + (cond (escape (setf escape nil)) + ((char= ch #\') (setf state :toplevel)) + ((char= ch #\\) (setf escape t)))) + (:comment + (when (and comment-begin (not (whitespace-char-p ch))) + (cond ((char= ch #\*)- + (push (cons i :comment-mid) markers) + (push (cons (1+ i) :comment-body) markers)) + (setf comment- + + (case ch + (#\space (incf space)) + (#\tab) + ( + (ecase state + (:top + +;;;-------------------------------------------------------------------------- +;;; The indentation algorithm. + +(defvar *indent-high-water-mark* 0) + +(defclass offset () + ((position :type fixnum :initarg :position :reader offset-position) + (livep :type boolean :initarg :livep :initform nil + :accessor offset-live-p))) + +(defclass line () + ((text :type string :initarg :text :reader line-text) + (offsets :type list :initarg :offsets :initform nil + :reader line-offsets))) + + + +;;;-------------------------------------------------------------------------- +;;; Main program. + +(defun main () + (format t "Hello, world!~%")) + +#+interactive (main) + +;;;----- That's all, folks --------------------------------------------------