Various: Try porting the code to CLisp.
[lisp] / str.lisp
1 ;;; -*-lisp-*-
2 ;;;
3 ;;; $Id$
4 ;;;
5 ;;; String utilities of various kinds
6 ;;;
7 ;;; (c) 2005 Straylight/Edgeware
8 ;;;
9
10 ;;;----- Licensing notice ---------------------------------------------------
11 ;;;
12 ;;; This program is free software; you can redistribute it and/or modify
13 ;;; it under the terms of the GNU General Public License as published by
14 ;;; the Free Software Foundation; either version 2 of the License, or
15 ;;; (at your option) any later version.
16 ;;;
17 ;;; This program is distributed in the hope that it will be useful,
18 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;;; GNU General Public License for more details.
21 ;;;
22 ;;; You should have received a copy of the GNU General Public License
23 ;;; along with this program; if not, write to the Free Software Foundation,
24 ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25
26 (defpackage #:mdw.str
27 (:use #:common-lisp #:mdw.base)
28 (:export #:join-strings #:str-next-word #:str-split-words))
29 (in-package #:mdw.str)
30
31 (defun join-strings (del strs)
32 "Join together the strings STRS with DEL between them. All the arguments
33 are first converted to strings, as if by `stringify'. Otherwise, this is
34 like Perl's join operator."
35 (setf del (stringify del))
36 (with-output-to-string (s)
37 (when strs
38 (loop
39 (princ (stringify (pop strs)) s)
40 (unless strs
41 (return))
42 (princ del s)))))
43
44 (defun str-next-word (string &key quotedp start end)
45 "Extract a whitespace-delimited word from STRING, returning it and the
46 index to continue parsing from. If no word is found, return nil twice.
47 If QUOTEDP, then allow quoting and backslashifying; otherwise don't. The
48 START and END arguments limit the portion of the string to be processed;
49 the default to 0 and nil (end of string), as usual."
50 (unless start (setf start 0))
51 (unless end (setf end (length string)))
52 (let ((i start)
53 (q nil)
54 (e nil)
55 (w (make-array 0
56 :element-type 'character
57 :adjustable t
58 :fill-pointer t)))
59 ;;
60 ;; Find the start of the next word.
61 (loop
62 (unless (< i end)
63 (return-from str-next-word (values nil nil)))
64 (let ((ch (char string i)))
65 (unless (whitespace-char-p ch)
66 (return)))
67 (incf i))
68 ;;
69 ;; Now pull off a word.
70 (loop
71 (unless (< i end)
72 (return))
73 (let ((ch (char string i)))
74 (cond ((and quotedp (eql ch #\\))
75 (setf e t))
76 (e
77 (vector-push-extend ch w)
78 (setf e nil))
79 ((eql ch q)
80 (setf q nil))
81 (q
82 (vector-push-extend ch w))
83 ((whitespace-char-p ch)
84 (return))
85 ((not quotedp)
86 (vector-push-extend ch w))
87 ((or (eql ch #\')
88 (eql ch #\"))
89 (setf q ch))
90 ((eql ch #\`)
91 (setf q #\'))
92 (t
93 (vector-push-extend ch w))))
94 (incf i))
95 ;;
96 ;; Skip to next word.
97 (loop
98 (unless (< i end)
99 (return))
100 (let ((ch (char string i)))
101 (unless (whitespace-char-p ch)
102 (return)))
103 (incf i))
104 ;;
105 ;; Done.
106 (values (make-array (length w)
107 :element-type 'character
108 :initial-contents w)
109 i)))
110
111 (defun str-split-words (string &key quotedp start end max)
112 "Break STRING into words, like str-next-word does, returning the list of
113 the individual words. If QUOTEDP, then allow quoting and backslashifying;
114 otherwise don't. No more than MAX `words' are returned: if the maximum is
115 hit, then the last `word' is unbroken, and may still contain quotes and
116 escape characters. The START and END arguments limit the portion of the
117 string to be processed in the usual way."
118 (when (equal max 0)
119 (return-from str-split-words nil))
120 (let ((l nil) (n 0))
121 (loop
122 (multiple-value-bind
123 (word nextstart)
124 (str-next-word string
125 :quotedp quotedp
126 :start start
127 :end end)
128 (unless word
129 (return))
130 (when (and max (= (1+ n) max))
131 (push (subseq string start end) l)
132 (return))
133 (setf start nextstart)
134 (push word l)
135 (incf n)))
136 (nreverse l)))
137
138 ;;;----- That's all, folks --------------------------------------------------