(defun whitespace-char-p (char)
(check-type char character)
;; Could include non-breaking space, maybe, since
;; we're already in semi-standard territory with
;; these character names. It's a tricky predicate.
(case char ((#\Return
#\Linefeed
#\Page
#\Space
#\Tab
#\VT)
t)))
(defun make-scratch (string to-type buffer-size)
(let ((string-type (array-element-type string)))
(make-array buffer-size :fill-pointer 0
;; The new element-type must accommodate both existing characters
;; and the given substitution character (if any was specified).
:element-type (cond ((subtypep to-type 'null) string-type)
((subtypep to-type string-type) string-type)
((subtypep string-type to-type) to-type)
(t 'character)))))
(defun collapse-string (string &key (start 0) end to (trim :both))
(check-type trim (member nil :left :right :both) "a trim option")
(let ((buffer-size (- (or end (setq end (length string))) start)))
(loop with buffer = (make-scratch string (type-of to) buffer-size)
with state = :initial
for i from start below end
for char = (char string i)
do (cond ((not (whitespace-char-p char))
(when (and to (eq state :whitespace))
(vector-push-extend to buffer))
(vector-push-extend char buffer)
(setq state :non-whitespace-sequence))
((not (and (zerop (fill-pointer buffer))
(case trim ((:left :both) t))))
;; Not trimming on left.
(setq state :whitespace)))
;; Deal with any trailing whitespace.
finally (unless (case trim ((:right :both) t))
(when (and to (eq state :whitespace))
(vector-push-extend to buffer)))
;; Always a new string.
(return (copy-seq buffer)))))
Quick examples
Call | Output |
---|---|
(collapse-string " Hello World ") |
HelloWorld |
(collapse-string " Hello World " :start 10) |
World |
(collapse-string " Hello World " :to #\-) |
Hello-World |
(collapse-string " Hello World " :to #\_ :trim :right) |
_Hello_World |
(collapse-string " Hello World " :to #\/ :trim nil) |
/Hello/World/ |
(collapse-string " " :to #\/ :trim nil) |
/ |
See also
- Forum post: Collapse all white spaces in a string.
- cl-strings, with e.g. clean and split functions.
- cl-slug handles punctuation and accent characters.
Apache 2