#|
Hi.
I think there is a bug in cl-ppcre (do-register-groups). I wrote two almost identical versions of function 'dtd-attributes-to-sexp'. One with 'do-register-groups' hangs, another which uses 'scan' works.
example:
(dtd-attributes-to-sexp "attr-foo (foo | bar) "foo" attr-bar CDATA #FIXED 'zonk' attr-baz NMTOKEN #REQUIRED")
result without 'read-dtd-parens':
(("attr-foo" "(foo | bar)" "foo") ("attr-bar" :CDATA (:FIXED "zonk")) ("attr-baz" :NMTOKEN :REQUIRED))
result with 'read-dtd-parens':
(("attr-foo" (:ALTERNATION 1 "foo" "bar") "foo") ("attr-bar" :CDATA (:FIXED "zonk")) ("attr-baz" :NMTOKEN :REQUIRED))
|#
(use-package :cl-ppcre)
(defparameter *dtd-attribute-name-regex* "([a-zA-z0-9._\-:]+)")
(defparameter *dtd-attribute-type-regex* "(?:(\(.*?\))|(CDATA)|(ID)|(IDREF)|(IDREFS)|(NMTOKEN)|(NMTOKENS)|(ENTITY)|(ENTITIES)|(NOTATION)|(xml:\w+))")
(defparameter *dtd-attribute-default-value-regex* "(?:('[^<>"']*')|("[^<>"]*")|(#REQUIRED)|(#IMPLIED)|(?:#FIXED\s+(?:('[^<>"']*')|("[^<>"]*"))))")
;;; ---------- buggy 'dtd-attributes-to-sexp' ----------
;; Version with 'do-register-groups' macro. It works only without call ;; to 'read-dtd-parens' (which 'read-dtd...' uses 'cl-ppcre:scan'). ;; 'read-dtd-parens' itself seems to be ok. ;; I think 'do-register-groups' has a bug. ;; (defun dtd-attributes-to-sexp (target &aux result) (do-register-groups (a b1 b2 b3 b4 b5 b6 b7 b8 b9 b10 b11 c1 c2 c3 c4 c5.1 c5.2) ((format nil "\s*~A\s+~A\s+~A\s*" ; no load-time-value beause it's an example. *dtd-attribute-name-regex* *dtd-attribute-type-regex* *dtd-attribute-default-value-regex*) target (nreverse result)) (push (list a (cond (b1 (read-dtd-parens b1)) #| *** !!!!!! *** |# (b2 :cdata) (b3 :id) (b4 :idref) (b5 :idrefs) (b6 :nmtoken) (b7 :nmtokens) (b8 :entity) (b9 :entities) (b10 :notation) (b11 b11)) (cond (c1 (subseq c1 1 (1- (length c1)))) (c2 (subseq c2 1 (1- (length c2)))) (c3 :required) (c4 :implied) (c5.1 (list :fixed (subseq c5.1 1 (1- (length c5.1))))) (c5.2 (list :fixed (subseq c5.2 1 (1- (length c5.2))))))) result)))
;;; ---------- 'dtd-attributes-to-sexp' that works ----------
;; Version without 'do-register-groups', i used 'scan' instead of it. ;; It works as intended. ;; (defun dtd-attributes-to-sexp (string &aux result (start 0)) (loop (multiple-value-bind (m-start m-end r-start r-end) (scan (format nil "\s*~A\s+~A\s+~A\s*" ; no load-time-value beause it's an example. *dtd-attribute-name-regex* *dtd-attribute-type-regex* *dtd-attribute-default-value-regex*) string :start start) (declare (ignore m-start)) (push (list (subseq string (svref r-start 0) (svref r-end 0)) (case (position-if #'identity r-start :start 1) (1 (read-dtd-parens (subseq string (svref r-start 1) (svref r-end 1)))) (2 :cdata) (3 :id) (4 :idref) (5 :idrefs) (6 :nmtoken) (7 :nmtokens) (8 :entity) (9 :entities) (10 :notation) (11 (subseq string (svref r-start 11) (svref r-end 11)))) (case (position-if #'identity r-start :start 12) (12 (subseq string (1+ (svref r-start 12)) (1- (svref r-end 12)))) (13 (subseq string (1+ (svref r-start 13)) (1- (svref r-end 13)))) (14 :required) (15 :implied) (16 (list :fixed (subseq string (1+ (svref r-start 16)) (1- (svref r-end 16))))) (17 (list :fixed (subseq string (1+ (svref r-start 17)) (1- (svref r-end 17))))))) result) (when (eql m-end (length string)) (return (nreverse result))) (setq start m-end))))
;; This function is meant to be a pareser to fragments of dtd's ;; 'element' and "attribute' tags. ;; for example: ;; ;; (read-dtd-parens "(e | f)") ;; ;; ===> (:ALTERNATION 1 "e" "f") ;; ;; or ;; ;; (read-dtd-parens "((script|style|meta|link|object)*, ((title, (script|style|meta|link|object)*, (base, (script|style|meta|link|object)*)?) | (base, (script|style|meta|link|object)*, (title, (script|style|meta|link|object)*))))") ;; ;; ===> big sexp :) ;; (defun read-dtd-parens (string &optional (start 0) &aux match-begin match-end result reg type stuff (index (1+ start))) (loop (cond ((char= #( (char string index)) (multiple-value-setq (stuff index) (read-dtd-parens string index)) (push stuff result)) ((multiple-value-setq (match-begin match-end) (scan (load-time-value (create-scanner "^\w+") t) string :start index)) (setq index match-end) (if (find (char string match-end) "+*?") (progn (push (list (intern (string (char string match-end))) (subseq string match-begin match-end)) result) (incf index)) (push (subseq string match-begin match-end) result))) ((multiple-value-setq (match-begin match-end reg) (scan (case type (:alternation (load-time-value (create-scanner "^\s*\|\s*") t)) (:sequence (load-time-value (create-scanner "^\s*,\s*") t)) ('nil (load-time-value (create-scanner "^\s*(?:(\|)|(,))\s*") t))) string :start index)) (unless type (setq type (if (svref reg 0) :alternation :sequence))) (setq index match-end)) ((char= #) (char string index)) (setq result (nreverse result)) (return (case (if (array-in-bounds-p string (1+ index)) (char string (1+ index))) (#* (values (cons type (cons '* result)) (+ 2 index))) (#? (values (cons type (cons '? result)) (+ 2 index))) (#+ (values (cons type (cons '+ result)) (+ 2 index))) (t (values (cons type (cons '1 result)) (+ 1 index)))))))))
;;; ---------- END.