>From e3d8b2cd9ad98660c68bed9d41532892bb394d6b Mon Sep 17 00:00:00 2001 From: Peter Bex Date: Fri, 30 Nov 2012 00:09:19 +0100 Subject: [PATCH 2/2] Finetune types.db entries for irregex and update its module export list to match latest version by removing obsolete procedures --- irregex.scm | 3 -- types.db | 111 ++++++++++++++++++++++++++++++++++++++---------------------- 2 files changed, 70 insertions(+), 44 deletions(-) diff --git a/irregex.scm b/irregex.scm index 253b745..ccd2ad9 100644 --- a/irregex.scm +++ b/irregex.scm @@ -45,15 +45,12 @@ irregex-match irregex-match? irregex-match-data? - irregex-match-end irregex-match-end-chunk irregex-match-end-index irregex-match-names irregex-match-num-submatches - irregex-match-start irregex-match-start-chunk irregex-match-start-index - irregex-match-string irregex-match-subchunk irregex-match-substring irregex-match-valid-index? diff --git a/types.db b/types.db index ede5fa3..cbb2749 100644 --- a/types.db +++ b/types.db @@ -1322,73 +1322,102 @@ ;; irregex -;;XXX these need to be reviewed by Alex and/or sjamaan - -;;XXX which do not invoke stored procedures that may modify local state? (clean) - -(irregex (#(procedure #:clean) irregex (#!rest) *)) -;irregex-apply-match - -(irregex-dfa (#(procedure #:clean #:enforce) irregex-dfa ((struct regexp)) *) +(irregex (#(procedure #:clean) irregex (#!rest) (struct regexp))) + +;; Both of these DFA accessors return either #f or a DFA vector. +;; TODO: Should we spec out the entire DFA type layout? It's plenty complex, so we don't +;; want to be specifying this for all procedures accepting a DFA! +;; A DFA looks like a vector of lists; +;; the car of each list is a number (for init-state), false or an alist; +;; the cdr is a list of alists, which contains a char (or vector) and two alists +;; These alists have types themselves, of course... +(irregex-dfa (#(procedure #:clean #:enforce) irregex-dfa ((struct regexp)) (or boolean vector)) (((struct regexp)) (##sys#slot #(1) '1))) -(irregex-dfa/search (#(procedure #:clean #:enforce) irregex-dfa/search ((struct regexp)) *) +(irregex-dfa/search (#(procedure #:clean #:enforce) irregex-dfa/search ((struct regexp)) (or boolean vector)) (((struct regexp)) (##sys#slot #(1) '2))) -(irregex-nfa (#(procedure #:clean #:enforce) irregex-nfa ((struct regexp)) *) +;; Procedure type returned by irregex-nfa is a matcher type (it is misnamed) +;; which is another complex procedure type. +(irregex-nfa (#(procedure #:clean #:enforce) irregex-nfa ((struct regexp)) (or boolean procedure)) (((struct regexp)) (##sys#slot #(1) '3))) -(irregex-flags (#(procedure #:clean #:enforce) irregex-flags ((struct regexp)) *) +(irregex-flags (#(procedure #:clean #:enforce) irregex-flags ((struct regexp)) fixnum) (((struct regexp)) (##sys#slot #(1) '4))) (irregex-num-submatches (#(procedure #:clean #:enforce) irregex-num-submatches ((struct regexp)) fixnum) (((struct regexp)) (##sys#slot #(1) '5))) -(irregex-lengths (#(procedure #:clean #:enforce) irregex-lengths ((struct regexp)) *) +(irregex-lengths (#(procedure #:clean #:enforce) irregex-lengths ((struct regexp)) + (vector-of (or boolean pair))) (((struct regexp)) (##sys#slot #(1) '6))) -(irregex-names (#(procedure #:clean #:enforce) irregex-names ((struct regexp)) *) +;; XXX: Submatch names ought to be symbols according to the docs, but this is +;; not enforced anywhere, so we can't assume it in the return type here. +(irregex-names (#(procedure #:clean #:enforce) irregex-names ((struct regexp)) + (list-of (pair * fixnum))) (((struct regexp)) (##sys#slot #(1) '7))) -(irregex-extract (#(procedure #:enforce) irregex-extract (* string #!optional fixnum fixnum) list)) ;XXX specialize? - -(irregex-fold (#(procedure #:enforce) irregex-fold (* (procedure (fixnum (struct regexp-match) *) *) * string #!optional (procedure (fixnum *) *) fixnum fixnum) *)) - -(irregex-fold/chunked (#(procedure #:enforce) irregex-fold/chunked (* (procedure (* fixnum (struct regexp-match) *) *) * procedure * #!optional (procedure (* fixnum *) *) fixnum fixnum) *)) - -(irregex-match (#(procedure #:enforce) irregex-match (* string #!optional fixnum fixnum) *)) -;irregex-match? +;; XXX: specialize these? (how?) +(irregex-extract (#(procedure #:clean #:enforce) irregex-extract (* string #!optional fixnum fixnum) + (list-of string))) +(irregex-split (#(procedure #:clean #:enforce) irregex-split (* string #!optional fixnum fixnum) + (list-of string))) + +(irregex-fold (forall (a) (#(procedure #:enforce) irregex-fold (* (procedure (fixnum (struct regexp-match) a) a) a string #!optional (procedure (fixnum *) *) fixnum fixnum) a))) +;; XXX TODO: chunker is a plain vector +(irregex-fold/chunked (forall (a c) (#(procedure #:enforce) irregex-fold/chunked (* (procedure (c fixnum (struct regexp-match) a) a) a vector c #!optional (procedure (c fixnum a) a) fixnum fixnum) a))) +(irregex-reset-matches! (procedure irregex-reset-matches! ((struct regexp-match)) + (struct regexp-match))) + +;; A silly procedure, but at least we can "inline" it like this +(irregex-match? (#(procedure #:clean #:enforce) irregex-match? (* string #!optional fixnum fixnum) + boolean) + ((* string) (and (irregex-match #(1) #(2)) '#t)) + ((* string fixnum) (and (irregex-match #(1) #(2) #(3)) '#t)) + ((* string fixnum fixnum) (and (irregex-match #(1) #(2) #(3) #(4)) '#t))) +;; These two return #f or a match object +(irregex-match (#(procedure #:clean #:enforce) irregex-match (* string #!optional fixnum fixnum) + (or boolean (struct regexp-match)))) +;; XXX chunker is a plain vector +;; Not marked clean because we don't know what chunker procedures will do +(irregex-match/chunked (#(procedure #:enforce) irregex-match/chunked (* vector * #!optional fixnum) + (or boolean (struct regexp-match)))) (irregex-match-data? (#(procedure #:pure #:predicate (struct regexp-match)) irregex-match-data? (*) boolean)) -(irregex-match-end (#(procedure) irregex-match-end (* #!optional *) *)) -;irregex-match-end-chunk -(irregex-match-end-index (#(procedure #:enforce) irregex-match-end-index ((struct regexp-match) #!optional *) fixnum)) +(irregex-match-end-index (#(procedure #:clean #:enforce) irregex-match-end-index ((struct regexp-match) #!optional *) fixnum)) +(irregex-match-end-chunk (#(procedure #:clean #:enforce) irregex-match-end-chunk ((struct regexp-match) #!optional *) *)) +(irregex-match-start-index (#(procedure #:clean #:enforce) irregex-match-start-index ((struct regexp-match) #!optional *) fixnum)) +(irregex-match-start-chunk (#(procedure #:clean #:enforce) irregex-match-start-chunk ((struct regexp-match) #!optional *) *)) +(irregex-match-substring (#(procedure #:clean #:enforce) irregex-match-substring ((struct regexp-match) #!optional *) *)) +(irregex-match-subchunk (#(procedure #:clean #:enforce) irregex-match-subchunk ((struct regexp-match) #!optional *) *)) -(irregex-match-names (#(procedure #:enforce) irregex-match-names ((struct regexp-match)) list) +(irregex-match-names (#(procedure #:clean #:enforce) irregex-match-names ((struct regexp-match)) list) (((struct regexp-match)) (##sys#slot #(1) '2))) -(irregex-match-num-submatches (#(procedure #:enforce) irregex-match-num-submatches ((struct regexp-match)) fixnum)) -(irregex-match-start (#(procedure) irregex-match-start (* #!optional *) *)) -;irregex-match-start-chunk -(irregex-match-start-index (#(procedure #:enforce) irregex-match-start-index ((struct regexp-match) #!optional *) fixnum)) -(irregex-match-string (#(procedure) irregex-match-string (*) *)) -(irregex-match-subchunk (#(procedure #:enforce) irregex-match-subchunk ((struct regexp-match) #!optional *) *)) -(irregex-match-substring (#(procedure) irregex-match-substring (* #!optional *) *)) -(irregex-match/chunked (#(procedure #:enforce) irregex-match/chunked (* * * #!optional fixnum) *)) +(irregex-match-num-submatches (#(procedure #:enforce) irregex-match-num-submatches ((struct regexp-match)) fixnum) + (((struct regexp-match)) + (fx- (fx/ (##sys#size ##sys#slot #(1) 1) 4) 2))) -(irregex-new-matches (procedure irregex-new-matches (*) *)) +(irregex-new-matches (procedure irregex-new-matches (*) *)) ; really only for internal use.. +(irregex-opt (#(procedure #:clean #:enforce) irregex-opt (list) *)) +(irregex-quote (#(procedure #:clean #:enforce) irregex-quote (string) string)) -(irregex-opt (#(procedure #:enforce) irregex-opt (list) *)) -(irregex-quote (#(procedure #:enforce) irregex-quote (string) string)) (irregex-replace (#(procedure #:enforce) irregex-replace (* string #!rest) string)) (irregex-replace/all (#(procedure #:enforce) irregex-replace/all (* string #!rest) string)) -(irregex-reset-matches! (procedure irregex-reset-matches! (*) *)) -(irregex-search (#(procedure #:enforce) irregex-search (* string #!optional fixnum fixnum) *)) -(irregex-search/matches (#(procedure #:enforce) irregex-search/matches (* string * fixnum fixnum *) *)) -(irregex-split (#(procedure #:enforce) irregex-split (* string #!optional fixnum fixnum) list)) -(irregex-search/chunked (#(procedure #:enforce) irregex-search/chunked (* procedure * #!optional fixnum fixnum *) *)) +;; Returns a list of strings, but *only* when all user-procedures do +(irregex-apply-match (procedure ((struct regexp-match) list) list)) ; internal use + +;; These return #f or a match object +(irregex-search (#(procedure #:clean #:enforce) irregex-search (* string #!optional fixnum fixnum) + (or boolean (struct regexp-match)))) +;; XXX chunker is a plain vector +(irregex-search/chunked (#(procedure #:enforce) irregex-search/chunked (* vector * #!optional fixnum *) + (or boolean (struct regexp-match)))) +(irregex-search/matches (#(procedure #:enforce) irregex-search/matches (* vector * * fixnum (struct regexp-match)) + (or boolean (struct regexp-match)))) (irregex-match-valid-index? (#(procedure #:clean #:enforce) irregex-match-valid-index? ((struct regexp-match) *) boolean)) -- 1.7.12.2