The Computer Language
Benchmarks Game

regex-redux Lisp SBCL #3 program

source code

;;; The Computer Language Benchmarks Game
;;; http://benchmarksgame.alioth.debian.org/
;;;
;;; regex-dna program contributed by: Witali Kusnezow 2009-03-02
;;; converted from regex-dna program

(eval-when (:compile-toplevel :load-toplevel :execute)
  (require :asdf)
  (require :cl-ppcre)

#+sb-thread
(progn
  (define-alien-routine sysconf long (name int))
  (use-package :sb-thread)))

(eval-when (:compile-toplevel)
(setf cl-ppcre:*regex-char-code-limit* 128))

(defconstant  +regex-list+
  '("agggtaaa|tttaccct"
    "[cgt]gggtaaa|tttaccc[acg]"
    "a[act]ggtaaa|tttacc[agt]t"
    "ag[act]gtaaa|tttac[agt]ct"
    "agg[act]taaa|ttta[agt]cct"
    "aggg[acg]aaa|ttt[cgt]ccct"
    "agggt[cgt]aa|tt[acg]accct"
    "agggta[cgt]a|t[acg]taccct"
    "agggtaa[cgt]|[acg]ttaccct"))

(defconstant  +alternatives+
  '(("tHa[Nt]" "<4>")  ("aND|caN|Ha[DS]|WaS" "<3>")
    ("a[NSt]|BY" "<2>")  ("<[^>]*>" "|")
    ("\\|[^|][^|]*\\|" "-")))

#+sb-thread
(progn
  (defconstant  +cpu-count+ (sysconf 84))
  (defvar *mutex* (make-mutex))
  (defvar *aux-mutex* (make-mutex))

  (defmacro bg  (&body body) `(make-thread (lambda () ,@body)))
  (defmacro join-all (&body body)
	`(mapcar
	  #'join-thread
	  (loop for item in (list ,@body)
		 append (if (consp item) item (list item))))))

(defun read-all
    (stream &aux (buf-size (* 1024 1024))
     (size 0)
     (buf-list
      (loop
         for buf = (make-string buf-size :element-type 'base-char)
         for len = (read-sequence buf stream)
         do (incf size len)
         collect (if (< len buf-size) (subseq buf 0 len) buf)
         while (= len buf-size))))
  (declare (type fixnum size))
  (loop with res-string = (make-string size :element-type 'base-char)
     with i of-type fixnum = 0
     for str in buf-list
     do (setf (subseq res-string i) (the simple-base-string str))
     (incf i (length (the simple-base-string str)))
     finally (return res-string)))

(defun length-to-replace (match)
  (loop for x in match
     sum (- (the fixnum (cdr x))
            (the fixnum (car x))) of-type fixnum))

(defun replace-aux
    (match replacement target-string result-string
     &key (match-begin 0) (match-end -1)
     (match-length (length match))
     &aux
     (len (length replacement))
     (first-match (if (zerop match-begin) '(0 . 0) (nth (1- match-begin) match)))
     (target-start (cdr first-match))
     (result-start (+ (the fixnum (* len match-begin))
                    (- target-start
                       (the fixnum (length-to-replace (subseq match 0 match-begin)))))))
  (declare (type fixnum match-begin match-end match-length target-start result-start len)
           (type list match)
           (type simple-base-string result-string target-string)
           (type vector replacement))
  (loop with (i j) of-type fixnum = (list result-start target-start)
     with mmatch = (if (> match-begin match-end)
                       match (subseq match match-begin match-end))
     for pair in mmatch
     do (setf (subseq result-string i) (subseq target-string j (car pair))
              i (+ i (- (the fixnum (car pair)) j))
              (subseq result-string i) replacement
              j (cdr pair)
              i (+ i len))
     finally (if (or (minusp match-end) (<= match-length match-end))
                 (setf (subseq result-string i ) (subseq target-string j))))
  nil)

#+sb-thread
(defun parts
    (parts-num len
     &aux
     (ranges (loop with (step rest) of-type fixnum =  (multiple-value-list (floor len parts-num))
                with i of-type fixnum = 0 while (< i len)
                collect i into res of-type fixnum
                do (incf i step)(if (plusp rest) (progn (incf i) (decf rest)) )
                finally (return (append res (list len))))
             ))
  (declare (type fixnum len parts-num)
           (type list ranges))
  (mapcar #'cons ranges (subseq ranges 1)))

(defun replace-all
    (regexp replacement target-string
     &aux (rmatch '()) (match '())
     (result-string (make-string 0 :element-type 'base-char)))
  (declare (type simple-base-string result-string target-string)
           (type vector replacement))
  (cl-ppcre:do-scans
      (match-start match-end reg-starts reg-ends regexp target-string nil)
    (push (cons match-start match-end) rmatch))
  (if rmatch
      (progn
        (setf match (reverse rmatch)
              result-string (make-string
                             (+ (- (length target-string)
                                   (length-to-replace match))
                                (the fixnum (* (length replacement)
                                               (length match)))) :element-type 'base-char))
        #-sb-thread
        (replace-aux match replacement target-string result-string)
        #+sb-thread
        (mapcar #'join-thread
                (loop with len of-type fixnum = (length match)
				   with parts-list  = (parts +cpu-count+ len)
                   with current of-type fixnum = 0
                   repeat +cpu-count+
                   collect
					 (bg (let (range)
                           (with-mutex (*mutex*)
                             (setf range (nth current parts-list))
                             (incf current))
                           (replace-aux match replacement target-string result-string
                                        :match-begin (car range) :match-end (cdr range)
                                        :match-length len)))))
        result-string)
      target-string))

(defun main (&optional (stream *standard-input*)
             &aux (sequence (read-all stream))
             (size (length sequence)))
  (declare (type simple-base-string sequence))
  (setf sequence (replace-all ">[^\\n]*\\n|\\n" "" sequence))

  #-sb-thread
  (progn
    (loop for regex in +regex-list+ do
         (format t "~a ~a~%" regex
                 (/ (length
                     (the list
                       (cl-ppcre:all-matches regex sequence))) 2)))
    (format t "~%~a~%~a~%" size (length sequence))
    (loop for pair in +alternatives+ do
         (setf sequence (replace-all  (car pair) (cadr pair) sequence )))
    (format t "~a~%" (length sequence)))
  #+sb-thread
  (let* ((len (length +regex-list+))
         (result (make-list (1+ len))))
    (join-all
	 (loop with idx of-type fixnum = 0
		repeat len
		collect
          (bg (let (reg cur)
                (with-mutex (*aux-mutex*)
                  (setf cur idx reg (nth cur +regex-list+))
                  (incf idx))
              (setf (nth cur result)
                    (format nil "~a ~a" reg
                            (/ (length
                                (the list
                                  (cl-ppcre:all-matches reg sequence))) 2))))))
	 (bg (loop with seq = (copy-seq sequence)
            for pair in +alternatives+ do
              (setf seq (replace-all  (car pair) (cadr pair) seq ))
            finally (setf (nth len result)
                          (format nil "~%~a~%~a~%~a" size (length sequence) (length seq))))))
    (format t "~{~a~%~}" result))
  )
    

notes, command-line, and program output

NOTES:
64-bit Ubuntu quad core
SBCL 1.3.15


Mon, 20 Mar 2017 18:56:32 GMT

MAKE:
cp: 'regexredux.sbcl-3.sbcl' and './regexredux.sbcl-3.sbcl' are the same file
SBCL built with: /usr/local/bin/sbcl --userinit /dev/null --batch --eval '(load "regexredux.sbcl-3.sbcl_compile")'
### START regexredux.sbcl-3.sbcl_compile
(handler-bind ((sb-ext:defconstant-uneql      (lambda (c) (abort c))))      (load (compile-file "regexredux.sbcl-3.sbcl" ))) (save-lisp-and-die "sbcl.core" :purify t)
### END regexredux.sbcl-3.sbcl_compile

; compiling file "/home/dunham/benchmarksgame/bench/regexredux/regexredux.sbcl-3.sbcl" (written 20 MAR 2017 10:27:36 AM):
; compiling (REQUIRE :ASDF)
; compiling (REQUIRE :CL-PPCRE)
; compiling (DEFINE-ALIEN-ROUTINE SYSCONF ...)
; compiling (USE-PACKAGE :SB-THREAD)
; compiling (DEFCONSTANT +REGEX-LIST+ ...)
; compiling (DEFCONSTANT +ALTERNATIVES+ ...)
; compiling (DEFCONSTANT +CPU-COUNT+ ...)
; compiling (DEFVAR *MUTEX* ...)
; compiling (DEFVAR *AUX-MUTEX* ...)
; compiling (DEFMACRO BG ...)
; compiling (DEFMACRO JOIN-ALL ...)
; compiling (DEFUN READ-ALL ...)
; compiling (DEFUN LENGTH-TO-REPLACE ...)
; compiling (DEFUN REPLACE-AUX ...)
; file: /home/dunham/benchmarksgame/bench/regexredux/regexredux.sbcl-3.sbcl
; in: DEFUN REPLACE-AUX
;     (LOOP WITH (I J) OF-TYPE FIXNUM = (LIST RESULT-START TARGET-START)
;           WITH MMATCH = (IF (> MATCH-BEGIN MATCH-END)
;                             MATCH
;                             (SUBSEQ MATCH MATCH-BEGIN MATCH-END))
;           FOR ...)
; --> BLOCK LET SB-LOOP::LOOP-DESTRUCTURING-BIND DESTRUCTURING-BIND 
; --> SB-INT:BINDING* LET* IF 
; ==>
;   NIL
; 
; caught STYLE-WARNING:
;   The binding of I is not a FIXNUM:
;    NIL
;   See also:
;     The SBCL Manual, Node "Handling of Types"
; 
; caught STYLE-WARNING:
;   The binding of J is not a FIXNUM:
;    NIL
;   See also:
;     The SBCL Manual, Node "Handling of Types"

; compiling (DEFUN PARTS ...)
; file: /home/dunham/benchmarksgame/bench/regexredux/regexredux.sbcl-3.sbcl
; in: DEFUN PARTS
;     (LOOP WITH (STEP
;                 REST) OF-TYPE FIXNUM = (MULTIPLE-VALUE-LIST
;                                         (FLOOR LEN PARTS-NUM))
;           WITH I OF-TYPE FIXNUM = ...)
; --> BLOCK LET SB-LOOP::LOOP-DESTRUCTURING-BIND DESTRUCTURING-BIND 
; --> SB-INT:BINDING* LET* IF 
; ==>
;   NIL
; 
; caught STYLE-WARNING:
;   This is not a FIXNUM:
;    NIL
;   See also:
;     The SBCL Manual, Node "Handling of Types"
; 
; caught STYLE-WARNING:
;   The binding of REST is not a FIXNUM:
;    NIL
;   See also:
;     The SBCL Manual, Node "Handling of Types"

; compiling (DEFUN REPLACE-ALL ...)
; compiling (DEFUN MAIN ...); 
; compilation unit finished
;   caught 4 STYLE-WARNING conditions


; /home/dunham/benchmarksgame_quadcore/regexredux/tmp/regexredux.sbcl-3.fasl written
; compilation finished in 0:00:01.574
803+17083+29201+20153 objects... ### START regexredux.sbcl-3.sbcl_run
(main) (quit)
### END regexredux.sbcl-3.sbcl_run

7.08s to complete and log all make actions

COMMAND LINE:
/usr/local/bin/sbcl  --noinform --core sbcl.core --userinit /dev/null --load regexredux.sbcl-3.sbcl_run 0 < regexredux-input5000000.txt

PROGRAM OUTPUT:
agggtaaa|tttaccct 356
[cgt]gggtaaa|tttaccc[acg] 1250
a[act]ggtaaa|tttacc[agt]t 4252
ag[act]gtaaa|tttac[agt]ct 2894
agg[act]taaa|ttta[agt]cct 5435
aggg[acg]aaa|ttt[cgt]ccct 1537
agggt[cgt]aa|tt[acg]accct 1431
agggta[cgt]a|t[acg]taccct 1608
agggtaa[cgt]|[acg]ttaccct 2178

50833411
50000000
27388361