/mobile Handheld Friendly website

 performance measurements

Each table row shows performance measurements for this Clojure program with a particular command-line input value N.

 N  CPU secs Elapsed secs Memory KB Code B ≈ CPU Load
50,0003.013.0367,668710  1% 0% 1% 100%
500,0007.247.27138,008710  1% 0% 1% 100%
5,000,00044.2044.25623,768710  0% 0% 1% 100%

Read the ↓ make, command line, and program output logs to see how this program was run.

Read regex-dna benchmark to see what this program should do.

 notes

java version "1.8.0"
Java(TM) SE Runtime Environment (build 1.8.0-b132)
Java HotSpot(TM) 64-Bit Server VM (build 25.0-b70, mixed mode)

Clojure 1.6

 regex-dna Clojure #3 program source code

;;   The Computer Language Benchmarks Game
;;   http://benchmarksgame.alioth.debian.org/

;; contributed by Andy Fingerhut

(ns regexdna
  (:gen-class)
  (:require [clojure.string :as str])
  (:import (java.util.regex Pattern)))


;; Slightly modified from standard library slurp so that it can read
;; from standard input.

(defn slurp-std-input
  ;; Reads the standard input using the encoding enc into a string and
  ;; returns it.
  ([] (slurp-std-input (.name (java.nio.charset.Charset/defaultCharset))))
  ([#^String enc]
     (with-open [r (new java.io.BufferedReader *in*)]
       (let [sb (new StringBuilder)]
	 (loop [c (.read r)]
	   (if (neg? c)
	     (str sb)
	     (do
	       (.append sb (char c))
	       (recur (.read r)))))))))


(def dna-seq-regexes '(    "agggtaaa|tttaccct"
		       "[cgt]gggtaaa|tttaccc[acg]"
		       "a[act]ggtaaa|tttacc[agt]t"
		       "ag[act]gtaaa|tttac[agt]ct"
		       "agg[act]taaa|ttta[agt]cct"
		       "aggg[acg]aaa|ttt[cgt]ccct"
		       "agggt[cgt]aa|tt[acg]accct"
		       "agggta[cgt]a|t[acg]taccct"
		       "agggtaa[cgt]|[acg]ttaccct" ))


(def iub-codes '( [ "B"  "(c|g|t)"   ]
		  [ "D"  "(a|g|t)"   ]
		  [ "H"  "(a|c|t)"   ]
		  [ "K"  "(g|t)"     ]
		  [ "M"  "(a|c)"     ]
		  [ "N"  "(a|c|g|t)" ]
		  [ "R"  "(a|g)"     ]
		  [ "S"  "(c|g)"     ]
		  [ "V"  "(a|c|g)"   ]
		  [ "W"  "(a|t)"     ]
		  [ "Y"  "(c|t)"     ] ))


(defn one-replacement [str [iub-str iub-replacement]]
  (str/replace str (. Pattern (compile iub-str)) iub-replacement))


(defn count-regex-occurrences [re s]
  ;; Prepending (?i) to the regexp in Java makes it
  ;; case-insensitive.
  [re (count (re-seq (. Pattern (compile (str "(?i)" re)))
                     s))])


(defn -main
  [& args]
  (let [content (slurp-std-input)
        original-len (count content)
        ;; I'd prefer if I could use the regexp #"(^>.*)?\n" like the
        ;; Perl benchmark does, but that only matches ^ at the beginning
        ;; of the string, not at the beginning of a line in the middle
        ;; of the string.
        content (str/replace content #"(^>.*|\n>.*)?\n" "")
        dna-seq-only-len (count content)]
    
    (doseq [[re num-matches] (pmap #(count-regex-occurrences % content)
                                   dna-seq-regexes)]
      (printf "%s %d\n" re num-matches))
    
    (let [content (reduce one-replacement content iub-codes)]
      (printf "\n%d\n%d\n%d\n" original-len dna-seq-only-len (count content))))
  (flush)
  (shutdown-agents))

 make, command-line, and program output logs

Fri, 18 Apr 2014 18:38:52 GMT

MAKE:
mv regexdna.clojure-3.clojure regexdna.clj
/usr/local/src/jdk1.8.0/bin/java -Dclojure.compile.path=. -cp .:/usr/local/src/clojure/clojure-1.6.0.jar clojure.lang.Compile regexdna
Compiling regexdna to .
1.55s to complete and log all make actions

COMMAND LINE:
/usr/local/src/jdk1.8.0/bin/java -server -XX:+TieredCompilation -XX:+AggressiveOpts -Xmx768m -cp .:/usr/local/src/clojure/clojure-1.6.0.jar regexdna 0 < regexdna-input5000000.txt

PROGRAM OUTPUT:
agggtaaa|tttaccct 356
[cgt]gggtaaa|tttaccc[acg] 1250
a[act]ggtaaa|tttacc[agt]t 4252
ag[act]gtaaa|tttac[agt]ct 2894
agg[act]taaa|ttta[agt]cct 5435
aggg[acg]aaa|ttt[cgt]ccct 1537
agggt[cgt]aa|tt[acg]accct 1431
agggta[cgt]a|t[acg]taccct 1608
agggtaa[cgt]|[acg]ttaccct 2178

50833411
50000000
66800214

Revised BSD license

  Home   Conclusions   License   Play