performance measurements

Each table row shows performance measurements for this Clojure program with a particular command-line input value N.

 N  CPU secs Elapsed secs Memory KB Code B ≈ CPU Load
250,0003.111.4976,1722723  44% 40% 89% 39%
2,500,0004.712.19151,2442723  43% 45% 87% 42%
25,000,00021.878.56403,4682723  68% 36% 84% 69%

Read the ↓ make, command line, and program output logs to see how this program was run.

Read k-nucleotide benchmark to see what this program should do.

 notes

java version "1.8.0_25"
Java(TM) SE Runtime Environment (build 1.8.0_25-b17)
Java HotSpot(TM) 64-Bit Server VM (build 25.25-b02, mixed mode)

Clojure 1.6

 k-nucleotide Clojure #5 program source code

;   The Computer Language Benchmarks Game
;;   http://benchmarksgame.alioth.debian.org/
;;
;; ported from Scala #2
;; contributed by Alex Miller

(ns knucleotide
  (:gen-class)
  (:require [clojure.string :as s])
  (:import [java.util.concurrent Executors Future]
           [java.io InputStream]
           [clojure.lang Numbers]
           [java.nio.channels Channels ReadableByteChannel]
           [java.nio ByteBuffer]))

(set! *warn-on-reflection* true)
(set! *unchecked-math* true)

(definterface IDnaHash
  (add [^long key ^long count])
  (^long get [^long key])
  (^void printSorted []))

(definterface IBits
  (add2 [^long b])
  (addLots [bs])
  (addToHash [h ^long n ^long offset])
  (^long getSize []))

(deftype Bits [^ints data ;; array of ints, each of which has 16 2-bit slots
               ^:unsynchronized-mutable ^long size ;; number of bits in Bits
               ^:unsynchronized-mutable ^long index ;; write index in data
               ^:unsynchronized-mutable ^long n] ;; bit offset to write at current index
  IBits
  (add2 [_ b]
    (let [data ^ints data]
      (set! size (inc size))
      (when (> n 30)
        (set! index (inc index))
        (set! n 0))
      (aset data index (bit-or (aget ^ints data index)
                               (Numbers/shiftLeftInt (bit-and b 0x3) n)))
      (set! n (+ n 2))
      nil))
  (addLots [this bs]
    (let [data ^ints data
          bs ^Bits bs]
      (if (or (= n 0) (> n 30))
        (do
          (when (> n 30)
            (set! index (inc index))
            (set! n 0))
          (System/arraycopy ^ints (.-data bs) 0 data index (.-index bs))
          (set! index (+ index (.-index bs)))
          (when (and (> index 0) (= n 0))
            (set! index (dec index))
            (set! n 32)))
        (do
          (loop [i 0]
            (when (< i (.-index bs))
              (let [j (aget ^ints (.-data bs) i)]
                (aset data index (bit-or (aget data index) (Numbers/shiftLeftInt j n)))
                (set! index (inc index))
                (aset data index (bit-or (aget data index) (Numbers/unsignedShiftRightInt j (- 32 n))))
                (recur (inc i)))))))
      (set! size (+ size (* 16 ^long (.-index bs))))
      (let [bsn (.-n bs)]
        (when (> bsn 0)
          (loop [n bsn
                 i (aget ^ints (.-data bs) (.-index bs))]
            (when (> n 0)
              (.add2 this i)
              (recur (long (- n 2))
                     (long (Numbers/unsignedShiftRightInt i 2)))))))
      nil))
  (addToHash [_ h n offset]
    (let [data ^ints data
          mask (dec (bit-shift-left 1 (* n 2)))]
      (loop [i (rem offset 16)
             j (quot offset 16)
             h ^IDnaHash h]
        (if (<= (+ (* j 16) i n) size)
          (if (<= (+ i n) 16)
            (let [l (bit-and (unsigned-bit-shift-right (aget data j) (* 2 i)) mask)
                  newi (+ i n)]
              (if (>= newi 16)
                (recur (- newi 16) (inc j) (.add h l 1))
                (recur newi j (.add h l 1))))
            (let [l (bit-and (bit-or (Numbers/unsignedShiftRightInt (aget data j) (int (* 2 i)))
                                     (bit-shift-left (aget data (inc j)) (* 2 (- 16 i))))
                             mask)
                  newj (inc j)
                  newi (+ i (- n 16))]
              (if (>= newi 16)
                (recur (- newi 16) (inc newj) (.add h l 1))
                (recur newi newj (.add h l 1)))))
          h))))
  (getSize [_] size))

(def table (let [arr (long-array 256)]
             (loop [i 0]
               (if (< i 256)
                 (do
                   (aset arr i (case i
                                 (97 65) 0
                                 (116 84) 1
                                 (103 71) 2
                                 (99 67) 3
                                 -1))
                   (recur (inc i)))
                 arr))))

(defn r ^long [^ReadableByteChannel ch ^ByteBuffer d]
  (.clear d)
  (.read ch d))

(defn add-all [bitsv]
  (let [total-size (inc ^long (/ ^long (reduce (fn ^long [a b] (+ ^long a ^long (.getSize ^IBits b))) 0 bitsv) 16))
        all ^IBits (->Bits (int-array total-size) 0 0 0)]
    (doseq [^IBits bits bitsv :when (pos? (.getSize bits))]
      (.addLots all bits))
    all))

(defn readb [^ReadableByteChannel ch ^ByteBuffer data ^long n ^long i]
  (let [table ^longs table]
    (loop [i i
           n n
           bits (->Bits (int-array (inc (quot (- n i) 4))) 0 0 0)
           bitsv []]
      (if (< i n)
        (let [b (long (.get data i))]
          (when (false? (= b (long 10))) ;; if not newline
            (.add2 ^IBits bits (aget table (bit-and b 0xFF))))
          (recur (inc i) n bits bitsv))
        (if (<= n 0)
          (add-all bitsv)
          (let [n (r ch data)]
            (recur 0 (long n) (->Bits (int-array (inc (quot n 4))) 0 0 0) (conj bitsv bits))))))))

(defn skip [^ReadableByteChannel ch ^ByteBuffer data ^long n ^long i]
  (loop [i i
         n n]
    (if (< i n)
      (if (= (long (.get data i)) 10) ;; \newline
        (if (= (inc i) n)
          (readb ch data (r ch data) 0)
          (readb ch data n i))
        (recur (inc i) n))
      (recur 0 (r ch data)))))

(defn loadf [^String target]
  (let [ch ^ReadableByteChannel (Channels/newChannel System/in)
        tb ^bytes (.getBytes target)
        tlen (long (alength tb))
        tl ^longs (long-array (inc tlen)) ;; pad by 1 space for match termination     
        data ^ByteBuffer (ByteBuffer/allocateDirect 1048576) ;; 1024 * 1024
        n (long (r ch data))]
    (doseq [i (range tlen)]
      (aset tl i (long (aget tb i))))
    (loop [i (long 0)
           need (long 0)
           match (long (aget tl 0))
           n (long n)]

      (if (< i n)
        (if (< need tlen)
          (if (= (long (.get data i)) match)            
            (recur (inc i) (inc need) (aget tl (inc need)) n)
            (recur (inc i) 0 (long (aget tl 0)) n))
          (if (= (inc i) n)
            (skip ch data (r ch data) 0)
            (skip ch data n i)))
        (recur 0 need match (r ch data))))))

(defmacro hc [l size]
  `(bit-and (+ ~l (bit-shift-right ~l 17)) (dec ~size)))

(defmacro nx [i size]
  `(bit-and (inc ~i) (dec ~size)))

(def ^:constant decode ["A" "T" "G" "C"])
(defn l2s [^long l ^long n]
  (if (<= n 0)
    ""
    (str (decode (bit-and l 0x3)) (l2s (unsigned-bit-shift-right l 2) (dec n)))))

(deftype DnaHash [^long z
                  ^long size
                  ^:unsynchronized-mutable ^long n
                  ^longs keys
                  ^longs counters]
  IDnaHash
  (add [this key count]
    (let [size size
          keys ^longs keys
          counters ^longs counters
          index (int (hc key size))]
      (cond
       ;; new key
       (= (aget counters index) 0)
       (do
         (aset keys index key)
         (aset counters index count)
         (set! n (inc n))
         this)

       ;; existing key
       (= (aget keys index) key)
       (do
         (aset counters index (+ (aget counters index) count))
         this)

       ;; rehash
       (> (* 6 n) size)
       (let [newsize (* size 64)
             newhash (DnaHash. z newsize 0 (long-array newsize) (long-array newsize))]
         (loop [i 0]
           (if (< i size)
             (let [ci (aget counters i)]
               (when (> ci 0)
                 (.add newhash (aget keys i) ci))
               (recur (inc i)))
             (do
               (.add newhash key 1)
               newhash))))

       true
       (loop [i (nx index size)]
         (let [ii (int i)]
           (if (or (= 0 (aget counters ii))
                   (= key (aget keys ii)))
             (if (= (aget counters i) 0)
               (do
                 (aset keys ii key)
                 (aset counters ii count)
                 (set! n (inc n))
                 this)
               (do
                 (aset counters ii (+ (aget counters ii) count))
                 this))
             (recur (nx i size))))))))
  (get [_ key]
    (loop [i (hc key size)]
      (if (and (pos? (aget counters i)) (not= key (aget keys i)))
        (recur i)
        (aget counters i))))
  (printSorted [this]
    (let [tcounts (long (loop [idx 0 acc 0]
                          (if (< idx (alength counters))
                            (recur (inc idx) (+ acc (aget counters idx)))
                            acc)))
          factor (/ 100.0 tcounts)
          freqs (loop [i 0
                       acc (transient [])]
                  (if (< i (alength counters))
                    (let [c (* factor (aget counters i))
                          k (l2s (aget keys i) z)]
                      (recur (inc i) (if (> c 0) (conj! acc [c k]) acc)))
                    (persistent! acc)))
          s (reverse (sort freqs))]
      (doseq [[freq label] s]
        (printf "%s %.3f\n" label freq))
      (println))))

(defn prints [^IDnaHash d ^String s]
  (let [bs (.getBytes s)
        mapped (map (fn [^long b] (aget ^longs table (bit-and b 0xFF))) bs)
        k (reduce (fn [^long acc ^long b] (+ (* 4 acc) b)) 0 (reverse mapped))]
    (format "%d\t%s" (.get d k) s)))

(defn -main [& args]
  (let [sizes [1 2 3 4 6 12 18]
        sequence "GGTATTTTAATTTATAGT"
        data ^IBits (loadf ">THREE")
        tasks (doall
               (map (fn [^long n]
                      (fn []
                        (loop [h (DnaHash. n 512 0 (long-array 512) (long-array 512))
                               i 0]
                          (if (< i n)
                            (recur (.addToHash data h n i) (inc i))
                            h))))
                    sizes))        
        processors (.. Runtime getRuntime availableProcessors)
        pool (Executors/newFixedThreadPool processors)
        [f1 f2 :as futures] (.invokeAll pool tasks)]
    (.printSorted ^DnaHash @f1)
    (.printSorted ^DnaHash @f2)

    (loop [[f & fs] (drop 2 futures)
           [s & ss] (drop 2 sizes)]
      (when f
        (println (prints @f (subs sequence 0 s)))
        (recur fs ss)))
    (.shutdown pool)
    (shutdown-agents)))

 make, command-line, and program output logs

Wed, 19 Nov 2014 18:20:07 GMT

MAKE:
mv knucleotide.clojure-5.clojure knucleotide.clj
/usr/local/src/jdk1.8.0_25/bin/java -Dclojure.compile.path=. -cp .:/usr/local/src/clojure/clojure-1.6.0.jar clojure.lang.Compile knucleotide
Compiling knucleotide to .
2.86s to complete and log all make actions

COMMAND LINE:
/usr/local/src/jdk1.8.0_25/bin/java -server -XX:+TieredCompilation -XX:+AggressiveOpts -Xmx1024m -cp .:/usr/local/src/clojure/clojure-1.6.0.jar knucleotide 0 < knucleotide-input25000000.txt

PROGRAM OUTPUT:
A 30.295
T 30.151
C 19.800
G 19.754

AA 9.177
TA 9.132
AT 9.131
TT 9.091
CA 6.002
AC 6.001
AG 5.987
GA 5.984
CT 5.971
TC 5.971
GT 5.957
TG 5.956
CC 3.917
GC 3.911
CG 3.909
GG 3.902

1471758	GGT
446535	GGTA
47336	GGTATT
893	GGTATTTTAATT
893	GGTATTTTAATTTATAGT

Revised BSD license

  Home   Conclusions   License   Play