performance measurements

Each table row shows performance measurements for this OCaml program with a particular command-line input value N.

 N  CPU secs Elapsed secs Memory KB Code B ≈ CPU Load
50,0000.150.15?615  0% 0% 0% 100%
500,0001.671.6848,324615  0% 0% 100% 1%
5,000,00019.0419.06511,656615  43% 0% 0% 57%

Read the ↓ make, command line, and program output logs to see how this program was run.

Read regex-dna benchmark to see what this program should do.

 notes

The OCaml native-code compiler, version 4.01.0

 regex-dna OCaml #2 program source code

(* The Computer Language Benchmarks Game
 * http://benchmarksgame.alioth.debian.org/
 *
 * Contributed by Christophe TROESTLER
 *)

open Printf

let variants = ["agggtaaa\\|tttaccct";          "[cgt]gggtaaa\\|tttaccc[acg]";
                "a[act]ggtaaa\\|tttacc[agt]t";  "ag[act]gtaaa\\|tttac[agt]ct";
                "agg[act]taaa\\|ttta[agt]cct";  "aggg[acg]aaa\\|ttt[cgt]ccct";
                "agggt[cgt]aa\\|tt[acg]accct";  "agggta[cgt]a\\|t[acg]taccct";
                "agggtaa[cgt]\\|[acg]ttaccct"]

(* Remove the "\\" which is mandatory in OCaml regex. *)
let re_bs = Str.regexp_string "\\"
let to_string = Str.global_replace re_bs ""

let subst = ["B", "(c|g|t)"; "D", "(a|g|t)";   "H", "(a|c|t)"; "K", "(g|t)";
             "M", "(a|c)";   "N", "(a|c|g|t)"; "R", "(a|g)";   "S", "(c|g)";
             "V", "(a|c|g)"; "W", "(a|t)";     "Y", "(c|t)"]

(* Read all of a redirected FASTA format file from stdin. *)
let file_data, file_length =
  let b = Buffer.create 0xFFFF and s = String.create 0xFFF and r = ref 1 in
  while !r > 0 do
    r := input stdin s 0 0xFFF;   Buffer.add_substring b s 0 !r
  done;
  Buffer.contents b, Buffer.length b

(* Remove FASTA sequence descriptions and all linefeed characters.  *)
let dna = Str.global_replace (Str.regexp "^>.*$\\|\n") "" file_data
let code_length = String.length dna

(* Count matches of [re]. *)
let count re s =
  let re = Str.regexp_case_fold re and i = ref 0 and n = ref 0 in
  try
    while true do i := 1 + Str.search_forward re s !i; incr n done; assert false
  with Not_found -> !n


let () =
  List.iter (fun re -> printf "%s %i\n" (to_string re) (count re dna)) variants;
  let b = ref dna in
  List.iter (fun (re, s) ->
               b := Str.global_replace (Str.regexp_case_fold re) s !b) subst;
  printf "\n%i\n%i\n%i\n" file_length code_length (String.length !b)

 make, command-line, and program output logs

Fri, 13 Sep 2013 03:06:57 GMT

MAKE:
mv regexdna.ocaml-2.ocaml regexdna.ocaml-2.ml
/usr/local/bin/ocamlopt -noassert -unsafe -fno-PIC -nodynlink -inline 100 -fno-PIC unix.cmxa str.cmxa regexdna.ocaml-2.ml -o regexdna.ocaml-2.ocaml_run
File "regexdna.ocaml-2.ml", line 1:
Warning 24: bad source file name: "Regexdna.ocaml-2" is not a valid module name.
rm regexdna.ocaml-2.ml
0.23s to complete and log all make actions

COMMAND LINE:
./regexdna.ocaml-2.ocaml_run 0 < regexdna-input5000000.txt

PROGRAM OUTPUT:
agggtaaa|tttaccct 356
[cgt]gggtaaa|tttaccc[acg] 1250
a[act]ggtaaa|tttacc[agt]t 4252
ag[act]gtaaa|tttac[agt]ct 2894
agg[act]taaa|ttta[agt]cct 5435
aggg[acg]aaa|ttt[cgt]ccct 1537
agggt[cgt]aa|tt[acg]accct 1431
agggta[cgt]a|t[acg]taccct 1608
agggtaa[cgt]|[acg]ttaccct 2178

50833411
50000000
66800214

Revised BSD license

  Home   Conclusions   License   Play