performance measurements

Each table row shows performance measurements for this OCaml program with a particular command-line input value N.

 N  CPU secs Elapsed secs Memory KB Code B ≈ CPU Load
50,0000.010.16?1050  0% 0% 0% 100%
500,0000.101.77570,1721050  1% 0% 2% 100%
5,000,0001.4423.16264,7441050  0% 0% 0% 100%

Read the ↓ make, command line, and program output logs to see how this program was run.

Read regex-dna benchmark to see what this program should do.

 notes

The OCaml native-code compiler, version 4.01.0

 regex-dna OCaml #4 program source code

(*
 * The Computer Language Benchmarks Game
 * http://benchmarksgame.alioth.debian.org/
 *
 * Contributed by Paolo Ribeca, August 2011
 *
 * The regexp machinery comes from a previous OCaml version by
 *  Christophe TROESTLER and Mauricio Fernandez
 *)

let workers = 16

let variants, variants_to_string =
  let variants = [|
    "agggtaaa\\|tttaccct";
    "[cgt]gggtaaa\\|tttaccc[acg]";
    "a[act]ggtaaa\\|tttacc[agt]t";
    "ag[act]gtaaa\\|tttac[agt]ct";
    "agg[act]taaa\\|ttta[agt]cct";
    "aggg[acg]aaa\\|ttt[cgt]ccct";
    "agggt[cgt]aa\\|tt[acg]accct";
    "agggta[cgt]a\\|t[acg]taccct";
    "agggtaa[cgt]\\|[acg]ttaccct"
  |]
  (* Remove the "\\" which is mandatory in OCaml regex *)
  and re_bs = Str.regexp_string "\\" in
  Array.map Str.regexp_case_fold variants,
  Array.map (Str.global_replace re_bs "") variants

let iupacs =
  Array.map (fun (re, s) -> Str.regexp_case_fold re, s) [|
    "B", "(c|g|t)";
    "D", "(a|g|t)";
    "H", "(a|c|t)";
    "K", "(g|t)";
    "M", "(a|c)";
    "N", "(a|c|g|t)";
    "R", "(a|g)";
    "S", "(c|g)";
    "V", "(a|c|g)";
    "W", "(a|t)";
    "Y", "(c|t)"
  |]

(* Read all of a redirected FASTA format file from stdin *)
let file_data, file_length =
  let b = Buffer.create 0xffffff and s = String.create 0xfff and r = ref 1 in
  while !r > 0 do
    r := input stdin s 0 0xfff; Buffer.add_substring b s 0 !r
  done;
  Buffer.contents b, Buffer.length b

(* Remove FASTA sequence descriptions and all linefeed characters *)
let dna = Str.global_replace (Str.regexp "^>.*$\\|\n") "" file_data
let code_length = String.length dna

(* Count matches of [re] *)
let count re s =
  let i = ref 0 and n = ref 0 in
  try
    while true do
      i := 1 + Str.search_forward re s !i;
      incr n
    done;
    assert false
  with Not_found -> !n

let () =
  let chunk_size = code_length / workers
  and rem = code_length mod workers in
  assert (chunk_size >= 7);
  let w = Array.make workers stdin
  and red_workers = workers - 1 in
  for i = 0 to red_workers do
    let delta = if i > 0 then 7 else 0 in
    let lo = i * chunk_size + min i rem - delta in
    let len = chunk_size + (if i < rem then 1 else 0) + delta
    and input, output = Unix.pipe () in
    match Unix.fork () with
    | 0 ->
      Unix.close input;
      let chunk = String.sub dna lo len
      and output = Unix.out_channel_of_descr output in
      (* First all the regexps... *)
      Array.iter
        (fun re -> output_binary_int output (count re chunk))
        variants;
      (* ...and then all the IUPAC replacements *)
      if i > 0 then
        for j = 0 to 6 do
          chunk.[j] <- ' '
        done;
      let b = ref chunk in
      Array.iter
        (fun (re, s) -> b := Str.global_replace re s !b)
        iupacs;
      output_binary_int output (String.length !b - delta);
      exit 0
    | _ ->
      Unix.close output;
      w.(i) <- Unix.in_channel_of_descr input
  done;
  let counts_variants = Array.init (Array.length variants) (fun _ -> ref 0)
  and count_iupac = ref 0 in
  Array.iter
    (fun input ->
      Array.iter
        (fun count -> count := !count + input_binary_int input)
        counts_variants;
      count_iupac := !count_iupac + input_binary_int input)
    w;
  Array.iteri
    (fun i re -> Printf.printf "%s %i\n" re !(counts_variants.(i)))
    variants_to_string;
  Printf.printf "\n%i\n%i\n%i\n%!" file_length code_length !count_iupac

 make, command-line, and program output logs

Fri, 13 Sep 2013 16:56:27 GMT

MAKE:
mv regexdna.ocaml-4.ocaml regexdna.ocaml-4.ml
/usr/local/bin/ocamlopt -noassert -unsafe -fno-PIC -nodynlink -inline 100 -fno-PIC unix.cmxa str.cmxa regexdna.ocaml-4.ml -o regexdna.ocaml-4.ocaml_run
File "regexdna.ocaml-4.ml", line 1:
Warning 24: bad source file name: "Regexdna.ocaml-4" is not a valid module name.
rm regexdna.ocaml-4.ml
0.24s to complete and log all make actions

COMMAND LINE:
./regexdna.ocaml-4.ocaml_run 0 < regexdna-input5000000.txt

PROGRAM OUTPUT:
agggtaaa|tttaccct 356
[cgt]gggtaaa|tttaccc[acg] 1250
a[act]ggtaaa|tttacc[agt]t 4252
ag[act]gtaaa|tttac[agt]ct 2894
agg[act]taaa|ttta[agt]cct 5435
aggg[acg]aaa|ttt[cgt]ccct 1537
agggt[cgt]aa|tt[acg]accct 1431
agggta[cgt]a|t[acg]taccct 1608
agggtaa[cgt]|[acg]ttaccct 2178

50833411
50000000
66800214

Revised BSD license

  Home   Conclusions   License   Play