performance measurements

Each table row shows performance measurements for this OCaml program with a particular command-line input value N.

 N  CPU secs Elapsed secs Memory KB Code B ≈ CPU Load
250,0000.620.3021,0401789  65% 37% 80% 43%
2,500,0006.002.40228,0721789  36% 37% 88% 93%
25,000,000Failed  1789   

Read the ↓ make, command line, and program output logs to see how this program was run.

Read k-nucleotide benchmark to see what this program should do.

 notes

The OCaml native-code compiler, version 4.01.0

 k-nucleotide OCaml #3 program source code

(* The Computer Language Benchmarks Game
 * http://benchmarksgame.alioth.debian.org/
 *
 * contributed by Troestler Christophe
 * modified by Mauricio Fernandez
 * optimized by Fabrice Le Fessant
 *)

let tab = Array.create 256 0
let _ =
  tab.(Char.code 'A') <- 0;
  tab.(Char.code 'a') <- 0;
  tab.(Char.code 'T') <- 1;
  tab.(Char.code 't') <- 1;
  tab.(Char.code 'C') <- 2;
  tab.(Char.code 'c') <- 2;
  tab.(Char.code 'g') <- 3;
  tab.(Char.code 'G') <- 3

let uppercase line =
  let len = String.length line in
    for i = 0 to len- 1 do
      let c =  line.[i] in
	line.[i] <- Char.unsafe_chr tab.(Char.code c)
    done

    (* Extract DNA sequence "THREE" from stdin *)
    let dna =
      let is_not_three s = String.length s < 6 || String.sub s 0 6 <> ">THREE" in
	while is_not_three(input_line stdin) do () done;
	let buf = Buffer.create 130_000_000 in
	  (* Skip possible comment *)
	  (try
	     while true do
	       let line = input_line stdin in
		 if line.[0] <> ';' then begin
		   uppercase line;
		   Buffer.add_string buf line;
		   raise Exit
		 end
	     done with _ -> ());
	  (* Read the DNA sequence *)
	  (try while true do
	     let line = input_line stdin in
	       if line.[0] = '>' then raise End_of_file;
	       uppercase line;
	       Buffer.add_string buf line
	   done with End_of_file -> ());
	  Buffer.contents buf


    module K15 = struct
      type t = int
      let equal k1 k2 = k1 = k2
      let hash n = n
    end

    module K16 = struct
      type t = int * int
      let equal (a1,a2) (b1,b2) = a1 = b1 && a2 = b2
      let hash (a1, _) = a1
    end

    type entry = {
      mutable count : int;
    }

    let threshold15 =
      match Sys.word_size with
	  32 -> 15
	| 64 -> 31
	| _ -> assert false
    let threshold16 = threshold15 + 1

    let c = 0x40000-1
    module H15 = Hashtbl.Make(K15)
    module H16 = Hashtbl.Make(K16)
    let h15 = H15.create c
    let h16 = H16.create c

    let rec pack_word n k h =
      let b = Char.code dna.[n] in
      let h = h * 4 + b in
	if k > 1 then
	  pack_word (n+1) (k-1) h
	else h

    let pack15 k n =
      pack_word n k 0

    let pack16 k n =
      let h1 = pack_word n threshold15 0 in
      let h2 = pack_word (n+ threshold15) (k- threshold15) 0 in
	(h1, h2)

    let rec pack_word_in dna n k h =
      let b = dna.[n] in
      let b = tab.(Char.code b) in
      let h = h * 4 + b in
	if k > 1 then
	  pack_word_in dna (n+1) (k-1) h
	else h

    let pack_key15 seq =
      let k = String.length seq in
	pack_word_in seq 0 k 0

    let pack_key16 seq =
      let k = String.length seq in
      let h1 = pack_word_in seq 0 threshold15 0 in
      let h2 = pack_word_in seq threshold15 (k- threshold15) 0 in
	(h1, h2)

    let char = [| 'A'; 'T'; 'C'; 'G' |]

    let rec unpack h s pos k =
      let pos = pos - 1 in
	s.[pos] <- char.(h land 3);
	if k > 1 then
	  unpack (h lsr 2) s pos (k-1)

    let unpack15 k h1 =
      let s = String.create k in
	unpack h1 s k k;
	s

    let unpack16 k (h1, h2) =
      let s = String.create k in
	unpack h1 s threshold15 threshold15;
	unpack h2 s k (k- threshold15);
	s

    let count15 k =
      for i = 0 to String.length dna - k - 1 do
	let packed = pack15 k i in
	  try
	    let key = H15.find h15 packed in
	      key.count <- key.count + 1
	  with Not_found ->
	    H15.add h15 packed { count = 1 }
      done

    let count16 k =
      for i = 0 to String.length dna - k - 1 do
	let packed = pack16 k i in
	  try
	    let key = H16.find h16 packed in
	      key.count <- key.count + 1
	  with Not_found ->
	    H16.add h16 packed { count = 1 }
      done

    let count k =
      if k < threshold16 then count15 k else count16 k

    let compare_freq ((k1:string),(f1:float)) (k2, f2) =
      if f1 > f2 then -1 else if f1 < f2 then 1 else String.compare k1 k2

    let write_frequencies15 k =
      count15 k;
      let tot = float(H15.fold (fun _ n t -> n.count + t) h15 0) in
      let frq =
	H15.fold (fun h n l ->
		  (unpack15 k h, 100. *. float n.count /. tot) :: l) h15 [] in
      let frq = List.sort compare_freq frq in
	String.concat ""
	  (List.map (fun (k,f) -> Printf.sprintf "%s %.3f\n" k f) frq)

    let write_frequencies16 k =
      count16 k;
      let tot = float(H16.fold (fun _ n t -> n.count + t) h16 0) in
      let frq =
	H16.fold (fun h n l ->
		  (unpack16 k h, 100. *. float n.count /. tot) :: l) h16 [] in
      let frq = List.sort compare_freq frq in
	String.concat ""
	  (List.map (fun (k,f) -> Printf.sprintf "%s %.3f\n" k f) frq)

    let write_count15 k seq =
	count15 k;
	Printf.sprintf "%d\t%s" (try (H15.find h15 (pack_key15 seq)).count with Not_found -> 0) seq

    let write_count16 k seq =
	count16 k;
	Printf.sprintf "%d\t%s" (try (H16.find h16 (pack_key16 seq)).count with Not_found -> 0) seq

    let write_frequencies k =
	if k < threshold16 then write_frequencies15 k
	else write_frequencies16 k

    let write_count seq =
      let k = String.length seq in
	if k < threshold16 then write_count15 k seq
	else write_count16 k seq

    type t = Size of int | Dna of string

let invoke (f : t -> string) x : unit -> string =
  let input, output = Unix.pipe() in
  match Unix.fork() with
  | -1 -> Unix.close input; Unix.close output; (let v = f x in fun () -> v)
  | 0 ->
      Unix.close input;
      let output = Unix.out_channel_of_descr output in
        Marshal.to_channel output (f x) [];
        close_out output;
        exit 0
  | pid ->
      Unix.close output;
      let input = Unix.in_channel_of_descr input in fun () ->
        let v = Marshal.from_channel input in
        ignore (Unix.waitpid [] pid);
        close_in input;
	v

let parallelize f l =
  let list = List.map (invoke f) (List.rev l) in
  List.iter (fun g -> print_endline (g ())) (List.rev list)

let () =
  parallelize
    (fun i ->
       match i with
	   Size i ->
             write_frequencies i
	 | Dna k ->
             write_count k
    ) [Size 1;
       Size 2;
       Dna "GGT";
       Dna "GGTA";
       Dna "GGTATT";
       Dna "GGTATTTTAATT";
       Dna "GGTATTTTAATTTATAGT"]

 make, command-line, and program output logs

Thu, 12 Sep 2013 18:56:44 GMT

MAKE:
mv knucleotide.ocaml-3.ocaml knucleotide.ocaml-3.ml
/usr/local/bin/ocamlopt -noassert -unsafe -nodynlink -inline 100 unix.cmxa knucleotide.ocaml-3.ml -o knucleotide.ocaml-3.ocaml_run
File "knucleotide.ocaml-3.ml", line 1:
Warning 24: bad source file name: "Knucleotide.ocaml-3" is not a valid module name.
rm knucleotide.ocaml-3.ml
0.41s to complete and log all make actions

COMMAND LINE:
./knucleotide.ocaml-3.ocaml_run 0 < knucleotide-input25000000.txt

PROGRAM FAILED 


PROGRAM OUTPUT:

Fatal error: exception Failure("Buffer.add: cannot grow buffer")

Revised BSD license

  Home   Conclusions   License   Play