The Computer Language
Benchmarks Game

regex-redux OCaml #2 program

source code

(* The Computer Language Benchmarks Game
 * http://benchmarksgame.alioth.debian.org/
 *
 * regex-dna program contributed by Christophe TROESTLER
 * converted from regex-dna program
 *)

open Printf

let variants = ["agggtaaa\\|tttaccct";          "[cgt]gggtaaa\\|tttaccc[acg]";
                "a[act]ggtaaa\\|tttacc[agt]t";  "ag[act]gtaaa\\|tttac[agt]ct";
                "agg[act]taaa\\|ttta[agt]cct";  "aggg[acg]aaa\\|ttt[cgt]ccct";
                "agggt[cgt]aa\\|tt[acg]accct";  "agggta[cgt]a\\|t[acg]taccct";
                "agggtaa[cgt]\\|[acg]ttaccct"]

(* Remove the "\\" which is mandatory in OCaml regex. *)
let re_bs = Str.regexp_string "\\"
let to_string = Str.global_replace re_bs ""

let subst = ["tHa[Nt]", "<4>"; "aND\\|caN\\|Ha[DS]\\|WaS", "<3>";
             "a[NSt]\\|BY", "<2>"; "<[^>]*>", "|"; "|[^|][^|]*|", "-"] 

(* Read all of a redirected FASTA format file from stdin. *)
let file_data, file_length =
  let b = Buffer.create 0xFFFF and s = String.create 0xFFF and r = ref 1 in
  while !r > 0 do
    r := input stdin s 0 0xFFF;   Buffer.add_substring b s 0 !r
  done;
  Buffer.contents b, Buffer.length b

(* Remove FASTA sequence descriptions and all linefeed characters.  *)
let dna = Str.global_replace (Str.regexp "^>.*$\\|\n") "" file_data
let code_length = String.length dna

(* Count matches of [re]. *)
let count re s =
  let re = Str.regexp_case_fold re and i = ref 0 and n = ref 0 in
  try
    while true do i := 1 + Str.search_forward re s !i; incr n done; assert false
  with Not_found -> !n


let () =
  List.iter (fun re -> printf "%s %i\n" (to_string re) (count re dna)) variants;
  let b = ref dna in
  List.iter (fun (re, s) ->
               b := Str.global_replace (Str.regexp re) s !b) subst;
  printf "\n%i\n%i\n%i\n" file_length code_length (String.length !b)
    

notes, command-line, and program output

NOTES:
64-bit Ubuntu quad core
The OCaml native-code compiler, version 4.06.0


Sun, 05 Nov 2017 22:27:15 GMT

MAKE:
mv regexredux.ocaml-2.ocaml regexredux.ocaml-2.ml
/opt/src/ocaml-4.06.0/bin/ocamlopt -noassert -unsafe -fPIC -nodynlink -inline 100 -O3 unix.cmxa str.cmxa regexredux.ocaml-2.ml -o regexredux.ocaml-2.ocaml_run
File "regexredux.ocaml-2.ml", line 25, characters 39-52:
Warning 3: deprecated: String.create
Use Bytes.create instead.
File "regexredux.ocaml-2.ml", line 27, characters 57-58:
Error: This expression has type bytes but an expression was expected of type
         string
/home/dunham/benchmarksgame/nanobench/makefiles/u64q.programs.Makefile:417: recipe for target 'regexredux.ocaml-2.ocaml_run' failed
make: [regexredux.ocaml-2.ocaml_run] Error 2 (ignored)
rm regexredux.ocaml-2.ml

0.04s to complete and log all make actions

COMMAND LINE:
./regexredux.ocaml-2.ocaml_run 0 < regexredux-input50000.txt

MAKE ERROR