performance measurements

Each table row shows performance measurements for this Erlang HiPE program with a particular command-line input value N.

 N  CPU secs Elapsed secs Memory KB Code B ≈ CPU Load
50,0001.311.338,6441058  1% 0% 1% 100%
500,00011.4711.5024,9801058  0% 0% 0% 100%
5,000,000114.18114.22145,4761058  0% 0% 0% 100%

Read the ↓ make, command line, and program output logs to see how this program was run.

Read regex-dna benchmark to see what this program should do.

 notes

Erlang R16B (erts-5.10.1) [source] [async-threads:10] [hipe] [kernel-poll:false]

 regex-dna Erlang HiPE #6 program source code

% The Computer Language Benchmarks Game
% http://benchmarksgame.alioth.debian.org/
% Contributed by: Hynek Vychodil 2009
% Inspired by regex-dna Erlang HiPE #5 program
%    by Sergei Matusevich 2007 and Thanassis Avgerinos 2009

% Main changes:
%   1/ Very fast Port line input instead stdio (~5x)
%   2/ Faster IUB code alternatives explicit expansion
%      using binary instead lists (~5x)
%   3/ Precompile regexps in data loading phase
%   4/ Simpler dispatch and result join code

% Note: re module is damn slow. Boyer-Moore like binary matcher
% written in Erlang should be magnitude faster (HiPE of course).

-module(regexdna).

-compile([native, {hipe, [o3]}]).

-export([main/1]).

main(_) -> main().

main() -> do(), halt().

do() ->
    S = self(),
    Worker = spawn_link(fun () -> work(S) end),
    Worker ! {data, read()},
    receive finish -> ok end.

work(Master) ->
    S = self(),
    Patterns = [{Pat, re:compile(Pat, [caseless])}
		|| Pat <- patterns()],
    {RawSize, [B3, B2, B1 | _]} = receive
				    {data, Data} -> Data
				  end,
    [L1, L2, L3] = L = [size(X) || X <- [B1, B2, B3]],
    Size = lists:sum(L),
    PIDS = [{spawn_link(matcher(S, B2, B3, MR)),
	     printer(Pat)}
	    || {Pat, {ok, MR}} <- Patterns],
    ExpandedSize = L1 + L3 + size(expand(B2, L2, 0, <<>>)),
    results(PIDS),
    io:format("~n~b~n~b~n~b~n",
	      [RawSize, Size, ExpandedSize]),
    Master ! finish.

expand(B, S, I, R) when I < S ->
    case B of
      <<_:I/binary, $B, _/binary>> ->
	  expand(B, S, I + 1, <<R/binary, "(c|g|t)">>);
      <<_:I/binary, $D, _/binary>> ->
	  expand(B, S, I + 1, <<R/binary, "(a|g|t)">>);
      <<_:I/binary, $H, _/binary>> ->
	  expand(B, S, I + 1, <<R/binary, "(a|c|t)">>);
      <<_:I/binary, $K, _/binary>> ->
	  expand(B, S, I + 1, <<R/binary, "(g|t)">>);
      <<_:I/binary, $M, _/binary>> ->
	  expand(B, S, I + 1, <<R/binary, "(a|c)">>);
      <<_:I/binary, $N, _/binary>> ->
	  expand(B, S, I + 1, <<R/binary, "(a|c|g|t)">>);
      <<_:I/binary, $R, _/binary>> ->
	  expand(B, S, I + 1, <<R/binary, "(a|g)">>);
      <<_:I/binary, $S, _/binary>> ->
	  expand(B, S, I + 1, <<R/binary, "(c|g)">>);
      <<_:I/binary, $V, _/binary>> ->
	  expand(B, S, I + 1, <<R/binary, "(a|c|g)">>);
      <<_:I/binary, $W, _/binary>> ->
	  expand(B, S, I + 1, <<R/binary, "(a|t)">>);
      <<_:I/binary, $Y, _/binary>> ->
	  expand(B, S, I + 1, <<R/binary, "(c|t)">>);
      <<_:I/binary, X, _/binary>> ->
	  expand(B, S, I + 1, <<R/binary, X>>)
    end;
expand(_, _, _, R) -> R.

matcher(S, B2, B3, MR) ->
    fun () ->
	    S !
	      {self(), countMatches(B2, MR) + countMatches(B3, MR)}
    end.

printer(Pat) ->
    fun (Num) -> io:format("~s ~b~n", [Pat, Num]) end.

countMatches(Data, RE) ->
    case re:run(Data, RE, [global]) of
      {match, M} -> length(M);
      nomatch -> 0
    end.

results([{PID, Fin} | R]) ->
    receive {PID, Ret} -> Fin(Ret), results(R) end;
results([]) -> ok.

patterns() ->
    ["agggtaaa|tttaccct", "[cgt]gggtaaa|tttaccc[acg]",
     "a[act]ggtaaa|tttacc[agt]t",
     "ag[act]gtaaa|tttac[agt]ct",
     "agg[act]taaa|ttta[agt]cct",
     "aggg[acg]aaa|ttt[cgt]ccct",
     "agggt[cgt]aa|tt[acg]accct",
     "agggta[cgt]a|t[acg]taccct",
     "agggtaa[cgt]|[acg]ttaccct"].

read() ->
    Port = open_port({fd, 0, 1}, [in, binary, {line, 256}]),
    read(Port, 0, [], []).

read(Port, Size, Seg, R) ->
    receive
      {Port, {data, {eol, <<$>:8, _/binary>> = Line}}} ->
	  read(Port, Size + size(Line) + 1, [],
	       [iolist_to_binary(lists:reverse(Seg, [])) | R]);
      {Port, {data, {eol, Line}}} ->
	  read(Port, Size + size(Line) + 1, [Line | Seg], R);
      {'EXIT', Port, normal} ->
	  {Size, [iolist_to_binary(lists:reverse(Seg, [])) | R]};
      Other ->
	  io:format(">>>>>>> Wrong! ~p~n", [Other]),
	  exit(bad_data)
    end.

 make, command-line, and program output logs

Mon, 04 Mar 2013 22:32:56 GMT

MAKE:
mv regexdna.hipe-6.hipe regexdna.erl
/usr/local/src/otp_src_R16B_nosmp/bin/erlc +native +"{hipe, [o3]}"  regexdna.erl
3.27s to complete and log all make actions

COMMAND LINE:
/usr/local/src/otp_src_R16B_nosmp/bin/erl -smp disable -noshell -run -noinput -run regexdna main 0 < regexdna-input5000000.txt

PROGRAM OUTPUT:
agggtaaa|tttaccct 356
[cgt]gggtaaa|tttaccc[acg] 1250
a[act]ggtaaa|tttacc[agt]t 4252
ag[act]gtaaa|tttac[agt]ct 2894
agg[act]taaa|ttta[agt]cct 5435
aggg[acg]aaa|ttt[cgt]ccct 1537
agggt[cgt]aa|tt[acg]accct 1431
agggta[cgt]a|t[acg]taccct 1608
agggtaa[cgt]|[acg]ttaccct 2178

50833411
50000000
66800214

Revised BSD license

  Home   Conclusions   License   Play