/mobile Handheld Friendly website
x64 Ubuntu : Intel® Q6600® one core |
Each table row shows performance measurements for this Erlang HiPE program with a particular command-line input value N.
| N | CPU secs | Elapsed secs | Memory KB | Code B | ≈ CPU Load |
|---|---|---|---|---|---|
| 50,000 | 0.19 | 0.20 | ? | 1198 | 0% 0% 0% 100% |
| 500,000 | 0.77 | 0.78 | 36,208 | 1198 | 0% 3% 0% 100% |
| 5,000,000 | 6.99 | 7.00 | 237,632 | 1198 | 0% 0% 0% 100% |
Read the ↓ make, command line, and program output logs to see how this program was run.
Read regex-dna benchmark to see what this program should do.
Erlang R16B (erts-5.10.1) [source] [64-bit] [async-threads:10] [hipe] [kernel-poll:false]
The program rewrites the regular expressions as string variants and then uses sub-string search and replace (ala Boyer-More and Aho-Corassick).
% The Computer Language Benchmarks Game % http://benchmarksgame.alioth.debian.org/ % Contributed by: Hynek Vychodil 2010 % Inspired by regex-dna Erlang HiPE #5 program % by Sergei Matusevich 2007 and Thanassis Avgerinos 2009 % Main changes: % 1/ Very fast Port line input instead stdio (~5x) % 2/ Faster IUB code alternatives explicit expansion % using binary instead lists (~5x) % 3/ Precompile regexps in data loading phase % 4/ Simpler dispatch and result join code % 5/ Use binary:matches for pattern match -module(regexdna). -compile([native, {hipe, [o3]}]). -export([main/0, main/1]). main(_) -> main(). main() -> do(), halt(). do() -> S = self(), Worker = spawn_link(fun () -> work(S) end), Worker ! {data, read()}, receive finish -> ok end. work(Master) -> S = self(), Patterns = [{Pat, compile_pattern(Pat)} || Pat <- patterns()], {RawSize, [B3, B2, B1 | _]} = receive {data, Data} -> Data end, [L1, L2, L3] = L = [size(X) || X <- [B1, B2, B3]], Size = lists:sum(L), PIDS = [{spawn_link(matcher(S, B2, B3, MR)), printer(Pat)} || {Pat, MR} <- Patterns], ExpandedSize = L1 + L3 + size(expand(B2, L2, 0, <<>>)), results(PIDS), io:format("~n~b~n~b~n~b~n", [RawSize, Size, ExpandedSize]), Master ! finish. expand(B, S, I, R) when I < S -> case B of <<_:I/binary, $B, _/binary>> -> expand(B, S, I + 1, <<R/binary, "(c|g|t)">>); <<_:I/binary, $D, _/binary>> -> expand(B, S, I + 1, <<R/binary, "(a|g|t)">>); <<_:I/binary, $H, _/binary>> -> expand(B, S, I + 1, <<R/binary, "(a|c|t)">>); <<_:I/binary, $K, _/binary>> -> expand(B, S, I + 1, <<R/binary, "(g|t)">>); <<_:I/binary, $M, _/binary>> -> expand(B, S, I + 1, <<R/binary, "(a|c)">>); <<_:I/binary, $N, _/binary>> -> expand(B, S, I + 1, <<R/binary, "(a|c|g|t)">>); <<_:I/binary, $R, _/binary>> -> expand(B, S, I + 1, <<R/binary, "(a|g)">>); <<_:I/binary, $S, _/binary>> -> expand(B, S, I + 1, <<R/binary, "(c|g)">>); <<_:I/binary, $V, _/binary>> -> expand(B, S, I + 1, <<R/binary, "(a|c|g)">>); <<_:I/binary, $W, _/binary>> -> expand(B, S, I + 1, <<R/binary, "(a|t)">>); <<_:I/binary, $Y, _/binary>> -> expand(B, S, I + 1, <<R/binary, "(c|t)">>); <<_:I/binary, X, _/binary>> -> expand(B, S, I + 1, <<R/binary, X>>) end; expand(_, _, _, R) -> R. matcher(S, B2, B3, MR) -> fun () -> S ! {self(), countMatches(B2, MR) + countMatches(B3, MR)} end. printer(Pat) -> fun (Num) -> io:format("~s ~b~n", [Pat, Num]) end. countMatches(Data, RE) -> length(binary:matches(Data,RE)). results([{PID, Fin} | R]) -> receive {PID, Ret} -> Fin(Ret), results(R) end; results([]) -> ok. patterns() -> ["agggtaaa|tttaccct", "[cgt]gggtaaa|tttaccc[acg]", "a[act]ggtaaa|tttacc[agt]t", "ag[act]gtaaa|tttac[agt]ct", "agg[act]taaa|ttta[agt]cct", "aggg[acg]aaa|ttt[cgt]ccct", "agggt[cgt]aa|tt[acg]accct", "agggta[cgt]a|t[acg]taccct", "agggtaa[cgt]|[acg]ttaccct"]. read() -> Port = open_port({fd, 0, 1}, [in, binary, {line, 256}]), read(Port, 0, [], []). read(Port, Size, Seg, R) -> receive {Port, {data, {eol, <<$>:8, _/binary>> = Line}}} -> read(Port, Size + size(Line) + 1, [], [iolist_to_binary(lists:reverse(Seg, [])) | R]); {Port, {data, {eol, Line}}} -> read(Port, Size + size(Line) + 1, [Line | Seg], R); {'EXIT', Port, normal} -> {Size, [list_to_binary(lists:reverse(Seg, [])) | R]}; Other -> io:format(">>>>>>> Wrong! ~p~n", [Other]), exit(bad_data) end. compile_pattern(RE) -> binary:compile_pattern( lists:append([multbin(X) || X<-string:tokens(RE, "|")]) ). multbin(X) -> [list_to_binary(Y) || Y <- mult(X, [])]. mult([$[|R], P) -> {C, Suffix} = var(R, []), [ lists:reverse(P, [X|Y]) || X<-C, Y<-mult(Suffix, [])]; mult([X|R], P) -> mult(R, [X|P]); mult([], P) -> [lists:reverse(P)]. var([$]|R], C) -> {C, R}; var([X|R], C) -> var(R, [X|C]).
Tue, 05 Mar 2013 04:32:16 GMT
MAKE:
mv regexdna.hipe-7.hipe regexdna.erl
/usr/local/src/otp_src_R16B_nosmp/bin/erlc +native +"{hipe, [o3]}" regexdna.erl
2.74s to complete and log all make actions
COMMAND LINE:
/usr/local/src/otp_src_R16B_nosmp/bin/erl -smp disable -noshell -run -noinput -run regexdna main 0 < regexdna-input5000000.txt
PROGRAM OUTPUT:
agggtaaa|tttaccct 356
[cgt]gggtaaa|tttaccc[acg] 1250
a[act]ggtaaa|tttacc[agt]t 4252
ag[act]gtaaa|tttac[agt]ct 2894
agg[act]taaa|ttta[agt]cct 5435
aggg[acg]aaa|ttt[cgt]ccct 1537
agggt[cgt]aa|tt[acg]accct 1431
agggta[cgt]a|t[acg]taccct 1608
agggtaa[cgt]|[acg]ttaccct 2178
50833411
50000000
66800214