performance measurements

Each table row shows performance measurements for this F# Mono program with a particular command-line input value N.

 N  CPU secs Elapsed secs Memory KB Code B ≈ CPU Load
50,0003.753.7546,320870  1% 3% 1% 100%
500,00010.0710.08226,196870  0% 0% 1% 100%
5,000,00071.0671.092,445,508870  0% 1% 1% 100%

Read the ↓ make, command line, and program output logs to see how this program was run.

Read regex-dna benchmark to see what this program should do.

 notes

F# Compiler for F# 3.1 (Open Source Edition)

Mono JIT compiler version 3.8.1 (master/0322d96 Wed Aug 13 11:49:57 PDT 2014)
LLVM: yes(3.4svn-mono-mono/e656cac)
GC: sgen

 regex-dna F# Mono #3 program source code

// The Computer Language Benchmarks Game
// http://benchmarksgame.alioth.debian.org/
//
// Modified version of Valentin Kraevskiy
// Contributed by Vassil Keremidchiev

open System.Text.RegularExpressions
open System.Threading

let regex s = Regex (s, RegexOptions.Compiled)
let input = stdin.ReadToEnd ()
let withoutComments = (regex ">.*\n").Replace (input, "")
let text = (regex "\n").Replace (withoutComments, "")

let textSize = text.Length
let blockSize = textSize / 2

let onblocks overlapSize blockSize =
    let rec onblocks' res = function
        | "" -> res
        | s when s.Length <= blockSize -> res @ [s]
        | s -> onblocks' (res @ [s.Substring(0, blockSize)]) (s.Substring(blockSize - overlapSize)) 
    onblocks' []

let onProcBlocks = onblocks 0 ((textSize / System.Environment.ProcessorCount) + 1) 

let DNAcodes = [ "agggtaaa|tttaccct"
                 "[cgt]gggtaaa|tttaccc[acg]"
                 "a[act]ggtaaa|tttacc[agt]t"
                 "ag[act]gtaaa|tttac[agt]ct"
                 "agg[act]taaa|ttta[agt]cct"
                 "aggg[acg]aaa|ttt[cgt]ccct"
                 "agggt[cgt]aa|tt[acg]accct"
                 "agggta[cgt]a|t[acg]taccct"
                 "agggtaa[cgt]|[acg]ttaccct" ]

// Calculate all chunks in parallel
let chunksCounts = 
    let chunkedMatch (matchStr:string) = 
        text |> onblocks (matchStr.Length - 1) blockSize 
             |> List.map (fun t -> async { return matchStr, ((regex matchStr).Matches t).Count })

    DNAcodes |> List.collect chunkedMatch |> Async.Parallel |> Async.RunSynchronously 

// Gather result counts by summing them per DNA code
DNAcodes |> List.map (fun key -> key, chunksCounts |> Array.fold (fun S (k,cnt) -> if k=key then S+cnt else S) 0)
         |> List.iter (fun (key, cnt) -> printfn "%s %i" key cnt)
  
let lengthAfterReplace text =
    ["B", "(c|g|t)"
     "D", "(a|g|t)"
     "H", "(a|c|t)"
     "K", "(g|t)"
     "M", "(a|c)"
     "N", "(a|c|g|t)"
     "R", "(a|g)"
     "S", "(c|g)"
     "V", "(a|c|g)"
     "W", "(a|t)"
     "Y", "(c|t)"]
     |> List.fold (fun s (code, alt) -> (regex code).Replace (s, alt)) text 
     |> String.length

let replacedSize = 
    text |> onProcBlocks 
    |> List.map (fun chunk -> async { return lengthAfterReplace chunk } ) 
    |> Async.Parallel |> Async.RunSynchronously
    |> Array.sum

printf "\n%i\n%i\n%i\n" input.Length textSize replacedSize

 make, command-line, and program output logs

Thu, 14 Aug 2014 14:14:28 GMT

MAKE:
mv regexdna.fsharp-3.fsharp regexdna.fsharp-3.fs
/usr/local/bin/fsharpc --target:exe --platform:x86 -O  -o regexdna.fsharp-3.fsharp_run.exe regexdna.fsharp-3.fs
F# Compiler for F# 3.1 (Open Source Edition)
Freely distributed under the Apache 2.0 Open Source License

/home/dunham/benchmarksgame_onecore/regexdna/tmp/regexdna.fsharp-3.fs(7,1): warning FS0221: The declarations in this file will be placed in an implicit module 'Regexdna.fsharp-3' based on the file name 'regexdna.fsharp-3.fs'. However this is not a valid F# identifier, so the contents will not be accessible from other files. Consider renaming the file or adding a 'module' or 'namespace' declaration at the top of the file.
rm regexdna.fsharp-3.fs
4.11s to complete and log all make actions

COMMAND LINE:
/usr/local/bin/mono --llvm --gc=sgen regexdna.fsharp-3.fsharp_run.exe 0 < regexdna-input5000000.txt

PROGRAM OUTPUT:
agggtaaa|tttaccct 356
[cgt]gggtaaa|tttaccc[acg] 1250
a[act]ggtaaa|tttacc[agt]t 4252
ag[act]gtaaa|tttac[agt]ct 2894
agg[act]taaa|ttta[agt]cct 5435
aggg[acg]aaa|ttt[cgt]ccct 1537
agggt[cgt]aa|tt[acg]accct 1431
agggta[cgt]a|t[acg]taccct 1608
agggtaa[cgt]|[acg]ttaccct 2178

50833411
50000000
66800214

Revised BSD license

  Home   Conclusions   License   Play