The Computer Language
Benchmarks Game

k-nucleotide F# .NET Core #3 program

source code

(* The Computer Language Benchmarks Game
   http://benchmarksgame.alioth.debian.org/
 *
 * Contributed by Don Syme
 * Based on C# contribution by Isaac Gouy, Antti Lankila, A.Nahr, The Anh Tran
 * Uses one byte array rather than strings.
 *)

open System
open System.IO
open System.Collections.Generic
open System.Text
open System.Threading

let input = Console.In
let toBytes (s: string) = s.ToCharArray() |> Array.map byte
let toString (s: byte []) = String(s |> Array.map char)
let prefixes = [| "ggt"; "ggta"; "ggtatt"; "ggtattttaatt"; "ggtattttaatttatagt" |]

let prefixBytes = 
    [| for p in prefixes -> toBytes p |]

let prefixLengths = 
    [| for p in prefixBytes -> p.Length |]

let prefixOffsets = Array.scan (+) 0 prefixLengths
let dnaStart = prefixOffsets.[prefixLengths.Length]

let dna = 
    seq { while true do yield input.ReadLine() }
    |> Seq.takeWhile (fun x -> x <> null)
    |> Seq.skipWhile (fun x -> not (x.StartsWith(">THREE")))
    |> Seq.skip 1
    |> String.concat ""
    |> toBytes
    |> Array.append (Array.concat prefixBytes)

let keyHash (k, n) = 
    let mutable h = 0
    for i in 0..n - 1 do
        h <- h * 31 + int dna.[k + i]
    h

let keyText (k, n) = toString(dna.[k..k + n - 1]).ToUpper()

type KeyData = 
    { mutable count: int
      key: int * int }

let generateFrequencies(frameSize) = 
    let freq = new Dictionary<int, KeyData>()
    let mutable v = Unchecked.defaultof<KeyData>
    for i in dnaStart..dna.Length - frameSize do
        let h = keyHash (i, frameSize)
        if freq.TryGetValue(h, &v) then v.count <- v.count + 1
        else freq.[h] <- { count = 1; key = (i, frameSize) }
    freq

let writeCount(n) = 
    let freq = generateFrequencies(prefixLengths.[n])
    let hash = keyHash(prefixOffsets.[n], prefixLengths.[n])
    
    let count = 
        if freq.ContainsKey(hash) then freq.[hash].count
        else 0
    String.Format("{0}\t{1}", count, prefixes.[n].ToUpper())

type Pair = KeyValuePair<int, KeyData>

let writeFrequencies(nucleotideLength) = 
    let freq = generateFrequencies(nucleotideLength)
    let items = new ResizeArray<Pair>(freq)
    items.Sort(fun (kv1: Pair) (kv2: Pair) -> 
        let c = kv2.Value.count - kv1.Value.count
        if c = 0 then keyText(kv1.Value.key).CompareTo(keyText(kv2.Value.key))
        else c)
    let buf = new StringBuilder()
    let sum = dna.Length - dnaStart - nucleotideLength + 1
    for element in items do
        let percent = double element.Value.count * 100.0 / double sum
        buf.AppendFormat("{0} {1:f3}\n", keyText(element.Value.key), percent) |> ignore
    buf.ToString()

Async.Parallel [ yield async { return writeFrequencies(1) }
                 yield async { return writeFrequencies(2) }
                 for i in 0..prefixes.Length - 1 do
                     yield async { return writeCount(i) } ]
|> Async.RunSynchronously
|> Array.iter Console.Out.WriteLine

    

notes, command-line, and program output

NOTES:
64-bit Ubuntu quad core
2.0.0-preview1-005977
"System.GC.Server": true


Fri, 09 Jun 2017 16:49:56 GMT

MAKE:
cp knucleotide.fsharpcore-3.fsharpcore Program.fs
cp Include/fsharpcore/tmp.fsproj .
cp Include/fsharpcore/runtimeconfig.template.json .
mkdir obj
cp Include/fsharpcore/project.assets.json ./obj
cp Include/fsharpcore/tmp.fsproj.nuget.g.props ./obj
cp Include/fsharpcore/tmp.fsproj.nuget.g.targets ./obj
/usr/bin/dotnet build -c Release
Microsoft (R) Build Engine version 15.3.117.23532
Copyright (C) Microsoft Corporation. All rights reserved.

/home/dunham/.nuget/packages/fsharp.net.sdk/1.0.5/build/FSharp.NET.Core.Sdk.targets(170,9): error MSB6006: "dotnet" exited with code 131. [/home/dunham/benchmarksgame_quadcore/knucleotide/tmp/tmp.fsproj]

Build FAILED.

/home/dunham/.nuget/packages/fsharp.net.sdk/1.0.5/build/FSharp.NET.Core.Sdk.targets(170,9): error MSB6006: "dotnet" exited with code 131. [/home/dunham/benchmarksgame_quadcore/knucleotide/tmp/tmp.fsproj]
    0 Warning(s)
    1 Error(s)

Time Elapsed 00:00:02.82
/home/dunham/benchmarksgame/nanobench/makefiles/u64q.programs.Makefile:189: recipe for target 'knucleotide.fsharpcore-3.fsharpcore_run' failed
make: [knucleotide.fsharpcore-3.fsharpcore_run] Error 1 (ignored)

3.31s to complete and log all make actions

COMMAND LINE:
/usr/bin/dotnet ./bin/Release/netcoreapp2.0/tmp.dll 0 < knucleotide-input250000.txt

PROGRAM FAILED 


PROGRAM OUTPUT:

No executable found matching command "dotnet-./bin/Release/netcoreapp2.0/tmp.dll"