/mobile Handheld Friendly website

 performance measurements

Each table row shows performance measurements for this Scala program with a particular command-line input value N.

 N  CPU secs Elapsed secs Memory KB Code B ≈ CPU Load

Read the ↓ make, command line, and program output logs to see how this program was run.

Read regex-dna benchmark to see what this program should do.

 notes

java version "1.8.0"
Java(TM) SE Runtime Environment (build 1.8.0-b132)
Java HotSpot(TM) 64-Bit Server VM (build 25.0-b70, mixed mode)

Scala compiler version 2.10.3 -- Copyright 2002-2013, LAMP/EPFL

 regex-dna Scala #5 program source code

/* The Computer Language Benchmarks Game
   http://benchmarksgame.alioth.debian.org/

   Contributed by Marceli Narcyz
*/

import scala.concurrent._
import scala.concurrent.duration._
import java.io.BufferedReader
import java.util.concurrent.Executors
import java.io.InputStreamReader
import java.nio.charset.Charset
import java.util.regex.Pattern

object regexdna {
  val newSeqChars = "BDHKMNRSVWY".toCharArray();

  val newSeqStrings = Array[String](
    "(c|g|t)", // B
    "(a|g|t)", // D
    "(a|c|t)", // H
    "(g|t)", // K
    "(a|c)", // M
    "(a|c|g|t)", // N
    "(a|g)", // R
    "(c|g)", // S
    "(a|c|g)", // V
    "(a|t)", // W
    "(c|t)" // Y
    );

  val variants = Seq(
    "agggtaaa|tttaccct",
    "[cgt]gggtaaa|tttaccc[acg]",
    "a[act]ggtaaa|tttacc[agt]t",
    "ag[act]gtaaa|tttac[agt]ct",
    "agg[act]taaa|ttta[agt]cct",
    "aggg[acg]aaa|ttt[cgt]ccct",
    "agggt[cgt]aa|tt[acg]accct",
    "agggta[cgt]a|t[acg]taccct",
    "agggtaa[cgt]|[acg]ttaccct")

  private def readInput(): (Int, String) = {
    val sb = new StringBuilder(10000000);
    val r = new BufferedReader(new InputStreamReader(System.in, Charset.defaultCharset()));

    var commentLength: Int = 0;
    try {
      var line: String = r.readLine();
      while (line != null) {
        if (line.charAt(0) == '>') {
          commentLength += line.length() + 1;
        } else {
          sb.append(line);
          commentLength += 1;
        }
        line = r.readLine();
      }
    } finally {
      r.close();
    }

    val result = sb.toString();
    return (result.length + commentLength, result);
  }

  def calcNewSeqLength(sequence: String): Int = {
    val seqChars = sequence.toCharArray();
    var acc: Int = 0;
    var seqI: Int = 0;
    while (seqI < seqChars.length) {
      val c = seqChars(seqI)
      val i = java.util.Arrays.binarySearch(newSeqChars, c);
      acc += (if (i >= 0) newSeqStrings(i).length() else 1);
      seqI += 1;
    }
    return acc;
  }

  def countVariantOccurences(sequence: String, variant: String): Int = {
	var count: Int = 0;
	val m = Pattern.compile(variant).matcher(sequence);
	while (m.find()) {
		count += 1;
	}
	count;
  }
  
  def slowCountVariantOccurences(sequence: String, variant: String): Int = sequence.r.findAllIn(sequence).length

  def main(args: Array[String]): Unit = {
    val es = Executors.newFixedThreadPool(variants.length + 1);
    implicit val ec = ExecutionContext.fromExecutorService(es);

    val (inputLength, sequence) = readInput();

    val newSeqLengthFuture = future { calcNewSeqLength(sequence) }
    val futures = variants.map(v => future { countVariantOccurences(sequence, v) })

    val duration = Duration(999, HOURS)
    val newSeqLength = Await.result(newSeqLengthFuture, duration);
    val counts = futures.map(f => Await.result(f, duration))

    es.shutdown();

    for (t <- variants zip counts) {
      System.out.println(t._1 + " " + t._2);
    }
    System.out.println();
    System.out.println(inputLength);
    System.out.println(sequence.length);
    System.out.println(newSeqLength);
  }
}

 make, command-line, and program output logs

Wed, 04 Dec 2013 18:48:40 GMT

MAKE:
mv regexdna.scala-5.scala regexdna.scala
/usr/local/src/scala-2.10.2/bin/scalac -optimise -target:jvm-1.7 regexdna.scala
7.56s to complete and log all make actions

COMMAND LINE:
env JAVA_OPTS=-Xmx1024m /usr/local/src/jdk1.7.0_45/bin/java -server -XX:+TieredCompilation -XX:+AggressiveOpts  -Xbootclasspath/a:/usr/local/src/scala-2.10.2/lib/scala-library.jar:/usr/local/src/scala-2.10.2/lib/akka-actors.jar:/usr/local/src/scala-2.10.2/lib/typesafe-config.jar regexdna 0 < regexdna-input5000000.txt

PROGRAM OUTPUT:
agggtaaa|tttaccct 356
[cgt]gggtaaa|tttaccc[acg] 1250
a[act]ggtaaa|tttacc[agt]t 4252
ag[act]gtaaa|tttac[agt]ct 2894
agg[act]taaa|ttta[agt]cct 5435
aggg[acg]aaa|ttt[cgt]ccct 1537
agggt[cgt]aa|tt[acg]accct 1431
agggta[cgt]a|t[acg]taccct 1608
agggtaa[cgt]|[acg]ttaccct 2178

50833411
50000000
66800214

Revised BSD license

  Home   Conclusions   License   Play