performance measurements

Each table row shows performance measurements for this Java program with a particular command-line input value N.

 N  CPU secs Elapsed secs Memory KB Code B ≈ CPU Load
50,0000.680.7027,9641377  3% 0% 1% 100%
500,0002.722.8136,9161377  0% 1% 1% 100%
5,000,00020.9420.95567,2761377  2% 9% 1% 100%

Read the ↓ make, command line, and program output logs to see how this program was run.

Read regex-dna benchmark to see what this program should do.

 notes

java version "1.8.0_25"
Java(TM) SE Runtime Environment (build 1.8.0_25-b17)
Java HotSpot(TM) 64-Bit Server VM (build 25.25-b02, mixed mode)

 regex-dna Java #8 program source code

/* The Computer Language Benchmarks Game
   http://benchmarksgame.alioth.debian.org/

   contributed by lucki
*/

import java.io.FileDescriptor;
import java.io.FileInputStream;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

final class ByteWrapper implements CharSequence {

   public byte[] backing;
   public int length;

   public ByteWrapper( byte[] backing ) {
      this( backing, backing.length );
   }
   
   public ByteWrapper( byte[] backing, int length ) {
      this.backing = backing;
      this.length = length;
   }

   public int length() {
      return length;
   }

   public char charAt(int index) {
      return (char) backing[index];
   }
   
   public CharSequence subSequence(int start, int end) {
      throw new UnsupportedOperationException();
   }
}


public final class regexdna {
   
   private static Pattern comments = Pattern.compile(">.*\n|\n");
   public static String newline = System.getProperty("line.separator");

   private static String[][] codes =
      {{"B", "(c|g|t)"},
      {"D", "(a|g|t)"},
      {"H", "(a|c|t)"},
      {"K", "(g|t)"},
      {"M", "(a|c)"},
      {"N", "(a|c|g|t)"},
      {"R", "(a|g)"},
      {"S", "(c|g)"},
      {"V", "(a|c|g)"},
      {"W", "(a|t)"},
      {"Y", "(c|t)"} };
   
   private static Pattern codesPat = Pattern.compile("[BDHKMNRSVWY]");
   
   private static final int longest;
   private static byte[] repl;
 
   private static String[] strs = {
      "agggtaaa|tttaccct",
      "[cgt]gggtaaa|tttaccc[acg]",
      "a[act]ggtaaa|tttacc[agt]t",
      "ag[act]gtaaa|tttac[agt]ct",
      "agg[act]taaa|ttta[agt]cct",
      "aggg[acg]aaa|ttt[cgt]ccct",
      "agggt[cgt]aa|tt[acg]accct",
      "agggta[cgt]a|t[acg]taccct",
      "agggtaa[cgt]|[acg]ttaccct"
   };

   private static Pattern[] pats = new Pattern[strs.length];
   
   static {
      for( int i = 0; i < pats.length; ++i )
         pats[i] = Pattern.compile( strs[i] );
      
      int l = 0;
      for( int i = 0; i < codes.length; ++i )
         l = Math.max( l, codes[i][1].length() );
      longest = l;
      
      repl = new byte[26 * longest + 1];
      for( int i = 0; i < codes.length; ++i ) {
         int off = longest * (codes[i][0].charAt( 0 ) - 'A');
         String code = codes[i][1];
         for( int j = 0; j < code.length(); ++j )
            repl[off + j] = (byte) code.charAt( j );
      }
   }
   
   private static void rmComments( ByteWrapper t ) {

      byte[] backing = t.backing;
      Matcher m = comments.matcher( t );
      
      if( !m.find() )
         return;
      
      int tail = m.start();
      int restart = m.end();
            
      while( m.find() ) {
         while( restart != m.start() )
            backing[tail++] = backing[restart++];
         restart = m.end();
      }
      
      while( restart < backing.length )
         backing[tail++] = backing[restart++];
      
      t.length = tail;
   }

   private static void countPatterns( ByteWrapper t ) {
	   StringBuilder sb = new StringBuilder(1000);
            
      for( int i = 0; i < pats.length; ++i ) {
         int c = 0;
         Matcher m = pats[i].matcher( t );
         while( m.find() )
            ++c;
         sb.append(strs[i]);
         sb.append(" ");
         sb.append(c);
         sb.append(newline);
//         System.out.println( strs[i] + ' ' + c );
      }
      System.out.print( sb.toString());
   }
   
   private static ByteWrapper replace( ByteWrapper t ) {
      
      byte[] backing = t.backing;
      byte[] buf = new byte[t.length * longest];
      int pos = 0;
      
      Matcher m = codesPat.matcher( t );
      int last = 0;
      
      while( m.find() ) {
         for( ; last < m.start(); ++last )
            buf[pos++] = backing[last];
         for( int i = longest * (backing[last] - 'A'); repl[i] != 0; ++i )
            buf[pos++] = repl[i];
         ++last;
      }
      
      for( ; last < t.length; ++last )
         buf[pos++] = backing[last];
      
      return new ByteWrapper( buf, pos );
   }

   public static void main( String[] args ) throws Exception {
//	   long s = System.currentTimeMillis();
      FileInputStream fis = new FileInputStream( FileDescriptor.in );
      FileChannel cin = fis.getChannel();
      ByteBuffer bb = ByteBuffer.allocate( (int) cin.size() );
      cin.read( bb );
      
      ByteWrapper t = new ByteWrapper( bb.array() );
      rmComments( t );

      countPatterns( t );
      
      ByteWrapper w = replace( t );
      
//      System.out.println();
//      System.out.println( t.backing.length );
//      System.out.println( t.length() );
//      System.out.println( w.length() );
      System.out.println(new StringBuilder().append(newline).append(t.backing.length)
      .append(newline).append(t.length)
      .append(newline).append(w.length).toString());
//      long e = System.currentTimeMillis();
//      System.out.println("t"+(e-s));
   }
}

 make, command-line, and program output logs

Wed, 19 Nov 2014 00:29:14 GMT

MAKE:
mv regexdna.java-8.java regexdna.java
/usr/local/src/jdk1.8.0_25/bin/javac regexdna.java
0.87s to complete and log all make actions

COMMAND LINE:
/usr/local/src/jdk1.8.0_25/bin/java  -server -XX:+TieredCompilation -XX:+AggressiveOpts regexdna 0 < regexdna-input5000000.txt

PROGRAM OUTPUT:
agggtaaa|tttaccct 356
[cgt]gggtaaa|tttaccc[acg] 1250
a[act]ggtaaa|tttacc[agt]t 4252
ag[act]gtaaa|tttac[agt]ct 2894
agg[act]taaa|ttta[agt]cct 5435
aggg[acg]aaa|ttt[cgt]ccct 1537
agggt[cgt]aa|tt[acg]accct 1431
agggta[cgt]a|t[acg]taccct 1608
agggtaa[cgt]|[acg]ttaccct 2178

50833411
50000000
66800214

Revised BSD license

  Home   Conclusions   License   Play