performance measurements

Each table row shows performance measurements for this Java program with a particular command-line input value N.

 N  CPU secs Elapsed secs Memory KB Code B ≈ CPU Load
250,0001.241.2640,1841602  2% 3% 1% 99%
2,500,0006.406.56137,4361602  0% 2% 0% 100%
25,000,00053.8653.89914,8961602  1% 1% 0% 100%

Read the ↓ make, command line, and program output logs to see how this program was run.

Read k-nucleotide benchmark to see what this program should do.

 notes

java version "1.8.0"
Java(TM) SE Runtime Environment (build 1.8.0-b132)
Java HotSpot(TM) Server VM (build 25.0-b70, mixed mode)

 k-nucleotide Java #2 program source code

/* The Computer Language Benchmarks Game
 http://benchmarksgame.alioth.debian.org/
 
 contributed by James McIlree
 ByteString code thanks to Matthieu Bentot and The Anh Tran
 */

import java.util.*;
import java.io.*;
import java.util.concurrent.*;

public class knucleotide {
    static ArrayList<Callable< Map<ByteString, ByteString> > > createFragmentTasks(final byte[] sequence, int[] fragmentLengths) {
	ArrayList<Callable<Map<ByteString, ByteString>>> tasks = new ArrayList<Callable<Map<ByteString, ByteString>>>();
	for (int fragmentLength : fragmentLengths) {
	    for (int index=0; index<fragmentLength; index++) {
		final int offset = index;
		final int finalFragmentLength = fragmentLength;
		tasks.add(new Callable<Map<ByteString, ByteString>>() {
		    public Map<ByteString, ByteString> call() {
			return createFragmentMap(sequence, offset, finalFragmentLength);
		    }
		});
	    }
	}
	return tasks;
    }
    	
    static Map<ByteString, ByteString> createFragmentMap(byte[] sequence, int offset, int fragmentLength) {
	HashMap<ByteString, ByteString> map = new HashMap<ByteString, ByteString>();
	int lastIndex = sequence.length - fragmentLength + 1;
	ByteString key = new ByteString(fragmentLength);	
	for (int index=offset; index<lastIndex; index+=fragmentLength) {
	    key.calculateHash(sequence, index);
	    ByteString fragment = map.get(key);
	    if (fragment != null) {
		fragment.count++;
	    } else {
		map.put(key, key);
		key = new ByteString(fragmentLength);
	    }
	}

	return map;
    }
        
    // Destructive!
    static Map<ByteString, ByteString> sumTwoMaps(Map<ByteString, ByteString> map1, Map<ByteString, ByteString> map2) {
	for (Map.Entry<ByteString, ByteString> entry : map2.entrySet()) {
	    ByteString sum = map1.get(entry.getKey());
	    if (sum != null)
		sum.count += entry.getValue().count;
	    else
		map1.put(entry.getKey(), entry.getValue());
	}
	return map1;
    }
    
    static String writeFrequencies(float totalCount, Map<ByteString, ByteString> frequencies) {
	SortedSet<ByteString> list = new TreeSet<ByteString>(frequencies.values());
	StringBuilder sb = new StringBuilder();
	for (ByteString k : list)
	    sb.append(String.format("%s %.3f\n", k.toString().toUpperCase(), (float)(k.count) * 100.0f / totalCount));
	
	return sb.append('\n').toString();
    }
    
    static String writeCount(List<Future<Map<ByteString, ByteString>>> futures, String nucleotideFragment) throws Exception {
	ByteString key = new ByteString(nucleotideFragment.length());
	key.calculateHash(nucleotideFragment.getBytes(), 0);
	
	int count = 0;
	for (Future<Map<ByteString, ByteString>> future : futures) {
	    ByteString temp = future.get().get(key);
	    if (temp != null) count += temp.count;
	}
	
	return count + "\t" + nucleotideFragment.toUpperCase() + '\n';
    }
    
    public static void main (String[] args) throws Exception {
	String line;
	BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
	while ((line = in.readLine()) != null) {
	    if (line.startsWith(">THREE")) break;
	}
	    
	ByteArrayOutputStream baos = new ByteArrayOutputStream();
        byte bytes[] = new byte[100];
        while((line = in.readLine()) != null) {
	    if (line.length() > bytes.length)
		bytes = new byte[line.length()];
	    
	    int i;
	    for(i=0; i<line.length(); i++)
		bytes[i] = (byte)line.charAt(i);
	    baos.write(bytes, 0, i);
        }
	
	byte[] sequence = baos.toByteArray();
		
	ExecutorService pool = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
	int[] fragmentLengths = { 1, 2, 3, 4, 6, 12, 18 };
	List<Future<Map<ByteString, ByteString>>> futures = pool.invokeAll(createFragmentTasks(sequence, fragmentLengths));
	pool.shutdown();
	
	StringBuilder sb = new StringBuilder();

	sb.append(writeFrequencies(sequence.length, futures.get(0).get()));
	sb.append(writeFrequencies(sequence.length - 1, sumTwoMaps(futures.get(1).get(), futures.get(2).get())));
	
	String[] nucleotideFragments = { "ggt", "ggta", "ggtatt", "ggtattttaatt", "ggtattttaatttatagt" };
	for (String nucleotideFragment : nucleotideFragments) {
	    sb.append(writeCount(futures, nucleotideFragment));
	}
	
	System.out.print(sb.toString());	
    }
    
    static final class ByteString implements Comparable<ByteString> {
        public int hash, count=1;
        public final byte bytes[];
	
        public ByteString(int size) {
            bytes = new byte[size];
        }
	
        public void calculateHash(byte k[], int offset) {
	    int temp = 0;
            for (int i=0; i<bytes.length; i++) {
		byte b = k[offset+i];
                bytes[i] = b;
                temp = temp * 31 + b;
            }
	    hash = temp;
        }
	
        public int hashCode() {
            return hash;
        }
	
        public boolean equals(Object obj) {
	    return Arrays.equals(bytes, ((ByteString)obj).bytes);
        }
	
        public int compareTo(ByteString other) {
            return other.count - count;
        }
	
	public String toString() {
	    return new String(bytes);
	}
    }
}

 make, command-line, and program output logs

Wed, 19 Mar 2014 00:29:47 GMT

MAKE:
mv knucleotide.java-2.java knucleotide.java
/usr/local/src/jdk1.8.0/bin/javac knucleotide.java
0.85s to complete and log all make actions

COMMAND LINE:
/usr/local/src/jdk1.8.0/bin/java -Xmx2048m -server -XX:+TieredCompilation -XX:+AggressiveOpts knucleotide 0 < knucleotide-input25000000.txt

PROGRAM OUTPUT:
A 30.295
T 30.151
C 19.800
G 19.754

AA 9.177
TA 9.132
AT 9.131
TT 9.091
CA 6.002
AC 6.001
AG 5.987
GA 5.984
CT 5.971
TC 5.971
GT 5.957
TG 5.956
CC 3.917
GC 3.911
CG 3.909
GG 3.902

1471758	GGT
446535	GGTA
47336	GGTATT
893	GGTATTTTAATT
893	GGTATTTTAATTTATAGT

Revised BSD license

  Home   Conclusions   License   Play