The Computer Language
Benchmarks Game

k-nucleotide Ruby #4 program

source code

# The Computer Language Benchmarks Game
# http://benchmarksgame.alioth.debian.org
#
# contributed by Aaron Tavistock

def frequency(seq, keys)
  counts = Hash.new(0)
  keys.each do |key|
    last_index = 0
    while last_index = seq.index(key, last_index+1)
      counts[key] += 1
    end
  end
  counts
end

def percentage(seq, keys)
  frequency(seq, keys).sort { |a,b| b[1] <=> a[1] }.map do |key, value|
    "%s %.3f" % [ key.upcase, ( (value*100).to_f / seq.size) ]
  end
end

def count(seq, keys)
  frequency(seq, keys).map do |key, value|
    "#{value.to_s}\t#{key.upcase}"
  end
end

_, seq = STDIN.read.scan(/(\n>THREE[^\n]*\n)([^>]*)\n/).flatten
seq.force_encoding('ASCII-8BIT')
seq.gsub!(/\s/, '')

singles = %w(a t c g)
doubles = singles.map { |a| singles.map { |b| "#{a}#{b}" }}.flatten

# count ALL the 3- 4- 6- 12- and 18-nucleotide sequences
#chains  = %w(ggt ggta ggtatt ggtattttaatt ggtattttaatttatagt)

print "#{percentage(seq, singles).join("\n")}\n\n"
print "#{percentage(seq, doubles).join("\n")}\n\n"
print "#{count(seq, chains).join("\n")}\n"
    

notes, command-line, and program output

NOTES:
32-bit Ubuntu one core
ruby 2.3.0p0 (2015-12-25 revision 53290) [i686-linux]


Thu, 22 Sep 2016 17:40:51 GMT

COMMAND LINE:
/usr/local/src/ruby/bin/ruby -W0 knucleotide.yarv-4.yarv 0 < knucleotide-input250000.txt

PROGRAM FAILED 


PROGRAM OUTPUT:
A 30.298
T 30.157
C 19.793
G 19.752

AA 9.177
TA 9.137
AT 9.136
TT 9.094
AC 6.000
CA 5.999
GA 5.986
AG 5.985
TC 5.970
CT 5.970
GT 5.957
TG 5.956
CC 3.915
CG 3.910
GC 3.908
GG 3.902


knucleotide.yarv-4.yarv:41:in `<main>': undefined local variable or method `chains' for main:Object (NameError)