The Computer Language
Benchmarks Game

k-nucleotide Smalltalk VW #5 program

source code

"* The Computer Language Benchmarks Game
    contributed by Andres Valloud *"!

Smalltalk.Core defineClass: #BenchmarksGame
	superclass: #{Core.Object}
	indexedType: #none
	private: false
	instanceVariableNames: ''
	classInstanceVariableNames: ''
	imports: ''
	category: ''!

!Core.SequenceableCollection methodsFor: 'benchmarks game'!

substringFrequencies: aLength using: aDictionary
   | buffer |
   buffer := String new: aLength.
   1 to: self size - aLength + 1 do:
      [:i |
         | answer |
         buffer replaceFrom: 1 to: aLength with: self startingAt: i.
         answer := aDictionary
            at: buffer
            putValueOf: [:sum | sum + 1]
            ifAbsentPutValueOf: 1.
         answer = 1 ifTrue: [buffer := String new: aLength].
   ^aDictionary! !

!Core.Dictionary methodsFor: 'benchmarks game'!

at: key putValueOf: putBlock ifAbsentPutValueOf: absentBlock
   "* Set the value at key to be the value of evaluating putBlock
    with the existing value. If key is not found, create a new
    entry for key and set is value to the evaluation of
    absentBlock. Answer the result of evaluating either block. *"

   | index element anObject |
   key == nil ifTrue:
         subscriptBoundsErrorFor: #at:putValueOf:ifAbsentPutValueOf:
         index: key
         value: absentBlock value].
   index := self findKeyOrNil: key.
   element := self basicAt: index.
   element == nil
      ifTrue: [self atNewIndex: index put:
         (self createKey: key value: (anObject := absentBlock value))]
      ifFalse: [element value: (anObject := putBlock value: element value)].
   ^anObject! !

!Core.BenchmarksGame class methodsFor: 'private'!

readFasta: sequenceName from: input
   | prefix newline buffer description line char |
   prefix := '>',sequenceName.
   newline := Character cr.

   "* find start of particular fasta sequence *"
   [(input atEnd) or: [
         (input peek = $>)
            ifTrue: [((line := input upTo: newline)
               indexOfSubCollection: prefix startingAt: 1) = 1]
            ifFalse: [input skipThrough: newline. false]]
      ] whileFalse.

   "* line-by-line read - it would be a lot faster to block read *"
   description := line.
   buffer := ReadWriteStream on: (String new: 1028).
   [(input atEnd) or: [(char := input peek) = $>]] whileFalse: [
      (char = $;)
         ifTrue: [input upTo: newline]
         ifFalse: [buffer nextPutAll: (input upTo: newline)]
   ^Association key: description value: buffer contents !

knucleotideFrom: input to: output
   "Same as av3, but create less strings while updating the frequencies"

   | sequence writeFrequencies writeCount |

   sequence := (self readFasta: 'THREE' from: input) value asUppercase.

   writeFrequencies :=
      [:k | | frequencies count |
      frequencies := SortedCollection sortBlock: [:a :b|
      (a value = b value) ifTrue: [b key < a key] ifFalse: [b value < a value]].

   count := 0.0.
   (sequence substringFrequencies: k using: (Dictionary new: 1024))
      associationsDo: [:each|
         frequencies add: each. count := count + each value].

   frequencies do: [:each | | percentage |
      percentage := (each value / count) * 100.0.
         nextPutAll: each key; space;
         print: percentage digits: 3; nl]].

   writeCount := [:nucleotideFragment | | frequencies count |
      frequencies := sequence substringFrequencies: nucleotideFragment size
         using: (Dictionary new: 1024).
      count := frequencies at: nucleotideFragment ifAbsent: [0].
      output print: count; tab; nextPutAll: nucleotideFragment; nl].

   writeFrequencies value: 1. output nl.
   writeFrequencies value: 2. output nl.

   writeCount value: 'GGT'.
   writeCount value: 'GGTA'.
   writeCount value: 'GGTATT'.
   writeCount value: 'GGTATTTTAATT'.
   writeCount value: 'GGTATTTTAATTTATAGT'.! !

!Core.BenchmarksGame class methodsFor: 'initialize-release'!

   self knucleotideFrom: Stdin to: Stdout.
   ^''! !

!Core.Stream methodsFor: 'benchmarks game'!

print: number digits: decimalPlaces
   self nextPutAll: 
      ((number asFixedPoint: decimalPlaces) printString copyWithout: $s)!

   self nextPut: Character lf! !


notes, command-line, and program output

64-bit Ubuntu quad core
VisualWorks® Personal Use Edition Release 8.2 of July 15, 2016

Thu, 04 May 2017 16:32:11 GMT

cp /usr/local/src/vw8.2pul/image/
/usr/local/src/vw8.2pul/bin/linuxx86_64/vwlinuxx86_64 -nogui -pcl MatriX -filein knucleotide.vw-5.vw -doit 'ObjectMemory snapshotThenQuit'

Autoloading MatriX from $(VISUALWORKS)/preview/matrix/MatriX.pcl
Autoloading Xtreams-Support from $(VISUALWORKS)/xtreams/Xtreams-Support.pcl
Autoloading Xtreams-Core from $(VISUALWORKS)/xtreams/Xtreams-Core.pcl
Autoloading Xtreams-Terminals from $(VISUALWORKS)/xtreams/Xtreams-Terminals.pcl
Autoloading Xtreams-Transforms from $(VISUALWORKS)/xtreams/Xtreams-Transforms.pcl
Autoloading Xtreams-Substreams from $(VISUALWORKS)/xtreams/Xtreams-Substreams.pcl
Autoloading Xtreams-Multiplexing from $(VISUALWORKS)/xtreams/Xtreams-Multiplexing.pcl
Filing in from:
SequenceableCollection<benchmarks game
Do you want to add Root.Smalltalk.Core.SequenceableCollection>>substringFrequencies:using: to the previously unchanged package, Collections-Abstract
						OK to continue?
Dictionary<benchmarks game
Do you want to add Root.Smalltalk.Core.Dictionary>>at:putValueOf:ifAbsentPutValueOf: to the previously unchanged package, Collections-Unordered
						OK to continue?
BenchmarksGame class<private
BenchmarksGame class<initialize-release
Stream<benchmarks game
Do you want to add Root.Smalltalk.Core.Stream>>print:digits: to the previously unchanged package, Collections-Streams
						OK to continue?
/home/dunham/benchmarksgame_quadcore/knucleotide/tmp/ created at May 4, 2017 9:30:59 AM
4.29s to complete and log all make actions

/usr/local/src/vw8.2pul/bin/linuxx86_64/vwlinuxx86_64 -nogui -evaluate "BenchmarksGame program" -a 0 < knucleotide-input25000000.txt

A 30.295
T 30.151
C 19.800
G 19.754

AA 9.177
TA 9.132
AT 9.131
TT 9.091
CA 6.002
AC 6.001
AG 5.987
GA 5.984
CT 5.971
TC 5.971
GT 5.957
TG 5.956
CC 3.917
GC 3.911
CG 3.909
GG 3.902

1471758	GGT
446535	GGTA
47336	GGTATT