/mobile Handheld Friendly website

 performance measurements

Each table row shows performance measurements for this Fortran Intel program with a particular command-line input value N.

 N  CPU secs Elapsed secs Memory KB Code B ≈ CPU Load
250,0000.760.7833,2802079  1% 0% 3% 100%
2,500,0006.306.3641,8802079  2% 0% 0% 100%
25,000,00060.7860.82166,8682079  1% 0% 0% 100%

Read the ↓ make, command line, and program output logs to see how this program was run.

Read k-nucleotide benchmark to see what this program should do.

 notes

Intel(R) Fortran Compiler XE for applications running on IA-32, Version 13.0.1.117 Build 20121010

 k-nucleotide Fortran Intel #2 program source code

! The Computer Language Benchmarks Game
! http://benchmarksgame.alioth.debian.org/
! 
! Compilation:
!  Single-core: ifort -fast knucleotide2.f90
!  Multi-core : ifort -fast -openmp knucleotide2.f90
!
! contributed by Steve Decker
! modified by Andrei Jirnyi:
! - added OpenMP parallelization (very naively)
! - changed the hash function to FNV hash:
!   see http://www.isthe.com/chongo/tech/comp/fnv/
!   (ifort is better at multiplication on intel cpu's)
! - changed internal string comparison to a library call :)
! - I/O is unoptimized and very slow (>10 sec.) --
!   there is much to gain there.
! - Sequences are in ASCII format; it seems that the C/C++
!   programs are compressing these. much to gain there too.
! - IMPORTANT NOTE: this is a DIRTY HACK!!
!   this code probably contains various NASTY BUGS.

module knuc_mod
  use, intrinsic :: iso_c_binding
  implicit none
  private
  public :: init_table, read_frame, keys_of_given_len, cnt

  integer, parameter :: MaxWordLen = 18
  type, public :: key
     integer                   :: count = 0
     character(len=MaxWordLen) :: word = ""
  end type key

  type, public :: table
     integer :: hashBits, maxWords, nWords
     type(key), allocatable, dimension(:) :: words
  end type table

  interface
     ! what's wrong with using a standard library?! :)
     integer(c_int) function strncmp(s1,s2,n) bind(C)
       import
       character(kind=c_char),  intent(in) :: s1(*)
       character(kind=c_char),  intent(in)  :: s2(*)
       integer(c_size_t), value, intent(in) :: n
     end function strncmp

  end interface

contains

  pure subroutine init_table(kNuc, nBits)
    type(table), intent(out) :: kNuc
    integer,     intent(in)  :: nBits

    kNuc = table(nBits, 2**nBits, 0, null())
    allocate(kNuc%words(kNuc%maxWords))
  end subroutine init_table

  subroutine read_frame(buf, n, length, kNuc)
    character, dimension(:), intent(in)    :: buf
    integer,                 intent(in)    :: n, length
    type(table),             intent(inout) :: kNuc

    integer               :: i, j
    character(len=length) :: word

    do i = 1, n
       do j = 1, length
          word(j:j) = buf(i+j-1)
       end do
       call add(kNuc, word)
    end do
  end subroutine read_frame

  subroutine add(kNuc, word)
    type(table),      intent(inout) :: kNuc
    character(len=*), intent(in)    :: word

    integer :: m

    m = hash_value(word, kNuc%maxWords)
    do
       if (kNuc%words(m)%count == 0) then 
          kNuc%words(m) = key(1, word)
          kNuc%nWords = kNuc%nWords + 1
          if (kNuc%nWords > kNuc%maxWords/2) call resize_table(kNuc)
          exit
       else if (strncmp(kNuc%words(m)%word,word,int(len(word),c_size_t)) == 0) then 
          kNuc%words(m)%count = kNuc%words(m)%count + 1
          exit
       end if
       m = merge(1, m+1, m == kNuc%maxWords)
    end do
  end subroutine add

  subroutine resize_table(kNuc)
    type(table), intent(inout) :: kNuc

    integer     :: i, m
    type(table) :: temp

    temp = table(kNuc%hashBits + 1, 2 * kNuc%maxWords, kNuc%nWords, null())
    allocate(temp%words(temp%maxWords))

    do i = 1, kNuc%maxWords
       if (kNuc%words(i)%count > 0) then
          m = hash_value(trim(kNuc%words(i)%word), temp%maxWords)
          do
             if (temp%words(m)%count == 0) then
                temp%words(m) = kNuc%words(i)
                exit
             end if
             m = merge(1, m+1, m == temp%maxWords)
          end do
       end if
    end do

    kNuc = temp
  end subroutine resize_table

  pure function keys_of_given_len(kNuc, length)
    type(table), intent(in) :: kNuc
    integer,     intent(in) :: length
    type(key), dimension(4**length) :: keys_of_given_len

    integer :: i, n

    n = 1
    do i = 1, kNuc%maxWords
       if (len_trim(kNuc%words(i)%word) == length) then
          keys_of_given_len(n) = kNuc%words(i)
          n = n + 1
          if (n > size(keys_of_given_len)) exit
       end if
    end do
  end function keys_of_given_len

  integer function cnt(kNuc, string)
    type(table), intent(in)      :: kNuc
    character(len=*), intent(in) :: string

    integer :: m

    m = hash_value(string, kNuc%maxWords)
    do
       if (kNuc%words(m)%word == string .or. kNuc%words(m)%count == 0) then
          cnt = kNuc%words(m)%count
          exit
       end if
       m = merge(1, m+1, m == kNuc%maxWords)
    end do
  end function cnt


  integer function hash_value(key, range)
    character(len=*), intent(in) :: key
    integer,          intent(in) :: range

    integer :: i,  tmp
    
    tmp = ieor(2166136261 * 16777619, iachar(key(1:1)))

    do i=2,len(key)
       tmp = tmp * 16777619
       tmp = ieor(tmp,iachar(key(i:i)))
    end do
    hash_value = iand(tmp, range - 1) + 1
        
  end function hash_value

end module knuc_mod

program knucleotide
  use knuc_mod
  implicit none

  integer, parameter :: LineLen = 60, InitialTableSize = 1

  integer :: bufferSize = 16384, istat, n = 0, i
  logical :: atThirdPart = .false.
  
  type(table) :: kn, kn1(7)
  ! having just one kn1 should work too (I think!!) --
  !  but for some reason for me it does not (with 12.0.3).
  !  Perhaps bugs in the code are to blame.
  !  Seems to work with 12.1.0, but better safe than sorry.
  
  character(len=LineLen) :: line
  character(len=1) :: line1(LineLen)
  equivalence(line,line1)
  character, dimension(:), allocatable :: buffer, tempBuffer

  character, dimension(1:116), parameter :: &
       Codes = (/ (" ",i = 1,64), "A", " ", "C",  &
       (" ", i = 68, 70), "G", (" ", i = 72, 83), "T", (" ", i = 85, 96),  &
       "A", " ", "C", (" ", i = 100, 102), "G", (" ", i = 104, 115), "T" /)

  character(len=40), dimension(5) :: lines, lines1
  integer :: lenghts(5)
  
  allocate(buffer(bufferSize))

  ! Read FASTA file line-by-line, extracting sequence three, and converting to
  ! uppercase.
  ! This is very slow and can certainly be optimized a lot further.
  do
     read(*, "(a)", iostat=istat) line
     if (istat /= 0) exit
     if (.not. atThirdPart) then
        atThirdPart = line(1:3) == ">TH"
     else
        if (n+LineLen > bufferSize) then
           allocate(tempBuffer(2*bufferSize))
           tempBuffer(1:bufferSize) = buffer
           tempBuffer(bufferSize+1 : ) = " "
           call move_alloc(tempBuffer, buffer)
           bufferSize = 2*bufferSize
        end if
        buffer(n+1 : n+LineLen) = Codes(iachar(line1))
        n = n + LineLen
     end if
  end do
  n = minloc(iachar(buffer),1) - 1


  call init_table(kn, InitialTableSize)

  lines(1) = "GGT"
  lines(2) = "GGTA"
  lines(3) = "GGTATT"
  lines(4) = "GGTATTTTAATT"
  lines(5) = "GGTATTTTAATTTATAGT"
  lenghts = [3, 4, 6, 12, 18]

  !$omp  parallel default(firstprivate) &
  !$omp& shared(lines, lines1, kn, lenghts)

  !$omp do schedule(dynamic,1)  
  do i=5,1,-1
     kn1(i) = kn  ! not sure if an array is really needed here...
     call write_count(kn1(i), lines(i)(1:lenghts(i)), lines1(i))
  end do
  !$omp end do nowait

  !$omp do schedule(dynamic,1)
  do i=1,2
     kn1(i+5) = kn
     call write_frequencies(kn1(i+5),i)
  end do
  !$omp end do nowait

  !$omp end parallel

  do i=1,5
     write(*,'(a)') trim(adjustl(lines1(i)))
  end do

contains

  subroutine write_frequencies(kn, length)
    integer, intent(in) :: length
    type(table) :: kn

    integer :: numNuc, i, j
    type(key), dimension(4**length) :: nucleotides
    type(key) :: temp

    numNuc = n - length + 1

    call read_frame(buffer, numNuc, length, kn)

    nucleotides = keys_of_given_len(kn, length)

    ! Insertion sort
    do i = 2, size(nucleotides)
       temp = nucleotides(i)
       do j = i, 2, -1
          if (nucleotides(j-1)%count > temp%count .or.  &
               nucleotides(j-1)%count == temp%count .and.  &
               nucleotides(j-1)%word < temp%word) exit
          nucleotides(j) = nucleotides(j-1)
       end do
       nucleotides(j) = temp
    end do

    do i = 1, size(nucleotides)
       write(*, "(a2,f6.3)") nucleotides(i)%word(1:2),  &
            100. * nucleotides(i)%count / real(numNuc)
    end do
    write(*, "(a)") ""
  end subroutine write_frequencies

  subroutine write_count(kn, string, string1)
    character(len=*), intent(in) :: string
    character(len=40), intent(out) :: string1
    type(table) :: kn

    character, parameter :: tab = achar(9)
    integer :: length, numNuc

    length = len(string)
    numNuc = n - length + 1

    call read_frame(buffer, numNuc, length, kn)

    write(string1, "(i0,a)") cnt(kn, string), tab//string
  end subroutine write_count

end program knucleotide

 make, command-line, and program output logs

Tue, 15 Jan 2013 07:03:40 GMT

MAKE:
/usr/local/src/intel/bin/ifort -O3 -fast knucleotide.ifc-2.f90 -o knucleotide.ifc-2.ifc_run
ipo: remark #11001: performing single-file optimizations
ipo: remark #11006: generating object file /tmp/ipo_ifortM2EfXr.o
rm knucleotide.ifc-2.f90
0.60s to complete and log all make actions

COMMAND LINE:
./knucleotide.ifc-2.ifc_run 0 < knucleotide-input25000000.txt

PROGRAM OUTPUT:
A 30.295
T 30.151
C 19.800
G 19.754

AA 9.177
TA 9.132
AT 9.131
TT 9.091
CA 6.002
AC 6.001
AG 5.987
GA 5.984
CT 5.971
TC 5.971
GT 5.957
TG 5.956
CC 3.917
GC 3.911
CG 3.909
GG 3.902

1471758	GGT
446535	GGTA
47336	GGTATT
893	GGTATTTTAATT
893	GGTATTTTAATTTATAGT

Revised BSD license

  Home   Conclusions   License   Play