/mobile Handheld Friendly website

 performance measurements

Each table row shows performance measurements for this C++ g++ program with a particular command-line input value N.

 N  CPU secs Elapsed secs Memory KB Code B ≈ CPU Load
250,000Make Error  2106   

Read the ↓ make, command line, and program output logs to see how this program was run.

Read k-nucleotide benchmark to see what this program should do.

 notes

gcc version 4.8.1 (Ubuntu/Linaro 4.8.1-10ubuntu8)

 k-nucleotide C++ g++ program source code

// The Computer Language Benchmarks Game
// http://benchmarksgame.alioth.debian.org/

// Copy task division idea from Java entry, contributed by James McIlree
// Contributed by The Anh Tran

#include <omp.h>
#include <sched.h>

#include <algorithm>
#include <vector>
#include <iostream>
#include <sstream>
#include <stdio.h>

//#include <ext/hash_map>
//#include <boost/unordered_map.hpp>
#include <ext/pb_ds/assoc_container.hpp>
#include <ext/pb_ds/hash_policy.hpp>

#include <boost/algorithm/string/case_conv.hpp>
#include <boost/lambda/lambda.hpp>
#include <boost/lambda/bind.hpp>
#include <boost/format.hpp>
#include <boost/foreach.hpp>
#define foreach BOOST_FOREACH


typedef unsigned int   uint;

int const MAX_CORE = 16;
uint const SEED = 183; //183 193 405 <= zero collision for hashing algorithm


// Hash_table key type, with key's length = reading_frame_size
template <int frm_sz>
struct Key_T
{
   uint   hash_value;
   char   key[frm_sz +1];

   Key_T()             {   memset(this, 0, sizeof(*this));      }
   Key_T(Key_T const& k)   {   memcpy(this, &k, sizeof(*this));   }
   Key_T(char const * str)   {   ReHash (str);   }

   void 
   ReHash(char const *str)
   {
      // naive hashing algorithm.
      hash_value = 0;

      for (int i = 0; i < frm_sz; ++i)
      {
         key[i] = str[i];
         hash_value = (hash_value * SEED) + str[i];
      }
   }


   // Hash functor Hash<HKey_T>
   uint 
   operator() (const Key_T &k) const   {   return k.hash_value;   }


   // Comparison functor equal_to<HKey_T>(Left, Right)
   bool 
   operator() (const Key_T &k1, const Key_T &k2) const
   {
      if (k1.hash_value == k2.hash_value)
      {
         for (int i = 0; i < frm_sz; ++i)
         {
            if ( __builtin_expect((k1.key[i] != k2.key[i]), false) )
            {
               //++collision;
               return false;   
            }
         }
         return true;
      }
      return false;
   }
};


// Game's rule: function to update hashtable
template <int hash_len, bool MT, typename Input_T, typename HTable_T>
void 
calculate_frequency(Input_T const &input, HTable_T& hash_table)
{
   hash_table.clear();
   int   const total_length = static_cast<int>(input.size() - hash_len +1);

   typedef typename Input_T::const_pointer   Ite_T;
   Ite_T const   ite_beg   = &(input[0]);
   Ite_T const   ite_end   = &(input[0]) + total_length;

   typename HTable_T::key_type key;

   if (MT)
   {
      static int char_done[hash_len] = {0};
      int const chunk_sz = std::max(512, std::min(1024*1024, total_length / omp_get_num_threads() / 128));
      int ichunk;

      for(int offset = 0; offset < hash_len; ++offset)
      {
         // Fetch task. Each thread hashes a block, which block size = chunk
         while ( (ichunk = __sync_fetch_and_add(char_done + offset, chunk_sz)) < total_length )
         {
            Ite_T ite   = ite_beg + ichunk + offset;
            Ite_T end   = std::min(ite_beg + ichunk + chunk_sz, ite_end);
         
            for (; ite < end; ite += hash_len)
            {
               key.ReHash(ite);
               ++(hash_table[key]);
            }
         }
      }
   }
   else
   {
      for(int offset = 0; offset < hash_len; ++offset)
      {
         for (Ite_T index = ite_beg + offset; index < ite_end; index += hash_len)
         {
            key.ReHash(index);
            ++(hash_table[key]);
         }
      }
   }
}


// Build a hash_table, count all key with hash_len = 1, 2
// write the code and percentage frequency
template <int hash_len, typename Input_T>
void 
write_frequencies(Input_T const &input, std::ostream &output)
{
   typedef Key_T<hash_len>         HKey_T;

   //typedef __gnu_cxx::hash_map <
   //typedef boost::unordered_map <
   typedef __gnu_pbds::cc_hash_table   <
                                 HKey_T,   // key type
                                 uint,   // map type
                                 HKey_T,   // hash functor
                                 HKey_T   // equal_to functor
                              >    HTable_T;


   static HTable_T hash_table[MAX_CORE];

   // parallel hashing. Each thread updates its own hash_table.
   if (omp_get_num_threads() > 1)
      calculate_frequency<hash_len, true>(input, hash_table[omp_get_thread_num()]);
   else
      calculate_frequency<hash_len, false>(input, hash_table[omp_get_thread_num()]);


   // only the last thread, reaching this code block, to process result
   static int thread_passed = 0;
   if (__sync_add_and_fetch(&thread_passed, 1) == omp_get_num_threads())
   {
      // merge thread local results to main hash_table
      HTable_T &merge_table (hash_table[0]);

      for (int i = 1; i < omp_get_num_threads(); ++i)
      {
         foreach (typename HTable_T::value_type const & e, hash_table[i])
            merge_table[e.first] += e.second;
      }
      
   
      typedef std::pair<HKey_T, uint>   HValue_T;
      typedef std::vector<HValue_T>    List_T;

      // Copy results from hash_table to list
      List_T order_table(merge_table.begin(), merge_table.end());

      {
         // Sort with descending frequency
         using namespace boost::lambda;
         std::sort(   order_table.begin(), order_table.end(),
            ( !(bind(&HValue_T::second, _1) < bind(&HValue_T::second, _2)) )   );
      }

      float const total_char = static_cast<float>(input.size() - hash_len +1);
      boost::format fmt("%|1$s| %|2$0.3f|\n");

      foreach(typename List_T::value_type &e, order_table)
      {
         e.first.key[hash_len] = 0; // ensure proper null terminated
         boost::to_upper(e.first.key);

         float percent = static_cast<float>(e.second) * 100.0f / total_char;
         fmt % e.first.key % percent;

         output << fmt;
      }

      output << std::endl;
      thread_passed = 0;
   }
}


// Build a hash_table, count all key with hash_len = 3, 4, 6, 12, 18
// Then print a specific sequence's count
template <int hash_len, typename Input_T>
void 
write_frequencies(Input_T const &input, std::ostream &output, char const *specific)
{
   typedef Key_T<hash_len>      HKey_T;
   typedef __gnu_pbds::cc_hash_table   <
                                 HKey_T,   // key type
                                 uint,   // map type
                                 HKey_T,   // hash functor
                                 HKey_T   // equal_to functor
                              >    HTable_T;

   HTable_T local_table;   // private for each thread
   if (omp_get_num_threads() > 1)
      calculate_frequency<hash_len, true>(input, local_table);   // parallel hash
   else
      calculate_frequency<hash_len, false>(input, local_table);   // parallel hash

   // Build hash key for searching
   HKey_T printkey(specific);

   // count how many matched for specific sequence
   static uint total_matched = 0;
   
   {
      // parallel look up
      uint match = local_table[printkey];
      #pragma omp atomic
      total_matched += match;
   }

   // The last thread, reaching this code block, will print result
   static int thread_passed = 0;
   if (__sync_add_and_fetch(&thread_passed, 1) == omp_get_num_threads())
   {
      printkey.key[hash_len] = 0; // null terminated
      boost::to_upper(printkey.key);

      boost::format fmt("%1%\t%2%\n");
      fmt % total_matched % printkey.key;
      output << fmt;

      thread_passed = 0;
      total_matched = 0;
   }
}

int 
GetThreadCount()
{
   cpu_set_t cs;
   CPU_ZERO(&cs);
   sched_getaffinity(0, sizeof(cs), &cs);

   int count = 0;
   for (int i = 0; i < MAX_CORE; ++i)
   {
      if (CPU_ISSET(i, &cs))
         ++count;
   }
   return count;
}

int 
main()
{
   typedef std::vector<char> Input_T;
   Input_T input;
   input.reserve(256*1024*1024); // 256MB

   char buffer[64];

   // rule: read line-by-line
   while (fgets(buffer, sizeof(buffer), stdin))
   {
      if(strncmp(buffer, ">THREE", 6) == 0)
         break;
   }

   std::back_insert_iterator<Input_T> back_ite (input);
   while (fgets(buffer, sizeof(buffer), stdin))
   {
      size_t sz = strlen(buffer);
      if (buffer[sz -1] == '\n')
         --sz;

      std::copy(buffer, buffer + sz, back_ite);
   }

   std::ostringstream output[7];
   #pragma omp parallel num_threads(GetThreadCount()) default(shared)
   {
      write_frequencies<18>( input, output[6], "ggtattttaatttatagt" );
      write_frequencies<12>( input, output[5], "ggtattttaatt" );
      write_frequencies< 6>( input, output[4], "ggtatt" );
      write_frequencies< 4>( input, output[3], "ggta" );
      write_frequencies< 3>( input, output[2], "ggt" );
      write_frequencies< 2>( input, output[1] );
      write_frequencies< 1>( input, output[0] );
   }

   foreach(std::ostringstream const& s, output)
      std::cout << s.str();
      
   return 0;
}

 make, command-line, and program output logs

Sun, 27 Oct 2013 00:52:58 GMT

MAKE:
/usr/bin/g++ -c -pipe -O3 -fomit-frame-pointer -march=native  -std=c++0x knucleotide.c++ -o knucleotide.c++.o &&  \
        /usr/bin/g++ knucleotide.c++.o -o knucleotide.gpp_run -lpthread 
knucleotide.c++:21:48: fatal error: boost/algorithm/string/case_conv.hpp: No such file or directory
 #include <boost/algorithm/string/case_conv.hpp>
                                                ^
compilation terminated.
make: [knucleotide.gpp_run] Error 1 (ignored)
rm knucleotide.c++
0.74s to complete and log all make actions

COMMAND LINE:
./knucleotide.gpp_run 0 < knucleotide-input250000.txt

MAKE ERROR 

Revised BSD license

  Home   Conclusions   License   Play