/mobile Handheld Friendly website

 performance measurements

Each table row shows performance measurements for this C++ g++ program with a particular command-line input value N.

 N  CPU secs Elapsed secs Memory KB Code B ≈ CPU Load
50,0000.290.21428844  82% 0% 43% 23%
500,0002.721.8728,596844  2% 52% 94% 2%
5,000,00027.3017.33178,296844  64% 1% 0% 100%

Read the ↓ make, command line, and program output logs to see how this program was run.

Read regex-dna benchmark to see what this program should do.

 notes

gcc version 4.8.1 (Ubuntu/Linaro 4.8.1-10ubuntu8)

 regex-dna C++ g++ #3 program source code

/* The Computer Language Benchmarks Game
   http://benchmarksgame.alioth.debian.org/

   contributed by Alexey Zolotov
   modified by Vaclav Zeman
*/

#include <boost/regex.hpp>
#include <cassert>
#include <iostream>
#include <cstdio>

using namespace std;

const std::size_t BUFSIZE = 1024;
const boost::regex::flag_type re_flags = boost::regex::perl;

int main(void)
{
    string str, out;
    int len1, len2;
    int read_size;
    char *buf;

    char const * pattern1[] = {
        "agggtaaa|tttaccct",
        "[cgt]gggtaaa|tttaccc[acg]",
        "a[act]ggtaaa|tttacc[agt]t",
        "ag[act]gtaaa|tttac[agt]ct",
        "agg[act]taaa|ttta[agt]cct",
        "aggg[acg]aaa|ttt[cgt]ccct",
        "agggt[cgt]aa|tt[acg]accct",
        "agggta[cgt]a|t[acg]taccct",
        "agggtaa[cgt]|[acg]ttaccct"
    };

    const int pattern1_count = (int)(sizeof(pattern1) / sizeof(pattern1[0]));

    string const pattern2[] = {
        "B", "(c|g|t)", "D", "(a|g|t)", "H", "(a|c|t)", "K", "(g|t)",
        "M", "(a|c)", "N", "(a|c|g|t)", "R", "(a|g)", "S", "(c|g)",
        "V", "(a|c|g)", "W", "(a|t)", "Y", "(c|t)"
    };

    fseek(stdin, 0, SEEK_END);
    read_size = ftell(stdin);
    assert(read_size > 0);

    str.resize (read_size);
    rewind(stdin);
    read_size = fread(&str[0], 1, read_size, stdin);
    assert(read_size);

    len1 = str.length();
    boost::regex re1 (">[^\\n]+\\n|[\\n]", re_flags);
    boost::regex_replace (str, re1, "").swap (str);
    len2 = str.length();

    out = str;
    int counts[pattern1_count] = { 0 };

    #pragma omp parallel sections
    {
    #pragma omp section
        #pragma omp parallel for
        for (int i = 0; i < pattern1_count; i++)
        {
            boost::regex pat(pattern1[i], re_flags);
            boost::smatch m;
            std::string::const_iterator start = str.begin (), end = str.end (); 
            while (boost::regex_search (start, end, m, pat))
            {
                ++counts[i];
                start += m.position () + m.length ();
            }
            
            
        }
        #pragma omp section
        for (int i = 0; i < (int)(sizeof(pattern2) / sizeof(string)); i += 2)
        {
            boost::regex re (pattern2[i], re_flags);
            boost::regex_replace (out, re, pattern2[i + 1]).swap (out);
        }
    }

    for (int i = 0; i != pattern1_count; ++i)
      cout << pattern1[i] << " " << counts[i] << "\n";

    cout << "\n";
    cout << len1 << "\n";
    cout << len2 << "\n";
    cout << out.length() << endl;
}

 make, command-line, and program output logs

Mon, 28 Oct 2013 00:14:21 GMT

MAKE:
/usr/bin/g++ -c -pipe -O3 -fomit-frame-pointer -march=native  -fopenmp regexdna.gpp-3.c++ -o regexdna.gpp-3.c++.o &&  \
        /usr/bin/g++ regexdna.gpp-3.c++.o -o regexdna.gpp-3.gpp_run -fopenmp -lboost_regex 
rm regexdna.gpp-3.c++
3.64s to complete and log all make actions

COMMAND LINE:
./regexdna.gpp-3.gpp_run 0 < regexdna-input5000000.txt

PROGRAM OUTPUT:
agggtaaa|tttaccct 356
[cgt]gggtaaa|tttaccc[acg] 1250
a[act]ggtaaa|tttacc[agt]t 4252
ag[act]gtaaa|tttac[agt]ct 2894
agg[act]taaa|ttta[agt]cct 5435
aggg[acg]aaa|ttt[cgt]ccct 1537
agggt[cgt]aa|tt[acg]accct 1431
agggta[cgt]a|t[acg]taccct 1608
agggtaa[cgt]|[acg]ttaccct 2178

50833411
50000000
66800214

Revised BSD license

  Home   Conclusions   License   Play