/mobile Handheld Friendly website

 performance measurements

Each table row shows performance measurements for this C CINT program with a particular command-line input value N.

 N  CPU secs Elapsed secs Memory KB Code B ≈ CPU Load
50,000Bad Output  1101   

Read the ↓ make, command line, and program output logs to see how this program was run.

Read regex-dna benchmark to see what this program should do.

 notes

cint 5.18.00, July 2, 2010

 regex-dna C CINT program source code

/* The Computer Language Benchmarks Game
   http://benchmarksgame.alioth.debian.org/
   contributed by Josh Goldfoot
*/

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <regex.h>

void regsub2 (char *buffer, char *searchFor, char *replaceWith,
	 unsigned long buflen, char **returnBuf, unsigned long* newLen)
{
  regex_t preg;
  regmatch_t pmatch;
  int result, replaceLen;
  *returnBuf = malloc (buflen * 14 / 10);
  if (!*returnBuf)
    return;
  if (regcomp (&preg, searchFor, REG_EXTENDED | REG_NEWLINE) != 0)
    return;
  unsigned long start;
  replaceLen = strlen (replaceWith);
  result = 0;
  start = 0;
  *newLen = 0;
  while (result == 0)
    {
      result = regexec (&preg, &(buffer[start]), 1, &pmatch, 0);
      if (result == 0)
	{
	  if (pmatch.rm_so > 0) {
	    memcpy(*returnBuf + *newLen, &(buffer[start]), pmatch.rm_so);
	    *newLen = *newLen + pmatch.rm_so;
	  }
	  memcpy(*returnBuf + *newLen, replaceWith, replaceLen);
	  *newLen = *newLen + replaceLen;
	  start = start + pmatch.rm_eo;
	}
      else
	{
	  memcpy(*returnBuf + *newLen, &(buffer[start]), buflen-start);
	  *newLen = *newLen + buflen - start;
	}
    }
}

void regsub(char** bufHandle, char* searchFor, char* replaceWith, unsigned long *buflen)
{
  char* tmp;
  unsigned long newlen;

  regsub2(*bufHandle, searchFor, replaceWith, *buflen, &tmp, &newlen);
  free(*bufHandle);
  *bufHandle = tmp;
  *buflen = newlen;
}

unsigned long
count_matches (char *buffer, char *searchFor, unsigned long buflen)
{
  regex_t preg;
  regmatch_t pmatch;
  int result;
  unsigned long matches, start;

  if (regcomp (&preg, searchFor, REG_EXTENDED) != 0)
    return buflen;
  memset (&pmatch, 0, sizeof (pmatch));
  result = 0;
  matches = 0;
  start = 0;
  while (result == 0)
    {
      result = regexec (&preg, &(buffer[start]), 1, &pmatch, 0);
      if (result == 0)
	{
	  matches++;
	  start = start + pmatch.rm_eo;
	}
    }
  return matches;
}


int main ()
{
  char *x, *buffer, *tmp;
  unsigned long buflen, seqlen, clen, rlen;
  size_t readlen;
  int i;

  seqlen = 0;
  buflen = 206848;
  buffer = malloc (buflen + 1);
  if (!buffer)
    return -1;
  x = buffer;
  while (readlen = (fread (x, 1, buflen - seqlen - 1, stdin)) )
    {
      seqlen = seqlen + readlen;
      if (!feof (stdin))
	{
	  buflen = buflen + 40960;
	  tmp = realloc (buffer, buflen + 1);
	  if (tmp == NULL)
	    return -1;
	  buffer = tmp;
	  x = &(buffer[seqlen]);
	}
    }
  buffer[seqlen] = 0;
  clen = seqlen;
  regsub (&buffer, ">.*|\n", "", &clen);

  char variants[9][27] = {
     "agggtaaa|tttaccct", "[cgt]gggtaaa|tttaccc[acg]",
     "a[act]ggtaaa|tttacc[agt]t", "ag[act]gtaaa|tttac[agt]ct",
     "agg[act]taaa|ttta[agt]cct", "aggg[acg]aaa|ttt[cgt]ccct",
     "agggt[cgt]aa|tt[acg]accct", "agggta[cgt]a|t[acg]taccct",
     "agggtaa[cgt]|[acg]ttaccct"
     };
  for (i = 0; i < 9; i++)
     printf ("%s %ld\n", variants[i], count_matches (buffer, variants[i], clen));
  rlen = clen;
  regsub (&buffer, "B", "(c|g|t)", &rlen);
  regsub (&buffer, "D", "(a|g|t)", &rlen);
  regsub (&buffer, "H", "(a|c|t)", &rlen);
  regsub (&buffer, "K", "(g|t)", &rlen);
  regsub (&buffer, "M", "(a|c)", &rlen);
  regsub (&buffer, "N", "(a|c|g|t)", &rlen);
  regsub (&buffer, "R", "(a|g)", &rlen);
  regsub (&buffer, "S", "(c|g)", &rlen);
  regsub (&buffer, "V", "(a|c|g)", &rlen);
  regsub (&buffer, "W", "(a|t)", &rlen);
  regsub (&buffer, "Y", "(c|t)", &rlen);
  printf ("\n%ld\n%ld\n%ld\n", seqlen, clen, rlen);
  free (buffer);
  return 0;
}

 make, command-line, and program output logs

Thu, 21 Apr 2011 14:21:03 GMT

COMMAND LINE:
/usr/local/src/cint/bin/cint -K -p -E -E -J0 -l/usr/lib/libpcre.so regexdna.cint 0 < regexdna-input50000.txt

UNEXPECTED OUTPUT 

0a1,13
> agggtaaa|tttaccct 3
> [cgt]gggtaaa|tttaccc[acg] 12
> a[act]ggtaaa|tttacc[agt]t 43
> ag[act]gtaaa|tttac[agt]ct 27
> agg[act]taaa|ttta[agt]cct 58
> aggg[acg]aaa|ttt[cgt]ccct 16
> agggt[cgt]aa|tt[acg]accct 15
> agggta[cgt]a|t[acg]taccct 18
> agggtaa[cgt]|[acg]ttaccct 20
> 
> 508411
> 500000
> 668262

PROGRAM OUTPUT:

Error: can not call private or protected function FILE:regexdna.cint LINE:14
  regex.h     15 regex_t regex_t::regex_t();
Calling : regex_t::regex_t();
Match rank: file     line  signature
*        0 regex.h     15 regex_t regex_t::regex_t();
!!! return from main() function

Revised BSD license

  Home   Conclusions   License   Play