The Computer Language
Benchmarks Game

regex-dna C# Mono LLVM #2 program

source code

/* The Computer Language Benchmarks Game
   http://benchmarksgame.alioth.debian.org/
 *
 * contributed by Jimmy Tang
 * modified by Sindhudweep Narayan Sarkar
*/

using System;
using System.Linq;
using System.Threading;
using System.Collections.Generic;
using System.Text.RegularExpressions;
using System.Threading.Tasks;

class regexdna {
   static void Main() {
      string sequence = Console.In.ReadToEnd();
      int initialLength = sequence.Length;
            
      sequence = Regex.Replace(sequence, ">.*\n|\n", "");
      int codeLength = sequence.Length;
      
      string[] variants = {
      "agggtaaa|tttaccct"
      ,"[cgt]gggtaaa|tttaccc[acg]"
      ,"a[act]ggtaaa|tttacc[agt]t"
      ,"ag[act]gtaaa|tttac[agt]ct"
      ,"agg[act]taaa|ttta[agt]cct"
      ,"aggg[acg]aaa|ttt[cgt]ccct"
      ,"agggt[cgt]aa|tt[acg]accct"
      ,"agggta[cgt]a|t[acg]taccct"
      ,"agggtaa[cgt]|[acg]ttaccct"
      };
      
      Parallel.For(0, variants.Length, i => 
         { 
            var pattern = new Regex(variants[i], RegexOptions.Compiled); 
            variants[i] += " " + pattern.Matches(sequence).Count; 
         });
      Console.WriteLine(string.Join("\n", variants));
      
      var dict = new Dictionary<string, string> {
         {"B", "(c|g|t)"}, {"D", "(a|g|t)"},   {"H", "(a|c|t)"}, {"K", "(g|t)"},
         {"M", "(a|c)"},   {"N", "(a|c|g|t)"}, {"R", "(a|g)"},   {"S", "(c|g)"},
         {"V", "(a|c|g)"}, {"W", "(a|t)"},     {"Y", "(c|t)"} 
      };
      sequence = new Regex("[WYKMSRBDVHN]", RegexOptions.Compiled).Replace(sequence, m => dict[m.Value]);
      Console.WriteLine("\n{0}\n{1}\n{2}", initialLength, codeLength, sequence.Length);
   }
}
    

notes, command-line, and program output

NOTES:
32-bit Ubuntu one core
Mono JIT compiler version 4.5.1 (master/3e844dd Fri May  6 19:24:07 PDT 2016)
	LLVM:          yes(3.6.0svn-mono-master/9f79399)
	GC:            sgen



Tue, 28 Jun 2016 20:01:39 GMT

MAKE:
mv regexdna.csharpllvm-2.csharpllvm regexdna.csharpllvm-2.cs
/usr/local/bin/mcs  -optimize+ -platform:x86 -out:regexdna.csharpllvm-2.csharpllvm_run regexdna.csharpllvm-2.cs
rm regexdna.csharpllvm-2.cs
0.27s to complete and log all make actions

COMMAND LINE:
/usr/local/bin/mono --llvm --gc=sgen regexdna.csharpllvm-2.csharpllvm_run 0 < regexdna-input5000000.txt

PROGRAM OUTPUT:
agggtaaa|tttaccct 356
[cgt]gggtaaa|tttaccc[acg] 1250
a[act]ggtaaa|tttacc[agt]t 4252
ag[act]gtaaa|tttac[agt]ct 2894
agg[act]taaa|ttta[agt]cct 5435
aggg[acg]aaa|ttt[cgt]ccct 1537
agggt[cgt]aa|tt[acg]accct 1431
agggta[cgt]a|t[acg]taccct 1608
agggtaa[cgt]|[acg]ttaccct 2178

50833411
50000000
66800214