The Computer Language
Benchmarks Game

regex-redux C# .NET Core #3 program

source code

/* The Computer Language Benchmarks Game
   http://benchmarksgame.alioth.debian.org/
 *
 * contributed by Jimmy Tang
 * modified by Josh Goldfoot (2016)
 */

using System;
using System.Text;
using System.Collections.Generic;
using System.Text.RegularExpressions;
using System.Threading.Tasks;

class regexredux
{
    static string readStdIn(out int seqLength, out int inputLength)
    {
        StringBuilder sb = new StringBuilder(10000000);
        int commentLength = 0;
        String line;
        
        while ((line = Console.ReadLine()) != null)
        {
            if (line[0] == '>')
                commentLength += line.Length + 1;
            else
            {
                sb.Append(line);
                commentLength += 1;
            }
        }
        seqLength = sb.Length;
        inputLength = seqLength + commentLength;
        return sb.ToString();
    }

    static void Main()
    {
        int seqLength, initialLength;
        var sequence = readStdIn(out seqLength, out initialLength);
        var newSequenceLength = Task.Run(() =>
        {
            var dict = new Dictionary<string, string> {
                {"B", "(c|g|t)"}, {"D", "(a|g|t)"},   {"H", "(a|c|t)"}, {"K", "(g|t)"},
                {"M", "(a|c)"},   {"N", "(a|c|g|t)"}, {"R", "(a|g)"},   {"S", "(c|g)"},
                {"V", "(a|c|g)"}, {"W", "(a|t)"},     {"Y", "(c|t)"}
            };
            return new Regex("[WYKMSRBDVHN]").Replace(sequence, m => dict[m.Value]).Length;
        });

        string[] variants = {
           "agggtaaa|tttaccct"
          ,"[cgt]gggtaaa|tttaccc[acg]"
          ,"a[act]ggtaaa|tttacc[agt]t"
          ,"ag[act]gtaaa|tttac[agt]ct"
          ,"agg[act]taaa|ttta[agt]cct"
          ,"aggg[acg]aaa|ttt[cgt]ccct"
          ,"agggt[cgt]aa|tt[acg]accct"
          ,"agggta[cgt]a|t[acg]taccct"
          ,"agggtaa[cgt]|[acg]ttaccct"
        };
        var output = new string[variants.Length];
        Parallel.ForEach(variants, (v, _, i) => {
            Regex r = new Regex(v);
            int count = 0;
            for (Match m = r.Match(sequence); m.Success; m = m.NextMatch()) count++;
            output[i] = v + " " + count;
        });

        foreach (var s in output)
            Console.WriteLine(s);
        newSequenceLength.Wait();
        Console.WriteLine("\n{0}\n{1}\n{2}", initialLength, seqLength, newSequenceLength.Result);
    }
}
    

notes, command-line, and program output

NOTES:
64-bit Ubuntu quad core
dotnet 1.0.1 005db40cd1
"System.GC.Server": true


Sun, 16 Apr 2017 16:25:11 GMT

MAKE:
cp regexredux.csharpcore-3.csharpcore Program.cs
cp Include/csharpcore/tmp.csproj .
cp Include/csharpcore/runtimeconfig.template.json .
mkdir obj
cp Include/csharpcore/tmp.csproj.nuget.g.props ./obj
cp Include/csharpcore/tmp.csproj.nuget.g.targets ./obj
/usr/bin/dotnet build -c Release
Microsoft (R) Build Engine version 15.1.548.43366
Copyright (C) Microsoft Corporation. All rights reserved.

  tmp -> /home/dunham/benchmarksgame_quadcore/regexredux/tmp/bin/Release/netcoreapp1.1/tmp.dll

Build succeeded.
    0 Warning(s)
    0 Error(s)

Time Elapsed 00:00:03.61
4.10s to complete and log all make actions

COMMAND LINE:
/usr/bin/dotnet ./bin/Release/netcoreapp1.1/tmp.dll 0 < regexredux-input50000.txt

UNEXPECTED OUTPUT 

13c13
< 668262
---
> 273927

PROGRAM OUTPUT:
agggtaaa|tttaccct 3
[cgt]gggtaaa|tttaccc[acg] 12
a[act]ggtaaa|tttacc[agt]t 43
ag[act]gtaaa|tttac[agt]ct 27
agg[act]taaa|ttta[agt]cct 58
aggg[acg]aaa|ttt[cgt]ccct 16
agggt[cgt]aa|tt[acg]accct 15
agggta[cgt]a|t[acg]taccct 18
agggtaa[cgt]|[acg]ttaccct 20

508411
500000
668262