The Computer Language
Benchmarks Game

regex-dna Rust #2 program

source code

// The Computer Language Benchmarks Game
// http://benchmarksgame.alioth.debian.org/
//
// contributed by the Rust Project Developers
// contributed by TeXitoi
// contributed by BurntSushi

extern crate regex;

use std::io::{self, Read};
use std::sync::Arc;
use std::thread;

macro_rules! regex { ($re:expr) => { ::regex::Regex::new($re).unwrap() } }

fn main() {
    let mut seq = String::with_capacity(50 * (1 << 20));
    io::stdin().read_to_string(&mut seq).unwrap();
    let ilen = seq.len();

    seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "");
    let clen = seq.len();
    let seq_arc = Arc::new(seq.clone());

    let variants = vec![
        regex!("agggtaaa|tttaccct"),
        regex!("[cgt]gggtaaa|tttaccc[acg]"),
        regex!("a[act]ggtaaa|tttacc[agt]t"),
        regex!("ag[act]gtaaa|tttac[agt]ct"),
        regex!("agg[act]taaa|ttta[agt]cct"),
        regex!("aggg[acg]aaa|ttt[cgt]ccct"),
        regex!("agggt[cgt]aa|tt[acg]accct"),
        regex!("agggta[cgt]a|t[acg]taccct"),
        regex!("agggtaa[cgt]|[acg]ttaccct"),
    ];
    let mut counts = vec![];
    for variant in variants {
        let seq = seq_arc.clone();
        let restr = variant.to_string();
        let future = thread::spawn(move || variant.find_iter(&seq).count());
        counts.push((restr, future));
    }

    let substs = vec![
        (regex!("B"), "(c|g|t)"),
        (regex!("D"), "(a|g|t)"),
        (regex!("H"), "(a|c|t)"),
        (regex!("K"), "(g|t)"),
        (regex!("M"), "(a|c)"),
        (regex!("N"), "(a|c|g|t)"),
        (regex!("R"), "(a|g)"),
        (regex!("S"), "(c|g)"),
        (regex!("V"), "(a|c|g)"),
        (regex!("W"), "(a|t)"),
        (regex!("Y"), "(c|t)"),
    ];
    let mut seq = seq;
    for (re, replacement) in substs.into_iter() {
        seq = re.replace_all(&seq, replacement);
    }
    let rlen = seq.len();

    for (variant, count) in counts {
        println!("{} {}", variant, count.join().unwrap());
    }
    println!("\n{}\n{}\n{}", ilen, clen, rlen);
}
    

notes, command-line, and program output

NOTES:
32-bit Ubuntu one core
rustc 1.13.0 (2c6933acc 2016-11-07)


Fri, 11 Nov 2016 20:43:40 GMT

MAKE:
/usr/local/src/rust-1.13.0/bin/rustc -C opt-level=3 -C target-cpu=core2 -L /usr/local/src/rust-libs regexdna.rs -o regexdna.rust-2.rust_run
3.31s to complete and log all make actions

COMMAND LINE:
./regexdna.rust-2.rust_run 0 < regexdna-input5000000.txt

PROGRAM OUTPUT:
agggtaaa|tttaccct 356
[cgt]gggtaaa|tttaccc[acg] 1250
a[act]ggtaaa|tttacc[agt]t 4252
ag[act]gtaaa|tttac[agt]ct 2894
agg[act]taaa|ttta[agt]cct 5435
aggg[acg]aaa|ttt[cgt]ccct 1537
agggt[cgt]aa|tt[acg]accct 1431
agggta[cgt]a|t[acg]taccct 1608
agggtaa[cgt]|[acg]ttaccct 2178

50833411
50000000
66800214