The Computer Language
Benchmarks Game

regex-redux Swift #3 program

source code

// The Computer Language Benchmarks Game
// http://benchmarksgame.alioth.debian.org/
//
// regex-dna program contributed by Daniel Muellenborn
// parallelized using Dispatch by Dave Grove
// converted from regex-dna program

import Foundation
import Dispatch

#if os(macOS)
typealias RegExT = RegularExpression
#else
typealias RegExT = NSRegularExpression
#endif

let regex: (String) -> RegExT = { pattern in
    return try! RegExT(pattern: pattern, options: [])
}

// Read data from stdin
let inputData = FileHandle.readDataToEndOfFile(FileHandle.standardInput)()
let inputLength = inputData.count
var sequence = String(data: inputData, encoding: .utf8)!

// Remove sequence descriptions and linefeeds
sequence = regex(">[^\n]*\n|\n").stringByReplacingMatches(in: sequence, options: [], range: NSRange(0..<inputLength), withTemplate: "")
let cleanedInput = sequence.utf8
let codeLength = cleanedInput.count

// Count matches
let variants = [
   "agggtaaa|tttaccct",
   "[cgt]gggtaaa|tttaccc[acg]",
   "a[act]ggtaaa|tttacc[agt]t",
   "ag[act]gtaaa|tttac[agt]ct",
   "agg[act]taaa|ttta[agt]cct",
   "aggg[acg]aaa|ttt[cgt]ccct",
   "agggt[cgt]aa|tt[acg]accct",
   "agggta[cgt]a|t[acg]taccct",
   "agggtaa[cgt]|[acg]ttaccct",
   ]
let counts = UnsafeMutablePointer<Int>.allocate(capacity: variants.count)

let range = NSRange(0..<codeLength)
let dg = DispatchGroup()
for i in 0..<variants.count {
    DispatchQueue.global().async(group: dg) {
        counts[i] = regex(variants[i]).numberOfMatches(in: sequence, options: [], range: range)
    }
}
dg.wait()

// output regex and counts
for i in 0..<variants.count {
   print(variants[i], "\(counts[i])")
}

let replacements = [
  (regex("tHa[Nt]"), "<4>"),
  (regex("aND|caN|Ha[DS]|WaS"), "<3>"),
  (regex("a[NSt]|BY"), "<2>"),
  (regex("<[^>]*>"), "|"),
  (regex("\\|[^|][^|]*\\|"), "-"),
  ]

for (re, replacement) in replacements {
    sequence = re.stringByReplacingMatches(in: sequence, options: [], range: range, withTemplate: replacement)
}


let resultLength = sequence.utf8.count

print("", inputLength, codeLength, resultLength, separator: "\n")


    

notes, command-line, and program output

NOTES:
64-bit Ubuntu quad core
Swift version 4.1-dev (LLVM 3fa7aed3df, Clang 529966cf07, Swift 607f23d66d)
Target: x86_64-unknown-linux-gnu




Wed, 15 Nov 2017 19:40:31 GMT

MAKE:
/opt/src/swift-4.1-DEVELOPMENT-SNAPSHOT-2017-11-06-a-ubuntu16.10/usr/bin/swiftc regexredux.swift-3.swift -Ounchecked  -o regexredux.swift-3.swift_run

1.60s to complete and log all make actions

COMMAND LINE:
./regexredux.swift-3.swift_run 0 < regexredux-input50000.txt

UNEXPECTED OUTPUT 

13c13
< 499935
---
> 273927

PROGRAM OUTPUT:
agggtaaa|tttaccct 3
[cgt]gggtaaa|tttaccc[acg] 12
a[act]ggtaaa|tttacc[agt]t 43
ag[act]gtaaa|tttac[agt]ct 27
agg[act]taaa|ttta[agt]cct 58
aggg[acg]aaa|ttt[cgt]ccct 16
agggt[cgt]aa|tt[acg]accct 15
agggta[cgt]a|t[acg]taccct 18
agggtaa[cgt]|[acg]ttaccct 20

508411
500000
499935