The Computer Language
Benchmarks Game

regex-redux Swift program

source code

// The Computer Language Benchmarks Game
// http://benchmarksgame.alioth.debian.org/
//
// contributed by Francois Green


import Foundation
import Dispatch

let input = FileHandle.standardInput.readDataToEndOfFile()

var sequence = String(data: input, encoding: .utf8)!

let inputLength = input.count

let regex: (String) -> NSRegularExpression = { pattern in
  return try! NSRegularExpression(pattern: pattern, options: [])
}

sequence = regex(">[^\n]*\n|\n").stringByReplacingMatches(in: sequence, options: [], range: NSRange(location: 0, length: inputLength), withTemplate: "")

let codeLength = sequence.utf8.count

var resultLength: Int?

let group = DispatchGroup()

DispatchQueue.global(qos: .background).async {
  group.enter()
  resultLength = [
    (regex: "tHa[Nt]",            replacement: "<4>"),
    (regex: "aND|caN|Ha[DS]|WaS", replacement: "<3>"),
    (regex: "a[NSt]|BY",          replacement: "<2>"),
    (regex: "<[^>]*>",            replacement: "|"),
    (regex: "\\|[^|][^|]*\\|",    replacement: "-")
  ].reduce(sequence) { buffer, iub in
    return regex(iub.regex).stringByReplacingMatches(in: buffer, options: [], range: NSRange(location: 0, length: buffer.utf16.count), withTemplate: iub.replacement)
  }.utf8.count
  group.leave()
}

[
  "agggtaaa|tttaccct",
  "[cgt]gggtaaa|tttaccc[acg]",
  "a[act]ggtaaa|tttacc[agt]t",
  "ag[act]gtaaa|tttac[agt]ct",
  "agg[act]taaa|ttta[agt]cct",
  "aggg[acg]aaa|ttt[cgt]ccct",
  "agggt[cgt]aa|tt[acg]accct",
  "agggta[cgt]a|t[acg]taccct",
  "agggtaa[cgt]|[acg]ttaccct"
].forEach { variant in
  print(variant, regex(variant).numberOfMatches(in: sequence, options: [], range: NSRange(location: 0, length: sequence.utf8.count)))
}

group.wait()
print("", inputLength, codeLength, resultLength!, separator: "\n")
    

notes, command-line, and program output

NOTES:
64-bit Ubuntu quad core
Swift version 4.1-dev (LLVM 3fa7aed3df, Clang 529966cf07, Swift 607f23d66d)
Target: x86_64-unknown-linux-gnu




Wed, 15 Nov 2017 19:50:15 GMT

MAKE:
/opt/src/swift-4.1-DEVELOPMENT-SNAPSHOT-2017-11-06-a-ubuntu16.10/usr/bin/swiftc regexredux.swift -Ounchecked  -o regexredux.swift_run

1.21s to complete and log all make actions

COMMAND LINE:
./regexredux.swift_run 0 < regexredux-input5000000.txt

PROGRAM OUTPUT:
agggtaaa|tttaccct 356
[cgt]gggtaaa|tttaccc[acg] 1250
a[act]ggtaaa|tttacc[agt]t 4252
ag[act]gtaaa|tttac[agt]ct 2894
agg[act]taaa|ttta[agt]cct 5435
aggg[acg]aaa|ttt[cgt]ccct 1537
agggt[cgt]aa|tt[acg]accct 1431
agggta[cgt]a|t[acg]taccct 1608
agggtaa[cgt]|[acg]ttaccct 2178

50833411
50000000
27388361