performance measurements

Each table row shows performance measurements for this Rust program with a particular command-line input value N.

 N  CPU secs Elapsed secs Memory KB Code B ≈ CPU Load
5000.080.03?1020  60% 67% 67% 67%
3,0002.450.845,1081020  75% 86% 67% 61%
5,5007.872.065,1121020  95% 96% 96% 96%

Read the ↓ make, command line, and program output logs to see how this program was run.

Read spectral-norm benchmark to see what this program should do.

 notes

rustc 0.12.0 (ba4081a5a 2014-10-07 13:44:41 -0700)

 spectral-norm Rust #3 program source code

// The Computer Language Benchmarks Game
// http://benchmarksgame.alioth.debian.org/
//
// contributed by the Rust Project Developers
// contributed by TeXitoi

#![allow(non_snake_case)]
#![feature(unboxed_closures, overloaded_calls)]

use std::iter::AdditiveIterator;
use std::mem;
use std::os;
use std::raw::Repr;
use std::simd::f64x2;

fn main() {
    let args = os::args();
    let answer = spectralnorm(from_str(args[1].as_slice()).unwrap());
    println!("{:.9f}", answer);
}

fn spectralnorm(n: uint) -> f64 {
    assert!(n % 2 == 0, "only even lengths are accepted");
    let mut u = Vec::from_elem(n, 1.0);
    let mut v = Vec::from_elem(n, 1.0);
    let mut tmp = Vec::from_elem(n, 1.0);
    for _ in range(0u, 10) {
        mult_AtAv(u.as_slice(), v.as_mut_slice(), tmp.as_mut_slice());
        mult_AtAv(v.as_slice(), u.as_mut_slice(), tmp.as_mut_slice());
    }
    (dot(u.as_slice(), v.as_slice()) / dot(v.as_slice(), v.as_slice())).sqrt()
}

fn mult_AtAv(v: &[f64], out: &mut [f64], tmp: &mut [f64]) {
    mult_Av(v, tmp);
    mult_Atv(tmp, out);
}

fn mult_Av(v: &[f64], out: &mut [f64]) {
    parallel(out, |&: start, out| mult(v, out, start, |i, j| A(i, j)));
}

fn mult_Atv(v: &[f64], out: &mut [f64]) {
    parallel(out, |&: start, out| mult(v, out, start, |i, j| A(j, i)));
}

fn mult(v: &[f64], out: &mut [f64], start: uint, a: |uint, uint| -> f64) {
    for (i, slot) in out.iter_mut().enumerate().map(|(i, s)| (i + start, s)) {
        let mut sum = f64x2(0.0, 0.0);
        for (j, chunk) in v.chunks(2).enumerate().map(|(j, s)| (2 * j, s)) {
            let top = f64x2(chunk[0], chunk[1]);
            let bot = f64x2(a(i, j), a(i, j + 1));
            sum += top / bot;
        }
        let f64x2(a, b) = sum;
        *slot = a + b;
    }
}

fn A(i: uint, j: uint) -> f64 {
    ((i + j) * (i + j + 1) / 2 + i + 1) as f64
}

fn dot(v: &[f64], u: &[f64]) -> f64 {
    v.iter().zip(u.iter()).map(|(a, b)| *a * *b).sum()
}

// Executes a closure in parallel over the given mutable slice. The closure `f`
// is run in parallel and yielded the starting index within `v` as well as a
// sub-slice of `v`.
fn parallel<'a, T, F>(v: &'a mut [T], f: F)
                      where T: Send + Sync,
                            F: Fn(uint, &'a mut [T]) + Sync {
    let (tx, rx) = channel();
    let size = v.len() / os::num_cpus() + 1;

    for (i, chunk) in v.chunks_mut(size).enumerate() {
        let tx = tx.clone();

        // Need to convert `f` and `chunk` to something that can cross the task
        // boundary.
        let f = &f as *const _ as *const uint;
        let raw = chunk.repr();
        spawn(proc() {
            let f = f as *const F;
            unsafe { (*f)(i * size, mem::transmute(raw)) }
            drop(tx)
        });
    }
    drop(tx);
    for () in rx.iter() {}
}

 make, command-line, and program output logs

Sat, 18 Oct 2014 17:21:08 GMT

MAKE:
/usr/local/src/rust/bin/rustc --opt-level=3 -C target-cpu=core2 -C lto spectralnorm.rs -o spectralnorm.rust-3.rust_run
12.97s to complete and log all make actions

COMMAND LINE:
./spectralnorm.rust-3.rust_run 5500

PROGRAM OUTPUT:
1.274224153

Revised BSD license

  Home   Conclusions   License   Play