performance measurements

Each table row shows performance measurements for this C gcc program with a particular command-line input value N.

 N  CPU secs Elapsed secs Memory KB Code B ≈ CPU Load
250,0000.660.253162439  96% 46% 42% 92%
2,500,0005.421.8452,9402439  97% 52% 54% 94%
25,000,00053.4416.99163,7882439  64% 97% 61% 97%

Read the ↓ make, command line, and program output logs to see how this program was run.

Read k-nucleotide benchmark to see what this program should do.

 notes

gcc version 4.8.2 (Ubuntu 4.8.2-19ubuntu1)

 k-nucleotide C gcc #6 program source code

/* The Computer Language Benchmarks Game
 * http://benchmarksgame.alioth.debian.org/
 
   Based on bit encoding idea of C++ contribution of Andrew Moon
   Copy task division idea from Java entry, contributed by James McIlree
   Contributed by Petr Prokhorenkov
*/

//#include "simple_hash3.h"

#include <ctype.h>
#include <malloc.h>
#include <pthread.h>
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>

#define HT_SIZE 2000000

typedef unsigned char uint8_t;

/* Thread pool implementation */

struct tp_entry {
    void *job;
    void *param;
};

struct tp {
    struct tp_entry *jobs;
    int capacity;
    int size;
    pthread_mutex_t mutex;
};

struct tp *
tp_create(int max_jobs) {
    struct tp *pool = malloc(sizeof(*pool));

    pool->jobs = malloc(sizeof(struct tp_entry)*max_jobs);
    pool->capacity = max_jobs;
    pool->size = 0;
    pthread_mutex_init(&pool->mutex, 0);

    return pool;
}

void
tp_destroy(struct tp *pool) {
    free(pool->jobs);
    pthread_mutex_destroy(&pool->mutex);
    free(pool);
}

void
tp_add_job(struct tp *pool, void *job, void *param) {
    if (pool->size < pool->capacity) {
        pool->jobs[pool->size].job = job;
        pool->jobs[pool->size].param = param;
        ++pool->size;
    }
}

void *
_tp_run(void *param) {
    struct tp *pool = param;

    for (;;) {
        void (*job)(void *) = 0;
        void *param = 0;

        pthread_mutex_lock(&pool->mutex);
        if (pool->size > 0) {
            job = pool->jobs[pool->size - 1].job;
            param = pool->jobs[pool->size - 1].param;
            --pool->size;
        }
        pthread_mutex_unlock(&pool->mutex);
        if (job == 0) {
            return 0;
        } else {
            job(param);
        }
    }
}

void
tp_run(struct tp *pool, int max_threads) {
    pthread_t threads[max_threads];
    for (int i = 0; i < max_threads; i++) {
        if (pthread_create(&threads[i], 0, &_tp_run, pool) < 0) {
            perror("pthread_create");
            exit(1);
        }
    }

    for (int i = 0; i < max_threads; i++) {
        pthread_join(threads[i], 0);
    }
}

char *
read_stdin(int *stdin_size) {
    struct stat stat;
    if (fstat(0, &stat) < 0) {
        perror("fstat");
        exit(1);
    }

    char *result = malloc(stat.st_size);

    do {
        fgets_unlocked(result, stat.st_size, stdin);
    } while (strncmp(result, ">THREE", 6));

    int read = 0;
    while (fgets_unlocked(result + read, stat.st_size - read, stdin)) {
        int len = strlen(result + read);
        if (len == 0 || result[read] == '>') {
            break;
        }
        read += len;
        if (result[read - 1] == '\n') {
            read--;
        }
    }

    result[read++] = '>';
    result = realloc(result, read);
    *stdin_size = read;

    return result;
}

static
inline uint8_t
pack_symbol(char c) {
    switch (c) {
        case 'a':
        case 'A':
            return 0;
        case 'c':
        case 'C':
            return 1;
        case 'g':
        case 'G':
            return 2;
        case 't':
        case 'T':
            return 3;
    }
    fprintf(stderr, "Unexpected: %c\n", c);
    exit(-1);
}

char unpack_symbol(uint8_t c) {
    static char table[] = {'A', 'C', 'G', 'T'};
    
    return table[c];
}

static
inline char *
next_char(char *p) {
    do {
        ++p;
    } while (isspace(*p));

    return p;
}

static
inline uint64_t
push_char(uint64_t cur, uint8_t c) {
    return (cur << 2) + pack_symbol(c);
}

static
inline uint64_t
rotate_code(uint64_t cur, uint8_t c, int frame) {
    return push_char(cur, c) & ((1ull << 2*frame) - 1);
}

uint64_t
pack_key(char *key, int len) {
    uint64_t code = 0;
    for (int i = 0; i < len; i++) {
        code = push_char(code, *key);
        key = next_char(key);
    }

    return code;
}

void
unpack_key(uint64_t key, int length, char *buffer) {
    int i;

    for (i = 0; i < length; i++) {
        buffer[i] = unpack_symbol(key & 3);
        key >>= 2;
    }
    buffer[i] = 0;

    // Reverse string.
    for (int j = (i - 1)/2; j >= 0; j--) {
        char c = buffer[j];
        buffer[j] = buffer[i - 1 - j];
        buffer[i - 1 - j] = c;
    }
}

void
generate_seqences(char *start, int length, int frame, struct ht_ht *ht) {
    uint64_t code = 0;
    char *p = start;
    char *end = start + length;

    // Pull first frame.
    for (int i = 0; i < frame; i++) {
        code = push_char(code, *p);
        p = next_char(p);
    }
    ht_find_new(ht, code)->val++;

    while (*p != '>' && p < end) {
        code = rotate_code(code, *p, frame);
        ht_find_new(ht, code)->val++;
        p = next_char(p);
    }
}

int
key_count_cmp(const void *l, const void *r) {
    const struct ht_node *lhs = l, *rhs = r;

    if (lhs->val != rhs->val) {
        return rhs->val - lhs->val;
    } else {
        // Overflow is possible here,
        // so use comparisons instead of subtraction.
        if (lhs->key < rhs->key) {
            return -1;
        } else if (lhs->key > rhs->key) {
            return 1;
        } else {
            return 0;
        }
    }
}

struct print_freqs_param {
    char *start;
    int length;
    int frame;
    char *output;
    int output_size;
};

struct ht_node *
ht_values_as_vector(struct ht_ht *ht) {
    struct ht_node *v = malloc(ht->items*sizeof(struct ht_node));
    struct ht_node *n = ht_first(ht);

    for (int i = 0; i < ht->items; i++) {
        v[i] = *n;
        n = ht_next(ht);
    }

    return v;
}

void
print_freqs(struct print_freqs_param *param) {
    char *start = param->start;
    int length = param->length;
    int frame = param->frame;
    char *output = param->output;
    int output_size = param->output_size;

    struct ht_ht *ht = ht_create(32);
    char buffer[frame + 1];
    int output_pos = 0;

    generate_seqences(start, length, frame, ht);
    
    struct ht_node *counts = ht_values_as_vector(ht);
    int size = ht->items;

    qsort(counts, size, sizeof(struct ht_node), &key_count_cmp);

    int total_count = 0;
    for (int i = 0; i < size; i++) {
        total_count += counts[i].val;
    }

    for (int i = 0; i < size; i++) {
        unpack_key(counts[i].key, frame, buffer);
        output_pos += snprintf(output + output_pos, output_size - output_pos,
                "%s %.3f\n", buffer, counts[i].val*100.0f/total_count);
    }

    free(counts);
    ht_destroy(ht);
}

struct print_occurences_param {
    char *start;
    int length;
    char *nuc_seq;
    char *output;
    int output_size;
};

void
print_occurences(struct print_occurences_param *param) {
    char *start = param->start;
    int length = param->length;
    char *nuc_seq = param->nuc_seq;
    char *output = param->output;
    int output_size = param->output_size;
    int nuc_seq_len = strlen(nuc_seq);
    struct ht_ht *ht = ht_create(HT_SIZE);

    generate_seqences(start, length, nuc_seq_len, ht);

    uint64_t key = pack_key(nuc_seq, nuc_seq_len);
    int count = ht_find_new(ht, key)->val;
    snprintf(output, output_size, "%d\t%s\n", count, nuc_seq);
    
    ht_destroy(ht);
}

int
get_cpu_count(void) {
    cpu_set_t cpu_set;

    CPU_ZERO(&cpu_set);
    sched_getaffinity(0, sizeof(cpu_set), &cpu_set);

    return CPU_COUNT(&cpu_set);
}

#define MAX_OUTPUT 1024

int
main(void) {
    int stdin_size;
    char *stdin_mem = read_stdin(&stdin_size);
    int cpu_count = get_cpu_count();

    char output_buffer[7][MAX_OUTPUT];

#   define DECLARE_PARAM(o, n) {\
    .start = stdin_mem, \
    .length = stdin_size, \
    .frame = n,\
    .output = output_buffer[o],\
    .output_size = MAX_OUTPUT }

    struct print_freqs_param freq_params[2] = {
        DECLARE_PARAM(0, 1),
        DECLARE_PARAM(1, 2)
    }; 

#   undef DECLARE_PARAM

#   define DECLARE_PARAM(o, s) {\
    .start = stdin_mem, \
    .length = stdin_size, \
    .nuc_seq = s,\
    .output = output_buffer[o],\
    .output_size = MAX_OUTPUT }

    struct print_occurences_param occurences_params[5] = {
        DECLARE_PARAM(2, "GGT"),
        DECLARE_PARAM(3, "GGTA"),
        DECLARE_PARAM(4, "GGTATT"),
        DECLARE_PARAM(5, "GGTATTTTAATT"),
        DECLARE_PARAM(6, "GGTATTTTAATTTATAGT")
    };

#   undef DECLARE_PARAM

    struct tp *tp = tp_create(7);

    for (int i = 0 ; i < 2; i++) {
        tp_add_job(tp, &print_freqs, &freq_params[i]);
    }
    for (int i = 0 ;i <  5; i++) {
        tp_add_job(tp, &print_occurences, &occurences_params[i]);
    }

    tp_run(tp, cpu_count + 1);

    tp_destroy(tp);

    for (int i = 0; i < 2; i++) {
        printf("%s\n", output_buffer[i]);
    }
    for (int i = 2; i < 7; i++) {
        printf("%s", output_buffer[i]);
    }

    free(stdin_mem);

    return 0;
}

 make, command-line, and program output logs

Wed, 23 Apr 2014 23:07:24 GMT

MAKE:
/usr/bin/gcc -pipe -Wall -O3 -fomit-frame-pointer -march=native -std=c99 -include Include/simple_hash3.h -pthread knucleotide.gcc-6.c -o knucleotide.gcc-6.gcc_run 
In file included from <command-line>:0:0:
./Include/simple_hash3.h:195:31: warning: ‘ht_node_create’ is static but used in inline function ‘ht_find_new’ which is not static [enabled by default]
   return(ht->tbl[hash_code] = ht_node_create(key));
                               ^
./Include/simple_hash3.h:191:24: warning: ‘ht_node_create’ is static but used in inline function ‘ht_find_new’ which is not static [enabled by default]
   return (prev->next = ht_node_create(key));
                        ^
./Include/simple_hash3.h:173:21: warning: ‘ht_hashcode’ is static but used in inline function ‘ht_find_new’ which is not static [enabled by default]
     int hash_code = ht_hashcode(ht, key);
                     ^
./Include/simple_hash3.h:156:21: warning: ‘ht_hashcode’ is static but used in inline function ‘ht_find’ which is not static [enabled by default]
     int hash_code = ht_hashcode(ht, key);
                     ^
knucleotide.gcc-6.c: In function ‘read_stdin’:
knucleotide.gcc-6.c:117:23: warning: ignoring return value of ‘fgets_unlocked’, declared with attribute warn_unused_result [-Wunused-result]
         fgets_unlocked(result, stat.st_size, stdin);
                       ^
rm knucleotide.gcc-6.c
0.34s to complete and log all make actions

COMMAND LINE:
./knucleotide.gcc-6.gcc_run 0 < knucleotide-input25000000.txt

PROGRAM OUTPUT:
A 30.295
T 30.151
C 19.800
G 19.754

AA 9.177
TA 9.132
AT 9.131
TT 9.091
CA 6.002
AC 6.001
AG 5.987
GA 5.984
CT 5.971
TC 5.971
GT 5.957
TG 5.956
CC 3.917
GC 3.911
CG 3.909
GG 3.902

1471758	GGT
446535	GGTA
47336	GGTATT
893	GGTATTTTAATT
893	GGTATTTTAATTTATAGT

Revised BSD license

  Home   Conclusions   License   Play