This converts the "pointer" to a 64-bit type (unsigned long long, or
'pointer') divided into two 32-bit halves (ptr and count), instead of
using a union of a struct and an 'unsigned long'.
All atomics are seq_cst for now. Once the code all looks a little
better, I'll relax these to reasonable memory orderings.
#include "main.h"
extern unsigned iterations;
#include "main.h"
extern unsigned iterations;
-extern unsigned multi;
-extern unsigned initial_nodes;
-extern unsigned repetitions;
-extern unsigned work;
void parse_args(int argc, char **argv)
{
void parse_args(int argc, char **argv)
{
- while ((c = getopt(argc, argv, "i:m:n:p:r:w:")) != EOF)
+ while ((c = getopt(argc, argv, "i:p:")) != EOF)
switch(c) {
case 'i': iterations = atoi(optarg); break;
switch(c) {
case 'i': iterations = atoi(optarg); break;
- case 'm': multi = atoi(optarg); break;
- case 'n': initial_nodes = atoi(optarg); break;
case 'p': procs = atoi(optarg); break;
case 'p': procs = atoi(optarg); break;
- case 'r': repetitions = atoi(optarg); break;
- case 'w': work = atoi(optarg); break;
#include "main.h"
#include <stdlib.h>
#include "main.h"
#include <stdlib.h>
-#define NUM_PROCESSORS 12
-
-struct tms tim;
-struct tms tim1;
-
-int shmid;
-
-unsigned pid;
-char* name = "";
-unsigned procs = 1;
-unsigned multi = 1;
-unsigned initial_nodes = 0;
-unsigned repetitions = 1;
-unsigned work = 0;
private_t private;
shared_mem_t *smp;
private_t private;
shared_mem_t *smp;
+static void main_task(void *param)
- unsigned i,j;
- struct tms time_val;
- clock_t t1, t2;
+ int pid = *((int *)param);
- if(pid==0) {
- init_queue();
- }
- init_private();
- for(i=0;i<iterations;i++) {
+ init_private(pid);
+ for (i = 0; i < iterations; i++) {
val = private.value;
enqueue(val);
val = private.value;
enqueue(val);
-void main_task()
-{
- unsigned processor;
- unsigned i;
-
- processor = (pid/multi)+1;
- processor %= NUM_PROCESSORS;
- for (i=0; i<repetitions; i++) {
- time_test();
- }
-}
-
int user_main(int argc, char **argv)
{
int i, num_threads;
thrd_t *t;
int user_main(int argc, char **argv)
{
int i, num_threads;
thrd_t *t;
- name = argv[0];
- iterations = (iterations + ((procs*multi)>>1))/(procs*multi);
+ iterations = (iterations + (procs >> 1)) / procs;
smp = (shared_mem_t *)calloc(1, sizeof(shared_mem_t));
assert(smp);
smp = (shared_mem_t *)calloc(1, sizeof(shared_mem_t));
assert(smp);
- num_threads = procs * multi;
t = malloc(num_threads * sizeof(thrd_t));
t = malloc(num_threads * sizeof(thrd_t));
+ param = malloc(num_threads * sizeof(*param));
- for (i = 0; i < num_threads; i++)
- thrd_create(&t[i], main_task, NULL);
+ init_queue();
+ for (i = 0; i < num_threads; i++) {
+ param[i] = i;
+ thrd_create(&t[i], main_task, ¶m[i]);
+ }
for (i = 0; i < num_threads; i++)
thrd_join(t[i]);
for (i = 0; i < num_threads; i++)
thrd_join(t[i]);
-extern unsigned pid;
-extern unsigned iterations;
-extern unsigned initial_nodes;
+extern unsigned int iterations;
extern private_t private;
extern private_t private;
-extern shared_mem_t* smp;
+extern shared_mem_t *smp;
+void init_private(int pid)
- private.node = 2 + initial_nodes + pid;
- private.value = 1 + initial_nodes + (pid * iterations);
+ private.node = 2 + pid;
+ private.value = 1 + (pid * iterations);
-static unsigned new_node()
+static unsigned int new_node()
-static void reclaim(unsigned node)
+static void reclaim(unsigned int node)
{
private.node = node;
}
void init_queue()
{
{
private.node = node;
}
void init_queue()
{
+ unsigned int i;
+ pointer head;
+ pointer tail;
+ pointer next;
- smp->head.sep.ptr = 1;
- smp->head.sep.count = 0;
- smp->tail.sep.ptr = 1;
- smp->tail.sep.count = 0;
- smp->nodes[1].next.sep.ptr = NULL;
- smp->nodes[1].next.sep.count = 0;
+ head = MAKE_POINTER(1, 0);
+ tail = MAKE_POINTER(1, 0);
+ next = MAKE_POINTER(0, 0); // (NULL, 0)
+
+ atomic_init(&smp->nodes[0].next, 0); // assumed inititalized in original example
+
+ atomic_store(&smp->head, head);
+ atomic_store(&smp->tail, tail);
+ atomic_store(&smp->nodes[1].next, next);
/* initialize avail list */
/* initialize avail list */
- for (i=2; i<MAX_NODES; i++) {
- smp->nodes[i].next.sep.ptr = i+1;
- smp->nodes[i].next.sep.count = 0;
- }
- smp->nodes[MAX_NODES].next.sep.ptr = NULL;
- smp->nodes[MAX_NODES].next.sep.count = 0;
-
- /* initialize queue contents */
- if (initial_nodes > 0) {
- for (i=2; i<initial_nodes+2; i++) {
- smp->nodes[i].value = i;
- smp->nodes[i-1].next.sep.ptr = i;
- smp->nodes[i].next.sep.ptr = NULL;
- }
- smp->head.sep.ptr = 1;
- smp->tail.sep.ptr = 1 + initial_nodes;
+ for (i = 2; i < MAX_NODES; i++) {
+ next = MAKE_POINTER(i + 1, 0);
+ atomic_store(&smp->nodes[i].next, next);
+
+ next = MAKE_POINTER(0, 0); // (NULL, 0)
+ atomic_store(&smp->nodes[MAX_NODES].next, next);
-void enqueue(unsigned val)
+void enqueue(unsigned int val)
- unsigned success;
- unsigned node;
- pointer_t tail;
- pointer_t next;
+ unsigned int success = 0;
+ unsigned int node;
+ pointer tail;
+ pointer next;
+ pointer tmp;
node = new_node();
smp->nodes[node].value = val;
node = new_node();
smp->nodes[node].value = val;
- smp->nodes[node].next.sep.ptr = NULL;
+ tmp = atomic_load(&smp->nodes[node].next);
+ set_ptr(&tmp, 0); // NULL
+ atomic_store(&smp->nodes[node].next, tmp);
- for (success = FALSE; success == FALSE; ) {
- tail.con = smp->tail.con;
- next.con = smp->nodes[tail.sep.ptr].next.con;
- if (tail.con == smp->tail.con) {
- if (next.sep.ptr == NULL) {
- success = cas(&smp->nodes[tail.sep.ptr].next,
- next.con,
- MAKE_LONG(node, next.sep.count+1));
+ while (!success) {
+ tail = atomic_load(&smp->tail);
+ next = atomic_load(&smp->nodes[get_ptr(tail)].next);
+ if (tail == atomic_load(&smp->tail)) {
+ if (get_ptr(next) == 0) { // == NULL
+ pointer val = MAKE_POINTER(node, get_count(next) + 1);
+ success = atomic_compare_exchange_weak(&smp->nodes[get_ptr(tail)].next,
+ &next,
+ val);
- if (success == FALSE) {
- cas(&smp->tail,
- tail.con,
- MAKE_LONG(smp->nodes[tail.sep.ptr].next.sep.ptr,
- tail.sep.count+1));
+ if (!success) {
+ unsigned int ptr = get_ptr(atomic_load(&smp->nodes[get_ptr(tail)].next));
+ pointer val = MAKE_POINTER(ptr,
+ get_count(tail) + 1);
+ atomic_compare_exchange_strong(&smp->tail,
+ &tail,
+ val);
- cas(&smp->tail,
- tail.con,
- MAKE_LONG(node, tail.sep.count+1));
+ atomic_compare_exchange_strong(&smp->tail,
+ &tail,
+ MAKE_POINTER(node, get_count(tail) + 1));
- unsigned value;
- unsigned success;
- pointer_t head;
- pointer_t tail;
- pointer_t next;
+ unsigned int value;
+ unsigned int success;
+ pointer head;
+ pointer tail;
+ pointer next;
for (success = FALSE; success == FALSE; ) {
for (success = FALSE; success == FALSE; ) {
- head.con = smp->head.con;
- tail.con = smp->tail.con;
- next.con = smp->nodes[head.sep.ptr].next.con;
- if (smp->head.con == head.con) {
- if (head.sep.ptr == tail.sep.ptr) {
- if (next.sep.ptr == NULL) {
- return NULL;
+ head = atomic_load(&smp->head);
+ tail = atomic_load(&smp->tail);
+ next = atomic_load(&smp->nodes[get_ptr(head)].next);
+ if (atomic_load(&smp->head) == head) {
+ if (get_ptr(head) == get_ptr(tail)) {
+ if (get_ptr(next) == 0) { // NULL
+ return 0; // NULL
- cas(&smp->tail,
- tail.con,
- MAKE_LONG(next.sep.ptr, tail.sep.count+1));
+ atomic_compare_exchange_weak(&smp->tail,
+ &tail,
+ MAKE_POINTER(get_ptr(next), get_count(tail) + 1));
- value = smp->nodes[next.sep.ptr].value;
- success = cas(&smp->head,
- head.con,
- MAKE_LONG(next.sep.ptr, head.sep.count+1));
+ value = smp->nodes[get_ptr(next)].value;
+ success = atomic_compare_exchange_weak(&smp->head,
+ &head,
+ MAKE_POINTER(get_ptr(next), get_count(head) + 1));
if (success == FALSE) {
thrd_yield();
}
}
}
}
if (success == FALSE) {
thrd_yield();
}
}
}
}
+ reclaim(get_ptr(head));
#define TRUE 1
#define FALSE 0
#define TRUE 1
#define FALSE 0
-#define MAKE_LONG(lo, hi) ((hi)<<16)+(lo)
+typedef unsigned long long pointer;
+typedef atomic_ullong pointer_t;
-typedef union pointer {
- struct {
- volatile unsigned short count;
- volatile unsigned short ptr;
- } sep;
- atomic_ulong con;
-} pointer_t;
+#define MAKE_POINTER(ptr, count) ((((pointer)count) << 32) | ptr)
+#define PTR_MASK 0xffffffffLL
+#define COUNT_MASK (0xffffffffLL << 32)
+
+static inline void set_count(pointer *p, unsigned int val) { *p = (*p & ~COUNT_MASK) | ((pointer)val << 32); }
+static inline void set_ptr(pointer *p, unsigned int val) { *p = (*p & ~PTR_MASK) | val; }
+static inline unsigned int get_count(pointer p) { return p & PTR_MASK; }
+static inline unsigned int get_ptr(pointer p) { return (p & COUNT_MASK) >> 32; }
} node_t;
typedef struct private {
} node_t;
typedef struct private {
- unsigned node;
- unsigned value;
- unsigned serial[MAX_SERIAL];
+ unsigned int node;
+ unsigned int value;
+ unsigned int serial[MAX_SERIAL];
} private_t;
typedef struct shared_mem {
pointer_t head;
} private_t;
typedef struct shared_mem {
pointer_t head;
node_t nodes[MAX_NODES+1];
node_t nodes[MAX_NODES+1];
+void init_private(int pid);
void init_memory();
void init_queue();
void init_memory();
void init_queue();