From 8dfa47da8cb33ebaf7aae6db6548e75ed86e8f1e Mon Sep 17 00:00:00 2001 From: Andy McFadden Date: Thu, 27 May 2010 10:10:18 -0700 Subject: [PATCH] Atomic/SMP update, part 2. Added new atomic functions, renamed some old ones. Some #defines have been added for backward compatibility. Merged the pre- and post-ARMv6 implementations into a single file. Renamed the semi-private __android_membar_full_smp to USE_SCREAMING_CAPS since that's more appropriate for a macro. Added lots of comments. Note Mac OS X primitives have not been tested. Change-Id: If827260750aeb61ad5c2b760e30658e29dbb26f2 --- include/cutils/atomic-inline.h | 18 +- include/cutils/atomic.h | 107 ++++++-- libcutils/atomic-android-arm.S | 448 +++++++++++++++++++++++++------ libcutils/atomic-android-armv6.S | 174 ------------ libcutils/atomic-android-sh.c | 52 +++- libcutils/atomic.c | 142 ++++++---- 6 files changed, 576 insertions(+), 365 deletions(-) delete mode 100644 libcutils/atomic-android-armv6.S diff --git a/include/cutils/atomic-inline.h b/include/cutils/atomic-inline.h index 4f5ddf761..1c23be9f1 100644 --- a/include/cutils/atomic-inline.h +++ b/include/cutils/atomic-inline.h @@ -27,6 +27,12 @@ * * Anything that does include this file must set ANDROID_SMP to either * 0 or 1, indicating compilation for UP or SMP, respectively. + * + * Macros defined in this header: + * + * void ANDROID_MEMBAR_FULL(void) + * Full memory barrier. Provides a compiler reordering barrier, and + * on SMP systems emits an appropriate instruction. */ #if !defined(ANDROID_SMP) @@ -55,17 +61,17 @@ extern "C" { * This will fail on plain 16-bit Thumb. */ #if defined(__ARM_HAVE_DMB) -# define __android_membar_full_smp() \ +# define _ANDROID_MEMBAR_FULL_SMP() \ do { __asm__ __volatile__ ("dmb" ::: "memory"); } while (0) #else -# define __android_membar_full_smp() ARM_SMP_defined_but_no_DMB() +# define _ANDROID_MEMBAR_FULL_SMP() ARM_SMP_defined_but_no_DMB() #endif #elif defined(__i386__) || defined(__x86_64__) /* * For recent x86, we can use the SSE2 mfence instruction. */ -# define __android_membar_full_smp() \ +# define _ANDROID_MEMBAR_FULL_SMP() \ do { __asm__ __volatile__ ("mfence" ::: "memory"); } while (0) #else @@ -73,7 +79,7 @@ extern "C" { * Implementation not defined for this platform. Hopefully we're building * in uniprocessor mode. */ -# define __android_membar_full_smp() SMP_barrier_not_defined_for_platform() +# define _ANDROID_MEMBAR_FULL_SMP() SMP_barrier_not_defined_for_platform() #endif @@ -88,9 +94,9 @@ extern "C" { * be stale. Other CPUs may do less, but the end result is equivalent. */ #if ANDROID_SMP != 0 -# define android_membar_full() __android_membar_full_smp() +# define ANDROID_MEMBAR_FULL() _ANDROID_MEMBAR_FULL_SMP() #else -# define android_membar_full() \ +# define ANDROID_MEMBAR_FULL() \ do { __asm__ __volatile__ ("" ::: "memory"); } while (0) #endif diff --git a/include/cutils/atomic.h b/include/cutils/atomic.h index 8e12902b0..0200709e1 100644 --- a/include/cutils/atomic.h +++ b/include/cutils/atomic.h @@ -25,51 +25,102 @@ extern "C" { #endif /* - * Unless otherwise noted, the operations below perform a full fence before - * the atomic operation on SMP systems ("release" semantics). + * A handful of basic atomic operations. The appropriate pthread + * functions should be used instead of these whenever possible. + * + * The "acquire" and "release" terms can be defined intuitively in terms + * of the placement of memory barriers in a simple lock implementation: + * - wait until compare-and-swap(lock-is-free --> lock-is-held) succeeds + * - barrier + * - [do work] + * - barrier + * - store(lock-is-free) + * In very crude terms, the initial (acquire) barrier prevents any of the + * "work" from happening before the lock is held, and the later (release) + * barrier ensures that all of the work happens before the lock is released. + * (Think of cached writes, cache read-ahead, and instruction reordering + * around the CAS and store instructions.) + * + * The barriers must apply to both the compiler and the CPU. Note it is + * legal for instructions that occur before an "acquire" barrier to be + * moved down below it, and for instructions that occur after a "release" + * barrier to be moved up above it. + * + * The ARM-driven implementation we use here is short on subtlety, + * and actually requests a full barrier from the compiler and the CPU. + * The only difference between acquire and release is in whether they + * are issued before or after the atomic operation with which they + * are associated. To ease the transition to C/C++ atomic intrinsics, + * you should not rely on this, and instead assume that only the minimal + * acquire/release protection is provided. + * + * NOTE: all int32_t* values are expected to be aligned on 32-bit boundaries. + * If they are not, atomicity is not guaranteed. */ -void android_atomic_write(int32_t value, volatile int32_t* addr); - /* - * all these atomic operations return the previous value + * Basic arithmetic and bitwise operations. These all provide a + * barrier with "release" ordering, and return the previous value. + * + * These have the same characteristics (e.g. what happens on overflow) + * as the equivalent non-atomic C operations. */ - int32_t android_atomic_inc(volatile int32_t* addr); int32_t android_atomic_dec(volatile int32_t* addr); - int32_t android_atomic_add(int32_t value, volatile int32_t* addr); int32_t android_atomic_and(int32_t value, volatile int32_t* addr); int32_t android_atomic_or(int32_t value, volatile int32_t* addr); -int32_t android_atomic_swap(int32_t value, volatile int32_t* addr); - /* - * cmpxchg returns zero if the new value was successfully written. This - * will only happen when *addr == oldvalue. + * Perform an atomic load with "acquire" or "release" ordering. * - * (The return value is inverted from implementations on other platforms, but - * matches the ARM ldrex/strex sematics. Note also this is a compare-and-set - * operation, not a compare-and-exchange operation, since we don't return - * the original value.) + * This is only necessary if you need the memory barrier. A 32-bit read + * from a 32-bit aligned address is atomic on all supported platforms. */ -int android_atomic_cmpxchg(int32_t oldvalue, int32_t newvalue, +int32_t android_atomic_acquire_load(volatile int32_t* addr); +int32_t android_atomic_release_load(volatile int32_t* addr); + +/* + * Perform an atomic store with "acquire" or "release" ordering. + * + * This is only necessary if you need the memory barrier. A 32-bit write + * to a 32-bit aligned address is atomic on all supported platforms. + */ +void android_atomic_acquire_store(int32_t value, volatile int32_t* addr); +void android_atomic_release_store(int32_t value, volatile int32_t* addr); + +/* + * Unconditional swap operation with "acquire" or "release" ordering. + * + * Stores the new value at *addr, and returns the previous value. + */ +int32_t android_atomic_acquire_swap(int32_t value, volatile int32_t* addr); +int32_t android_atomic_release_swap(int32_t value, volatile int32_t* addr); + +/* + * Compare-and-set operation with "acquire" or "release" ordering. + * + * This returns zero if the new value was successfully stored, which will + * only happen when *addr == oldvalue. + * + * (The return value is inverted from implementations on other platforms, + * but matches the ARM ldrex/strex result.) + * + * Implementations that use the release CAS in a loop may be less efficient + * than possible, because we re-issue the memory barrier on each iteration. + */ +int android_atomic_acquire_cas(int32_t oldvalue, int32_t newvalue, + volatile int32_t* addr); +int android_atomic_release_cas(int32_t oldvalue, int32_t newvalue, volatile int32_t* addr); /* - * Same basic operation as android_atomic_cmpxchg, but with "acquire" - * semantics. The memory barrier, if required, is performed after the - * new value is stored. Useful for acquiring a spin lock. + * Aliases for code using an older version of this header. These are now + * deprecated and should not be used. The definitions will be removed + * in a future release. */ -int android_atomic_acquire_cmpxchg(int32_t oldvalue, int32_t newvalue, - volatile int32_t* addr); - -/* - * Perform an atomic store with "release" semantics. The memory barrier, - * if required, is performed before the store instruction. Useful for - * releasing a spin lock. - */ -#define android_atomic_release_store android_atomic_write +#define android_atomic_write android_atomic_release_store +#define android_atomic_cmpxchg android_atomic_release_cas #ifdef __cplusplus } // extern "C" diff --git a/libcutils/atomic-android-arm.S b/libcutils/atomic-android-arm.S index f918990c8..d8ee15cc6 100644 --- a/libcutils/atomic-android-arm.S +++ b/libcutils/atomic-android-arm.S @@ -14,68 +14,353 @@ * limitations under the License. */ -/* TODO: insert memory barriers on SMP */ - #include + .text + .align + + .global android_atomic_acquire_load + .type android_atomic_acquire_load, %function + .global android_atomic_release_load + .type android_atomic_release_load, %function + + .global android_atomic_acquire_store + .type android_atomic_acquire_store, %function + .global android_atomic_release_store + .type android_atomic_release_store, %function + + .global android_atomic_inc + .type android_atomic_inc, %function + .global android_atomic_dec + .type android_atomic_dec, %function + + .global android_atomic_add + .type android_atomic_add, %function + .global android_atomic_and + .type android_atomic_and, %function + .global android_atomic_or + .type android_atomic_or, %function + + .global android_atomic_release_swap + .type android_atomic_release_swap, %function + .global android_atomic_acquire_swap + .type android_atomic_acquire_swap, %function + + .global android_atomic_release_cas + .type android_atomic_release_cas, %function + .global android_atomic_acquire_cas + .type android_atomic_acquire_cas, %function + +/* must be on or off; cannot be left undefined */ +#if !defined(ANDROID_SMP) +# error "ANDROID_SMP not defined" +#endif + + +#if defined(__ARM_HAVE_LDREX_STREX) /* - * NOTE: these atomic operations are SMP safe on all architectures. + * =========================================================================== + * ARMv6+ implementation + * =========================================================================== + * + * These functions use the LDREX/STREX instructions to perform atomic + * operations ("LL/SC" approach). On an SMP build they will include + * an appropriate memory barrier. */ - .text - .align - - .global android_atomic_write - .type android_atomic_write, %function - - .global android_atomic_inc - .type android_atomic_inc, %function - .global android_atomic_dec - .type android_atomic_dec, %function - - .global android_atomic_add - .type android_atomic_add, %function - .global android_atomic_and - .type android_atomic_and, %function - .global android_atomic_or - .type android_atomic_or, %function - - .global android_atomic_swap - .type android_atomic_swap, %function - - .global android_atomic_cmpxchg - .type android_atomic_cmpxchg, %function - .global android_atomic_acquire_cmpxchg - .type android_atomic_acquire_cmpxchg, %function +/* generate the memory barrier instruction when the build requires it */ +#if ANDROID_SMP == 1 +# if defined(__ARM_HAVE_DMB) +# define SMP_DMB dmb +# else + /* Data Memory Barrier operation, initated by writing a value into a + specific register with the Move to Coprocessor instruction. We + arbitrarily use r0 here. */ +# define SMP_DMB mcr p15, 0, r0, c7, c10, 5 +# endif +#else +# define SMP_DMB +#endif /* - * ---------------------------------------------------------------------------- - * int __kernel_cmpxchg(int oldval, int newval, int *ptr) - * clobbered: r3, ip, flags - * return 0 if a swap was made, non-zero otherwise. - */ - - .equ kernel_cmpxchg, 0xFFFF0FC0 - .equ kernel_atomic_base, 0xFFFF0FFF + * Sidebar: do we need to use the -EX instructions for atomic load/store? + * + * Consider the following situation (time advancing downward): + * + * P1 P2 + * val = LDREX(mem) + * val = val + 1 + * STR(mem, otherval) + * STREX(mem, val) + * + * If these instructions issue on separate cores, the STREX will correctly + * fail because of the intervening store from the other core. If this same + * sequence of instructions executes in two threads on the same core, the + * STREX will incorrectly succeed. + * + * There are two ways to fix this: + * (1) Use LDREX/STREX for the atomic store operations. This doesn't + * prevent the program from doing a non-exclusive store, but at least + * this way if they always use atomic ops to access the memory location + * there won't be any problems. + * (2) Have the kernel clear the LDREX reservation on thread context switch. + * This will sometimes clear the reservation unnecessarily, but guarantees + * correct behavior. + * + * The Android kernel performs a CLREX (v7) or dummy STREX (pre-v7), so we + * can get away with a non-exclusive store here. + * + * ----- + * + * It's worth noting that using non-exclusive LDR and STR means the "load" + * and "store" operations aren't quite the same as read-modify-write or + * swap operations. By definition those must read and write memory in a + * in a way that is coherent across all cores, whereas our non-exclusive + * load and store have no such requirement. + * + * In practice this doesn't matter, because the only guarantees we make + * about who sees what when are tied to the acquire/release semantics. + * Other cores may not see our atomic releasing store as soon as they would + * if the code used LDREX/STREX, but a store-release operation doesn't make + * any guarantees as to how soon the store will be visible. It's allowable + * for operations that happen later in program order to become visible + * before the store. For an acquring store we issue a full barrier after + * the STREX, ensuring that other processors see events in the proper order. + */ /* - * ---------------------------------------------------------------------------- - * android_atomic_write - * input: r0=value, r1=address + * android_atomic_acquire_load / android_atomic_release_load + * input: r0 = address + * output: r0 = value + */ +android_atomic_acquire_load: + .fnstart + ldr r0, [r0] + SMP_DMB + bx lr + .fnend + +android_atomic_release_load: + .fnstart + SMP_DMB + ldr r0, [r0] + bx lr + .fnend + + +/* + * android_atomic_acquire_store / android_atomic_release_store + * input: r0 = value, r1 = address * output: void */ - -android_atomic_write: +android_atomic_acquire_store: + .fnstart str r0, [r1] - bx lr; + SMP_DMB + bx lr + .fnend + +android_atomic_release_store: + .fnstart + SMP_DMB + str r0, [r1] + bx lr + .fnend + +/* + * Common sequence for read-modify-write operations. + * + * input: r1 = address + * output: r0 = original value, returns to caller + */ + .macro RMWEX op, arg +1: ldrex r0, [r1] @ load current value into r0 + \op r2, r0, \arg @ generate new value into r2 + strex r3, r2, [r1] @ try to store new value; result in r3 + cmp r3, #0 @ success? + bxeq lr @ yes, return + b 1b @ no, retry + .endm + + +/* + * android_atomic_inc + * input: r0 = address + * output: r0 = old value + */ +android_atomic_inc: + .fnstart + SMP_DMB + mov r1, r0 + RMWEX add, #1 + .fnend + + +/* + * android_atomic_dec + * input: r0 = address + * output: r0 = old value + */ +android_atomic_dec: + .fnstart + SMP_DMB + mov r1, r0 + RMWEX sub, #1 + .fnend + + +/* + * android_atomic_add + * input: r0 = value, r1 = address + * output: r0 = old value + */ +android_atomic_add: + .fnstart + SMP_DMB + mov ip, r0 + RMWEX add, ip + .fnend + + +/* + * android_atomic_and + * input: r0 = value, r1 = address + * output: r0 = old value + */ +android_atomic_and: + .fnstart + SMP_DMB + mov ip, r0 + RMWEX and, ip + .fnend + + +/* + * android_atomic_or + * input: r0 = value, r1 = address + * output: r0 = old value + */ +android_atomic_or: + .fnstart + SMP_DMB + mov ip, r0 + RMWEX orr, ip + .fnend + + +/* + * android_atomic_acquire_swap / android_atomic_release_swap + * input: r0 = value, r1 = address + * output: r0 = old value + */ +android_atomic_acquire_swap: + .fnstart +1: ldrex r2, [r1] @ load current value into r2 + strex r3, r0, [r1] @ store new value + teq r3, #0 @ strex success? + bne 1b @ no, loop + mov r0, r2 @ return old value + SMP_DMB + bx lr + .fnend + +android_atomic_release_swap: + .fnstart + SMP_DMB +1: ldrex r2, [r1] + strex r3, r0, [r1] + teq r3, #0 + bne 1b + mov r0, r2 + bx lr + .fnend + + +/* + * android_atomic_acquire_cas / android_atomic_release_cas + * input: r0 = oldvalue, r1 = newvalue, r2 = address + * output: r0 = 0 (xchg done) or non-zero (xchg not done) + */ +android_atomic_acquire_cas: + .fnstart +1: mov ip, #2 @ ip=2 means "new != old" + ldrex r3, [r2] @ load current value into r3 + teq r0, r3 @ new == old? + strexeq ip, r1, [r2] @ yes, try store, set ip to 0 or 1 + teq ip, #1 @ strex failure? + beq 1b @ yes, retry + mov r0, ip @ return 0 on success, 2 on failure + SMP_DMB + bx lr + .fnend + +android_atomic_release_cas: + .fnstart + SMP_DMB +1: mov ip, #2 + ldrex r3, [r2] + teq r0, r3 + strexeq ip, r1, [r2] + teq ip, #1 + beq 1b + mov r0, ip + bx lr + .fnend + + +#else /*not defined __ARM_HAVE_LDREX_STREX*/ +/* + * =========================================================================== + * Pre-ARMv6 implementation + * =========================================================================== + * + * These functions call through the kernel cmpxchg facility, or use the + * (now deprecated) SWP instruction. They are not SMP-safe. + */ +#if ANDROID_SMP == 1 +# error "SMP defined, but LDREX/STREX not available" +#endif + +/* + * int __kernel_cmpxchg(int oldval, int newval, int *ptr) + * clobbered: r3, ip, flags + * return 0 if a swap was made, non-zero otherwise. + */ + .equ kernel_cmpxchg, 0xFFFF0FC0 + .equ kernel_atomic_base, 0xFFFF0FFF + + +/* + * android_atomic_acquire_load / android_atomic_release_load + * input: r0 = address + * output: r0 = value + */ +android_atomic_acquire_load: +android_atomic_release_load: + .fnstart + ldr r0, [r0] + bx lr + .fnend + + +/* + * android_atomic_acquire_store / android_atomic_release_store + * input: r0 = value, r1 = address + * output: void + */ +android_atomic_acquire_store: +android_atomic_release_store: + .fnstart + str r0, [r1] + bx lr + .fnend + /* - * ---------------------------------------------------------------------------- * android_atomic_inc * input: r0 = address * output: r0 = old value */ - android_atomic_inc: .fnstart .save {r4, lr} @@ -99,14 +384,13 @@ android_atomic_inc: ldmia sp!, {r4, lr} bx lr .fnend - + + /* - * ---------------------------------------------------------------------------- * android_atomic_dec - * input: r0=address + * input: r0 = address * output: r0 = old value */ - android_atomic_dec: .fnstart .save {r4, lr} @@ -130,14 +414,13 @@ android_atomic_dec: ldmia sp!, {r4, lr} bx lr .fnend - + + /* - * ---------------------------------------------------------------------------- * android_atomic_add - * input: r0=value, r1=address + * input: r0 = value, r1 = address * output: r0 = old value */ - android_atomic_add: .fnstart .save {r4, lr} @@ -162,19 +445,17 @@ android_atomic_add: ldmia sp!, {r4, lr} bx lr .fnend - - + + /* - * ---------------------------------------------------------------------------- * android_atomic_and - * input: r0=value, r1=address + * input: r0 = value, r1 = address * output: r0 = old value */ - android_atomic_and: .fnstart - .save {r4, r5, lr} - stmdb sp!, {r4, r5, lr} + .save {r4, r5, ip, lr} /* include ip for 64-bit stack alignment */ + stmdb sp!, {r4, r5, ip, lr} mov r2, r1 /* r2 = address */ mov r4, r0 /* r4 = the value */ 1: @ android_atomic_and @@ -194,21 +475,20 @@ android_atomic_and: #endif bcc 1b mov r0, r5 - ldmia sp!, {r4, r5, lr} + ldmia sp!, {r4, r5, ip, lr} bx lr .fnend - + + /* - * ---------------------------------------------------------------------------- * android_atomic_or - * input: r0=value, r1=address + * input: r0 = value, r1 = address * output: r0 = old value */ - android_atomic_or: .fnstart - .save {r4, r5, lr} - stmdb sp!, {r4, r5, lr} + .save {r4, r5, ip, lr} /* include ip for 64-bit stack alignment */ + stmdb sp!, {r4, r5, ip, lr} mov r2, r1 /* r2 = address */ mov r4, r0 /* r4 = the value */ 1: @ android_atomic_or @@ -228,40 +508,31 @@ android_atomic_or: #endif bcc 1b mov r0, r5 - ldmia sp!, {r4, r5, lr} + ldmia sp!, {r4, r5, ip, lr} bx lr .fnend + /* - * ---------------------------------------------------------------------------- - * android_atomic_swap - * input: r0=value, r1=address + * android_atomic_acquire_swap / android_atomic_release_swap + * input: r0 = value, r1 = address * output: r0 = old value */ - -/* replaced swp instruction with ldrex/strex for ARMv6 & ARMv7 */ -android_atomic_swap: -#if defined (__ARM_HAVE_LDREX_STREX) -1: ldrex r2, [r1] - strex r3, r0, [r1] - teq r3, #0 - bne 1b - mov r0, r2 - mcr p15, 0, r0, c7, c10, 5 /* or, use dmb */ -#else +android_atomic_acquire_swap: +android_atomic_release_swap: + .fnstart swp r0, r0, [r1] -#endif bx lr + .fnend + /* - * ---------------------------------------------------------------------------- - * android_atomic_cmpxchg - * input: r0=oldvalue, r1=newvalue, r2=address + * android_atomic_acquire_cas / android_atomic_release_cas + * input: r0 = oldvalue, r1 = newvalue, r2 = address * output: r0 = 0 (xchg done) or non-zero (xchg not done) */ - -android_atomic_acquire_cmpxchg: -android_atomic_cmpxchg: +android_atomic_acquire_cas: +android_atomic_release_cas: .fnstart .save {r4, lr} stmdb sp!, {r4, lr} @@ -287,3 +558,4 @@ android_atomic_cmpxchg: bx lr .fnend +#endif /*not defined __ARM_HAVE_LDREX_STREX*/ diff --git a/libcutils/atomic-android-armv6.S b/libcutils/atomic-android-armv6.S deleted file mode 100644 index 1574c9c92..000000000 --- a/libcutils/atomic-android-armv6.S +++ /dev/null @@ -1,174 +0,0 @@ -/* - * Copyright (C) 2008 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - - .text - .align - - .global android_atomic_write - .type android_atomic_write, %function - - .global android_atomic_inc - .type android_atomic_inc, %function - .global android_atomic_dec - .type android_atomic_dec, %function - - .global android_atomic_add - .type android_atomic_add, %function - .global android_atomic_and - .type android_atomic_and, %function - .global android_atomic_or - .type android_atomic_or, %function - - .global android_atomic_swap - .type android_atomic_swap, %function - - .global android_atomic_cmpxchg - .type android_atomic_cmpxchg, %function - - - -/* FIXME: On SMP systems memory barriers may be needed */ -#warning "this file is not safe with SMP systems" - - -/* - * ---------------------------------------------------------------------------- - * android_atomic_write - * input: r0=value, r1=address - * output: void - */ - -android_atomic_write: - str r0, [r1] - bx lr; - -/* - * ---------------------------------------------------------------------------- - * android_atomic_inc - * input: r0 = address - * output: r0 = old value - */ - -android_atomic_inc: - mov r12, r0 -1: ldrex r0, [r12] - add r2, r0, #1 - strex r1, r2, [r12] - cmp r1, #0 - bxeq lr - b 1b - -/* - * ---------------------------------------------------------------------------- - * android_atomic_dec - * input: r0=address - * output: r0 = old value - */ - -android_atomic_dec: - mov r12, r0 -1: ldrex r0, [r12] - sub r2, r0, #1 - strex r1, r2, [r12] - cmp r1, #0 - bxeq lr - b 1b - - -/* - * ---------------------------------------------------------------------------- - * android_atomic_add - * input: r0=value, r1=address - * output: r0 = old value - */ - -android_atomic_add: - mov r12, r0 -1: ldrex r0, [r1] - add r2, r0, r12 - strex r3, r2, [r1] - cmp r3, #0 - bxeq lr - b 1b - -/* - * ---------------------------------------------------------------------------- - * android_atomic_and - * input: r0=value, r1=address - * output: r0 = old value - */ - -android_atomic_and: - mov r12, r0 -1: ldrex r0, [r1] - and r2, r0, r12 - strex r3, r2, [r1] - cmp r3, #0 - bxeq lr - b 1b - - -/* - * ---------------------------------------------------------------------------- - * android_atomic_or - * input: r0=value, r1=address - * output: r0 = old value - */ - -android_atomic_or: - mov r12, r0 -1: ldrex r0, [r1] - orr r2, r0, r12 - strex r3, r2, [r1] - cmp r3, #0 - bxeq lr - b 1b - -/* - * ---------------------------------------------------------------------------- - * android_atomic_swap - * input: r0=value, r1=address - * output: r0 = old value - */ - -android_atomic_swap: - swp r0, r0, [r1] - bx lr - -/* - * ---------------------------------------------------------------------------- - * android_atomic_cmpxchg - * input: r0=oldvalue, r1=newvalue, r2=address - * output: r0 = 0 (xchg done) or non-zero (xchg not done) - */ - -android_atomic_cmpxchg: - mov r12, r1 - ldrex r3, [r2] - eors r0, r0, r3 - strexeq r0, r12, [r2] - bx lr - - - -/* - * ---------------------------------------------------------------------------- - * android_atomic_cmpxchg_64 - * input: r0-r1=oldvalue, r2-r3=newvalue, arg4 (on stack)=address - * output: r0 = 0 (xchg done) or non-zero (xchg not done) - */ -/* TODO: NEED IMPLEMENTATION FOR THIS ARCHITECTURE */ diff --git a/libcutils/atomic-android-sh.c b/libcutils/atomic-android-sh.c index d95b02bdb..abe7d25fb 100644 --- a/libcutils/atomic-android-sh.c +++ b/libcutils/atomic-android-sh.c @@ -35,6 +35,9 @@ * ARM implementation, in this file above. * We follow the fact that the initializer for mutex is a simple zero * value. + * + * (3) These operations are NOT safe for SMP, as there is no currently + * no definition for a memory barrier operation. */ #include @@ -46,18 +49,35 @@ static pthread_mutex_t _swap_locks[SWAP_LOCK_COUNT]; &_swap_locks[((unsigned)(void*)(addr) >> 3U) % SWAP_LOCK_COUNT] -void android_atomic_write(int32_t value, volatile int32_t* addr) { +int32_t android_atomic_acquire_load(volatile int32_t* addr) +{ + return *addr; +} + +int32_t android_atomic_release_load(volatile int32_t* addr) +{ + return *addr; +} + +void android_atomic_acquire_store(int32_t value, volatile int32_t* addr) { int32_t oldValue; do { oldValue = *addr; - } while (android_atomic_cmpxchg(oldValue, value, addr)); + } while (android_atomic_release_cas(oldValue, value, addr)); +} + +void android_atomic_release_store(int32_t value, volatile int32_t* addr) { + int32_t oldValue; + do { + oldValue = *addr; + } while (android_atomic_release_cas(oldValue, value, addr)); } int32_t android_atomic_inc(volatile int32_t* addr) { int32_t oldValue; do { oldValue = *addr; - } while (android_atomic_cmpxchg(oldValue, oldValue+1, addr)); + } while (android_atomic_release_cas(oldValue, oldValue+1, addr)); return oldValue; } @@ -65,7 +85,7 @@ int32_t android_atomic_dec(volatile int32_t* addr) { int32_t oldValue; do { oldValue = *addr; - } while (android_atomic_cmpxchg(oldValue, oldValue-1, addr)); + } while (android_atomic_release_cas(oldValue, oldValue-1, addr)); return oldValue; } @@ -73,7 +93,7 @@ int32_t android_atomic_add(int32_t value, volatile int32_t* addr) { int32_t oldValue; do { oldValue = *addr; - } while (android_atomic_cmpxchg(oldValue, oldValue+value, addr)); + } while (android_atomic_release_cas(oldValue, oldValue+value, addr)); return oldValue; } @@ -81,7 +101,7 @@ int32_t android_atomic_and(int32_t value, volatile int32_t* addr) { int32_t oldValue; do { oldValue = *addr; - } while (android_atomic_cmpxchg(oldValue, oldValue&value, addr)); + } while (android_atomic_release_cas(oldValue, oldValue&value, addr)); return oldValue; } @@ -89,11 +109,15 @@ int32_t android_atomic_or(int32_t value, volatile int32_t* addr) { int32_t oldValue; do { oldValue = *addr; - } while (android_atomic_cmpxchg(oldValue, oldValue|value, addr)); + } while (android_atomic_release_cas(oldValue, oldValue|value, addr)); return oldValue; } -int32_t android_atomic_swap(int32_t value, volatile int32_t* addr) { +int32_t android_atomic_acquire_swap(int32_t value, volatile int32_t* addr) { + return android_atomic_release_swap(value, addr); +} + +int32_t android_atomic_release_swap(int32_t value, volatile int32_t* addr) { int32_t oldValue; do { oldValue = *addr; @@ -101,7 +125,12 @@ int32_t android_atomic_swap(int32_t value, volatile int32_t* addr) { return oldValue; } -int android_atomic_cmpxchg(int32_t oldvalue, int32_t newvalue, +int android_atomic_acquire_cmpxchg(int32_t oldvalue, int32_t newvalue, + volatile int32_t* addr) { + return android_atomic_release_cmpxchg(oldValue, newValue, addr); +} + +int android_atomic_release_cmpxchg(int32_t oldvalue, int32_t newvalue, volatile int32_t* addr) { int result; pthread_mutex_t* lock = SWAP_LOCK(addr); @@ -118,8 +147,3 @@ int android_atomic_cmpxchg(int32_t oldvalue, int32_t newvalue, return result; } -int android_atomic_acquire_cmpxchg(int32_t oldvalue, int32_t newvalue, - volatile int32_t* addr) { - return android_atomic_cmpxchg(oldValue, newValue, addr); -} - diff --git a/libcutils/atomic.c b/libcutils/atomic.c index d81890614..4cefa6b93 100644 --- a/libcutils/atomic.c +++ b/libcutils/atomic.c @@ -27,11 +27,25 @@ #include -void android_atomic_write(int32_t value, volatile int32_t* addr) { - int32_t oldValue; - do { - oldValue = *addr; - } while (OSAtomicCompareAndSwap32Barrier(oldValue, value, (int32_t*)addr) == 0); +int32_t android_atomic_acquire_load(volatile int32_t* addr) { + int32_t value = *addr; + OSMemoryBarrier(); + return value; +} + +int32_t android_atomic_release_load(volatile int32_t* addr) { + OSMemoryBarrier(); + return *addr; +} + +void android_atomic_acquire_store(int32_t value, volatile int32_t* addr) { + *addr = value; + OSMemoryBarrier(); +} + +void android_atomic_release_store(int32_t value, volatile int32_t* addr) { + OSMemoryBarrier(); + *addr = value; } int32_t android_atomic_inc(volatile int32_t* addr) { @@ -47,74 +61,81 @@ int32_t android_atomic_add(int32_t value, volatile int32_t* addr) { } int32_t android_atomic_and(int32_t value, volatile int32_t* addr) { - int32_t oldValue; - do { - oldValue = *addr; - } while (OSAtomicCompareAndSwap32Barrier(oldValue, oldValue&value, (int32_t*)addr) == 0); - return oldValue; + return OSAtomicAnd32OrigBarrier(value, (int32_t*)addr); } int32_t android_atomic_or(int32_t value, volatile int32_t* addr) { + return OSAtomicOr32OrigBarrier(value, (int32_t*)addr); +} + +int32_t android_atomic_acquire_swap(int32_t value, volatile int32_t* addr) { int32_t oldValue; do { oldValue = *addr; - } while (OSAtomicCompareAndSwap32Barrier(oldValue, oldValue|value, (int32_t*)addr) == 0); + } while (android_atomic_acquire_cas(oldValue, value, addr)); return oldValue; } -int32_t android_atomic_swap(int32_t value, volatile int32_t* addr) { +int32_t android_atomic_release_swap(int32_t value, volatile int32_t* addr) { int32_t oldValue; do { oldValue = *addr; - } while (android_atomic_cmpxchg(oldValue, value, addr)); + } while (android_atomic_release_cas(oldValue, value, addr)); return oldValue; } -int android_atomic_cmpxchg(int32_t oldvalue, int32_t newvalue, volatile int32_t* addr) { +int android_atomic_release_cas(int32_t oldvalue, int32_t newvalue, volatile int32_t* addr) { /* OS X CAS returns zero on failure; invert to return zero on success */ return OSAtomicCompareAndSwap32Barrier(oldvalue, newvalue, (int32_t*)addr) == 0; } -int android_atomic_acquire_cmpxchg(int32_t oldvalue, int32_t newvalue, +int android_atomic_acquire_cas(int32_t oldvalue, int32_t newvalue, volatile int32_t* addr) { int result = (OSAtomicCompareAndSwap32(oldvalue, newvalue, (int32_t*)addr) == 0); - if (!result) { + if (result == 0) { /* success, perform barrier */ OSMemoryBarrier(); } + return result; } /*****************************************************************************/ #elif defined(__i386__) || defined(__x86_64__) -void android_atomic_write(int32_t value, volatile int32_t* addr) { - int32_t oldValue; - do { - oldValue = *addr; - } while (android_atomic_cmpxchg(oldValue, value, addr)); +int32_t android_atomic_acquire_load(volatile int32_t* addr) { + int32_t value = *addr; + ANDROID_MEMBAR_FULL(); + return value; +} + +int32_t android_atomic_release_load(volatile int32_t* addr) { + ANDROID_MEMBAR_FULL(); + return *addr; +} + +void android_atomic_acquire_store(int32_t value, volatile int32_t* addr) { + *addr = value; + ANDROID_MEMBAR_FULL(); +} + +void android_atomic_release_store(int32_t value, volatile int32_t* addr) { + ANDROID_MEMBAR_FULL(); + *addr = value; } int32_t android_atomic_inc(volatile int32_t* addr) { - int32_t oldValue; - do { - oldValue = *addr; - } while (android_atomic_cmpxchg(oldValue, oldValue+1, addr)); - return oldValue; + return android_atomic_add(1, addr); } int32_t android_atomic_dec(volatile int32_t* addr) { - int32_t oldValue; - do { - oldValue = *addr; - } while (android_atomic_cmpxchg(oldValue, oldValue-1, addr)); - return oldValue; + return android_atomic_add(-1, addr); } int32_t android_atomic_add(int32_t value, volatile int32_t* addr) { int32_t oldValue; do { oldValue = *addr; - } while (android_atomic_cmpxchg(oldValue, oldValue+value, addr)); + } while (android_atomic_release_cas(oldValue, oldValue+value, addr)); return oldValue; } @@ -122,7 +143,7 @@ int32_t android_atomic_and(int32_t value, volatile int32_t* addr) { int32_t oldValue; do { oldValue = *addr; - } while (android_atomic_cmpxchg(oldValue, oldValue&value, addr)); + } while (android_atomic_release_cas(oldValue, oldValue&value, addr)); return oldValue; } @@ -130,20 +151,13 @@ int32_t android_atomic_or(int32_t value, volatile int32_t* addr) { int32_t oldValue; do { oldValue = *addr; - } while (android_atomic_cmpxchg(oldValue, oldValue|value, addr)); + } while (android_atomic_release_cas(oldValue, oldValue|value, addr)); return oldValue; } -int32_t android_atomic_swap(int32_t value, volatile int32_t* addr) { - int32_t oldValue; - do { - oldValue = *addr; - } while (android_atomic_cmpxchg(oldValue, value, addr)); - return oldValue; -} - -int android_atomic_cmpxchg(int32_t oldvalue, int32_t newvalue, volatile int32_t* addr) { - android_membar_full(); +/* returns 0 on successful swap */ +static inline int cas(int32_t oldvalue, int32_t newvalue, + volatile int32_t* addr) { int xchg; asm volatile ( @@ -156,18 +170,36 @@ int android_atomic_cmpxchg(int32_t oldvalue, int32_t newvalue, volatile int32_t* return xchg; } -int android_atomic_acquire_cmpxchg(int32_t oldvalue, int32_t newvalue, +int32_t android_atomic_acquire_swap(int32_t value, volatile int32_t* addr) { + int32_t oldValue; + do { + oldValue = *addr; + } while (cas(oldValue, value, addr)); + ANDROID_MEMBAR_FULL(); + return oldValue; +} + +int32_t android_atomic_release_swap(int32_t value, volatile int32_t* addr) { + ANDROID_MEMBAR_FULL(); + int32_t oldValue; + do { + oldValue = *addr; + } while (cas(oldValue, value, addr)); + return oldValue; +} + +int android_atomic_acquire_cas(int32_t oldvalue, int32_t newvalue, volatile int32_t* addr) { - int xchg; - asm volatile - ( - " lock; cmpxchg %%ecx, (%%edx);" - " setne %%al;" - " andl $1, %%eax" - : "=a" (xchg) - : "a" (oldvalue), "c" (newvalue), "d" (addr) - ); - android_membar_full(); + int xchg = cas(oldvalue, newvalue, addr); + if (xchg == 0) + ANDROID_MEMBAR_FULL(); + return xchg; +} + +int android_atomic_release_cas(int32_t oldvalue, int32_t newvalue, + volatile int32_t* addr) { + ANDROID_MEMBAR_FULL(); + int xchg = cas(oldvalue, newvalue, addr); return xchg; }