#ifndef _URCU_ARCH_UATOMIC_X86_H
#define _URCU_ARCH_UATOMIC_X86_H
/*
* Copyright (c) 1991-1994 by Xerox Corporation. All rights reserved.
* Copyright (c) 1996-1999 by Silicon Graphics. All rights reserved.
* Copyright (c) 1999-2004 Hewlett-Packard Development Company, L.P.
* Copyright (c) 2009 Mathieu Desnoyers
*
* THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
* OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
*
* Permission is hereby granted to use or copy this program
* for any purpose, provided the above notices are retained on all copies.
* Permission to modify the code and to distribute modified code is granted,
* provided the above notices are retained, and a notice that the code was
* modified is included with the above copyright notice.
*
* Code inspired from libuatomic_ops-1.2, inherited in part from the
* Boehm-Demers-Weiser conservative garbage collector.
*/
#include <urcu/compiler.h>
#include <urcu/system.h>
#define UATOMIC_HAS_ATOMIC_BYTE
#define UATOMIC_HAS_ATOMIC_SHORT
#ifdef __cplusplus
extern "C" {
#endif
/*
* Derived from AO_compare_and_swap() and AO_test_and_set_full().
*/
struct __uatomic_dummy {
unsigned long v[10];
};
#define __hp(x) ((struct __uatomic_dummy *)(x))
#define _uatomic_set(addr, v) ((void) CMM_STORE_SHARED(*(addr), (v)))
/* cmpxchg */
static inline __attribute__((always_inline))
unsigned long __uatomic_cmpxchg(void *addr, unsigned long old,
unsigned long _new, int len)
{
switch (len) {
case 1:
{
unsigned char result = old;
__asm__ __volatile__(
"lock; cmpxchgb %2, %1"
: "+a"(result), "+m"(*__hp(addr))
: "q"((unsigned char)_new)
: "memory");
return result;
}
case 2:
{
unsigned short result = old;
__asm__ __volatile__(
"lock; cmpxchgw %2, %1"
: "+a"(result), "+m"(*__hp(addr))
: "r"((unsigned short)_new)
: "memory");
return result;
}
case 4:
{
unsigned int result = old;
__asm__ __volatile__(
"lock; cmpxchgl %2, %1"
: "+a"(result), "+m"(*__hp(addr))
: "r"((unsigned int)_new)
: "memory");
return result;
}
#if (CAA_BITS_PER_LONG == 64)
case 8:
{
unsigned long result = old;
__asm__ __volatile__(
"lock; cmpxchgq %2, %1"
: "+a"(result), "+m"(*__hp(addr))
: "r"((unsigned long)_new)
: "memory");
return result;
}
#endif
}
/*
* generate an illegal instruction. Cannot catch this with
* linker tricks when optimizations are disabled.
*/
__asm__ __volatile__("ud2");
return 0;
}
#define _uatomic_cmpxchg(addr, old, _new) \
((__typeof__(*(addr))) __uatomic_cmpxchg((addr), \
caa_cast_long_keep_sign(old), \
caa_cast_long_keep_sign(_new),\
sizeof(*(addr))))
/* xchg */
static inline __attribute__((always_inline))
unsigned long __uatomic_exchange(void *addr, unsigned long val, int len)
{
/* Note: the "xchg" instruction does not need a "lock" prefix. */
switch (len) {
case 1:
{
unsigned char result;
__asm__ __volatile__(
"xchgb %0, %1"
: "=q"(result), "+m"(*__hp(addr))
: "0" ((unsigned char)val)
: "memory");
return result;
}
case 2:
{
unsigned short result;
__asm__ __volatile__(
"xchgw %0, %1"
: "=r"(result), "+m"(*__hp(addr))
: "0" ((unsigned short)val)
: "memory");
return result;
}
case 4:
{
unsigned int result;
__asm__ __volatile__(
"xchgl %0, %1"
: "=r"(result), "+m"(*__hp(addr))
: "0" ((unsigned int)val)
: "memory");
return result;
}
#if (CAA_BITS_PER_LONG == 64)
case 8:
{
unsigned long result;
__asm__ __volatile__(
"xchgq %0, %1"
: "=r"(result), "+m"(*__hp(addr))
: "0" ((unsigned long)val)
: "memory");
return result;
}
#endif
}
/*
* generate an illegal instruction. Cannot catch this with
* linker tricks when optimizations are disabled.
*/
__asm__ __volatile__("ud2");
return 0;
}
#define _uatomic_xchg(addr, v) \
((__typeof__(*(addr))) __uatomic_exchange((addr), \
caa_cast_long_keep_sign(v), \
sizeof(*(addr))))
/* uatomic_add_return */
static inline __attribute__((always_inline))
unsigned long __uatomic_add_return(void *addr, unsigned long val,
int len)
{
switch (len) {
case 1:
{
unsigned char result = val;
__asm__ __volatile__(
"lock; xaddb %1, %0"
: "+m"(*__hp(addr)), "+q" (result)
:
: "memory");
return result + (unsigned char)val;
}
case 2:
{
unsigned short result = val;
__asm__ __volatile__(
"lock; xaddw %1, %0"
: "+m"(*__hp(addr)), "+r" (result)
:
: "memory");
return result + (unsigned short)val;
}
case 4:
{
unsigned int result = val;
__asm__ __volatile__(
"lock; xaddl %1, %0"
: "+m"(*__hp(addr)), "+r" (result)
:
: "memory");
return result + (unsigned int)val;
}
#if (CAA_BITS_PER_LONG == 64)
case 8:
{
unsigned long result = val;
__asm__ __volatile__(
"lock; xaddq %1, %0"
: "+m"(*__hp(addr)), "+r" (result)
:
: "memory");
return result + (unsigned long)val;
}
#endif
}
/*
* generate an illegal instruction. Cannot catch this with
* linker tricks when optimizations are disabled.
*/
__asm__ __volatile__("ud2");
return 0;
}
#define _uatomic_add_return(addr, v) \
((__typeof__(*(addr))) __uatomic_add_return((addr), \
caa_cast_long_keep_sign(v), \
sizeof(*(addr))))
/* uatomic_and */
static inline __attribute__((always_inline))
void __uatomic_and(void *addr, unsigned long val, int len)
{
switch (len) {
case 1:
{
__asm__ __volatile__(
"lock; andb %1, %0"
: "=m"(*__hp(addr))
: "iq" ((unsigned char)val)
: "memory");
return;
}
case 2:
{
__asm__ __volatile__(
"lock; andw %1, %0"
: "=m"(*__hp(addr))
: "ir" ((unsigned short)val)
: "memory");
return;
}
case 4:
{
__asm__ __volatile__(
"lock; andl %1, %0"
: "=m"(*__hp(addr))
: "ir" ((unsigned int)val)
: "memory");
return;
}
#if (CAA_BITS_PER_LONG == 64)
case 8:
{
__asm__ __volatile__(
"lock; andq %1, %0"
: "=m"(*__hp(addr))
: "er" ((unsigned long)val)
: "memory");
return;
}
#endif
}
/*
* generate an illegal instruction. Cannot catch this with
* linker tricks when optimizations are disabled.
*/
__asm__ __volatile__("ud2");
return;
}
#define _uatomic_and(addr, v) \
(__uatomic_and((addr), caa_cast_long_keep_sign(v), sizeof(*(addr))))
/* uatomic_or */
static inline __attribute__((always_inline))
void __uatomic_or(void *addr, unsigned long val, int len)
{
switch (len) {
case 1:
{
__asm__ __volatile__(
"lock; orb %1, %0"
: "=m"(*__hp(addr))
: "iq" ((unsigned char)val)
: "memory");
return;
}
case 2:
{
__asm__ __volatile__(
"lock; orw %1, %0"
: "=m"(*__hp(addr))
: "ir" ((unsigned short)val)
: "memory");
return;
}
case 4:
{
__asm__ __volatile__(
"lock; orl %1, %0"
: "=m"(*__hp(addr))
: "ir" ((unsigned int)val)
: "memory");
return;
}
#if (CAA_BITS_PER_LONG == 64)
case 8:
{
__asm__ __volatile__(
"lock; orq %1, %0"
: "=m"(*__hp(addr))
: "er" ((unsigned long)val)
: "memory");
return;
}
#endif
}
/*
* generate an illegal instruction. Cannot catch this with
* linker tricks when optimizations are disabled.
*/
__asm__ __volatile__("ud2");
return;
}
#define _uatomic_or(addr, v) \
(__uatomic_or((addr), caa_cast_long_keep_sign(v), sizeof(*(addr))))
/* uatomic_add */
static inline __attribute__((always_inline))
void __uatomic_add(void *addr, unsigned long val, int len)
{
switch (len) {
case 1:
{
__asm__ __volatile__(
"lock; addb %1, %0"
: "=m"(*__hp(addr))
: "iq" ((unsigned char)val)
: "memory");
return;
}
case 2:
{
__asm__ __volatile__(
"lock; addw %1, %0"
: "=m"(*__hp(addr))
: "ir" ((unsigned short)val)
: "memory");
return;
}
case 4:
{
__asm__ __volatile__(
"lock; addl %1, %0"
: "=m"(*__hp(addr))
: "ir" ((unsigned int)val)
: "memory");
return;
}
#if (CAA_BITS_PER_LONG == 64)
case 8:
{
__asm__ __volatile__(
"lock; addq %1, %0"
: "=m"(*__hp(addr))
: "er" ((unsigned long)val)
: "memory");
return;
}
#endif
}
/*
* generate an illegal instruction. Cannot catch this with
* linker tricks when optimizations are disabled.
*/
__asm__ __volatile__("ud2");
return;
}
#define _uatomic_add(addr, v) \
(__uatomic_add((addr), caa_cast_long_keep_sign(v), sizeof(*(addr))))
/* uatomic_inc */
static inline __attribute__((always_inline))
void __uatomic_inc(void *addr, int len)
{
switch (len) {
case 1:
{
__asm__ __volatile__(
"lock; incb %0"
: "=m"(*__hp(addr))
:
: "memory");
return;
}
case 2:
{
__asm__ __volatile__(
"lock; incw %0"
: "=m"(*__hp(addr))
:
: "memory");
return;
}
case 4:
{
__asm__ __volatile__(
"lock; incl %0"
: "=m"(*__hp(addr))
:
: "memory");
return;
}
#if (CAA_BITS_PER_LONG == 64)
case 8:
{
__asm__ __volatile__(
"lock; incq %0"
: "=m"(*__hp(addr))
:
: "memory");
return;
}
#endif
}
/* generate an illegal instruction. Cannot catch this with linker tricks
* when optimizations are disabled. */
__asm__ __volatile__("ud2");
return;
}
#define _uatomic_inc(addr) (__uatomic_inc((addr), sizeof(*(addr))))
/* uatomic_dec */
static inline __attribute__((always_inline))
void __uatomic_dec(void *addr, int len)
{
switch (len) {
case 1:
{
__asm__ __volatile__(
"lock; decb %0"
: "=m"(*__hp(addr))
:
: "memory");
return;
}
case 2:
{
__asm__ __volatile__(
"lock; decw %0"
: "=m"(*__hp(addr))
:
: "memory");
return;
}
case 4:
{
__asm__ __volatile__(
"lock; decl %0"
: "=m"(*__hp(addr))
:
: "memory");
return;
}
#if (CAA_BITS_PER_LONG == 64)
case 8:
{
__asm__ __volatile__(
"lock; decq %0"
: "=m"(*__hp(addr))
:
: "memory");
return;
}
#endif
}
/*
* generate an illegal instruction. Cannot catch this with
* linker tricks when optimizations are disabled.
*/
__asm__ __volatile__("ud2");
return;
}
#define _uatomic_dec(addr) (__uatomic_dec((addr), sizeof(*(addr))))
#if ((CAA_BITS_PER_LONG != 64) && defined(CONFIG_RCU_COMPAT_ARCH))
extern int __rcu_cas_avail;
extern int __rcu_cas_init(void);
#define UATOMIC_COMPAT(insn) \
((caa_likely(__rcu_cas_avail > 0)) \
? (_uatomic_##insn) \
: ((caa_unlikely(__rcu_cas_avail < 0) \
? ((__rcu_cas_init() > 0) \
? (_uatomic_##insn) \
: (compat_uatomic_##insn)) \
: (compat_uatomic_##insn))))
/*
* We leave the return value so we don't break the ABI, but remove the
* return value from the API.
*/
extern unsigned long _compat_uatomic_set(void *addr,
unsigned long _new, int len);
#define compat_uatomic_set(addr, _new) \
((void) _compat_uatomic_set((addr), \
caa_cast_long_keep_sign(_new), \
sizeof(*(addr))))
extern unsigned long _compat_uatomic_xchg(void *addr,
unsigned long _new, int len);
#define compat_uatomic_xchg(addr, _new) \
((__typeof__(*(addr))) _compat_uatomic_xchg((addr), \
caa_cast_long_keep_sign(_new), \
sizeof(*(addr))))
extern unsigned long _compat_uatomic_cmpxchg(void *addr, unsigned long old,
unsigned long _new, int len);
#define compat_uatomic_cmpxchg(addr, old, _new) \
((__typeof__(*(addr))) _compat_uatomic_cmpxchg((addr), \
caa_cast_long_keep_sign(old), \
caa_cast_long_keep_sign(_new), \
sizeof(*(addr))))
extern void _compat_uatomic_and(void *addr, unsigned long _new, int len);
#define compat_uatomic_and(addr, v) \
(_compat_uatomic_and((addr), \
caa_cast_long_keep_sign(v), \
sizeof(*(addr))))
extern void _compat_uatomic_or(void *addr, unsigned long _new, int len);
#define compat_uatomic_or(addr, v) \
(_compat_uatomic_or((addr), \
caa_cast_long_keep_sign(v), \
sizeof(*(addr))))
extern unsigned long _compat_uatomic_add_return(void *addr,
unsigned long _new, int len);
#define compat_uatomic_add_return(addr, v) \
((__typeof__(*(addr))) _compat_uatomic_add_return((addr), \
caa_cast_long_keep_sign(v), \
sizeof(*(addr))))
#define compat_uatomic_add(addr, v) \
((void)compat_uatomic_add_return((addr), (v)))
#define compat_uatomic_inc(addr) \
(compat_uatomic_add((addr), 1))
#define compat_uatomic_dec(addr) \
(compat_uatomic_add((addr), -1))
#else
#define UATOMIC_COMPAT(insn) (_uatomic_##insn)
#endif
/* Read is atomic even in compat mode */
#define uatomic_set(addr, v) \
UATOMIC_COMPAT(set(addr, v))
#define uatomic_cmpxchg(addr, old, _new) \
UATOMIC_COMPAT(cmpxchg(addr, old, _new))
#define uatomic_xchg(addr, v) \
UATOMIC_COMPAT(xchg(addr, v))
#define uatomic_and(addr, v) \
UATOMIC_COMPAT(and(addr, v))
#define cmm_smp_mb__before_uatomic_and() cmm_barrier()
#define cmm_smp_mb__after_uatomic_and() cmm_barrier()
#define uatomic_or(addr, v) \
UATOMIC_COMPAT(or(addr, v))
#define cmm_smp_mb__before_uatomic_or() cmm_barrier()
#define cmm_smp_mb__after_uatomic_or() cmm_barrier()
#define uatomic_add_return(addr, v) \
UATOMIC_COMPAT(add_return(addr, v))
#define uatomic_add(addr, v) UATOMIC_COMPAT(add(addr, v))
#define cmm_smp_mb__before_uatomic_add() cmm_barrier()
#define cmm_smp_mb__after_uatomic_add() cmm_barrier()
#define uatomic_inc(addr) UATOMIC_COMPAT(inc(addr))
#define cmm_smp_mb__before_uatomic_inc() cmm_barrier()
#define cmm_smp_mb__after_uatomic_inc() cmm_barrier()
#define uatomic_dec(addr) UATOMIC_COMPAT(dec(addr))
#define cmm_smp_mb__before_uatomic_dec() cmm_barrier()
#define cmm_smp_mb__after_uatomic_dec() cmm_barrier()
#ifdef __cplusplus
}
#endif
#include <urcu/uatomic/generic.h>
#endif /* _URCU_ARCH_UATOMIC_X86_H */