谁贴一个ARM版本的 atomic_cas2 上来, 独得160, 其他平分..

mLee79 2010-03-04 06:04:58

atomic_xxx 系列函数 ( incl , decl , add , exchg , cas ) 都找到了, 就缺这个....
很久没散分了, 如果在我找到了, 就转到非技术区去, 分数就大家平分....

...全文

637 61 打赏收藏转发到动态举报

写回复

用AI写文章

61 条回复

切换为时间正序

请发表友善的回复…

发表回复

jiayucunyan 2010-03-08

打赏
举报

UP!!!!!!!!!!!!!!!!!!!!!!

mLee79 2010-03-07

打赏
举报

头晕了, 好像应该是这样:

	.align	2

	.global	_XPEER2_atomic_CAS

	.type	_XPEER2_atomic_CAS, %function

_XPEER2_atomic_CAS:

	stmfd   sp! , {r4,lr}	

1:	mov     r4  , 0

	ldrex   r3  , [r0]	

    teq     r3  , r2

	strexeq r4  , r1 , [r0]

	teq     r4  , #0

	bne     1b

	teq     r3  , r2

	moveq   r0  , #1

	movne   r0  , #0 

	ldmfd	sp! , {r4,pc}	



	.align	2

	.global	_XPEER2_atomic_CAS2

	.type	_XPEER2_atomic_CAS2, %function

_XPEER2_atomic_CAS2:

	stmfd	sp! , {r4-r7,lr}

	ldr r4 , [r1,#0]

	ldr r5 , [r1,#4]

	ldr r6 , [r2,#0]

	ldr r7 , [r2,#4]

1:	mov r1 , #0

	ldrexd   r2 , r3 , [r0]

	teq      r2 , r6

	teqeq    r3 , r7

	strexdeq r1 , r4 , r5 , [r0]

	teq      r1 , #0

	bne      1b

	teq      r2 , r6

	reqeq    r3 , r7

	moveq    r0 , #1

	movne    r0 , #0	

	ldmfd	 sp! , {r4-r7,pc}

Bug_Debug 2010-03-07

打赏
举报

顶

mLee79 2010-03-07

打赏
举报

还是不对, 得重新写过...

mLee79 2010-03-07

打赏
举报

好像是有点不对, ldrex 前得加个 mov r1 , #1

mLee79 2010-03-07

打赏
举报

引用 50 楼 zenny_chen 的回复:

引用 17 楼 mlee79 的回复:
现学,不知道对不对, 先贴出来.....

C/C++ code
.text/*
* int ARMv6_atomic_CAS2( volatile int target[2] , int exchange[2] , int compare[2] )
{
if( target[0] == compare[0] && target[1] == compare[1] )
{
target[0] = exchange[0];
target[1] = exchange[1];
return 1;
}
return 0;
}*/
.align2
.global ARMv6_atomic_CAS2
ARMv6_atomic_CAS2:
stmfd sp! , {r4,r5,r6,r7,lr}
ldr r4 , [r1,#0]
ldr r5 , [r1,#4]
ldr r6 , [r2,#0]
ldr r7 , [r2,#4]
ldrexd r2 , r3 , [r0]
teq r2 , r6
teqeq r3 , r7
strexdeq r1 , r4 , r5 , [r0]
eor r0 , r1 , #1
ldmfd sp! , {r4,r5,r6,r7,pc}

虽然逻辑上没啥问题，但是根据ARM手册，这貌似有些不太规范。主要是当失败的话STREXD指令不会被执行，而标准方式是最终需要执行一条STREXD操作将target针对当前处理器的Exclusive状态转为Open状态。尽管用户调用此函数时应该要轮询操作，呵呵。
不过这点也是LL/SC比较麻烦的地方。有很多异常状态需要处理。

好像这样是没有问题的, eq 条件没有满足也算是执行了 strex 操作, 一开始的时候我看 linux 的代码也觉得困惑, 后来才明白, 这个是 linux 上的 cas ...

static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new)

{

	unsigned long oldval, res;



	do {

		__asm__ __volatile__("@ atomic_cmpxchg\n"

		"ldrex	%1, [%2]\n"

		"mov	%0, #0\n"

		"teq	%1, %3\n"

		"strexeq %0, %4, [%2]\n"

		    : "=&r" (res), "=&r" (oldval)

		    : "r" (&ptr->counter), "Ir" (old), "r" (new)

		    : "cc");

	} while (res);



	return oldval;

}

xiaocai 2010-03-07

打赏
举报

..................................................

lxb316 2010-03-07

打赏
举报

UP

UP

UP

zenny_chen 2010-03-07

打赏
举报

呵呵。基本上没什么问题。
这个其实可以参考19楼贴的Linux中的atomic_cmpxchg，稍微改改就行。

zenny_chen 2010-03-07

打赏
举报

引用 17 楼 mlee79 的回复:

现学,不知道对不对, 先贴出来.....

C/C++ code
.text/*
* int ARMv6_atomic_CAS2( volatile int target[2] , int exchange[2] , int compare[2] )
{
if( target[0] == compare[0] && target[1] == compare[1] )
{
target[0] = exchange[0];
target[1] = exchange[1];
return 1;
}
return 0;
}*/
.align2
.global ARMv6_atomic_CAS2
ARMv6_atomic_CAS2:
stmfd sp! , {r4,r5,r6,r7,lr}
ldr r4 , [r1,#0]
ldr r5 , [r1,#4]
ldr r6 , [r2,#0]
ldr r7 , [r2,#4]
ldrexd r2 , r3 , [r0]
teq r2 , r6
teqeq r3 , r7
strexdeq r1 , r4 , r5 , [r0]
eor r0 , r1 , #1
ldmfd sp! , {r4,r5,r6,r7,pc}

虽然逻辑上没啥问题，但是根据ARM手册，这貌似有些不太规范。主要是当失败的话STREXD指令不会被执行，而标准方式是最终需要执行一条STREXD操作将target针对当前处理器的Exclusive状态转为Open状态。尽管用户调用此函数时应该要轮询操作，呵呵。
不过这点也是LL/SC比较麻烦的地方。有很多异常状态需要处理。

zenny_chen 2010-03-07

打赏
举报

呵呵，写这东东应该在此之前先用C等高级编程语言或时序描述语言来编写，然后翻成汇编，再进行优化。
嘿嘿。

现在有事，等会儿再详细看看55楼的代码。

r11222 2010-03-07

打赏
举报

JF 加学习
回复内容太短了！

mLee79 2010-03-06

打赏
举报

我感觉17楼贴出来的 cas2 应该是没有问题的, 不过我这暂时没有可以跑的板子, 唉, 先用自旋锁吧...

等几天先找个模拟器试试... iPhone 应该是可以跑的... 可以的话帮我把其他的也试试:





	.file	"ARMv6Atomic.S"



	.text



	.align	2

	.global	ARMv6_atomic_incl

	.type	ARMv6_atomic_incl, %function

ARMv6_atomic_incl:

1:	ldrex r3 , [r0]

	add   r3 , r3 , #1

	strex r2 , r3 , [r0]

	teq   r2 , #0

	bne   1b

	mov   r0 , #0

	teq   r3 , #0

	movne r0 , #1		

	bx lr





	.align	2

	.global	ARMv6_atomic_decl

	.type	ARMv6_atomic_decl, %function

ARMv6_atomic_decl:

1:	ldrex r3 , [r0]

	sub   r3 , r3 , #1

	strex r2 , r3 , [r0]

	teq   r2 , #0

	bne   1b

	mov   r0 , #0

	teq   r3 , #0

	movne r0 , #1		

	bx lr



	.align	2

	.global	ARMv6_atomic_incl_

	.type	ARMv6_atomic_incl_, %function

ARMv6_atomic_incl_:

1:	ldrex r3 , [r0]

	add   r3 , r3 , #1

	strex r2 , r3 , [r0]

	teq   r2 , #0

	bne   1b

	mov   r0 , r3		

	bx lr





	.align	2

	.global	ARMv6_atomic_decl_

	.type	ARMv6_atomic_decl_, %function

ARMv6_atomic_decl_:

1:	ldrex r3 , [r0]

	sub   r3 , r3 , #1

	strex r2 , r3 , [r0]

	teq   r2 , #0

	bne   1b

	mov   r0 , r3

	bx lr





	.align	2

	.global	ARMv6_atomic_add

	.type	ARMv6_atomic_add, %function

ARMv6_atomic_add:

1:	ldrex r3 , [r0]

	add   r3 , r3 , r1

	strex r2 , r3 , [r0]

	teq   r2 , #0

	bne   1b

	mov   r0 , r3

	bx    lr



	.align	2

	.global	ARMv6_atomic_exchange

	.type	ARMv6_atomic_exchange, %function

ARMv6_atomic_exchange:

1:	ldrex r3 , [r0]

	strex r2 , r1 , [r0]

	teq   r2 , #0

	bne   1b

	mov   r0 , r3	

	bx	  lr



	.align	2

	.global	ARMv6_atomic_CAS

	.type	ARMv6_atomic_CAS, %function

ARMv6_atomic_CAS:

	ldrex   r3  , [r0]

	teq     r3  , r2

	strexeq r3  , r1 , [r0]

	eor     r0  , r3 , #1

	bx		lr



	.align	2

	.global	ARMv6_atomic_CAS2

	.type	ARMv6_atomic_CAS2, %function

ARMv6_atomic_CAS2:

	stmfd	sp! , {r4-r7,lr}

	ldr r4 , [r1,#0]

	ldr r5 , [r1,#4]

	ldr r6 , [r2,#0]

	ldr r7 , [r2,#4]

	ldrexd   r2 , r3 , [r0]

	teq      r2 , r6

	teqeq    r3 , r7

	strexdeq r1 , r4 , r5 , [r0]

	eor      r0 , r1 , #1

	ldmfd	sp! , {r4-r7,pc}

zenny_chen 2010-03-06

打赏
举报

稍微看了一下LDREX和STREX，这对东东有些复杂，要稍微花些时间研究一下，呵呵。

mLee79 2010-03-06

打赏
举报

单核多线程也很有意义, 可以减少加锁的开销和死锁的风险, 并且现在 lock-free 很流行, 搞个比较拉风...
跟 x86 的 cmpxchg cmpxchg8b cmpxchg16b 的意义是一样的, 单核的 x86 也同样用这些指令, 只是用的时候不用加 lock 前缀锁定总线...
SWP 只能搞个自旋锁, 貌似 ARMv6 版本以下的 linux 内核用的是自旋锁, 看来对 v6 以下的也只好这样了...

文档上貌似是这样写的:

LDREX



LDREX loads data from memory.



    *



      If the physical address has the Shared TLB attribute, LDREX tags the physical address as exclusive access for the current processor, and clears any exclusive access tag for this processor for any other physical address.

    *



      Otherwise, it tags the fact that the executing processor has an outstanding tagged physical address.



STREX



STREX performs a conditional store to memory. The conditions are as follows:



    *



      If the physical address does not have the Shared TLB attribute, and the executing processor has an outstanding tagged physical address, the store takes place, the tag is cleared, and the value 0 is returned in Rd.

    *



      If the physical address does not have the Shared TLB attribute, and the executing processor does not have an outstanding tagged physical address, the store does not take place, and the value 1 is returned in Rd.

    *



      If the physical address has the Shared TLB attribute, and the physical address is tagged as exclusive access for the executing processor, the store takes place, the tag is cleared, and the value 0 is returned in Rd.

    *



      If the physical address has the Shared TLB attribute, and the physical address is not tagged as exclusive access for the executing processor, the store does not take place, and the value 1 is returned in Rd.

引用 28 楼 zenny_chen 的回复:

我的理解是：当核0对地址x使用了LDREX后，并在对x使用STREX之前，如果核1对地址x使用LDREX，那么核1会被系统锁住，呵呵。关于这个行为文档中虽然没有明确地提到，但是在状态图中，在初始状态为“互斥”时，没有列出LDREX(t, !n)的情况。也就是说不可能存在这一状态。因此对于LDREX-STREX而言，LDREX就是上锁，STREX是释放锁。

稍微有点问题, 核1不会被锁住, 但是 strex 会失败, 因此才需要不断的重试( bne 1b )

zenny_chen 2010-03-06

打赏
举报

引用 48 楼 sinservice 的回复:

3楼就是正解，用带前缀的64位比较交换指令就可以了。

楼主你要找的就是这个。

晕死，你连Intel指令集和ARM指令集都不分啊。

「已注销」 2010-03-06

打赏
举报

3楼就是正解，用带前缀的64位比较交换指令就可以了。

楼主你要找的就是这个。

mLee79 2010-03-06

打赏
举报

引用 46 楼 zenny_chen 的回复:

引用 42 楼 mlee79 的回复:
cas 的好处不止这点, 比如上面的 atomic_incl , 每次的 cas 操作都可以确保一个CPU成功的完成一次功能, 而自旋锁却很有可能会原地打转半天才会完成一次成功的操作, 并且如果一旦一个低优先级的线程获取了自旋锁, 那就真的杯具了....
如果在应用程序端使用这种锁的话一般会前后加开关中断，以防止杯具的发生。

ring3 不能关中断啊, 只能像 win 的 cs 一样, 头转晕了就歇会....

zenny_chen 2010-03-06