This is the mail archive of the cygwin-developers mailing list for the Cygwin project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: Bad codegen in pthread_mutex causing 100% cpu spin loop related to inline asm ilockcmpexchg


Dave Korn wrote:

>   This new version changes the approach to use an "m" constraint to refer
> directly to the contents of *t, and not hope the compiler can infer the
> relationship between the address of t in operand 2 and the content of t in
> operand 1.  It requires rewriting the instruction template, and will generate
> more different addressing modes than the original, which would only create
> register-indirect addressing:
> 
> extern __inline__ long
> ilockcmpexch (volatile long *t, long v, long c)
> {
>   register int __res;
>   __asm__ __volatile__ ("\n\
> 	lock cmpxchgl %2,%1\n\
> 	": "=a" (__res), "+m" (*t) : "q" (v), "0" (c) : "memory", "cc");
>   return __res;
> }

  Then I thought, why not go the whole hog and get rid of the matching
constraints altogether by making __res an in/out operand and passing 'c' in
it.  So I tried this:

Index: winsup/cygwin/winbase.h
===================================================================
RCS file: /cvs/src/src/winsup/cygwin/winbase.h,v
retrieving revision 1.14
diff -p -u -r1.14 winbase.h
--- winsup/cygwin/winbase.h     12 Jul 2008 18:09:17 -0000      1.14
+++ winsup/cygwin/winbase.h     28 May 2009 20:33:21 -0000
@@ -38,21 +38,21 @@ ilockdecr (volatile long *m)
 extern __inline__ long
 ilockexch (volatile long *t, long v)
 {
-  register int __res;
+  register long __res = *t;
   __asm__ __volatile__ ("\n\
-1:     lock    cmpxchgl %3,(%1)\n\
+1:     lock    cmpxchgl %2,%1\n\
        jne 1b\n\
-       ": "=a" (__res), "=q" (t): "1" (t), "q" (v), "0" (*t): "cc");
+       ": "+a" (__res), "+m" (*t): "q" (v) : "memory", "cc");
   return __res;
 }

 extern __inline__ long
 ilockcmpexch (volatile long *t, long v, long c)
 {
-  register int __res;
+  register long __res = c;
   __asm__ __volatile__ ("\n\
-       lock cmpxchgl %3,(%1)\n\
-       ": "=a" (__res), "=q" (t) : "1" (t), "q" (v), "0" (c): "cc");
+       lock cmpxchgl %2,%1\n\
+       ": "+a" (__res), "+m" (*t) : "q" (v) : "memory", "cc");
   return __res;
 }


  The ilockexch function proved interesting.  There was some needless register
motion generated:

/* Second half of user shared initialization: Initialize content. */
void
user_shared_initialize ()
{
  DWORD sversion = (DWORD) InterlockedExchange ((LONG *)
&user_shared->version, USER_VERSION_MAGIC);


translates into:

	movl	$779157505, %edx	 #, tmp65
	movl	_user_shared, %ecx	 # user_shared, D.25215
	movl	(%ecx), %ebx	 #* D.25215, __res
	movl	%ebx, %eax	 # __res,
/APP
 # 42 "/gnu/winsup/src/winsup/cygwin/winbase.h" 1
1:	lock	cmpxchgl %edx,(%ecx)	 # tmp65,* D.25215
	jne 1b
 # 0 "" 2
/NO_APP
	testl	%eax, %eax	 # __res
	movl	%eax, %ebx	 #, __res

... where (%ecx) is moved via %ebx into %eax, and then %eax stored back into
%ebx.  The compiler isn't clever enough to allocate __res and sversion both in
%eax and automatically satisfy the asm constraints, so it assigns __res to
%ebx and has to copy it back and forth.  So I'm going to try this slight
modification:

extern __inline__ long
ilockexch (volatile long *t, long v)
{
  register long __res __asm__ ("%eax") = *t;
  __asm__ __volatile__ ("\n\
1:	lock	cmpxchgl %2,%1\n\
	jne 1b\n\
 	": "+a" (__res), "+m" (*t): "q" (v) : "memory", "cc");
  return __res;
}

extern __inline__ long
ilockcmpexch (volatile long *t, long v, long c)
{
  register long __res __asm ("%eax") = c;
  __asm__ __volatile__ ("\n\
	lock cmpxchgl %2,%1\n\
	": "+a" (__res), "+m" (*t) : "q" (v) : "memory", "cc");
  return __res;
}

which at least gets me this healthier-looking output:

	movl	$779157505, %ecx	 #, tmp66
	movl	_user_shared, %edx	 # user_shared, D.25216
	movl	(%edx), %eax	 #* D.25216, __res.0

1:	lock	cmpxchgl %ecx,(%edx)	 # tmp66,* D.25216
	jne 1b

	testl	%eax, %eax	 # sversion
	movl	%eax, %ebx	 # __res, sversion

  I'm running with this change in my local cygwin DLL now and I'll see how it
goes over the next few days.

    cheers,
      DaveK


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]