patch-2.4.4 linux/arch/ia64/lib/strlen_user.S

Next file: linux/arch/ia64/lib/strncpy_from_user.S
Previous file: linux/arch/ia64/lib/strlen.S
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.4.3/linux/arch/ia64/lib/strlen_user.S linux/arch/ia64/lib/strlen_user.S
@@ -6,9 +6,9 @@
  *
  * Outputs:
  *	ret0	0 in case of fault, strlen(buffer)+1 otherwise
- * 
- * Copyright (C) 1998, 1999 Hewlett-Packard Co
- * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * Copyright (C) 1998, 1999, 2001 Hewlett-Packard Co
+ * Copyright (C) 1998, 1999, 2001 David Mosberger-Tang <davidm@hpl.hp.com>
  * Copyright (C) 1998, 1999 Stephane Eranian <eranian@hpl.hp.com>
  *
  * 01/19/99 S.Eranian heavily enhanced version (see details below)
@@ -24,8 +24,8 @@
 //	- length of string + 1
 //	- 0 in case an exception is raised
 //
-// This is an enhanced version of the basic strlen_user. it includes a 
-// combination of compute zero index (czx), parallel comparisons, speculative 
+// This is an enhanced version of the basic strlen_user. it includes a
+// combination of compute zero index (czx), parallel comparisons, speculative
 // loads and loop unroll using rotating registers.
 //
 // General Ideas about the algorithm:
@@ -34,7 +34,7 @@
 //	  string may not be 8-byte aligned. In this case we load the 8byte
 //	  quantity which includes the start of the string and mask the unused
 //	  bytes with 0xff to avoid confusing czx.
-//	  We use speculative loads and software pipelining to hide memory 
+//	  We use speculative loads and software pipelining to hide memory
 //	  latency and do read ahead safely. This way we defer any exception.
 //
 //	  Because we don't want the kernel to be relying on particular
@@ -45,7 +45,7 @@
 //	  The fact that speculation may fail can be caused, for instance, by
 //	  the DCR.dm bit being set. In this case TLB misses are deferred, i.e.,
 //	  a NaT bit will be set if the translation is not present. The normal
-//	  load, on the other hand, will cause the translation to be inserted 
+//	  load, on the other hand, will cause the translation to be inserted
 //	  if the mapping exists.
 //
 //	  It should be noted that we execute recovery code only when we need
@@ -53,30 +53,21 @@
 //	  recovery code on pure read ahead data.
 //
 // Remarks:
-//	- the cmp r0,r0 is used as a fast way to initialize a predicate 
+//	- the cmp r0,r0 is used as a fast way to initialize a predicate
 //	  register to 1. This is required to make sure that we get the parallel
 //	  compare correct.
 //
 //	- we don't use the epilogue counter to exit the loop but we need to set
 //	  it to zero beforehand.
 //
-//	- after the loop we must test for Nat values because neither the 
+//	- after the loop we must test for Nat values because neither the
 //	  czx nor cmp instruction raise a NaT consumption fault. We must be
-//	  careful not to look too far for a Nat for which we don't care. 
+//	  careful not to look too far for a Nat for which we don't care.
 //	  For instance we don't need to look at a NaT in val2 if the zero byte
 //	  was in val1.
 //
 //	- Clearly performance tuning is required.
 //
-// 
-//
-
-#define EX(y,x...)				\
-	.section __ex_table,"a";		\
-	data4 @gprel(99f);			\
-	data4 y-99f;				\
-	.previous;				\
-99:	x
 
 #define saved_pfs	r11
 #define	tmp		r10
@@ -89,15 +80,9 @@
 #define val1		r22
 #define val2		r23
 
-
-	.text
-	.psr abi64
-	.psr lsb
-	.lsb
-
 GLOBAL_ENTRY(__strlen_user)
-	UNW(.prologue)
-	UNW(.save ar.pfs, saved_pfs)
+	.prologue
+	.save ar.pfs, saved_pfs
 	alloc saved_pfs=ar.pfs,11,0,0,8
 
 	.rotr v[2], w[2]	// declares our 4 aliases
@@ -105,7 +90,7 @@
 	extr.u tmp=in0,0,3	// tmp=least significant 3 bits
 	mov orig=in0		// keep trackof initial byte address
 	dep src=0,in0,0,3	// src=8byte-aligned in0 address
-	UNW(.save pr, saved_pr)
+	.save pr, saved_pr
 	mov saved_pr=pr		// preserve predicates (rotation)
 	;;
 
@@ -127,8 +112,8 @@
 	or v[1]=v[1],mask	// now we have a safe initial byte pattern
 	;;
 1:
-	ld8.s v[0]=[src],8	// speculatively load next 
-	czx1.r val1=v[1]	// search 0 byte from right 
+	ld8.s v[0]=[src],8	// speculatively load next
+	czx1.r val1=v[1]	// search 0 byte from right
 	czx1.r val2=w[1]	// search 0 byte from right following 8bytes
 	;;
 	ld8.s w[0]=[src],8	// speculatively load next to next
@@ -144,11 +129,7 @@
 	//	- there must be a better way of doing the test
 	//
 	cmp.eq  p8,p9=8,val1	// p6 = val1 had zero (disambiguate)
-#ifdef notyet
 	tnat.nz p6,p7=val1	// test NaT on val1
-#else
-	tnat.z p7,p6=val1	// test NaT on val1
-#endif
 (p6)	br.cond.spnt.few recover// jump to recovery if val1 is NaT
 	;;
 	//
@@ -193,7 +174,7 @@
 2:
 	EX(.Lexit1, (p6) ld8 val=[base],8)
 	;;
-	czx1.r val1=val		// search 0 byte from right 
+	czx1.r val1=val		// search 0 byte from right
 	;;
 	cmp.eq p6,p0=8,val1	// val1==8 ?
 (p6)	br.wtop.dptk.few 2b	// loop until p6 == 0

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)