patch-2.3.41 linux/arch/sparc64/lib/checksum.S

Next file: linux/arch/sparc64/mm/asyncd.c
Previous file: linux/arch/sparc64/lib/VIScsumcopyusr.S
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.3.40/linux/arch/sparc64/lib/checksum.S linux/arch/sparc64/lib/checksum.S
@@ -2,7 +2,7 @@
  *
  *  Copyright(C) 1995 Linus Torvalds
  *  Copyright(C) 1995 Miguel de Icaza
- *  Copyright(C) 1996 David S. Miller
+ *  Copyright(C) 1996, 2000 David S. Miller
  *  Copyright(C) 1997 Jakub Jelinek
  *
  * derived from:
@@ -263,6 +263,238 @@
 	 srl	%o0, 0, %o0
 cpc_end:
 
+	/* Now the version with userspace as the destination */
+#define CSUMCOPY_LASTCHUNK_USER(off, t0, t1)						\
+	ldx		[%src - off - 0x08], t0;					\
+	ldx		[%src - off - 0x00], t1;					\
+	nop; nop;									\
+	addcc		t0, %sum, %sum;							\
+	stwa		t0, [%dst - off - 0x04] %asi;					\
+	srlx		t0, 32, t0;							\
+	bcc,pt		%xcc, 51f;							\
+	 stwa		t0, [%dst - off - 0x08] %asi;					\
+	add		%sum, 1, %sum;							\
+51:	addcc		t1, %sum, %sum;							\
+	stwa		t1, [%dst - off + 0x04] %asi;					\
+	srlx		t1, 32, t1;							\
+	bcc,pt		%xcc, 52f;							\
+	 stwa		t1, [%dst - off - 0x00] %asi;					\
+	add		%sum, 1, %sum;							\
+52:
+
+cpc_user_start:
+cc_user_end_cruft:
+	andcc		%g7, 8, %g0		! IEU1	Group
+	be,pn		%icc, 1f		! CTI
+	 and		%g7, 4, %g5		! IEU0
+	ldx		[%src + 0x00], %g2	! Load	Group
+	add		%dst, 8, %dst		! IEU0
+	add		%src, 8, %src		! IEU1
+	addcc		%g2, %sum, %sum		! IEU1	Group + 2 bubbles
+	stwa		%g2, [%dst - 0x04] %asi	! Store
+	srlx		%g2, 32, %g2		! IEU0
+	bcc,pt		%xcc, 1f		! CTI	Group
+	 stwa		%g2, [%dst - 0x08] %asi	! Store
+	add		%sum, 1, %sum		! IEU0
+1:	brz,pt		%g5, 1f			! CTI	Group
+	 clr		%g2			! IEU0
+	lduw		[%src + 0x00], %g2	! Load
+	add		%dst, 4, %dst		! IEU0	Group
+	add		%src, 4, %src		! IEU1
+	stwa		%g2, [%dst - 0x04] %asi	! Store	Group + 2 bubbles
+	sllx		%g2, 32, %g2		! IEU0
+1:	andcc		%g7, 2, %g0		! IEU1
+	be,pn		%icc, 1f		! CTI	Group
+	 clr		%o4			! IEU1
+	lduh		[%src + 0x00], %o4	! Load
+	add		%src, 2, %src		! IEU0	Group
+	add		%dst, 2, %dst		! IEU1
+	stha		%o4, [%dst - 0x2] %asi	! Store Group + 2 bubbles
+	sll		%o4, 16, %o4		! IEU0
+1:	andcc		%g7, 1, %g0		! IEU1
+	be,pn		%icc, 1f		! CTI	Group
+	 clr		%o5			! IEU0
+	ldub		[%src + 0x00], %o5	! Load
+	stba		%o5, [%dst + 0x00] %asi	! Store	Group + 2 bubbles
+	sll		%o5, 8, %o5		! IEU0
+1:	or		%g2, %o4, %o4		! IEU1
+	or		%o5, %o4, %o4		! IEU0	Group
+	addcc		%o4, %sum, %sum		! IEU1
+	bcc,pt		%xcc, ccuserfold	! CTI
+	 sethi		%uhi(PAGE_OFFSET), %g4	! IEU0	Group
+	b,pt		%xcc, ccuserfold	! CTI
+	 add		%sum, 1, %sum		! IEU1
+
+cc_user_fixit:
+	cmp		%len, 6			! IEU1	Group
+	bl,a,pn		%icc, ccuserte		! CTI
+	 andcc		%len, 0xf, %g7		! IEU1	Group
+	andcc		%src, 2, %g0		! IEU1	Group
+	be,pn		%icc, 1f		! CTI
+	 andcc		%src, 0x4, %g0		! IEU1	Group
+	lduh		[%src + 0x00], %g4	! Load
+	sub		%len, 2, %len		! IEU0
+	add		%src, 2, %src		! IEU0	Group
+	add		%dst, 2, %dst		! IEU1
+	sll		%g4, 16, %g3		! IEU0	Group + 1 bubble
+	addcc		%g3, %sum, %sum		! IEU1
+	bcc,pt		%xcc, 0f		! CTI
+	 srl		%sum, 16, %g3		! IEU0	Group
+	add		%g3, 1, %g3		! IEU0	4 clocks (mispredict)
+0:	andcc		%src, 0x4, %g0		! IEU1	Group
+	stha		%g4, [%dst - 0x2] %asi	! Store
+	sll		%sum, 16, %sum		! IEU0
+	sll		%g3, 16, %g3		! IEU0	Group
+	srl		%sum, 16, %sum		! IEU0	Group
+	or		%g3, %sum, %sum		! IEU0	Group (regdep)
+1:	be,pt		%icc, ccusermerge	! CTI
+	 andcc		%len, 0xf0, %g1		! IEU1
+	lduw		[%src + 0x00], %g4	! Load	Group
+	sub		%len, 4, %len		! IEU0
+	add		%src, 4, %src		! IEU1
+	add		%dst, 4, %dst		! IEU0	Group
+	addcc		%g4, %sum, %sum		! IEU1	Group + 1 bubble
+	stwa		%g4, [%dst - 0x4] %asi	! Store
+	bcc,pt		%xcc, ccusermerge	! CTI
+	 andcc		%len, 0xf0, %g1		! IEU1	Group
+	b,pt		%xcc, ccusermerge	! CTI	4 clocks (mispredict)
+	 add		%sum, 1, %sum		! IEU0
+
+	.align		32
+	.globl		csum_partial_copy_user_sparc64
+csum_partial_copy_user_sparc64:			/* %o0=src, %o1=dest, %o2=len, %o3=sum */
+	xorcc		%src, %dst, %o4		! IEU1	Group
+	srl		%sum, 0, %sum		! IEU0
+	andcc		%o4, 3, %g0		! IEU1	Group
+	srl		%len, 0, %len		! IEU0
+	bne,pn		%icc, ccuserslow	! CTI
+	 andcc		%src, 1, %g0		! IEU1	Group
+	bne,pn		%icc, ccuserslow	! CTI
+	 cmp		%len, 256		! IEU1	Group
+	bgeu,pt		%icc, csum_partial_copy_user_vis ! CTI
+	 andcc		%src, 7, %g0		! IEU1	Group
+	bne,pn		%icc, cc_user_fixit	! CTI
+	 andcc		%len, 0xf0, %g1		! IEU1	Group
+ccusermerge:
+	be,pn		%icc, ccuserte		! CTI
+	 andcc		%len, 0xf, %g7		! IEU1	Group
+	sll		%g1, 2, %o4		! IEU0
+13:	sethi		%hi(12f), %o5		! IEU0	Group
+	add		%src, %g1, %src		! IEU1	
+	sub		%o5, %o4, %o5		! IEU0	Group
+	jmpl		%o5 + %lo(12f), %g0	! CTI	Group brk forced
+	 add		%dst, %g1, %dst		! IEU0	Group
+ccusertbl:
+	CSUMCOPY_LASTCHUNK_USER(0xe8,%g2,%g3)
+	CSUMCOPY_LASTCHUNK_USER(0xd8,%g2,%g3)
+	CSUMCOPY_LASTCHUNK_USER(0xc8,%g2,%g3)
+	CSUMCOPY_LASTCHUNK_USER(0xb8,%g2,%g3)
+	CSUMCOPY_LASTCHUNK_USER(0xa8,%g2,%g3)
+	CSUMCOPY_LASTCHUNK_USER(0x98,%g2,%g3)
+	CSUMCOPY_LASTCHUNK_USER(0x88,%g2,%g3)
+	CSUMCOPY_LASTCHUNK_USER(0x78,%g2,%g3)
+	CSUMCOPY_LASTCHUNK_USER(0x68,%g2,%g3)
+	CSUMCOPY_LASTCHUNK_USER(0x58,%g2,%g3)
+	CSUMCOPY_LASTCHUNK_USER(0x48,%g2,%g3)
+	CSUMCOPY_LASTCHUNK_USER(0x38,%g2,%g3)
+	CSUMCOPY_LASTCHUNK_USER(0x28,%g2,%g3)
+	CSUMCOPY_LASTCHUNK_USER(0x18,%g2,%g3)
+	CSUMCOPY_LASTCHUNK_USER(0x08,%g2,%g3)
+12:
+	andcc		%len, 0xf, %g7		! IEU1	Group
+ccuserte:
+	bne,pn		%icc, cc_user_end_cruft	! CTI
+	 sethi		%uhi(PAGE_OFFSET), %g4	! IEU0
+ccuserfold:
+	sllx		%sum, 32, %o0		! IEU0	Group
+	addcc		%sum, %o0, %o0		! IEU1	Group (regdep)
+	srlx		%o0, 32, %o0		! IEU0	Group (regdep)
+	bcs,a,pn	%xcc, 1f		! CTI
+	 add		%o0, 1, %o0		! IEU1	4 clocks (mispredict)
+1:	retl					! CTI	Group brk forced
+	 sllx		%g4, 32, %g4		! IEU0	Group
+
+ccuserslow:
+	mov	0, %g5
+	brlez,pn %len, 4f
+	 andcc	%src, 1, %o5		
+	be,a,pt	%icc, 1f
+	 srl	%len, 1, %g7		
+	sub	%len, 1, %len	
+	ldub [%src], %g5
+	add	%src, 1, %src	
+	stba	%g5, [%dst] %asi
+	srl	%len, 1, %g7
+	add	%dst, 1, %dst
+1:	brz,a,pn %g7, 3f
+	 andcc	%len, 1, %g0
+	andcc	%src, 2, %g0	
+	be,a,pt	%icc, 1f
+	 srl	%g7, 1, %g7
+	lduh [%src], %o4
+	sub	%len, 2, %len	
+	srl	%o4, 8, %g2
+	sub	%g7, 1, %g7	
+	stba	%g2, [%dst] %asi
+	add	%o4, %g5, %g5
+	stba	%o4, [%dst + 1] %asi
+	add	%src, 2, %src	
+	srl	%g7, 1, %g7
+	add	%dst, 2, %dst
+1:	brz,a,pn %g7, 2f		
+	 andcc	%len, 2, %g0
+	lduw	[%src], %o4
+5:	srl	%o4, 24, %g2
+	srl	%o4, 16, %g3
+	stba	%g2, [%dst] %asi
+	srl	%o4, 8, %g2
+	stba	%g3, [%dst + 1] %asi
+	add	%src, 4, %src
+	stba	%g2, [%dst + 2] %asi
+	addcc	%o4, %g5, %g5
+	stba	%o4, [%dst + 3] %asi
+	addc	%g5, %g0, %g5
+	add	%dst, 4, %dst
+	subcc	%g7, 1, %g7
+	bne,a,pt %icc, 5b
+	 lduw [%src], %o4
+	sll	%g5, 16, %g2
+	srl	%g5, 16, %g5
+	srl	%g2, 16, %g2
+	andcc	%len, 2, %g0
+	add	%g2, %g5, %g5 
+2:	be,a,pt	%icc, 3f		
+	 andcc	%len, 1, %g0
+	lduh [%src], %o4
+	andcc	%len, 1, %g0
+	srl	%o4, 8, %g2
+	add	%src, 2, %src	
+	stba	%g2, [%dst] %asi
+	add	%g5, %o4, %g5
+	stba	%o4, [%dst + 1] %asi
+	add	%dst, 2, %dst
+3:	be,a,pt	%icc, 1f		
+	 sll	%g5, 16, %o4
+	ldub [%src], %g2
+	sll	%g2, 8, %o4	
+	stba 	%g2, [%dst] %asi
+	add	%g5, %o4, %g5
+	sll	%g5, 16, %o4
+1:	addcc	%o4, %g5, %g5
+	srl	%g5, 16, %o4
+	addc	%g0, %o4, %g5
+	brz,pt	%o5, 4f
+	 srl	%g5, 8, %o4
+	and	%g5, 0xff, %g2
+	and	%o4, 0xff, %o4
+	sll	%g2, 8, %g2
+	or	%g2, %o4, %g5
+4:	addcc	%sum, %g5, %sum
+	addc	%g0, %sum, %o0
+	retl	
+	 srl	%o0, 0, %o0
+cpc_user_end:
+
 	.globl	cpc_handler
 cpc_handler:
 	ldx	[%sp + 0x7ff + 128], %g1
@@ -277,5 +509,5 @@
 
 	.section __ex_table
 	.align  4
-	.word  cpc_start, 0, cpc_end, cpc_handler
-
+	.word	cpc_start, 0, cpc_end, cpc_handler
+	.word	cpc_user_start, 0, cpc_user_end, cpc_handler

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)