powerpc: Make 64-bit non-VMX __copy_tofrom_user bi-endian
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>
Tue, 17 Dec 2013 22:29:57 +0000 (09:29 +1100)
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>
Mon, 30 Dec 2013 03:02:30 +0000 (14:02 +1100)
The powerpc 64-bit __copy_tofrom_user() function uses shifts to handle
unaligned invocations.  However, these shifts were designed for
big-endian systems: On little-endian systems, they must shift in the
opposite direction.

This commit relies on the C preprocessor to insert the correct shifts
into the assembly code.

[ This is a rare but nasty LE issue. Most of the time we use the POWER7
optimised __copy_tofrom_user_power7 loop, but when it hits an exception
we fall back to the base __copy_tofrom_user loop. - Anton ]

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
arch/powerpc/lib/copyuser_64.S

index d73a5901490018486fed19115fce46d88084f701..596a285c07554d65dfd561367a52ffed47aba55d 100644 (file)
@@ -9,6 +9,14 @@
 #include <asm/processor.h>
 #include <asm/ppc_asm.h>
 
+#ifdef __BIG_ENDIAN__
+#define sLd sld                /* Shift towards low-numbered address. */
+#define sHd srd                /* Shift towards high-numbered address. */
+#else
+#define sLd srd                /* Shift towards low-numbered address. */
+#define sHd sld                /* Shift towards high-numbered address. */
+#endif
+
        .align  7
 _GLOBAL(__copy_tofrom_user)
 BEGIN_FTR_SECTION
@@ -118,10 +126,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
 
 24:    ld      r9,0(r4)        /* 3+2n loads, 2+2n stores */
 25:    ld      r0,8(r4)
-       sld     r6,r9,r10
+       sLd     r6,r9,r10
 26:    ldu     r9,16(r4)
-       srd     r7,r0,r11
-       sld     r8,r0,r10
+       sHd     r7,r0,r11
+       sLd     r8,r0,r10
        or      r7,r7,r6
        blt     cr6,79f
 27:    ld      r0,8(r4)
@@ -129,35 +137,35 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
 
 28:    ld      r0,0(r4)        /* 4+2n loads, 3+2n stores */
 29:    ldu     r9,8(r4)
-       sld     r8,r0,r10
+       sLd     r8,r0,r10
        addi    r3,r3,-8
        blt     cr6,5f
 30:    ld      r0,8(r4)
-       srd     r12,r9,r11
-       sld     r6,r9,r10
+       sHd     r12,r9,r11
+       sLd     r6,r9,r10
 31:    ldu     r9,16(r4)
        or      r12,r8,r12
-       srd     r7,r0,r11
-       sld     r8,r0,r10
+       sHd     r7,r0,r11
+       sLd     r8,r0,r10
        addi    r3,r3,16
        beq     cr6,78f
 
 1:     or      r7,r7,r6
 32:    ld      r0,8(r4)
 76:    std     r12,8(r3)
-2:     srd     r12,r9,r11
-       sld     r6,r9,r10
+2:     sHd     r12,r9,r11
+       sLd     r6,r9,r10
 33:    ldu     r9,16(r4)
        or      r12,r8,r12
 77:    stdu    r7,16(r3)
-       srd     r7,r0,r11
-       sld     r8,r0,r10
+       sHd     r7,r0,r11
+       sLd     r8,r0,r10
        bdnz    1b
 
 78:    std     r12,8(r3)
        or      r7,r7,r6
 79:    std     r7,16(r3)
-5:     srd     r12,r9,r11
+5:     sHd     r12,r9,r11
        or      r12,r8,r12
 80:    std     r12,24(r3)
        bne     6f
@@ -165,23 +173,38 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
        blr
 6:     cmpwi   cr1,r5,8
        addi    r3,r3,32
-       sld     r9,r9,r10
+       sLd     r9,r9,r10
        ble     cr1,7f
 34:    ld      r0,8(r4)
-       srd     r7,r0,r11
+       sHd     r7,r0,r11
        or      r9,r7,r9
 7:
        bf      cr7*4+1,1f
+#ifdef __BIG_ENDIAN__
        rotldi  r9,r9,32
+#endif
 94:    stw     r9,0(r3)
+#ifdef __LITTLE_ENDIAN__
+       rotrdi  r9,r9,32
+#endif
        addi    r3,r3,4
 1:     bf      cr7*4+2,2f
+#ifdef __BIG_ENDIAN__
        rotldi  r9,r9,16
+#endif
 95:    sth     r9,0(r3)
+#ifdef __LITTLE_ENDIAN__
+       rotrdi  r9,r9,16
+#endif
        addi    r3,r3,2
 2:     bf      cr7*4+3,3f
+#ifdef __BIG_ENDIAN__
        rotldi  r9,r9,8
+#endif
 96:    stb     r9,0(r3)
+#ifdef __LITTLE_ENDIAN__
+       rotrdi  r9,r9,8
+#endif
 3:     li      r3,0
        blr