powerpc: Make 64-bit non-VMX __copy_tofrom_user bi-endian
[linux-drm-fsl-dcu.git] / arch / powerpc / lib / copyuser_64.S
index d73a5901490018486fed19115fce46d88084f701..596a285c07554d65dfd561367a52ffed47aba55d 100644 (file)
@@ -9,6 +9,14 @@
 #include <asm/processor.h>
 #include <asm/ppc_asm.h>
 
+#ifdef __BIG_ENDIAN__
+#define sLd sld                /* Shift towards low-numbered address. */
+#define sHd srd                /* Shift towards high-numbered address. */
+#else
+#define sLd srd                /* Shift towards low-numbered address. */
+#define sHd sld                /* Shift towards high-numbered address. */
+#endif
+
        .align  7
 _GLOBAL(__copy_tofrom_user)
 BEGIN_FTR_SECTION
@@ -118,10 +126,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
 
 24:    ld      r9,0(r4)        /* 3+2n loads, 2+2n stores */
 25:    ld      r0,8(r4)
-       sld     r6,r9,r10
+       sLd     r6,r9,r10
 26:    ldu     r9,16(r4)
-       srd     r7,r0,r11
-       sld     r8,r0,r10
+       sHd     r7,r0,r11
+       sLd     r8,r0,r10
        or      r7,r7,r6
        blt     cr6,79f
 27:    ld      r0,8(r4)
@@ -129,35 +137,35 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
 
 28:    ld      r0,0(r4)        /* 4+2n loads, 3+2n stores */
 29:    ldu     r9,8(r4)
-       sld     r8,r0,r10
+       sLd     r8,r0,r10
        addi    r3,r3,-8
        blt     cr6,5f
 30:    ld      r0,8(r4)
-       srd     r12,r9,r11
-       sld     r6,r9,r10
+       sHd     r12,r9,r11
+       sLd     r6,r9,r10
 31:    ldu     r9,16(r4)
        or      r12,r8,r12
-       srd     r7,r0,r11
-       sld     r8,r0,r10
+       sHd     r7,r0,r11
+       sLd     r8,r0,r10
        addi    r3,r3,16
        beq     cr6,78f
 
 1:     or      r7,r7,r6
 32:    ld      r0,8(r4)
 76:    std     r12,8(r3)
-2:     srd     r12,r9,r11
-       sld     r6,r9,r10
+2:     sHd     r12,r9,r11
+       sLd     r6,r9,r10
 33:    ldu     r9,16(r4)
        or      r12,r8,r12
 77:    stdu    r7,16(r3)
-       srd     r7,r0,r11
-       sld     r8,r0,r10
+       sHd     r7,r0,r11
+       sLd     r8,r0,r10
        bdnz    1b
 
 78:    std     r12,8(r3)
        or      r7,r7,r6
 79:    std     r7,16(r3)
-5:     srd     r12,r9,r11
+5:     sHd     r12,r9,r11
        or      r12,r8,r12
 80:    std     r12,24(r3)
        bne     6f
@@ -165,23 +173,38 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
        blr
 6:     cmpwi   cr1,r5,8
        addi    r3,r3,32
-       sld     r9,r9,r10
+       sLd     r9,r9,r10
        ble     cr1,7f
 34:    ld      r0,8(r4)
-       srd     r7,r0,r11
+       sHd     r7,r0,r11
        or      r9,r7,r9
 7:
        bf      cr7*4+1,1f
+#ifdef __BIG_ENDIAN__
        rotldi  r9,r9,32
+#endif
 94:    stw     r9,0(r3)
+#ifdef __LITTLE_ENDIAN__
+       rotrdi  r9,r9,32
+#endif
        addi    r3,r3,4
 1:     bf      cr7*4+2,2f
+#ifdef __BIG_ENDIAN__
        rotldi  r9,r9,16
+#endif
 95:    sth     r9,0(r3)
+#ifdef __LITTLE_ENDIAN__
+       rotrdi  r9,r9,16
+#endif
        addi    r3,r3,2
 2:     bf      cr7*4+3,3f
+#ifdef __BIG_ENDIAN__
        rotldi  r9,r9,8
+#endif
 96:    stb     r9,0(r3)
+#ifdef __LITTLE_ENDIAN__
+       rotrdi  r9,r9,8
+#endif
 3:     li      r3,0
        blr