arch/blackfin/lib/udivsi3.S

   1 /*
   2  * File:         arch/blackfin/lib/udivsi3.S
   3  * Based on:
   4  * Author:
   5  *
   6  * Created:
   7  * Description:
   8  *
   9  * Modified:
  10  *               Copyright 2004-2006 Analog Devices Inc.
  11  *
  12  * Bugs:         Enter bugs at http://blackfin.uclinux.org/
  13  *
  14  * This program is free software; you can redistribute it and/or modify
  15  * it under the terms of the GNU General Public License as published by
  16  * the Free Software Foundation; either version 2 of the License, or
  17  * (at your option) any later version.
  18  *
  19  * This program is distributed in the hope that it will be useful,
  20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  22  * GNU General Public License for more details.
  23  *
  24  * You should have received a copy of the GNU General Public License
  25  * along with this program; if not, see the file COPYING, or write
  26  * to the Free Software Foundation, Inc.,
  27  * 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  28  */
  29
  30 #include <linux/linkage.h>
  31
  32 #define CARRY AC0
  33
  34 #ifdef CONFIG_ARITHMETIC_OPS_L1
  35 .section .l1.text
  36 #else
  37 .text
  38 #endif
  39
  40
  41 ENTRY(___udivsi3)
  42
  43   CC = R0 < R1 (IU);    /* If X < Y, always return 0 */
  44   IF CC JUMP .Lreturn_ident;
  45
  46   R2 = R1 << 16;
  47   CC = R2 <= R0 (IU);
  48   IF CC JUMP .Lidents;
  49
  50   R2 = R0 >> 31;       /* if X is a 31-bit number */
  51   R3 = R1 >> 15;       /* and Y is a 15-bit number */
  52   R2 = R2 | R3;        /* then it's okay to use the DIVQ builtins (fallthrough to fast)*/
  53   CC = R2;
  54   IF CC JUMP .Ly_16bit;
  55
  56 /* METHOD 1: FAST DIVQ
  57    We know we have a 31-bit dividend, and 15-bit divisor so we can use the
  58    simple divq approach (first setting AQ to 0 - implying unsigned division,
  59    then 16 DIVQ's).
  60 */
  61
  62   AQ = CC;             /* Clear AQ (CC==0) */
  63
  64 /* ISR States: When dividing two integers (32.0/16.0) using divide primitives,
  65    we need to shift the dividend one bit to the left.
  66    We have already checked that we have a 31-bit number so we are safe to do
  67    that.
  68 */
  69   R0 <<= 1;
  70   DIVQ(R0, R1); // 1
  71   DIVQ(R0, R1); // 2
  72   DIVQ(R0, R1); // 3
  73   DIVQ(R0, R1); // 4
  74   DIVQ(R0, R1); // 5
  75   DIVQ(R0, R1); // 6
  76   DIVQ(R0, R1); // 7
  77   DIVQ(R0, R1); // 8
  78   DIVQ(R0, R1); // 9
  79   DIVQ(R0, R1); // 10
  80   DIVQ(R0, R1); // 11
  81   DIVQ(R0, R1); // 12
  82   DIVQ(R0, R1); // 13
  83   DIVQ(R0, R1); // 14
  84   DIVQ(R0, R1); // 15
  85   DIVQ(R0, R1); // 16
  86   R0 = R0.L (Z);
  87   RTS;
  88
  89 .Ly_16bit:
  90   /* We know that the upper 17 bits of Y might have bits set,
  91   ** or that the sign bit of X might have a bit. If Y is a
  92   ** 16-bit number, but not bigger, then we can use the builtins
  93   ** with a post-divide correction.
  94   ** R3 currently holds Y>>15, which means R3's LSB is the
  95   ** bit we're interested in.
  96   */
  97
  98   /* According to the ISR, to use the Divide primitives for
  99   ** unsigned integer divide, the useable range is 31 bits
 100   */
 101   CC = ! BITTST(R0, 31);
 102
 103   /* IF condition is true we can scale our inputs and use the divide primitives,
 104   ** with some post-adjustment
 105   */
 106   R3 += -1;             /* if so, Y is 0x00008nnn */
 107   CC &= AZ;
 108
 109   /* If condition is true we can scale our inputs and use the divide primitives,
 110   ** with some post-adjustment
 111   */
 112   R3 = R1 >> 1;         /* Pre-scaled divisor for primitive case */
 113   R2 = R0 >> 16;
 114
 115   R2 = R3 - R2;         /* shifted divisor < upper 16 bits of dividend */
 116   CC &= CARRY;
 117   IF CC JUMP .Lshift_and_correct;
 118
 119   /* Fall through to the identities */
 120
 121 /* METHOD 2: identities and manual calculation
 122    We are not able to use the divide primites, but may still catch some special
 123    cases.
 124 */
 125 .Lidents:
 126   /* Test for common identities. Value to be returned is placed in R2. */
 127   CC = R0 == 0;        /* 0/Y => 0 */
 128   IF CC JUMP .Lreturn_r0;
 129   CC = R0 == R1;       /* X==Y => 1 */
 130   IF CC JUMP .Lreturn_ident;
 131   CC = R1 == 1;        /* X/1 => X */
 132   IF CC JUMP .Lreturn_ident;
 133
 134   R2.L = ONES R1;
 135   R2 = R2.L (Z);
 136   CC = R2 == 1;
 137   IF CC JUMP .Lpower_of_two;
 138
 139   [--SP] = (R7:5);                /* Push registers R5-R7 */
 140
 141   /* Idents don't match. Go for the full operation. */
 142
 143
 144   R6 = 2;                         /* assume we'll shift two */
 145   R3 = 1;
 146
 147   P2 = R1;
 148                                   /* If either R0 or R1 have sign set, */
 149                                   /* divide them by two, and note it's */
 150                                   /* been done. */
 151   CC = R1 < 0;
 152   R2 = R1 >> 1;
 153   IF CC R1 = R2;                  /* Possibly-shifted R1 */
 154   IF !CC R6 = R3;                 /* R1 doesn't, so at most 1 shifted */
 155
 156   P0 = 0;
 157   R3 = -R1;
 158   [--SP] = R3;
 159   R2 = R0 >> 1;
 160   R2 = R0 >> 1;
 161   CC = R0 < 0;
 162   IF CC P0 = R6;                  /* Number of values divided */
 163   IF !CC R2 = R0;                 /* Shifted R0 */
 164
 165                                   /* P0 is 0, 1 (NR/=2) or 2 (NR/=2, DR/=2) */
 166
 167                                   /* r2 holds Copy dividend  */
 168   R3 = 0;                         /* Clear partial remainder */
 169   R7 = 0;                         /* Initialise quotient bit */
 170
 171   P1 = 32;                        /* Set loop counter */
 172   LSETUP(.Lulst, .Lulend) LC0 = P1; /* Set loop counter */
 173 .Lulst:  R6 = R2 >> 31;             /* R6 = sign bit of R2, for carry */
 174        R2 = R2 << 1;              /* Shift 64 bit dividend up by 1 bit */
 175        R3 = R3 << 1 || R5 = [SP];
 176        R3 = R3 | R6;              /* Include any carry */
 177        CC = R7 < 0;               /* Check quotient(AQ) */
 178                                   /* If AQ==0, we'll sub divisor */
 179        IF CC R5 = R1;             /* and if AQ==1, we'll add it. */
 180        R3 = R3 + R5;              /* Add/sub divsor to partial remainder */
 181        R7 = R3 ^ R1;              /* Generate next quotient bit */
 182
 183        R5 = R7 >> 31;             /* Get AQ */
 184        BITTGL(R5, 0);             /* Invert it, to get what we'll shift */
 185 .Lulend: R2 = R2 + R5;              /* and "shift" it in. */
 186
 187   CC = P0 == 0;                   /* Check how many inputs we shifted */
 188   IF CC JUMP .Lno_mult;            /* if none... */
 189   R6 = R2 << 1;
 190   CC = P0 == 1;
 191   IF CC R2 = R6;                  /* if 1, Q = Q*2 */
 192   IF !CC R1 = P2;                 /* if 2, restore stored divisor */
 193
 194   R3 = R2;                        /* Copy of R2 */
 195   R3 *= R1;                       /* Q * divisor */
 196   R5 = R0 - R3;                   /* Z = (dividend - Q * divisor) */
 197   CC = R1 <= R5 (IU);             /* Check if divisor <= Z? */
 198   R6 = CC;                        /* if yes, R6 = 1 */
 199   R2 = R2 + R6;                   /* if yes, add one to quotient(Q) */
 200 .Lno_mult:
 201   SP += 4;
 202   (R7:5) = [SP++];                /* Pop registers R5-R7 */
 203   R0 = R2;                        /* Store quotient */
 204   RTS;
 205
 206 .Lreturn_ident:
 207   CC = R0 < R1 (IU);    /* If X < Y, always return 0 */
 208   R2 = 0;
 209   IF CC JUMP .Ltrue_return_ident;
 210   R2 = -1 (X);         /* X/0 => 0xFFFFFFFF */
 211   CC = R1 == 0;
 212   IF CC JUMP .Ltrue_return_ident;
 213   R2 = -R2;            /* R2 now 1 */
 214   CC = R0 == R1;       /* X==Y => 1 */
 215   IF CC JUMP .Ltrue_return_ident;
 216   R2 = R0;             /* X/1 => X */
 217   /*FALLTHRU*/
 218
 219 .Ltrue_return_ident:
 220   R0 = R2;
 221 .Lreturn_r0:
 222   RTS;
 223
 224 .Lpower_of_two:
 225   /* Y has a single bit set, which means it's a power of two.
 226   ** That means we can perform the division just by shifting
 227   ** X to the right the appropriate number of bits
 228   */
 229
 230   /* signbits returns the number of sign bits, minus one.
 231   ** 1=>30, 2=>29, ..., 0x40000000=>0. Which means we need
 232   ** to shift right n-signbits spaces. It also means 0x80000000
 233   ** is a special case, because that *also* gives a signbits of 0
 234   */
 235
 236   R2 = R0 >> 31;
 237   CC = R1 < 0;
 238   IF CC JUMP .Ltrue_return_ident;
 239
 240   R1.l = SIGNBITS R1;
 241   R1 = R1.L (Z);
 242   R1 += -30;
 243   R0 = LSHIFT R0 by R1.L;
 244   RTS;
 245
 246 /* METHOD 3: PRESCALE AND USE THE DIVIDE PRIMITIVES WITH SOME POST-CORRECTION
 247   Two scaling operations are required to use the divide primitives with a
 248   divisor > 0x7FFFF.
 249   Firstly (as in method 1) we need to shift the dividend 1 to the left for
 250   integer division.
 251   Secondly we need to shift both the divisor and dividend 1 to the right so
 252   both are in range for the primitives.
 253   The left/right shift of the dividend does nothing so we can skip it.
 254 */
 255 .Lshift_and_correct:
 256   R2 = R0;
 257   // R3 is already R1 >> 1
 258   CC=!CC;
 259   AQ = CC;                        /* Clear AQ, got here with CC = 0 */
 260   DIVQ(R2, R3); // 1
 261   DIVQ(R2, R3); // 2
 262   DIVQ(R2, R3); // 3
 263   DIVQ(R2, R3); // 4
 264   DIVQ(R2, R3); // 5
 265   DIVQ(R2, R3); // 6
 266   DIVQ(R2, R3); // 7
 267   DIVQ(R2, R3); // 8
 268   DIVQ(R2, R3); // 9
 269   DIVQ(R2, R3); // 10
 270   DIVQ(R2, R3); // 11
 271   DIVQ(R2, R3); // 12
 272   DIVQ(R2, R3); // 13
 273   DIVQ(R2, R3); // 14
 274   DIVQ(R2, R3); // 15
 275   DIVQ(R2, R3); // 16
 276
 277   /* According to the Instruction Set Reference:
 278      To divide by a divisor > 0x7FFF,
 279      1. prescale and perform divide to obtain quotient (Q) (done above),
 280      2. multiply quotient by unscaled divisor (result M)
 281      3. subtract the product from the divident to get an error (E = X - M)
 282      4. if E < divisor (Y) subtract 1, if E > divisor (Y) add 1, else return quotient (Q)
 283    */
 284   R3 = R2.L (Z);                /* Q = X' / Y' */
 285   R2 = R3;              /* Preserve Q */
 286   R2 *= R1;             /* M = Q * Y */
 287   R2 = R0 - R2;         /* E = X - M */
 288   R0 = R3;              /* Copy Q into result reg */
 289
 290 /* Correction: If result of the multiply is negative, we overflowed
 291    and need to correct the result by subtracting 1 from the result.*/
 292   R3 = 0xFFFF (Z);
 293   R2 = R2 >> 16;                /* E >> 16 */
 294   CC = R2 == R3;
 295   R3 = 1 ;
 296   R1 = R0 - R3;
 297   IF CC R0 = R1;
 298   RTS;