MIPS: math-emu: Add support for the MIPS R6 MSUBF FPU instruction
[linux-drm-fsl-dcu.git] / arch / mips / math-emu / sp_msubf.c
1 /*
2  * IEEE754 floating point arithmetic
3  * single precision: MSUB.f (Fused Multiply Subtract)
4  * MSUBF.fmt: FPR[fd] = FPR[fd] - (FPR[fs] x FPR[ft])
5  *
6  * MIPS floating point support
7  * Copyright (C) 2015 Imagination Technologies, Ltd.
8  * Author: Markos Chandras <markos.chandras@imgtec.com>
9  *
10  *  This program is free software; you can distribute it and/or modify it
11  *  under the terms of the GNU General Public License as published by the
12  *  Free Software Foundation; version 2 of the License.
13  */
14
15 #include "ieee754sp.h"
16
17 union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x,
18                                 union ieee754sp y)
19 {
20         int re;
21         int rs;
22         unsigned rm;
23         unsigned short lxm;
24         unsigned short hxm;
25         unsigned short lym;
26         unsigned short hym;
27         unsigned lrm;
28         unsigned hrm;
29         unsigned t;
30         unsigned at;
31         int s;
32
33         COMPXSP;
34         COMPYSP;
35         u32 zm; int ze; int zs __maybe_unused; int zc;
36
37         EXPLODEXSP;
38         EXPLODEYSP;
39         EXPLODESP(z, zc, zs, ze, zm)
40
41         FLUSHXSP;
42         FLUSHYSP;
43         FLUSHSP(z, zc, zs, ze, zm);
44
45         ieee754_clearcx();
46
47         switch (zc) {
48         case IEEE754_CLASS_SNAN:
49                 ieee754_setcx(IEEE754_INVALID_OPERATION);
50                 return ieee754sp_nanxcpt(z);
51         case IEEE754_CLASS_DNORM:
52                 SPDNORMx(zm, ze);
53         /* QNAN is handled separately below */
54         }
55
56         switch (CLPAIR(xc, yc)) {
57         case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN):
58         case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN):
59         case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN):
60         case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN):
61         case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN):
62                 return ieee754sp_nanxcpt(y);
63
64         case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
65         case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
66         case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO):
67         case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
68         case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
69         case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
70                 return ieee754sp_nanxcpt(x);
71
72         case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
73         case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
74         case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
75         case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
76                 return y;
77
78         case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
79         case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
80         case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
81         case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
82         case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF):
83                 return x;
84
85         /*
86          * Infinity handling
87          */
88         case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
89         case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
90                 if (zc == IEEE754_CLASS_QNAN)
91                         return z;
92                 ieee754_setcx(IEEE754_INVALID_OPERATION);
93                 return ieee754sp_indef();
94
95         case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
96         case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
97         case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
98         case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
99         case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
100                 if (zc == IEEE754_CLASS_QNAN)
101                         return z;
102                 return ieee754sp_inf(xs ^ ys);
103
104         case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
105         case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
106         case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
107         case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
108         case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
109                 if (zc == IEEE754_CLASS_INF)
110                         return ieee754sp_inf(zs);
111                 /* Multiplication is 0 so just return z */
112                 return z;
113
114         case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
115                 SPDNORMX;
116
117         case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
118                 if (zc == IEEE754_CLASS_QNAN)
119                         return z;
120                 else if (zc == IEEE754_CLASS_INF)
121                         return ieee754sp_inf(zs);
122                 SPDNORMY;
123                 break;
124
125         case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
126                 if (zc == IEEE754_CLASS_QNAN)
127                         return z;
128                 else if (zc == IEEE754_CLASS_INF)
129                         return ieee754sp_inf(zs);
130                 SPDNORMX;
131                 break;
132
133         case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
134                 if (zc == IEEE754_CLASS_QNAN)
135                         return z;
136                 else if (zc == IEEE754_CLASS_INF)
137                         return ieee754sp_inf(zs);
138                 /* fall through to real compuation */
139         }
140
141         /* Finally get to do some computation */
142
143         /*
144          * Do the multiplication bit first
145          *
146          * rm = xm * ym, re = xe + ye basically
147          *
148          * At this point xm and ym should have been normalized.
149          */
150
151         /* rm = xm * ym, re = xe+ye basically */
152         assert(xm & SP_HIDDEN_BIT);
153         assert(ym & SP_HIDDEN_BIT);
154
155         re = xe + ye;
156         rs = xs ^ ys;
157
158         /* shunt to top of word */
159         xm <<= 32 - (SP_FBITS + 1);
160         ym <<= 32 - (SP_FBITS + 1);
161
162         /*
163          * Multiply 32 bits xm, ym to give high 32 bits rm with stickness.
164          */
165         lxm = xm & 0xffff;
166         hxm = xm >> 16;
167         lym = ym & 0xffff;
168         hym = ym >> 16;
169
170         lrm = lxm * lym;        /* 16 * 16 => 32 */
171         hrm = hxm * hym;        /* 16 * 16 => 32 */
172
173         t = lxm * hym; /* 16 * 16 => 32 */
174         at = lrm + (t << 16);
175         hrm += at < lrm;
176         lrm = at;
177         hrm = hrm + (t >> 16);
178
179         t = hxm * lym; /* 16 * 16 => 32 */
180         at = lrm + (t << 16);
181         hrm += at < lrm;
182         lrm = at;
183         hrm = hrm + (t >> 16);
184
185         rm = hrm | (lrm != 0);
186
187         /*
188          * Sticky shift down to normal rounding precision.
189          */
190         if ((int) rm < 0) {
191                 rm = (rm >> (32 - (SP_FBITS + 1 + 3))) |
192                     ((rm << (SP_FBITS + 1 + 3)) != 0);
193                 re++;
194         } else {
195                 rm = (rm >> (32 - (SP_FBITS + 1 + 3 + 1))) |
196                      ((rm << (SP_FBITS + 1 + 3 + 1)) != 0);
197         }
198         assert(rm & (SP_HIDDEN_BIT << 3));
199
200         /* And now the subtraction */
201
202         /* Flip sign of r and handle as add */
203         rs ^= 1;
204
205         assert(zm & SP_HIDDEN_BIT);
206
207         /*
208          * Provide guard,round and stick bit space.
209          */
210         zm <<= 3;
211
212         if (ze > re) {
213                 /*
214                  * Have to shift y fraction right to align.
215                  */
216                 s = ze - re;
217                 SPXSRSYn(s);
218         } else if (re > ze) {
219                 /*
220                  * Have to shift x fraction right to align.
221                  */
222                 s = re - ze;
223                 SPXSRSYn(s);
224         }
225         assert(ze == re);
226         assert(ze <= SP_EMAX);
227
228         if (zs == rs) {
229                 /*
230                  * Generate 28 bit result of adding two 27 bit numbers
231                  * leaving result in zm, zs and ze.
232                  */
233                 zm = zm + rm;
234
235                 if (zm >> (SP_FBITS + 1 + 3)) { /* carry out */
236                         SPXSRSX1(); /* shift preserving sticky */
237                 }
238         } else {
239                 if (zm >= rm) {
240                         zm = zm - rm;
241                 } else {
242                         zm = rm - zm;
243                         zs = rs;
244                 }
245                 if (zm == 0)
246                         return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
247
248                 /*
249                  * Normalize in extended single precision
250                  */
251                 while ((zm >> (SP_MBITS + 3)) == 0) {
252                         zm <<= 1;
253                         ze--;
254                 }
255
256         }
257         return ieee754sp_format(zs, ze, zm);
258 }