1462 lines
62 KiB
C
1462 lines
62 KiB
C
/* hp2100_fp1.c: HP 1000 multiple-precision floating point routines
|
|
|
|
Copyright (c) 2005-2016, J. David Bryan
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a
|
|
copy of this software and associated documentation files (the "Software"),
|
|
to deal in the Software without restriction, including without limitation
|
|
the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
and/or sell copies of the Software, and to permit persons to whom the
|
|
Software is furnished to do so, subject to the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be included in
|
|
all copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
|
|
Except as contained in this notice, the name of the author shall not be used
|
|
in advertising or otherwise to promote the sale, use or other dealings in
|
|
this Software without prior written authorization from the author.
|
|
|
|
16-May-16 JDB Reformulated the definitions of op_mask
|
|
24-Dec-14 JDB Added casts for explicit downward conversions
|
|
Changed fp_ucom return from uint32 to uint16
|
|
18-Mar-13 JDB Changed type of mantissa masks array to to unsigned
|
|
06-Feb-12 JDB Added missing precision on constant "one" in fp_trun
|
|
21-Jun-11 JDB Completed the comments for divide; no code changes
|
|
08-Jun-08 JDB Quieted bogus gcc warning in fp_exec
|
|
10-May-08 JDB Fixed uninitialized return in fp_accum when setting
|
|
19-Mar-08 JDB Reworked "complement" to avoid inlining bug in gcc-4.x
|
|
01-Dec-06 JDB Reworked into generalized multiple-precision ops for FPP
|
|
12-Oct-06 JDB Altered x_trun for F-Series FFP compatibility
|
|
Added F-Series ..TCM FFP helpers
|
|
|
|
Primary references:
|
|
- HP 1000 M/E/F-Series Computers Engineering and Reference Documentation
|
|
(92851-90001, Mar-1981)
|
|
- HP 1000 M/E/F-Series Computers Technical Reference Handbook
|
|
(5955-0282, Mar-1980)
|
|
- DOS/RTE Relocatable Library Reference Manual
|
|
(24998-90001, Oct-1981)
|
|
|
|
|
|
This module implements multiple-precision floating-point operations to
|
|
support the 1000 F-Series hardware Floating Point Processor. It employs
|
|
64-bit integer arithmetic for speed and simplicity of implementation. The
|
|
host compiler must support 64-bit integers, and the HAVE_INT64 symbol must be
|
|
defined during compilation. If this symbol is not defined, then FPP support
|
|
is not available.
|
|
|
|
HP 2100/1000 computers used a proprietary floating-point format. The 2100
|
|
had optional firmware that provided two-word floating-point add, subtract,
|
|
multiply, and divide, as well as single-integer fix and float. The 1000-M/E
|
|
provided the same two-word firmware operations as standard equipment.
|
|
Three-word extended-precision instructions for the 2100 and 1000-M/E were
|
|
provided by the optional Fast FORTRAN Processor firmware.
|
|
|
|
The 1000-F substituted a hardware floating point processor for the firmware
|
|
in previous machines. In addition to the two- and three-word formats, the
|
|
F-Series introduced a four-word double-precision format. A five-word format
|
|
that provided extra range in the exponent by unpacking it from the mantissa
|
|
was also provided, although this capability was not documented in the user
|
|
manual. In addition, the FPP improved the accuracy of floating-point
|
|
calculations, as the firmware versions sacrificed a few bits of precision to
|
|
gain speed. Consequently, operations on the F-Series may return results that
|
|
differ slightly from the same operations on the M/E-Series or the 2100.
|
|
|
|
F-Series units after date code 1920 also provided two-word double-integer
|
|
instructions in firmware, as well as double-integer fix and float operations.
|
|
|
|
The original 32-bit floating-point format is as follows:
|
|
|
|
15 14 0
|
|
+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
|
|
|MS| mantissa high | : M
|
|
+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
|
|
| mantissa low | exponent |XS| : M + 1
|
|
+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
|
|
15 8 7 1 0
|
|
|
|
Both 23-bit mantissa and 7-bit exponent are in twos-complement form. The
|
|
exponent sign bit has been rotated into the LSB of the second word.
|
|
|
|
The extended-precision floating-point format is a 48-bit extension of the
|
|
32-bit format used for single precision. A packed extended-precision value
|
|
consists of a 39-bit mantissa and a 7-bit exponent. The format is as
|
|
follows:
|
|
|
|
15 14 0
|
|
+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
|
|
|MS| mantissa high | : M
|
|
+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
|
|
| mantissa middle | : M + 1
|
|
+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
|
|
| mantissa low | exponent |XS| : M + 2
|
|
+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
|
|
15 8 7 1 0
|
|
|
|
The double-precision floating-point format is similar to the 48-bit
|
|
extended-precision format, although with a 55-bit mantissa:
|
|
|
|
15 14 0
|
|
+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
|
|
|MS| mantissa high | : M
|
|
+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
|
|
| mantissa middle high | : M + 1
|
|
+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
|
|
| mantissa middle low | : M + 2
|
|
+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
|
|
| mantissa low | exponent |XS| : M + 3
|
|
+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
|
|
15 8 7 1 0
|
|
|
|
The FPP also supports a special five-word expanded-exponent format:
|
|
|
|
15 14 0
|
|
+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
|
|
|MS| mantissa high | : M
|
|
+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
|
|
| mantissa middle high | : M + 1
|
|
+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
|
|
| mantissa middle low | : M + 2
|
|
+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
|
|
| mantissa low | : M + 3
|
|
+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
|
|
| exponent |XS| : M + 4
|
|
+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
|
|
15 8 7 1 0
|
|
|
|
The exponent is a full 16-bit twos-complement value, but the allowed range is
|
|
only 10 bits, i.e., -512 to +511.
|
|
|
|
In a normalized value, the sign and MSB of the mantissa differ. Zero is
|
|
represented by all words = 0.
|
|
|
|
Internally, unpacked floating-point values are contained in a structure
|
|
having a signed 64-bit mantissa and a signed 32-bit exponent. Mantissas are
|
|
left-justified with the unused bits masked to zero. Exponents are
|
|
right-justified. The precision is indicated by the value of a structure
|
|
field.
|
|
|
|
HP terminology for the three-word floating-point format is confused. Some
|
|
documents refer to it as "double precision," while others use "extended
|
|
precision." The instruction mnemonics begin with "X" (e.g., .XADD),
|
|
suggesting the extended-precision term.
|
|
|
|
HP apparently intended that the four-word double-precision format would be
|
|
called "triple-precision," as the instruction mnemonics begin with "T" (e.g.,
|
|
".TADD" for the four-word add instruction). The source files for the
|
|
software simulations of these instructions for the M/E-Series also explicitly
|
|
refer to "triple precision math." However, the engineering documentation and
|
|
the F-Series reference manual both use the double-precision term.
|
|
|
|
This module adopts the single/extended/double terminology and uses the
|
|
initial letters of the instructions (F/X/T) to indicate the precision used.
|
|
|
|
The FPP hardware consisted of two circuit boards that interfaced to the main
|
|
CPU via the Microprogrammable Processor Port (MPP) that had been introduced
|
|
with the 1000 E-Series. One board contained argument registers and ALUs,
|
|
split into separate mantissa and exponent parts. The other contained a state
|
|
machine sequencer. FPP results were copied automatically to the argument
|
|
registers in addition to being available over the MPP, so that chained
|
|
operations could be executed from these "accumulators" without reloading.
|
|
|
|
The FPP operated independently of the CPU. An opcode, specifying one of the
|
|
six operations (add, subtract, multiply, divide, fix, or float) was sent to
|
|
the FPP, and a start command was given. Operands of appropriate precision
|
|
were then supplied to the FPP. Once the operands were received, the FPP
|
|
would execute and set a flag when the operation was complete. The result
|
|
would then be retrieved from the FPP. The floating-point instruction
|
|
firmware in the CPU initiated the desired FPP operation and handled operand
|
|
reads from and result writes to main memory.
|
|
|
|
Under simulation, "fp_exec" provides the six arithmetic operations analogous
|
|
to FPP execution. The remainder of the functions are helpers that were
|
|
provided by firmware in the 1000-F but that can reuse code needed to simulate
|
|
the FPP hardware. As with the hardware, "fp_exec" retains the last result
|
|
in an internal accumulator that may be referenced in subsequent operations.
|
|
|
|
NOTE: this module also provides the floating-point support for the firmware
|
|
single-precision 1000-M/E base set and extended-precision FFP instructions.
|
|
Because the firmware and hardware implementations returned slightly different
|
|
results, particularly with respect to round-off, conditional checks are
|
|
implemented in the arithmetic routines. In some cases, entirely different
|
|
algorithms are used to ensure fidelity with the real machines. Functionally,
|
|
this means that the 2100/1000-M/E and 1000-F floating-point diagnostics are
|
|
not interchangeable, and failures are to be expected if a diagnostic is run
|
|
on the wrong machine.
|
|
*/
|
|
|
|
#include "hp2100_defs.h"
|
|
#include "hp2100_cpu.h"
|
|
#include "hp2100_cpu1.h"
|
|
#include "hp2100_fp1.h"
|
|
|
|
|
|
#if defined (HAVE_INT64) /* we need int64 support */
|
|
|
|
/* Field widths. */
|
|
|
|
#define IN_W_SIGN 1
|
|
#define IN_W_SMAGN 15
|
|
#define IN_W_DMAGN 31
|
|
|
|
#define FP_W_MSIGN 1
|
|
#define FP_W_FMANT 23
|
|
#define FP_W_XMANT 39
|
|
#define FP_W_TMANT 55
|
|
#define FP_W_EMANT 55
|
|
#define FP_W_EXPANDEXP 9
|
|
#define FP_W_EXP 7
|
|
#define FP_W_ESIGN 1
|
|
|
|
/* Starting bit numbers. */
|
|
|
|
#define IN_V_SIGN (64 - IN_W_SIGN)
|
|
#define IN_V_SNUM (64 - IN_W_SIGN - IN_W_SMAGN)
|
|
#define IN_V_DNUM (64 - IN_W_SIGN - IN_W_DMAGN)
|
|
|
|
#define FP_V_FNUM (64 - FP_W_MSIGN - FP_W_FMANT - FP_W_EXP - FP_W_ESIGN)
|
|
#define FP_V_XNUM (64 - FP_W_MSIGN - FP_W_XMANT - FP_W_EXP - FP_W_ESIGN)
|
|
#define FP_V_TNUM (64 - FP_W_MSIGN - FP_W_TMANT - FP_W_EXP - FP_W_ESIGN)
|
|
#define FP_V_ENUM (64 - FP_W_MSIGN - FP_W_EMANT - FP_W_EXP - FP_W_ESIGN)
|
|
|
|
#define FP_V_MSIGN (64 - FP_W_MSIGN)
|
|
#define FP_V_FMANT (64 - FP_W_MSIGN - FP_W_FMANT)
|
|
#define FP_V_XMANT (64 - FP_W_MSIGN - FP_W_XMANT)
|
|
#define FP_V_TMANT (64 - FP_W_MSIGN - FP_W_TMANT)
|
|
#define FP_V_EMANT (64 - FP_W_MSIGN - FP_W_EMANT)
|
|
#define FP_V_EXP 1
|
|
#define FP_V_ESIGN 0
|
|
|
|
/* Right-aligned field masks. */
|
|
|
|
#define IN_M_SIGN (((t_uint64) 1 << IN_W_SIGN) - 1)
|
|
#define IN_M_SMAGN (((t_uint64) 1 << IN_W_SMAGN) - 1)
|
|
#define IN_M_DMAGN (((t_uint64) 1 << IN_W_DMAGN) - 1)
|
|
|
|
#define FP_M_MSIGN (((t_uint64) 1 << FP_W_MSIGN) - 1)
|
|
#define FP_M_FMANT (((t_uint64) 1 << FP_W_FMANT) - 1)
|
|
#define FP_M_XMANT (((t_uint64) 1 << FP_W_XMANT) - 1)
|
|
#define FP_M_TMANT (((t_uint64) 1 << FP_W_TMANT) - 1)
|
|
#define FP_M_EMANT (((t_uint64) 1 << FP_W_EMANT) - 1)
|
|
|
|
#define FP_M_EXPANDEXP ((1 << FP_W_EXPANDEXP) - 1)
|
|
#define FP_M_EXP ((1 << FP_W_EXP) - 1)
|
|
#define FP_M_ESIGN ((1 << FP_W_ESIGN) - 1)
|
|
|
|
/* In-place field masks. */
|
|
|
|
#define IN_SIGN (IN_M_SIGN << IN_V_SIGN)
|
|
#define IN_SMAGN (IN_M_SMAGN << IN_V_SNUM)
|
|
#define IN_DMAGN (IN_M_DMAGN << IN_V_DNUM)
|
|
|
|
#define FP_MSIGN (FP_M_MSIGN << FP_V_MSIGN)
|
|
#define FP_FMANT (FP_M_FMANT << FP_V_FMANT)
|
|
#define FP_XMANT (FP_M_XMANT << FP_V_XMANT)
|
|
#define FP_TMANT (FP_M_TMANT << FP_V_TMANT)
|
|
#define FP_EMANT (FP_M_EMANT << FP_V_EMANT)
|
|
#define FP_EXP (FP_M_EXP << FP_V_EXP)
|
|
#define FP_ESIGN (FP_M_ESIGN << FP_V_ESIGN)
|
|
|
|
/* In-place record masks. */
|
|
|
|
#define IN_SSMAGN (IN_SIGN | IN_SMAGN)
|
|
#define IN_SDMAGN (IN_SIGN | IN_DMAGN)
|
|
|
|
#define FP_SFMANT (FP_MSIGN | FP_FMANT)
|
|
#define FP_SXMANT (FP_MSIGN | FP_XMANT)
|
|
#define FP_STMANT (FP_MSIGN | FP_TMANT)
|
|
#define FP_SEMANT (FP_MSIGN | FP_EMANT)
|
|
#define FP_SEXP (FP_ESIGN | FP_EXP)
|
|
|
|
/* Minima and maxima. */
|
|
|
|
#define FP_ONEHALF ((t_int64) 1 << (FP_V_MSIGN - 1)) /* mantissa = 0.5 */
|
|
#define FP_MAXPMANT ((t_int64) FP_EMANT) /* maximum pos mantissa */
|
|
#define FP_MAXNMANT ((t_int64) FP_MSIGN) /* maximum neg mantissa */
|
|
#define FP_MAXPEXP (FP_M_EXPANDEXP) /* maximum pos expanded exponent */
|
|
#define FP_MAXNEXP (-(FP_MAXPEXP + 1)) /* maximum neg expanded exponent */
|
|
|
|
/* Floating-point helpers. */
|
|
|
|
#define DENORM(x) ((((x) ^ (x) << 1) & FP_MSIGN) == 0)
|
|
|
|
#define TO_EXP(e) (int8) ((e >> FP_V_EXP & FP_M_EXP) | \
|
|
(e & FP_M_ESIGN ? ~FP_M_EXP : 0))
|
|
|
|
/* Property constants. */
|
|
|
|
static const t_int64 p_half_lsb[6] = { ((t_int64) 1 << IN_V_SNUM) - 1, /* different than FP! */
|
|
((t_int64) 1 << IN_V_DNUM) - 1, /* different than FP! */
|
|
(t_int64) 1 << (FP_V_FMANT - 1),
|
|
(t_int64) 1 << (FP_V_XMANT - 1),
|
|
(t_int64) 1 << (FP_V_TMANT - 1),
|
|
(t_int64) 1 << (FP_V_EMANT - 1) };
|
|
|
|
static const t_int64 n_half_lsb[6] = { 0,
|
|
0,
|
|
((t_int64) 1 << (FP_V_FMANT - 1)) - 1,
|
|
((t_int64) 1 << (FP_V_XMANT - 1)) - 1,
|
|
((t_int64) 1 << (FP_V_TMANT - 1)) - 1,
|
|
((t_int64) 1 << (FP_V_EMANT - 1)) - 1 };
|
|
|
|
static const uint32 op_start[6] = { IN_V_SNUM,
|
|
IN_V_DNUM,
|
|
FP_V_FMANT,
|
|
FP_V_XMANT,
|
|
FP_V_TMANT,
|
|
FP_V_EMANT };
|
|
|
|
static const t_uint64 mant_mask[6] = { IN_SSMAGN,
|
|
IN_SDMAGN,
|
|
FP_SFMANT,
|
|
FP_SXMANT,
|
|
FP_STMANT,
|
|
FP_SEMANT };
|
|
|
|
static const uint32 op_bits[6] = { IN_W_SMAGN,
|
|
IN_W_DMAGN,
|
|
FP_W_FMANT + FP_W_MSIGN,
|
|
FP_W_XMANT + FP_W_MSIGN,
|
|
FP_W_TMANT + FP_W_MSIGN,
|
|
FP_W_EMANT + FP_W_MSIGN };
|
|
|
|
static const t_int64 op_mask[6] = { ~(((t_int64) 1 << IN_V_SNUM) - 1),
|
|
~(((t_int64) 1 << IN_V_DNUM) - 1),
|
|
~(((t_int64) 1 << FP_V_FNUM) - 1),
|
|
~(((t_int64) 1 << FP_V_XNUM) - 1),
|
|
~(((t_int64) 1 << FP_V_TNUM) - 1),
|
|
~(((t_int64) 1 << FP_V_ENUM) - 1) };
|
|
|
|
static const uint32 int_p_max[2] = { IN_M_SMAGN,
|
|
IN_M_DMAGN };
|
|
|
|
|
|
/* Internal unpacked floating-point representation. */
|
|
|
|
typedef struct {
|
|
t_int64 mantissa;
|
|
int32 exponent;
|
|
OPSIZE precision;
|
|
} FPU;
|
|
|
|
|
|
|
|
/* Low-level helper routines. */
|
|
|
|
|
|
/* Arithmetic shift right for mantissa only.
|
|
|
|
Returns TRUE if any one-bits are shifted out (for F-series only).
|
|
*/
|
|
|
|
static t_bool asr (FPU *operand, int32 shift)
|
|
{
|
|
t_uint64 mask;
|
|
t_bool bits_lost;
|
|
|
|
if (UNIT_CPU_MODEL == UNIT_1000_F) { /* F-series? */
|
|
mask = ((t_uint64) 1 << shift) - 1; /* mask for lost bits */
|
|
bits_lost = ((operand->mantissa & mask) != 0); /* flag if any lost */
|
|
}
|
|
else
|
|
bits_lost = FALSE;
|
|
|
|
operand->mantissa = operand->mantissa >> shift; /* mantissa is int, so ASR */
|
|
return bits_lost;
|
|
}
|
|
|
|
|
|
/* Logical shift right for mantissa and exponent.
|
|
|
|
Shifts mantissa and corrects exponent for mantissa overflow.
|
|
Returns TRUE if any one-bits are shifted out (for F-series only).
|
|
*/
|
|
|
|
static t_bool lsrx (FPU *operand, int32 shift)
|
|
{
|
|
t_uint64 mask;
|
|
t_bool bits_lost;
|
|
|
|
if (UNIT_CPU_MODEL == UNIT_1000_F) { /* F-series? */
|
|
mask = ((t_uint64) 1 << shift) - 1; /* mask for lost bits */
|
|
bits_lost = ((operand->mantissa & mask) != 0); /* flag if any lost */
|
|
}
|
|
else
|
|
bits_lost = FALSE;
|
|
|
|
operand->mantissa = (t_uint64) operand->mantissa >> shift; /* uint, so LSR */
|
|
operand->exponent = operand->exponent + shift; /* correct exponent */
|
|
return bits_lost;
|
|
}
|
|
|
|
|
|
/* Unpack an operand into a long integer.
|
|
|
|
Returns a left-aligned integer or mantissa. Does not mask to precision; this
|
|
should be done subsequently if desired.
|
|
*/
|
|
|
|
static t_int64 unpack_int (OP packed, OPSIZE precision)
|
|
{
|
|
uint32 i;
|
|
t_uint64 unpacked = 0;
|
|
|
|
if (precision == in_s)
|
|
unpacked = (t_uint64) packed.word << 48; /* unpack single integer */
|
|
|
|
else if (precision == in_d)
|
|
unpacked = (t_uint64) packed.dword << 32; /* unpack double integer */
|
|
|
|
else {
|
|
if (precision == fp_e) /* five word operand? */
|
|
precision = fp_t; /* only four mantissa words */
|
|
|
|
for (i = 0; i < 4; i++) /* unpack fp 2 to 4 words */
|
|
if (i < TO_COUNT (precision))
|
|
unpacked = unpacked << 16 | packed.fpk[i];
|
|
else
|
|
unpacked = unpacked << 16;
|
|
}
|
|
|
|
return (t_int64) unpacked;
|
|
}
|
|
|
|
|
|
/* Unpack a packed operand.
|
|
|
|
The packed value is split into separate mantissa and exponent variables. The
|
|
multiple words of the mantissa are concatenated into a single 64-bit signed
|
|
value, and the exponent is shifted with recovery of the sign.
|
|
*/
|
|
|
|
static FPU unpack (OP packed, OPSIZE precision)
|
|
{
|
|
FPU unpacked;
|
|
|
|
unpacked.precision = precision; /* set value's precision */
|
|
|
|
unpacked.mantissa = /* unpack and mask mantissa */
|
|
unpack_int (packed, precision) & (t_int64) mant_mask[precision];
|
|
|
|
switch (precision) {
|
|
|
|
case fp_f:
|
|
case fp_x:
|
|
case fp_t:
|
|
unpacked.exponent = /* unpack exponent from correct word */
|
|
TO_EXP (packed.fpk[(uint32) precision - 1]);
|
|
break;
|
|
|
|
case fp_e:
|
|
unpacked.exponent = /* unpack expanded exponent */
|
|
(int16) (packed.fpk[4] >> FP_V_EXP | /* rotate sign into place */
|
|
(packed.fpk[4] & 1 ? SIGN : 0));
|
|
break;
|
|
|
|
case fp_a: /* no action for value in accum */
|
|
case in_s: /* integers don't use exponent */
|
|
case in_d: /* integers don't use exponent */
|
|
default:
|
|
unpacked.exponent = 0;
|
|
break;
|
|
}
|
|
|
|
return unpacked;
|
|
}
|
|
|
|
|
|
/* Pack a long integer into an operand. */
|
|
|
|
static OP pack_int (t_int64 unpacked, OPSIZE precision)
|
|
{
|
|
int32 i;
|
|
OP packed;
|
|
|
|
if (precision == in_s)
|
|
packed.word = (uint16) (unpacked >> 48) & DMASK; /* pack single integer */
|
|
|
|
else if (precision == in_d)
|
|
packed.dword = (uint32) (unpacked >> 32) & DMASK32; /* pack double integer */
|
|
|
|
else {
|
|
if (precision == fp_e) /* five word operand? */
|
|
precision = fp_t; /* only four mantissa words */
|
|
|
|
for (i = 3; i >= 0; i--) { /* pack fp 2 to 4 words */
|
|
packed.fpk[i] = (uint16) unpacked & DMASK;
|
|
unpacked = unpacked >> 16;
|
|
}
|
|
}
|
|
|
|
return packed;
|
|
}
|
|
|
|
|
|
/* Pack an unpacked floating-point number.
|
|
|
|
The 64-bit mantissa is split into the appropriate number of 16-bit words.
|
|
The exponent is rotated to incorporate the sign bit and merged into the
|
|
appropriate word.
|
|
*/
|
|
|
|
static OP pack (FPU unpacked)
|
|
{
|
|
OP packed;
|
|
uint8 exp;
|
|
|
|
packed = pack_int (unpacked.mantissa, unpacked.precision); /* pack mantissa */
|
|
|
|
exp = (uint8) (unpacked.exponent << FP_V_EXP | /* rotate exponent */
|
|
(unpacked.exponent < 0) << FP_V_ESIGN);
|
|
|
|
switch (unpacked.precision) { /* merge exponent into correct word */
|
|
|
|
case in_s: /* no action for integers */
|
|
case in_d:
|
|
break;
|
|
|
|
case fp_f: /* merge into last word */
|
|
case fp_x:
|
|
case fp_t:
|
|
packed.fpk[(uint32) unpacked.precision - 1] =
|
|
(packed.fpk[(uint32) unpacked.precision - 1] & ~FP_SEXP) | exp;
|
|
break;
|
|
|
|
case fp_e: /* place in separate word */
|
|
packed.fpk[4] = (uint16) (unpacked.exponent << FP_V_EXP |
|
|
(unpacked.exponent < 0) << FP_V_ESIGN);
|
|
break;
|
|
|
|
case fp_a: /* no action for value in accum */
|
|
break;
|
|
}
|
|
|
|
return packed;
|
|
}
|
|
|
|
|
|
/* Normalize an unpacked floating-point number.
|
|
|
|
Floating-point numbers are in normal form if the sign bit and the MSB of the
|
|
mantissa differ. Unnormalized numbers are shifted as needed with appropriate
|
|
exponent modification.
|
|
*/
|
|
|
|
static void normalize (FPU *unpacked)
|
|
{
|
|
|
|
if (unpacked->mantissa) /* non-zero? */
|
|
while (DENORM (unpacked->mantissa)) { /* normal form? */
|
|
unpacked->exponent = unpacked->exponent - 1; /* no, so left shift */
|
|
unpacked->mantissa = unpacked->mantissa << 1;
|
|
}
|
|
else
|
|
unpacked->exponent = 0; /* clean for zero */
|
|
return;
|
|
}
|
|
|
|
|
|
/* Round an unpacked floating-point number and check for overflow.
|
|
|
|
An unpacked floating-point number is rounded by adding one-half of the LSB
|
|
value, maintaining symmetry around zero. If rounding resulted in a mantissa
|
|
overflow, the result logically is shifted to the right with an appropriate
|
|
exponent modification. Finally, the result is checked for exponent underflow
|
|
or overflow, and the appropriate approximation (zero or infinity) is
|
|
returned.
|
|
|
|
Rounding in hardware involves a special mantissa extension register that
|
|
holds three "guard" bits and one "sticky" bit. These represent the value of
|
|
bits right-shifted out the mantissa register. Under simulation, we track
|
|
such right-shifts and utilize the lower eight bits of the 64-bit mantissa
|
|
value to simulate the extension register.
|
|
|
|
Overflow depends on whether the FPP expanded-exponent form is being used
|
|
(this expands the exponent range by two bits). If overflow is detected, the
|
|
value representing infinity is dependent on whether the operation is on
|
|
behalf of the Fast FORTRAN Processor. The F-Series FPP returns positive
|
|
infinity on both positive and negative overflow for all precisions. The 2100
|
|
and M/E-Series FFPs return negative infinity on negative overflow of
|
|
extended-precision values. Single-precision overflows on these machines
|
|
always return positive infinity.
|
|
|
|
The number to be rounded must be normalized upon entry.
|
|
*/
|
|
|
|
static uint32 roundovf (FPU *unpacked, t_bool expand)
|
|
{
|
|
uint32 overflow;
|
|
t_bool sign;
|
|
|
|
sign = (unpacked->mantissa < 0); /* save mantissa sign */
|
|
|
|
if (sign) /* round and mask the number */
|
|
unpacked->mantissa =
|
|
(unpacked->mantissa + n_half_lsb[unpacked->precision]) &
|
|
(t_int64) mant_mask[unpacked->precision];
|
|
else
|
|
unpacked->mantissa =
|
|
(unpacked->mantissa + p_half_lsb[unpacked->precision]) &
|
|
(t_int64) mant_mask[unpacked->precision];
|
|
|
|
if (sign != (unpacked->mantissa < 0)) /* mantissa overflow? */
|
|
lsrx (unpacked, 1); /* correct by shifting */
|
|
else
|
|
normalize (unpacked); /* renorm may be needed */
|
|
|
|
if (unpacked->mantissa == 0) { /* result zero? */
|
|
unpacked->mantissa = 0; /* return zero */
|
|
unpacked->exponent = 0;
|
|
overflow = 0; /* with overflow clear */
|
|
}
|
|
else if (unpacked->exponent < /* result underflow? */
|
|
(FP_MAXNEXP >> (expand ? 0 : 2))) {
|
|
unpacked->mantissa = 0; /* return zero */
|
|
unpacked->exponent = 0;
|
|
overflow = 1; /* and set overflow */
|
|
}
|
|
else if (unpacked->exponent > /* result overflow? */
|
|
(FP_MAXPEXP >> (expand ? 0 : 2))) {
|
|
if (sign && /* negative value? */
|
|
(unpacked->precision == fp_x) && /* extended precision? */
|
|
(UNIT_CPU_MODEL != UNIT_1000_F)) { /* not F-series? */
|
|
unpacked->mantissa = FP_MAXNMANT; /* return negative infinity */
|
|
unpacked->exponent = FP_MAXPEXP & FP_M_EXP;
|
|
}
|
|
else {
|
|
unpacked->mantissa = FP_MAXPMANT; /* return positive infinity */
|
|
unpacked->exponent = FP_MAXPEXP & FP_M_EXP;
|
|
}
|
|
overflow = 1; /* and set overflow */
|
|
}
|
|
else
|
|
overflow = 0; /* value is in range */
|
|
|
|
return overflow;
|
|
}
|
|
|
|
|
|
/* Normalize, round, and pack an unpacked floating-point number. */
|
|
|
|
static uint32 nrpack (OP *packed, FPU unpacked, t_bool expand)
|
|
{
|
|
uint32 overflow;
|
|
|
|
normalize (&unpacked); /* normalize for rounding */
|
|
overflow = roundovf (&unpacked, expand); /* round and check for overflow */
|
|
*packed = pack (unpacked); /* pack result */
|
|
|
|
return overflow;
|
|
}
|
|
|
|
|
|
|
|
/* Low-level arithmetic routines. */
|
|
|
|
|
|
/* Complement an unpacked number. */
|
|
|
|
static void complement (FPU *result)
|
|
{
|
|
if (result->mantissa == FP_MAXNMANT) { /* maximum negative? */
|
|
result->mantissa = FP_ONEHALF; /* complement of -1.0 * 2 ^ n */
|
|
result->exponent = result->exponent + 1; /* is 0.5 * 2 ^ (n + 1) */
|
|
}
|
|
else
|
|
result->mantissa = -result->mantissa; /* negate mantissa */
|
|
return;
|
|
}
|
|
|
|
|
|
/* Add two unpacked numbers.
|
|
|
|
The mantissas are first aligned if necessary by scaling the smaller of the
|
|
two operands. If the magnitude of the difference between the exponents is
|
|
greater than the number of significant bits, then the smaller number has been
|
|
scaled to zero (swamped), and so the sum is simply the larger operand.
|
|
Otherwise, the sum is computed and checked for overflow, which has occurred
|
|
if the signs of the operands are the same but differ from that of the result.
|
|
Scaling and renormalization is performed if overflow occurred.
|
|
*/
|
|
|
|
static void add (FPU *sum, FPU augend, FPU addend)
|
|
{
|
|
int32 magn;
|
|
t_bool bits_lost;
|
|
|
|
if (augend.mantissa == 0)
|
|
*sum = addend; /* X + 0 = X */
|
|
|
|
else if (addend.mantissa == 0)
|
|
*sum = augend; /* 0 + X = X */
|
|
|
|
else {
|
|
magn = augend.exponent - addend.exponent; /* difference exponents */
|
|
|
|
if (magn > 0) { /* addend smaller? */
|
|
*sum = augend; /* preset augend */
|
|
bits_lost = asr (&addend, magn); /* align addend */
|
|
}
|
|
else { /* augend smaller? */
|
|
*sum = addend; /* preset addend */
|
|
magn = -magn; /* make difference positive */
|
|
bits_lost = asr (&augend, magn); /* align augend */
|
|
}
|
|
|
|
if (magn <= (int32) op_bits[augend.precision]) { /* value swamped? */
|
|
sum->mantissa = /* no, add mantissas */
|
|
addend.mantissa + augend.mantissa;
|
|
|
|
if (((addend.mantissa < 0) == (augend.mantissa < 0)) && /* mantissa overflow? */
|
|
((addend.mantissa < 0) != (sum->mantissa < 0))) {
|
|
bits_lost = bits_lost | lsrx (sum, 1); /* restore value */
|
|
sum->mantissa = /* restore sign */
|
|
sum-> mantissa | (addend.mantissa & FP_MSIGN);
|
|
}
|
|
|
|
if (bits_lost) /* any bits lost? */
|
|
sum->mantissa = sum->mantissa | 1; /* include one for rounding */
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
|
|
/* Multiply two unpacked numbers.
|
|
|
|
The single-precision firmware (FMP) operates differently from the firmware
|
|
extended-precision (.XMPY) and the hardware multiplies of any precision.
|
|
Firmware implementations use the MPY micro-order to form 16-bit x 16-bit =
|
|
32-bit partial products and sum them to form the result. The hardware uses a
|
|
series of shifts and adds. This means that firmware FMP and hardware FMP
|
|
return slightly different values, as may be seen by attempting to run the
|
|
firmware FMP diagnostic on the FPP.
|
|
|
|
The FMP microcode calls a signed multiply routine to calculate three partial
|
|
products (all but LSB * LSB). Because the LSBs are unsigned, i.e., all bits
|
|
significant, the two MSB * LSB products are calculated using LSB/2. The
|
|
unsigned right-shift ensures a positive LSB with no significant bits lost,
|
|
because the lower eight bits are unused (they held the vacated exponent). In
|
|
order to sum the partial products, the LSB of the result of MSB * MSB is also
|
|
right-shifted before addition. Note, though, that this loses a significant
|
|
bit. After summation, the result is left-shifted to correct for the original
|
|
right shifts.
|
|
|
|
The .XMPY microcode negates both operands as necessary to produce positive
|
|
values and then forms six of the nine 16-bit x 16-bit = 32-bit unsigned
|
|
multiplications required for a full 96-bit product. Given a 48-bit
|
|
multiplicand "a1a2a3" and a 48-bit multiplier "b1b2b3", the firmware performs
|
|
these calculations to develop a 48-bit product:
|
|
|
|
a1 a2 a3
|
|
+-------+-------+-------+
|
|
b1 b2 b3
|
|
+-------+-------+-------+
|
|
_________________________
|
|
|
|
a1 * b3 [p1]
|
|
+-------+-------+
|
|
a2 * b2 [p2]
|
|
+-------+-------+
|
|
a1 * b2 [p3]
|
|
+-------+-------+
|
|
a3 * b1 [p4]
|
|
+-------+-------+
|
|
a2 * b1 [p5]
|
|
+-------+-------+
|
|
a1 * b1 [p6]
|
|
+-------+-------+
|
|
_________________________________
|
|
|
|
product
|
|
+-------+-------+-------+
|
|
|
|
The least-significant words of partial products [p1], [p2], and [p4] are used
|
|
only to develop a carry bit into the 48-bit sum. The product is complemented
|
|
as necessary to restore the sign.
|
|
|
|
The basic FPP hardware algorithm scans the multiplier and adds a shifted copy
|
|
of the multiplicand whenever a one-bit is detected. To avoid successive adds
|
|
when a string of ones is encountered (because adds are more expensive than
|
|
shifts), the hardware instead adds the multiplicand shifted by N + 1 + P and
|
|
subtracts the multiplicand shifted by P to obtain the equivalent value with a
|
|
maximum of two operations.
|
|
|
|
Instead of implementing either the .XMPY firmware algorithm or the hardware
|
|
shift-and-add algorithm directly, it is more efficient under simulation to
|
|
use 32 x 32 = 64-bit multiplications, thereby reducing the number required
|
|
from six to four (64-bit "c1c2" x 64-bit "d1d2"):
|
|
|
|
ah al
|
|
+-------+-------+
|
|
bh bl
|
|
+-------+-------+
|
|
_________________
|
|
|
|
al * bl [ll]
|
|
+-------+-------+
|
|
ah * bl [hl]
|
|
+-------+-------+
|
|
al * bh [lh]
|
|
+-------+-------+
|
|
ah * bh [hh]
|
|
+-------+-------+
|
|
_________________________________
|
|
|
|
product
|
|
+-------+-------+
|
|
|
|
However, the FMP algorithm is implemented directly from the microcode to
|
|
preserve the fidelity of the simulation, i.e., to lose the same amount
|
|
of precision.
|
|
*/
|
|
|
|
static void multiply (FPU *product, FPU multiplicand, FPU multiplier)
|
|
{
|
|
uint32 ah, al, bh, bl, sign = 0;
|
|
t_uint64 hh, hl, lh, ll, carry;
|
|
int16 ch, cl, dh, dl;
|
|
t_bool firmware;
|
|
|
|
product->precision = multiplicand.precision; /* set precision */
|
|
|
|
if ((multiplicand.mantissa == 0) || /* 0 * X = 0 */
|
|
(multiplier.mantissa == 0)) /* X * 0 = 0 */
|
|
product->mantissa = product->exponent = 0;
|
|
|
|
else {
|
|
firmware = (UNIT_CPU_MODEL != UNIT_1000_F); /* set firmware flag */
|
|
|
|
if (!firmware || (product->precision != fp_f)) { /* hardware? */
|
|
if (multiplicand.mantissa < 0) { /* negative? */
|
|
complement (&multiplicand); /* complement operand */
|
|
sign = ~sign; /* track sign */
|
|
}
|
|
if (multiplier.mantissa < 0) { /* negative? */
|
|
complement (&multiplier); /* complement operand */
|
|
sign = ~sign; /* track sign */
|
|
}
|
|
}
|
|
|
|
product->exponent = /* compute exponent */
|
|
multiplicand.exponent + multiplier.exponent + 1;
|
|
|
|
ah = (uint32) (multiplicand.mantissa >> 32); /* split multiplicand */
|
|
al = (uint32) (multiplicand.mantissa & DMASK32); /* into high and low parts */
|
|
bh = (uint32) (multiplier.mantissa >> 32); /* split multiplier */
|
|
bl = (uint32) (multiplier.mantissa & DMASK32); /* into high and low parts */
|
|
|
|
if (firmware && (product->precision == fp_f)) { /* single-precision firmware? */
|
|
ch = (int16) (ah >> 16) & DMASK; /* split 32-bit multiplicand */
|
|
cl = (int16) (ah & 0xfffe); /* into high and low parts */
|
|
dh = (int16) (bh >> 16) & DMASK; /* split 32-bit multiplier */
|
|
dl = (int16) (bh & 0xfffe); /* into high and low parts */
|
|
|
|
hh = (t_uint64) (((int32) ch * dh) & ~1); /* form cross products */
|
|
hl = (t_uint64) (((t_int64) ch * (t_int64) (uint16) dl +
|
|
(t_int64) dh * (t_int64) (uint16) cl) &
|
|
0xfffffffffffe0000);
|
|
|
|
product->mantissa = (t_uint64) (((t_int64) hh << 32) + /* sum partials */
|
|
((t_int64) hl << 16));
|
|
}
|
|
|
|
else {
|
|
hh = ((t_uint64) ah * bh); /* form four cross products */
|
|
hl = ((t_uint64) ah * bl); /* using 32 x 32 = */
|
|
lh = ((t_uint64) al * bh); /* 64-bit multiplies */
|
|
ll = ((t_uint64) al * bl);
|
|
|
|
carry = ((ll >> 32) + (uint32) hl + (uint32) lh) >> 32; /* form carry */
|
|
|
|
product->mantissa = hh + (hl >> 32) + (lh >> 32) + carry; /* sum partials */
|
|
|
|
if (sign) /* negate if required */
|
|
complement (product);
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
|
|
/* Divide two unpacked numbers.
|
|
|
|
As with multiply, the single-precision firmware (FDV) operates differently
|
|
from the firmware extended-precision (.XDIV) and the hardware divisions of
|
|
any precision. Firmware implementations use the DIV micro-order to form
|
|
32-bit / 16-bit = 16-bit quotients and 16-bit remainders. These are used in
|
|
a "divide and correct" algorithm, wherein the quotient is estimated and then
|
|
corrected by comparing the dividend to the product of the quotient and the
|
|
divisor. The hardware uses a series of shifts and subtracts. This means
|
|
that firmware FDV and hardware FDV once again return slightly different
|
|
values.
|
|
|
|
Under simulation, the classic divide-and-correct method is employed, using
|
|
64-bit / 32-bit = 32-bit divisions. This method considers the 64-bit
|
|
dividend and divisor each to consist of two 32-bit "digits." The 64-bit
|
|
dividend "a1a2a3a4" is divided by the first 32-bit digit "b1b2" of the 64-bit
|
|
divisor "b1b2b3b4", yielding a 32-bit trial quotient digit and a 32-bit
|
|
remainder digit. A correction is developed by subtracting the product of the
|
|
second 32-bit digit "b3b4" of the divisor and the trial quotient digit from
|
|
the remainder (we take advantage of the eight bits vacated by the exponent
|
|
during unpacking to ensure that this product will not overflow into the sign
|
|
bit). If the remainder is negative, the trial quotient is too large, so it
|
|
is decremented, and the (full 64-bit) divisor is added to the correction.
|
|
This is repeated until the correction is non-negative, indicating that the
|
|
first quotient digit is correct. The process is then repeated using the
|
|
remainder as the dividend to develop the second 32-bit digit of the quotient.
|
|
The two digits are then concatenated for produce the final 64-bit value.
|
|
|
|
(See, "Divide-and-Correct Methods for Multiple Precision Division" by Marvin
|
|
L. Stein, Communications of the ACM, August 1964 for background.)
|
|
|
|
The microcoded single-precision division avoids overflows by right-shifting
|
|
some values, which leads to a loss of precision in the LSBs. We duplicate
|
|
the firmware algorithm here to preserve the fidelity of the simulation.
|
|
*/
|
|
|
|
static void divide (FPU *quotient, FPU dividend, FPU divisor)
|
|
{
|
|
uint32 sign = 0;
|
|
t_int64 bh, bl, r1, r0, p1, p0;
|
|
t_uint64 q, q1, q0;
|
|
t_bool firmware;
|
|
int32 ah, div, cp;
|
|
int16 dh, dl, pq1, pq2, cq;
|
|
|
|
quotient->precision = dividend.precision; /* set precision */
|
|
|
|
if (divisor.mantissa == 0) { /* division by zero? */
|
|
if (dividend.mantissa < 0)
|
|
quotient->mantissa = FP_MSIGN; /* return minus infinity */
|
|
else
|
|
quotient->mantissa = ~FP_MSIGN; /* or plus infinity */
|
|
quotient->exponent = FP_MAXPEXP + 1;
|
|
}
|
|
|
|
else if (dividend.mantissa == 0) /* dividend zero? */
|
|
quotient->mantissa = quotient->exponent = 0; /* yes; result is zero */
|
|
|
|
else {
|
|
firmware = (UNIT_CPU_MODEL != UNIT_1000_F); /* set firmware flag */
|
|
|
|
if (!firmware || (quotient->precision != fp_f)) { /* hardware or FFP? */
|
|
if (dividend.mantissa < 0) { /* negative? */
|
|
complement (÷nd); /* complement operand */
|
|
sign = ~sign; /* track sign */
|
|
}
|
|
if (divisor.mantissa < 0) { /* negative? */
|
|
complement (&divisor); /* complement operand */
|
|
sign = ~sign; /* track sign */
|
|
}
|
|
}
|
|
|
|
quotient->exponent = /* division subtracts exponents */
|
|
dividend.exponent - divisor.exponent;
|
|
|
|
bh = divisor.mantissa >> 32; /* split divisor */
|
|
bl = divisor.mantissa & DMASK32; /* into high and low parts */
|
|
|
|
if (firmware && (quotient->precision == fp_f)) { /* single-precision firmware? */
|
|
quotient->exponent = quotient->exponent + 1; /* fix exponent */
|
|
|
|
ah = (int32) (dividend.mantissa >> 32); /* split dividend */
|
|
dh = (int16) (bh >> 16); /* split divisor again */
|
|
dl = (int16) bh;
|
|
|
|
div = ah >> 2; /* ASR 2 to prevent overflow */
|
|
|
|
pq1 = (int16) (div / dh); /* form first partial quotient */
|
|
div = ((div % dh) & ~1) << 15; /* ASR 1, move rem to upper */
|
|
pq2 = (int16) (div / dh); /* form second partial quotient */
|
|
|
|
div = (uint16) dl << 13; /* move divisor LSB to upper, LSR 3 */
|
|
cq = (int16) (div / dh); /* form correction quotient */
|
|
cp = -cq * pq1; /* and correction product */
|
|
|
|
cp = (((cp >> 14) & ~3) + (int32) pq2) << 1; /* add corr prod and 2nd partial quo */
|
|
quotient->mantissa = /* add 1st partial quo and align */
|
|
(t_uint64) (((int32) pq1 << 16) + cp) << 32;
|
|
}
|
|
|
|
else { /* hardware or FFP */
|
|
q1 = (t_uint64) (dividend.mantissa / bh); /* form 1st trial quotient */
|
|
r1 = dividend.mantissa % bh; /* and remainder */
|
|
p1 = (r1 << 24) - (bl >> 8) * q1; /* calculate correction */
|
|
|
|
while (p1 < 0) { /* correction needed? */
|
|
q1 = q1 - 1; /* trial quotient too large */
|
|
p1 = p1 + (divisor.mantissa >> 8); /* increase remainder */
|
|
}
|
|
|
|
q0 = (t_uint64) ((p1 << 8) / bh); /* form 2nd trial quotient */
|
|
r0 = (p1 << 8) % bh; /* and remainder */
|
|
p0 = (r0 << 24) - (bl >> 8) * q0; /* calculate correction */
|
|
|
|
while (p0 < 0) { /* correction needed? */
|
|
q0 = q0 - 1; /* trial quotient too large */
|
|
p0 = p0 + (divisor.mantissa >> 8); /* increase remainder */
|
|
}
|
|
|
|
q = (q1 << 32) + q0; /* sum quotient digits */
|
|
|
|
if (q1 & 0xffffffff00000000) { /* did we lose MSB? */
|
|
q = (q >> 1) | 0x8000000000000000; /* shift right and replace bit */
|
|
quotient->exponent = quotient->exponent + 1;/* bump exponent for shift */
|
|
}
|
|
|
|
if (q & 0x8000000000000000) /* lose normalization? */
|
|
q = q >> 1; /* correct */
|
|
|
|
quotient->mantissa = (t_int64) q;
|
|
}
|
|
|
|
if (sign)
|
|
complement (quotient); /* negate if required */
|
|
}
|
|
return;
|
|
}
|
|
|
|
|
|
/* Fix an unpacked number.
|
|
|
|
A floating-point value is converted to an integer. The desired precision of
|
|
the result (single or double integer) must be set before calling.
|
|
|
|
Values less than 0.5 (i.e., with negative exponents) underflow to zero. If
|
|
the value exceeds the specified integer range, the maximum integer value is
|
|
returned and overflow is set. Otherwise, the floating-point value is
|
|
right-shifted to zero the exponent. The result is then rounded.
|
|
*/
|
|
|
|
static uint32 fix (FPU *result, FPU operand)
|
|
{
|
|
uint32 overflow;
|
|
t_bool bits_lost;
|
|
|
|
if (operand.exponent < 0) { /* value < 0.5? */
|
|
result->mantissa = 0; /* result rounds to zero */
|
|
overflow = 0; /* clear for underflow */
|
|
}
|
|
|
|
else if (operand.exponent > /* value > integer size? */
|
|
(int32) op_bits[result->precision]) {
|
|
result->mantissa = /* return max int value */
|
|
(t_uint64) int_p_max[result->precision] <<
|
|
op_start[result->precision];
|
|
overflow = 1; /* and set overflow */
|
|
}
|
|
|
|
else { /* value in range */
|
|
bits_lost = asr (&operand, /* shift to zero exponent */
|
|
op_bits[result->precision] - operand.exponent);
|
|
|
|
if (operand.mantissa < 0) { /* value negative? */
|
|
if (bits_lost) /* bits lost? */
|
|
operand.mantissa = operand.mantissa | 1; /* include one for rounding */
|
|
|
|
operand.mantissa = operand.mantissa + /* round result */
|
|
p_half_lsb[result->precision];
|
|
}
|
|
|
|
result->mantissa = operand.mantissa & /* mask to precision */
|
|
op_mask[result->precision];
|
|
overflow = 0;
|
|
}
|
|
|
|
result->exponent = 0; /* tidy up for integer value */
|
|
return overflow;
|
|
}
|
|
|
|
|
|
/* Float an integer to an unpacked number.
|
|
|
|
An integer is converted to a floating-point value. The desired precision of
|
|
the result must be set before calling.
|
|
|
|
Conversion is simply a matter of copying the integer value, setting an
|
|
exponent that reflects the right-aligned position of the bits, and
|
|
normalizing.
|
|
*/
|
|
|
|
static void ffloat (FPU *result, FPU operand)
|
|
{
|
|
result->mantissa = operand.mantissa; /* set value */
|
|
result->exponent = op_bits[operand.precision]; /* set exponent */
|
|
normalize (result); /* normalize */
|
|
return;
|
|
}
|
|
|
|
|
|
|
|
/* High-level floating-point routines. */
|
|
|
|
|
|
/* Determine operand precisions.
|
|
|
|
The precisions of the operands and result are determined by decoding an
|
|
operation opcode and returned to the caller. Pass NULL for both of the
|
|
operands if only the result precision is wanted. Pass NULL for the result if
|
|
only the operand precisions are wanted.
|
|
|
|
Implementation note:
|
|
|
|
1. gcc-4.3.0 complains at -O3 that operand_l/r may not be initialized.
|
|
Because of the mask, the switch statement covers all cases, but gcc
|
|
doesn't realize this. The "default" case is redundant but eliminates the
|
|
warning.
|
|
*/
|
|
|
|
void fp_prec (uint16 opcode, OPSIZE *operand_l, OPSIZE *operand_r, OPSIZE *result)
|
|
{
|
|
OPSIZE fp_size, int_size;
|
|
|
|
fp_size = (OPSIZE) ((opcode & 0003) + 2); /* fp_f, fp_x, fp_t, fp_e */
|
|
int_size = (OPSIZE) ((opcode & 0004) >> 2); /* in_s, in_d */
|
|
|
|
if (operand_l && operand_r) { /* want operand precisions? */
|
|
switch (opcode & 0120) { /* mask out opcode bit 5 */
|
|
case 0000: /* add/mpy */
|
|
case 0020: /* sub/div */
|
|
*operand_l = fp_size; /* assume first op is fp */
|
|
|
|
if (opcode & 0004) /* operand internal? */
|
|
*operand_r = fp_a; /* second op is accum */
|
|
else
|
|
*operand_r = fp_size; /* second op is fp */
|
|
break;
|
|
|
|
case 0100: /* fix/accum as integer */
|
|
*operand_l = fp_size; /* first op is fp */
|
|
*operand_r = fp_a; /* second op is always null */
|
|
break;
|
|
|
|
case 0120: /* flt/accum as float */
|
|
default: /* keeps compiler quiet for uninit warning */
|
|
*operand_l = int_size; /* first op is integer */
|
|
*operand_r = fp_a; /* second op is always null */
|
|
break;
|
|
}
|
|
|
|
if (opcode & 0010) /* operand internal? */
|
|
*operand_l = fp_a; /* first op is accum */
|
|
}
|
|
|
|
if (result) /* want result precision? */
|
|
if ((opcode & 0120) == 0100) /* fix? */
|
|
*result = int_size; /* result is integer */
|
|
else /* all others */
|
|
*result = fp_size; /* result is fp */
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
/* Floating Point Processor executor.
|
|
|
|
The executor simulates the MPP interface between the CPU and the FPP. The
|
|
operation to be performed is specified by the supplied opcode, which conforms
|
|
to the FPP hardware interface, as follows:
|
|
|
|
Bits Value Action
|
|
---- ----- ----------------------------------------------
|
|
7 0 Exponent range is standard (+/-127)
|
|
1 Exponent range is expanded (+/-511)
|
|
|
|
6-4 000 Add
|
|
001 Subtract
|
|
010 Multiply
|
|
011 Divide
|
|
100 Fix
|
|
101 Float
|
|
110 (diagnostic)
|
|
111 (diagnostic)
|
|
|
|
3 0 Left operand is supplied
|
|
1 Left operand in accumulator
|
|
|
|
2 0 Right operand is supplied (ADD/SUB/MPY/DIV)
|
|
Single integer operation (FIX/FLT)
|
|
1 Right operand in accumulator (ADD/SUB/MPY/DIV)
|
|
Double integer operation (FIX/FLT)
|
|
|
|
1-0 00 2-word operation
|
|
01 3-word operation
|
|
10 4-word operation
|
|
11 5-word operation
|
|
|
|
If the opcode specifies that the left (or right) operand is in the
|
|
accumulator, then the value supplied for that parameter is not used. All
|
|
results are automatically left in the accumulator. If the result is not
|
|
needed externally, then NULL may be passed for the result parameter.
|
|
|
|
To support accumulator set/get operations under simulation, the opcode is
|
|
expanded to include a special mode, indicated by bit 15 = 1. In this mode,
|
|
if the result parameter is NULL, then the accumulator is set from the value
|
|
passed as operand_l. If the result parameter is not null, then the
|
|
accumulator value is returned as the result, and operand_l is ignored. The
|
|
precision of the operation is performed as specified by the OPSIZE value
|
|
passed in bits 2-0 of the opcode.
|
|
|
|
The function returns 1 if the operation overflows and 0 if not.
|
|
*/
|
|
|
|
uint32 fp_exec (uint16 opcode, OP *result, OP operand_l, OP operand_r)
|
|
{
|
|
static FPU accumulator;
|
|
FPU uoperand_l, uoperand_r;
|
|
OPSIZE op_l_prec, op_r_prec, rslt_prec;
|
|
uint32 overflow;
|
|
|
|
if (opcode & SIGN) { /* accumulator mode? */
|
|
rslt_prec = (OPSIZE) (opcode & 0017); /* get operation precision */
|
|
|
|
if (result) { /* get accumulator? */
|
|
op_l_prec = accumulator.precision; /* save accum prec temp */
|
|
accumulator.precision = rslt_prec; /* set desired precision */
|
|
*result = pack (accumulator); /* pack accumulator */
|
|
accumulator.precision = op_l_prec; /* restore correct prec */
|
|
}
|
|
else /* set accumulator */
|
|
accumulator = unpack (operand_l, rslt_prec); /* unpack from operand */
|
|
|
|
return 0; /* no overflow from accum ops */
|
|
}
|
|
|
|
fp_prec (opcode, &op_l_prec, &op_r_prec, &rslt_prec); /* calc precs from opcode */
|
|
|
|
if (op_l_prec == fp_a) /* left operand in accum? */
|
|
uoperand_l = accumulator; /* copy it */
|
|
else /* operand supplied */
|
|
uoperand_l = unpack (operand_l, op_l_prec); /* unpack from parameter */
|
|
|
|
if (op_r_prec == fp_a) /* right operand in accum? */
|
|
uoperand_r = accumulator; /* copy it */
|
|
else /* operand supplied */
|
|
uoperand_r = unpack (operand_r, op_r_prec); /* unpack from parameter */
|
|
|
|
|
|
switch (opcode & 0160) { /* dispatch operation */
|
|
|
|
case 0000: /* add */
|
|
add (&accumulator, uoperand_l, uoperand_r);
|
|
break;
|
|
|
|
case 0020: /* subtract */
|
|
complement (&uoperand_r);
|
|
add (&accumulator, uoperand_l, uoperand_r);
|
|
break;
|
|
|
|
case 0040: /* multiply */
|
|
multiply (&accumulator, uoperand_l, uoperand_r);
|
|
break;
|
|
|
|
case 0060: /* divide */
|
|
divide (&accumulator, uoperand_l, uoperand_r);
|
|
break;
|
|
|
|
case 0100: /* fix */
|
|
accumulator.precision = rslt_prec;
|
|
overflow = fix (&accumulator, uoperand_l);
|
|
|
|
if (result) /* result wanted? */
|
|
*result = pack_int (accumulator.mantissa, /* pack integer */
|
|
rslt_prec);
|
|
return overflow;
|
|
|
|
case 0120: /* float */
|
|
accumulator.precision = rslt_prec;
|
|
ffloat (&accumulator, uoperand_l);
|
|
|
|
if (result) /* result wanted? */
|
|
*result = pack (accumulator); /* pack FP (FLT does not round) */
|
|
return 0;
|
|
|
|
case 0140: /* (diagnostic) */
|
|
case 0160: /* (diagnostic) */
|
|
return 0;
|
|
}
|
|
|
|
if (UNIT_CPU_MODEL != UNIT_1000_F) /* firmware implementation? */
|
|
accumulator.mantissa = accumulator.mantissa & /* mask to precision */
|
|
op_mask[accumulator.precision];
|
|
|
|
normalize (&accumulator); /* normalize */
|
|
overflow = roundovf (&accumulator, opcode & 0200); /* round and check for overflow */
|
|
|
|
if (result) /* result wanted? */
|
|
*result = pack (accumulator); /* pack result */
|
|
|
|
return overflow;
|
|
}
|
|
|
|
|
|
/* Set or get accumulator at desired precision.
|
|
|
|
This function provides access to the FPP accumulator. In hardware, the
|
|
accumulator may be read at a given precision by sending the FPP an opcode
|
|
encoded with the desired precision and then reading words from the FPP
|
|
/without/ initiating the operation, i.e., without starting the processor.
|
|
|
|
Under simulation, pass this function a NULL operand and the desired
|
|
precision to read the accumulator. Pass a pointer to an operand and the
|
|
desired precision to set the accumulator; the return value in this case is
|
|
not defined.
|
|
*/
|
|
|
|
OP fp_accum (const OP *operand, OPSIZE precision)
|
|
{
|
|
OP result = NOP;
|
|
uint16 opcode = (uint16) precision | SIGN; /* add special mode bit */
|
|
|
|
if (operand)
|
|
fp_exec (opcode, NULL, *operand, NOP); /* set accum */
|
|
else
|
|
fp_exec (opcode, &result, NOP, NOP); /* get accum */
|
|
return result;
|
|
}
|
|
|
|
|
|
/* Pack an unpacked floating-point number.
|
|
|
|
An unpacked mantissa is passed as a "packed" number with an unused exponent.
|
|
The mantissa and separately-passed exponent are packed into the in-memory
|
|
floating-point format. Note that all bits are significant in the mantissa
|
|
(no masking is done).
|
|
*/
|
|
|
|
uint32 fp_pack (OP *result, OP mantissa, int32 exponent, OPSIZE precision)
|
|
{
|
|
FPU unpacked;
|
|
|
|
unpacked.mantissa = unpack_int (mantissa, precision); /* unpack mantissa */
|
|
unpacked.exponent = exponent; /* set exponent */
|
|
unpacked.precision = precision; /* set precision */
|
|
*result = pack (unpacked); /* pack them */
|
|
return 0;
|
|
}
|
|
|
|
|
|
/* Normalize, round, and pack an unpacked floating-point number.
|
|
|
|
An unpacked mantissa is passed as a "packed" number with an unused exponent.
|
|
The mantissa and separately-passed exponent are normalized, rounded, and
|
|
packed into the in-memory floating-point format. Note that all bits are
|
|
significant in the mantissa (no masking is done).
|
|
*/
|
|
|
|
uint32 fp_nrpack (OP *result, OP mantissa, int32 exponent, OPSIZE precision)
|
|
{
|
|
FPU unpacked;
|
|
|
|
unpacked.mantissa = unpack_int (mantissa, precision); /* unpack mantissa */
|
|
unpacked.exponent = exponent; /* set exponent */
|
|
unpacked.precision = precision; /* set precision */
|
|
return nrpack (result, unpacked, FALSE); /* norm/rnd/pack them */
|
|
}
|
|
|
|
|
|
/* Unpack a packed floating-point number.
|
|
|
|
A floating-point number, packed into the in-memory format, is unpacked into
|
|
separate mantissa and exponent values. The unpacked mantissa is returned in
|
|
a "packed" structure with an exponent of zero. Mantissa or exponent may be
|
|
null if that part isn't wanted.
|
|
*/
|
|
|
|
uint32 fp_unpack (OP *mantissa, int32 *exponent, OP packed, OPSIZE precision)
|
|
|
|
{
|
|
FPU unpacked;
|
|
|
|
unpacked = unpack (packed, precision); /* unpack mantissa and exponent */
|
|
|
|
if (exponent) /* exponent wanted? */
|
|
*exponent = unpacked.exponent; /* return exponent */
|
|
|
|
if (mantissa) /* mantissa wanted? */
|
|
*mantissa = pack_int (unpacked.mantissa, fp_t); /* return full-size mantissa */
|
|
return 0;
|
|
}
|
|
|
|
|
|
/* Complement an unpacked mantissa.
|
|
|
|
An unpacked mantissa is passed as a "packed" number with a zero exponent.
|
|
The exponent increment, i.e., either zero or one, depending on whether a
|
|
renormalization was required, is returned. Note that all bits are
|
|
significant in the mantissa.
|
|
*/
|
|
|
|
uint16 fp_ucom (OP *mantissa, OPSIZE precision)
|
|
{
|
|
FPU unpacked;
|
|
|
|
unpacked.mantissa = unpack_int (*mantissa, precision); /* unpack mantissa */
|
|
unpacked.exponent = 0; /* clear undefined exponent */
|
|
unpacked.precision = precision; /* set precision */
|
|
complement (&unpacked); /* negate it */
|
|
*mantissa = pack_int (unpacked.mantissa, precision); /* replace mantissa */
|
|
return (uint16) unpacked.exponent; /* return exponent increment */
|
|
}
|
|
|
|
|
|
/* Complement a floating-point number. */
|
|
|
|
uint32 fp_pcom (OP *packed, OPSIZE precision)
|
|
{
|
|
FPU unpacked;
|
|
|
|
unpacked = unpack (*packed, precision); /* unpack the number */
|
|
complement (&unpacked); /* negate it */
|
|
return nrpack (packed, unpacked, FALSE); /* and norm/rnd/pack */
|
|
}
|
|
|
|
|
|
/* Truncate a floating-point number. */
|
|
|
|
uint32 fp_trun (OP *result, OP source, OPSIZE precision)
|
|
{
|
|
t_bool bits_lost;
|
|
FPU unpacked;
|
|
FPU one = { FP_ONEHALF, 1, fp_t }; /* 0.5 * 2 ** 1 = 1.0 */
|
|
OP zero = { { 0, 0, 0, 0, 0 } }; /* 0.0 */
|
|
t_uint64 mask = mant_mask[precision] & ~FP_MSIGN;
|
|
|
|
unpacked = unpack (source, precision);
|
|
if (unpacked.exponent < 0) /* number < 0.5? */
|
|
*result = zero; /* return 0 */
|
|
else if (unpacked.exponent >= (int32) op_bits[precision]) /* no fractional bits? */
|
|
*result = source; /* already integer */
|
|
else {
|
|
mask = (mask >> unpacked.exponent) & mask; /* mask fractional bits */
|
|
bits_lost = ((unpacked.mantissa & mask) != 0); /* flag if bits lost */
|
|
unpacked.mantissa = unpacked.mantissa & ~mask; /* mask off fraction */
|
|
if ((unpacked.mantissa < 0) && bits_lost) /* negative? */
|
|
add (&unpacked, unpacked, one); /* truncate toward zero */
|
|
nrpack (result, unpacked, FALSE); /* (overflow cannot occur) */
|
|
}
|
|
return 0; /* clear overflow on return */
|
|
}
|
|
|
|
|
|
/* Convert a floating-point number from one precision to another. */
|
|
|
|
uint32 fp_cvt (OP *result, OPSIZE source_precision, OPSIZE dest_precision)
|
|
{
|
|
FPU unpacked;
|
|
|
|
unpacked = unpack (*result, source_precision);
|
|
unpacked.precision = dest_precision;
|
|
return nrpack (result, unpacked, FALSE); /* norm/rnd/pack */
|
|
}
|
|
|
|
|
|
#endif /* end of int64 support */
|