# HG changeset patch # Parent 2f9f68ed036b063948cd0e554011f886d16ee8f7 Correct remquo() quotient computation; cf. MinGW-Bug #41597 * mingwex/math/remquo_generic.sx: New file; it replaces... * mingwex/math/remquo.s mingwex/math/remquof.s: ...these, and also... * mingwex/math/remquol.s: ...this; delete them. * Makefile.in (libmingwex.a): Add "x87remquo.$OBJEXT" requirement. diff --git a/mingwrt/Makefile.in b/mingwrt/Makefile.in --- a/mingwrt/Makefile.in +++ b/mingwrt/Makefile.in @@ -418,11 +418,11 @@ libmingwex.a: $(addsuffix .$(OBJEXT), co lrintl llround llroundf llroundl lround lroundf lroundl modff modfl \ nearbyint nearbyintf nearbyintl nextafterf nextafterl nexttoward nexttowardf \ powf powl powi powif powil remainder remainderf remainderl remquo remquof \ remquol rint rintf rintl round roundf roundl scalbn scalbnf scalbnl signbit \ signbitf signbitl sqrtf sqrtl tgamma tgammaf tgammal trunc truncf truncl \ - x87cvt x87cvtf x87log x87log1p x87pow) + x87cvt x87cvtf x87log x87log1p x87pow x87remquo) # Replacement I/O functions in libmingwex.a, providing better POSIX # compatibility than their Microsoft equivalents. # vpath %.c ${mingwrt_srcdir}/mingwex/stdio diff --git a/mingwrt/mingwex/math/remquo.s b/mingwrt/mingwex/math/remquo.s deleted file mode 100644 --- a/mingwrt/mingwex/math/remquo.s +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Written by Ulrich Drepper . - * Based on e_remainder by J.T. Conklin . - * Removed header file dependency for use in libmingwex.a by - * Danny Smith + * Copyright (C) 2021, MinGW.org Project + * + * Adapted from original code written by J. T. Conklin . + * + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ +.intel_syntax noprefix + +.text +.align 4 +.def ___x87remquo; .scl 2; .type 32; .endef + +#if defined _remquo_source +/* Preamble to load the FPU registers, and EDX register, from the + * arguments passed in any call to the function: + * + * double remquo (double, double, int *); + */ +.globl _remquo +.def _remquo; .scl 2; .type 32; .endef +.def ___x87cvt; .scl 2; .type 32; .endef + +_remquo: + fld QWORD ptr 4[esp] /* FPU TOS = x */ + fld QWORD ptr 12[esp] /* FPU TOS = y, x */ + mov edx, DWORD ptr 20[esp] /* EDX = *q */ + +/* Hand off the preloaded register set, to the shared computational + * back-end routine, before ultimately converting its REAL10 result + * to the required REAL8. + */ + call ___x87remquo /* compute REAL10 result */ + jmp ___x87cvt /* convert to REAL8 */ + +#elif defined _remquof_source +/* Preamble to load the FPU registers, and EDX register, from the + * arguments passed in any call to the function: + * + * float remquof (float, float, int *); + */ +.globl _remquof +.def _remquof; .scl 2; .type 32; .endef +.def ___x87cvtf; .scl 2; .type 32; .endef + +_remquof: + fld DWORD ptr 4[esp] /* FPU TOS = x */ + fld DWORD ptr 8[esp] /* FPU TOS = y, x */ + mov edx, DWORD ptr 12[esp] /* EDX = *q */ + +/* Hand off the preloaded register set, to the shared computational + * back-end routine, before ultimately converting its REAL10 result + * to the required REAL4. + */ + call ___x87remquo /* compute REAL10 result */ + jmp ___x87cvtf /* convert to REAL4 */ + +#elif defined _remquol_source +/* Preamble to load the FPU registers, and EDX register, from the + * arguments passed in any call to the function: + * + * long double remquo (long double, long double, int *); + */ +.globl _remquol +.def _remquol; .scl 2; .type 32; .endef + +_remquol: + fld TBYTE ptr 4[esp] /* FPU TOS = x */ + fld TBYTE ptr 16[esp] /* FPU TOS = y, x */ + mov edx, DWORD ptr 28[esp] /* EDX = *q */ + +/* Hand off the preloaded register set, to the shared computational + * back-end routine, to... + */ + jmp ___x87remquo /* ...compute REAL10 result */ + +#else +/* No specific function entry point identified; implement the generic + * back-end code, which is shared by all three entry points. + */ +.globl ___x87remquo + +___x87remquo: +/* Assuming that the entry point preamble has stored the pointer to the + * storage location for the returned integer quotient, in the EDX register, + * and has loaded the floating point divisor (y), and dividend (x), values + * into the FPU st(0) and st(1) registers, respectively, this computes the + * remainder, and floating point quotient, to the full IEEE-754 extended + * (80-bit) precision of the FPU, before ultimately reducing the quotient + * to an integer, storing as many of itsleast-significant bits as can be + * accommodated in an "int", at the address pointed to by EDX, and + * returning the remainder in FPU register st(0). + */ + fst st(2) /* save a copy of 'y'... */ + fld st(1) /* ...and of 'x' */ + +/* Computation of the remainder requires an iterative procedure... + */ +10: fprem1 /* compute interim result */ + fstsw ax /* copy resultant FPU status... */ + sahf /* ...into CPU flags, for testing... */ + jp 10b /* ...until completion */ + +/* We now have the computed remainder (r), and the original saved x and y, + * in FPU registers st(0), st(1), and st(2) respectively; the next step is + * to compute the floating point quotient, (leaving the remainder in place + * as a fractional part, to ensure that eventual truncation rounds in the + * correct direction)... + */ + fstp st(3) /* ...after saving 'r' for return... */ + fdivp st(1), st /* ...divide 'x' by 'y' */ + +/* This now leaves the integer-valued floating point quotient (q) in st(0), + * and the saved remainder in st(1); the computed value of the quotient may + * exceed the maximum which can be represented as an "int", so we reduce it + * modulo "INT_MAX + 1", to retain the least significant bits with absolute + * value not exceeding INT_MAX. + */ + fld DWORD ptr ___int_max_1 /* load equivalent of (INT_MAX + 1) */ + fxch st(1) /* bring 'q' to top of FPU stack */ +20: fprem /* compute interim modulus value */ + fstsw ax /* copy resultant FPU status... */ + sahf /* ...into CPU flags, for testing... */ + jp 20b /* ...until completion */ + +/* Finally, we are left with the residual quotient in st(0), the remainder + * in st(2), and st(1) still retaining the "INT_MAX + 1" value, (which is of + * no further use to us). + */ + fstp st(1) /* pop FPU stack, discarding st(1) */ + fistp DWORD ptr [edx] /* store reduced quotient, leaving... */ + ret /* ...just the remainder, to return */ + +.section .rdata, "dr" +.align 4 +___int_max_1: .long 1325400064 /* (1 + INT_MAX) as float */ +#endif + +/* vim: set autoindent filetype=asm formatoptions=croqlj: */ +/* $RCSfile$: end of file */ diff --git a/mingwrt/mingwex/math/remquof.s b/mingwrt/mingwex/math/remquof.s deleted file mode 100644 --- a/mingwrt/mingwex/math/remquof.s +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Written by Ulrich Drepper . - * Based on e_remainder by J.T. Conklin . - * Removed header file dependency for use in libmingwex.a by - * Danny Smith . - * Based on e_remainder by J.T. Conklin . - * Removed header file dependency for use in libmingwex.a by - * Danny Smith - * Public domain. - */ - .text - .align 4; -.globl _remquol; - _remquol: - fldt 4 +12(%esp) - fldt 4(%esp) -1: fprem1 - fstsw %ax - sahf - jp 1b - fstp %st(1) - movl %eax, %ecx - shrl $8, %eax - shrl $12, %ecx - andl $4, %ecx - andl $3, %eax - orl %eax, %ecx - movl $0xef2960, %eax - shrl %cl, %eax - andl $3, %eax - movl 4 +12 +12(%esp), %ecx - movl 4 +8(%esp), %edx - xorl 4 +12 +8(%esp), %edx - testl $0x8000, %edx - jz 1f - negl %eax -1: movl %eax, (%ecx) - - ret