Jon Beniston
2018-06-22 10:49:03 UTC
Hi,
Most functions in libm call __ieee754_sqrt when needing to perform a square
root. For most targets, this results in the s/w implementation in
math/e_sqrt.c being using, even if the target has a h/w sqrt instruction.
There are some targets that have machine specific implementations in
machine/*/, but even if a single instruction, that code typically doesn't
get inlined.
The following patch is one possible way to allow a sqrt instruction to be
used and for the calls to be inlined. I've just done this for x86/arm for
now. I've put this in include/machine/ieeefp.h, rather than fdlibm.h, as
that's where most of the other target specific code seems to be.
Not sure if using the __IEEE754_INLINE_SQRT* macros is the best way to
prevent redefinition errors. Perhaps someone has a better idea?
Cheers,
Jon
diff --git a/newlib/libc/include/machine/ieeefp.h
b/newlib/libc/include/machine/ieeefp.h
index 2fb2268ce..e917d74b0 100644
--- a/newlib/libc/include/machine/ieeefp.h
+++ b/newlib/libc/include/machine/ieeefp.h
@@ -87,6 +87,39 @@
# define __IEEE_BYTES_LITTLE_ENDIAN
# endif
#endif
+
+#if (__ARM_FP & 0x8) && !defined(__SOFTFP__)
+#define __IEEE754_INLINE_SQRT
+static inline double
+__ieee754_sqrt(double x)
+{
+ double result;
+#if __ARM_ARCH >= 6
+ __asm__ ("vsqrt.f64 %P0, %P1" : "=w" (result) : "w" (x));
+#else
+ /* VFP9 Erratum 760019, see GCC sources "gcc/config/arm/vfp.md" */
+ __asm__ ("vsqrt.f64 %P0, %P1" : "=&w" (result) : "w" (x));
+#endif
+ return result;
+}
+#endif
+
+#if (__ARM_FP & 0x4) && !defined(__SOFTFP__)
+#define __IEEE754_INLINE_SQRTF
+static inline float
+__ieee754_sqrtf(float x)
+{
+ float result;
+#if __ARM_ARCH >= 6
+ __asm__ ("vsqrt.f32 %0, %1" : "=w" (result) : "w" (x));
+#else
+ /* VFP9 Erratum 760019, see GCC sources "gcc/config/arm/vfp.md" */
+ __asm__ ("vsqrt.f32 %0, %1" : "=&w" (result) : "w" (x));
+#endif
+ return result;
+}
+#endif
+
#endif
#if defined (__aarch64__)
@@ -189,6 +222,25 @@
#ifdef __i386__
#define __IEEE_LITTLE_ENDIAN
+
+#define __IEEE754_INLINE_SQRT
+static inline double
+__ieee754_sqrt (double x)
+{
+ double result;
+ __asm__ ("fsqrt" : "=t" (result) : "0" (x));
+ return result;
+}
+
+#define __IEEE754_INLINE_SQRTF
+static inline float
+__ieee754_sqrtf (float x)
+{
+ float result;
+ __asm__ ("fsqrt" : "=t" (result) : "0" (x));
+ return result;
+}
+
#endif
#ifdef __riscv
diff --git a/newlib/libm/common/fdlibm.h b/newlib/libm/common/fdlibm.h
index 4523e8b2a..7eccce2b6 100644
--- a/newlib/libm/common/fdlibm.h
+++ b/newlib/libm/common/fdlibm.h
@@ -149,7 +149,9 @@ extern double significand __P((double));
extern long double __ieee754_hypotl __P((long double, long double));
/* ieee style elementary functions */
+#ifndef __IEEE754_INLINE_SQRT
extern double __ieee754_sqrt __P((double));
+#endif
extern double __ieee754_acos __P((double));
extern double __ieee754_acosh __P((double));
extern double __ieee754_log __P((double));
@@ -195,7 +197,9 @@ extern float scalbf __P((float, float));
extern float significandf __P((float));
/* ieee style elementary float functions */
+#ifndef __IEEE754_INLINE_SQRTF
extern float __ieee754_sqrtf __P((float));
+#endif
extern float __ieee754_acosf __P((float));
extern float __ieee754_acoshf __P((float));
extern float __ieee754_logf __P((float));
diff --git a/newlib/libm/machine/arm/e_sqrt.c
b/newlib/libm/machine/arm/e_sqrt.c
index 6f3eb8301..8d50ae234 100644
--- a/newlib/libm/machine/arm/e_sqrt.c
+++ b/newlib/libm/machine/arm/e_sqrt.c
@@ -24,7 +24,7 @@
* SUCH DAMAGE.
*/
-#if (__ARM_FP & 0x8) && !defined(__SOFTFP__)
+#if (__ARM_FP & 0x8) && !defined(__SOFTFP__) &&
!defined(__IEEE754_INLINE_SQRT)
#include <math.h>
double
diff --git a/newlib/libm/machine/arm/ef_sqrt.c
b/newlib/libm/machine/arm/ef_sqrt.c
index 3a1ba6cb4..3d8fd1191 100644
--- a/newlib/libm/machine/arm/ef_sqrt.c
+++ b/newlib/libm/machine/arm/ef_sqrt.c
@@ -24,7 +24,7 @@
* SUCH DAMAGE.
*/
-#if (__ARM_FP & 0x4) && !defined(__SOFTFP__)
+#if (__ARM_FP & 0x4) && !defined(__SOFTFP__) &&
!defined(__IEEE754_INLINE_SQRTF)
#include <math.h>
float
diff --git a/newlib/libm/math/e_sqrt.c b/newlib/libm/math/e_sqrt.c
index 78fc52417..313ae972c 100644
--- a/newlib/libm/math/e_sqrt.c
+++ b/newlib/libm/math/e_sqrt.c
@@ -83,6 +83,8 @@
#include "fdlibm.h"
+#ifndef __IEEE754_INLINE_SQRTF
+
#ifndef _DOUBLE_IS_32BITS
#ifdef __STDC__
@@ -194,6 +196,8 @@ static double one = 1.0, tiny=1.0e-300;
#endif /* defined(_DOUBLE_IS_32BITS) */
+#endif /* __IEEE754_INLINE_SQRTF */
+
/*
Other methods (use floating-point arithmetic)
-------------
diff --git a/newlib/libm/math/ef_sqrt.c b/newlib/libm/math/ef_sqrt.c
index 80e7f360e..9940bad32 100644
--- a/newlib/libm/math/ef_sqrt.c
+++ b/newlib/libm/math/ef_sqrt.c
@@ -15,6 +15,8 @@
#include "fdlibm.h"
+#ifndef __IEEE754_INLINE_SQRT
+
#ifdef __STDC__
static const float one = 1.0, tiny=1.0e-30;
#else
@@ -87,3 +89,5 @@ static float one = 1.0, tiny=1.0e-30;
SET_FLOAT_WORD(z,ix);
return z;
}
+
+#endif /* __IEEE754_INLINE_SQRT */
Most functions in libm call __ieee754_sqrt when needing to perform a square
root. For most targets, this results in the s/w implementation in
math/e_sqrt.c being using, even if the target has a h/w sqrt instruction.
There are some targets that have machine specific implementations in
machine/*/, but even if a single instruction, that code typically doesn't
get inlined.
The following patch is one possible way to allow a sqrt instruction to be
used and for the calls to be inlined. I've just done this for x86/arm for
now. I've put this in include/machine/ieeefp.h, rather than fdlibm.h, as
that's where most of the other target specific code seems to be.
Not sure if using the __IEEE754_INLINE_SQRT* macros is the best way to
prevent redefinition errors. Perhaps someone has a better idea?
Cheers,
Jon
diff --git a/newlib/libc/include/machine/ieeefp.h
b/newlib/libc/include/machine/ieeefp.h
index 2fb2268ce..e917d74b0 100644
--- a/newlib/libc/include/machine/ieeefp.h
+++ b/newlib/libc/include/machine/ieeefp.h
@@ -87,6 +87,39 @@
# define __IEEE_BYTES_LITTLE_ENDIAN
# endif
#endif
+
+#if (__ARM_FP & 0x8) && !defined(__SOFTFP__)
+#define __IEEE754_INLINE_SQRT
+static inline double
+__ieee754_sqrt(double x)
+{
+ double result;
+#if __ARM_ARCH >= 6
+ __asm__ ("vsqrt.f64 %P0, %P1" : "=w" (result) : "w" (x));
+#else
+ /* VFP9 Erratum 760019, see GCC sources "gcc/config/arm/vfp.md" */
+ __asm__ ("vsqrt.f64 %P0, %P1" : "=&w" (result) : "w" (x));
+#endif
+ return result;
+}
+#endif
+
+#if (__ARM_FP & 0x4) && !defined(__SOFTFP__)
+#define __IEEE754_INLINE_SQRTF
+static inline float
+__ieee754_sqrtf(float x)
+{
+ float result;
+#if __ARM_ARCH >= 6
+ __asm__ ("vsqrt.f32 %0, %1" : "=w" (result) : "w" (x));
+#else
+ /* VFP9 Erratum 760019, see GCC sources "gcc/config/arm/vfp.md" */
+ __asm__ ("vsqrt.f32 %0, %1" : "=&w" (result) : "w" (x));
+#endif
+ return result;
+}
+#endif
+
#endif
#if defined (__aarch64__)
@@ -189,6 +222,25 @@
#ifdef __i386__
#define __IEEE_LITTLE_ENDIAN
+
+#define __IEEE754_INLINE_SQRT
+static inline double
+__ieee754_sqrt (double x)
+{
+ double result;
+ __asm__ ("fsqrt" : "=t" (result) : "0" (x));
+ return result;
+}
+
+#define __IEEE754_INLINE_SQRTF
+static inline float
+__ieee754_sqrtf (float x)
+{
+ float result;
+ __asm__ ("fsqrt" : "=t" (result) : "0" (x));
+ return result;
+}
+
#endif
#ifdef __riscv
diff --git a/newlib/libm/common/fdlibm.h b/newlib/libm/common/fdlibm.h
index 4523e8b2a..7eccce2b6 100644
--- a/newlib/libm/common/fdlibm.h
+++ b/newlib/libm/common/fdlibm.h
@@ -149,7 +149,9 @@ extern double significand __P((double));
extern long double __ieee754_hypotl __P((long double, long double));
/* ieee style elementary functions */
+#ifndef __IEEE754_INLINE_SQRT
extern double __ieee754_sqrt __P((double));
+#endif
extern double __ieee754_acos __P((double));
extern double __ieee754_acosh __P((double));
extern double __ieee754_log __P((double));
@@ -195,7 +197,9 @@ extern float scalbf __P((float, float));
extern float significandf __P((float));
/* ieee style elementary float functions */
+#ifndef __IEEE754_INLINE_SQRTF
extern float __ieee754_sqrtf __P((float));
+#endif
extern float __ieee754_acosf __P((float));
extern float __ieee754_acoshf __P((float));
extern float __ieee754_logf __P((float));
diff --git a/newlib/libm/machine/arm/e_sqrt.c
b/newlib/libm/machine/arm/e_sqrt.c
index 6f3eb8301..8d50ae234 100644
--- a/newlib/libm/machine/arm/e_sqrt.c
+++ b/newlib/libm/machine/arm/e_sqrt.c
@@ -24,7 +24,7 @@
* SUCH DAMAGE.
*/
-#if (__ARM_FP & 0x8) && !defined(__SOFTFP__)
+#if (__ARM_FP & 0x8) && !defined(__SOFTFP__) &&
!defined(__IEEE754_INLINE_SQRT)
#include <math.h>
double
diff --git a/newlib/libm/machine/arm/ef_sqrt.c
b/newlib/libm/machine/arm/ef_sqrt.c
index 3a1ba6cb4..3d8fd1191 100644
--- a/newlib/libm/machine/arm/ef_sqrt.c
+++ b/newlib/libm/machine/arm/ef_sqrt.c
@@ -24,7 +24,7 @@
* SUCH DAMAGE.
*/
-#if (__ARM_FP & 0x4) && !defined(__SOFTFP__)
+#if (__ARM_FP & 0x4) && !defined(__SOFTFP__) &&
!defined(__IEEE754_INLINE_SQRTF)
#include <math.h>
float
diff --git a/newlib/libm/math/e_sqrt.c b/newlib/libm/math/e_sqrt.c
index 78fc52417..313ae972c 100644
--- a/newlib/libm/math/e_sqrt.c
+++ b/newlib/libm/math/e_sqrt.c
@@ -83,6 +83,8 @@
#include "fdlibm.h"
+#ifndef __IEEE754_INLINE_SQRTF
+
#ifndef _DOUBLE_IS_32BITS
#ifdef __STDC__
@@ -194,6 +196,8 @@ static double one = 1.0, tiny=1.0e-300;
#endif /* defined(_DOUBLE_IS_32BITS) */
+#endif /* __IEEE754_INLINE_SQRTF */
+
/*
Other methods (use floating-point arithmetic)
-------------
diff --git a/newlib/libm/math/ef_sqrt.c b/newlib/libm/math/ef_sqrt.c
index 80e7f360e..9940bad32 100644
--- a/newlib/libm/math/ef_sqrt.c
+++ b/newlib/libm/math/ef_sqrt.c
@@ -15,6 +15,8 @@
#include "fdlibm.h"
+#ifndef __IEEE754_INLINE_SQRT
+
#ifdef __STDC__
static const float one = 1.0, tiny=1.0e-30;
#else
@@ -87,3 +89,5 @@ static float one = 1.0, tiny=1.0e-30;
SET_FLOAT_WORD(z,ix);
return z;
}
+
+#endif /* __IEEE754_INLINE_SQRT */