CrystalSpace

Public API Reference

Main Page   Modules   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members  

qsqrt.h

Go to the documentation of this file.
00001 /*
00002     Copyright (C) 2000 by Andrew Zabolotny (Intel version)
00003     Copyright (C) 2002 by Matthew Reda <reda@mac.com> (PowerPC version)
00004     Fast computation of sqrt(x) and 1/sqrt(x)
00005   
00006     This library is free software; you can redistribute it and/or
00007     modify it under the terms of the GNU Library General Public
00008     License as published by the Free Software Foundation; either
00009     version 2 of the License, or (at your option) any later version.
00010   
00011     This library is distributed in the hope that it will be useful,
00012     but WITHOUT ANY WARRANTY; without even the implied warranty of
00013     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014     Library General Public License for more details.
00015   
00016     You should have received a copy of the GNU Library General Public
00017     License along with this library; if not, write to the Free
00018     Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00019 */
00020 
00029 #ifndef __CS_QSQRT_H__
00030 #define __CS_QSQRT_H__
00031 
00039 static inline float qsqrt (float x);
00040 
00048 static inline float qisqrt (float x);
00049 
00052 #if (!defined (CS_NO_QSQRT)) && defined (PROC_X86) && defined (COMP_GCC)
00053 
00070 static inline float qsqrt (float x)
00071 {
00072   float ret;
00073 
00074 // Original C++ formulae:
00075 // float tmp = x;
00076 // *((unsigned *)&tmp) = (0xbe6f0000 - *((unsigned *)&tmp)) >> 1;
00077 // double h = x * 0.5;
00078 // double a = tmp;
00079 // a *= 1.5 - a * a * h;
00080 // a *= 1.5 - a * a * h;
00081 // return a * x;
00082 
00083   // Use __volatile__ so that the compiler will not mess with this
00084   // code. Under some versions of gcc in combination with -O2 optimize
00085   // mode not using __volatile__ can cause errors.
00086   __asm__ __volatile__ (
00087                 "flds   %1\n"                   // x
00088                 "movl   $0xbe6f0000,%%eax\n"
00089                 "subl   %1,%%eax\n"
00090                 "shrl   $1,%%eax\n"
00091                 "movl   %%eax,%1\n"
00092                 "flds   %2\n"                   // x 0.5
00093                 "fmul   %%st(1)\n"              // x h
00094                 "flds   %3\n"                   // x h 1.5
00095                 "flds   %1\n"                   // x h 1.5 a
00096                 "fld    %%st\n"                 // x h 1.5 a a
00097                 "fmul   %%st\n"                 // x h 1.5 a a*a
00098                 "fmul   %%st(3)\n"              // x h 1.5 a a*a*h
00099                 "fsubr  %%st(2)\n"              // x h 1.5 a 1.5-a*a*h
00100                 "fmulp  %%st(1)\n"              // x h 1.5 a
00101                 "fld    %%st\n"                 // x h 1.5 a a
00102                 "fmul   %%st\n"                 // x h 1.5 a a*a
00103                 "fmulp  %%st(3)\n"              // x a*a*h 1.5 a
00104                 "fxch\n"                        // x a*a*h a 1.5
00105                 "fsubp  %%st,%%st(2)\n"         // x 1.5-a*a*h a
00106                 "fmulp  %%st(1)\n"              // x a
00107                 "fmulp  %%st(1)\n"              // a
00108         : "=&t" (ret), "+m" (x) : "m" (0.5F), "m" (1.5F)
00109         : "eax", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"
00110   );
00111   return ret;
00112 }
00113 
00121 static inline float qisqrt (float x)
00122 {
00123   float ret;
00124   // Use __volatile__ so that the compiler will not mess with this
00125   // code. Under some versions of gcc in combination with -O2 optimize
00126   // mode not using __volatile__ can cause errors.
00127   __asm__ __volatile__ (
00128                 "flds   %1\n"                   // x
00129                 "movl   $0xbe6f0000,%%eax\n"
00130                 "subl   %1,%%eax\n"
00131                 "shrl   $1,%%eax\n"
00132                 "movl   %%eax,%1\n"
00133                 "flds   %2\n"                   // x 0.5
00134                 "fmulp  %%st(1)\n"              // h
00135                 "flds   %3\n"                   // h 1.5
00136                 "flds   %1\n"                   // h 1.5 a
00137                 "fld    %%st\n"                 // h 1.5 a a
00138                 "fmul   %%st\n"                 // h 1.5 a a*a
00139                 "fmul   %%st(3)\n"              // h 1.5 a a*a*h
00140                 "fsubr  %%st(2)\n"              // h 1.5 a 1.5-a*a*h
00141                 "fmulp  %%st(1)\n"              // h 1.5 a
00142                 "fld    %%st\n"                 // h 1.5 a a
00143                 "fmul   %%st\n"                 // h 1.5 a a*a
00144                 "fmulp  %%st(3)\n"              // a*a*h 1.5 a
00145                 "fxch\n"                        // a*a*h a 1.5
00146                 "fsubp  %%st,%%st(2)\n"         // 1.5-a*a*h a
00147                 "fmulp  %%st(1)\n"              // a
00148         : "=t" (ret), "+m" (x) : "m" (0.5F), "m" (1.5F)
00149         : "eax", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"
00150   );
00151   return ret;
00152 }
00153 
00154 #elif (!defined (CS_NO_QSQRT)) && defined (PROC_POWERPC) && defined (COMP_GCC)
00155 
00163 static inline float qsqrt(float x)
00164 {
00165   float y0 = 0.0;
00166 
00167   if (x != 0.0)
00168   {
00169     float x0 = x * 0.5f;
00170 
00171     __asm__ __volatile__ ("frsqrte %0,%1" : "=f" (y0) : "f" (x));
00172     
00173     y0 = y0 * (1.5f - x0 * y0 * y0);
00174     y0 = (y0 * (1.5f - x0 * y0 * y0)) * x;
00175   };
00176     
00177   return y0;
00178 };
00179 
00184 static inline float qisqrt(float x)
00185 {
00186   float x0 = x * 0.5f;
00187   float y0;
00188   __asm__ __volatile__ ("frsqrte %0,%1" : "=f" (y0) : "f" (x));
00189     
00190   y0 = y0 * (1.5f - x0 * y0 * y0);
00191   y0 = y0 * (1.5f - x0 * y0 * y0);
00192 
00193   return y0;
00194 };
00195 
00196 #elif (!defined (CS_NO_QSQRT)) && defined (PROC_X86) && defined (COMP_VC)
00197 
00198 #include <math.h>
00199 static inline float qsqrt (float x) { return sqrtf(x); }
00200 static inline float qisqrt(float x) { return 1.0f / sqrtf(x); }
00201 
00202 #else
00203 
00204 #include <math.h>
00205 static inline float qsqrt (float x) { return (float)sqrt(x); }
00206 static inline float qisqrt(float x) { return (float)(1.0 / sqrt(x)); }
00207 
00208 #endif
00209 
00210 #endif // __CS_QSQRT_H__

Generated for Crystal Space by doxygen 1.2.14