mmcdara

787 Reputation

12 Badges

3 years, 133 days

MaplePrimes Activity


These are Posts that have been published by mmcdara

Seeking for fast approximate formulas to compute (a huge number of) quantiles of a Gaussian random variable (here the standard one, but its extension to any Gaussian RV is straightforward), I found a few of them in the Abramowitz and Stegun book, page 933, relations 26.2.22 and 26.2.23.
Each approximation model is expressed as a rational fraction, the second one being the more accurate.
Each model depends on (respectively 4 and 6) parameters that are estimated (I guess it was done this way) through a least-square-like method.

See here for an online access http://people.math.sfu.ca/~cbm/aands/page_933.htm.

These approximation, and specially the most accurate one (formula 26.2.23) seem to be still widely used today(1) (see for instance https://www.johndcook.com/blog/normal_cdf_inverse/ ).

As an amusement I decided to compute the best fit by using the Statistics:-NonLinearFit procedure and a sample of (probability, quantile) points where probability ranges in [0.5, 1-1/1000] (the range used in formulas 26.2.22 and 26.2.23 is (0, 0.5] but this is not a point).
Surprisingly Statistics:-NonLinearFit returned, for the two formulas, parameter estimations substantially different from the one given in the Abramowitz & Stegun's book. A reason could be that the points I used when I did the fits weren't the one they used (unfortunately they give no informations about this).

More interesting, whatever the formula I refitted,  NonLinearFit produced an approximation whose the absolute error was smaller by about two orders of magnitude to the onesprovided by Abramowitz and Stegun.
For instance they wrote that the most accurate formula (26.2.23) had an absolute approximation error less than 4.5*10-4 as I obtained a value around 10-6!

(1) To get an idea of the persistence of the use of the formula 26.2.23, just type the value 2.515517 of its parameter c[0] in any search engine.


In the plots below the gray rectangle refers to the region where the approximate ICDF is used for extrapolation.
 

restart:

with(Statistics):

cdf := unapply(evalf(CDF(Normal(0, 1), x)), x):
X   := [seq(0..5, 0.1)]:
A   := cdf~(X):
T  := alpha -> sqrt(-2*log(1-alpha)):
q  := Quantile~(Normal(0, 1), A):
Aq := convert([A,q], Matrix)^+:

r := 1:

J := z -> z - add(a__||k*z^k, k=0..r)/(1+add(b__||k*z^k, k=1..r+1)):


model  := J(T(alpha)):
NL_fit := unapply(NonlinearFit(model, Aq, alpha), alpha);


# these lines are for estimating the performances
B  := Sample(Uniform(0.5, 1), 10^4):
CodeTools:-Usage(Quantile~(Normal(0, 1), B)):
CodeTools:-Usage(Quantile~(Normal(0, 1), B, numeric)):
CodeTools:-Usage(NL_fit~(B)):
#-----------------------------------------------------
Y  := [seq(0..6, 0.01)]:
B  := cdf~(Y):
R1 := Quantile~(Normal(0, 1), B, numeric):
R2 := NL_fit~(B):

plots:-display(
  ScatterPlot(R1, log[10]~(abs~(R2-~R1)), legend=mmcdara, color=red, gridlines=true, size=[700, 400]),
  plottools:-rectangle([max(X), log[10]~(min(abs~(R2-~R1)))], [max(Y), log[10]~(max(abs~(R2-~R1)))], color=gray, transparency=0.6)
);

proc (alpha) options operator, arrow; (-2*ln(1-alpha))^(1/2)-(HFloat(2.5454311687345044)+HFloat(0.8058592540791468)*(-2*ln(1-alpha))^(1/2))/(1+HFloat(1.4689746699940707)*(-2*ln(1-alpha))^(1/2)-HFloat(0.34455942407858625)*ln(1-alpha)) end proc

 

memory used=170.31MiB, alloc change=76.01MiB, cpu time=3.06s, real time=3.05s, gc time=54.87ms

memory used=171.59MiB, alloc change=256.00MiB, cpu time=3.12s, real time=3.03s, gc time=154.77ms

memory used=8.24MiB, alloc change=0 bytes, cpu time=95.00ms, real time=95.00ms, gc time=0ns

 

 

r := 2:

 
J := z -> z - add(a__||k*z^k, k=0..r)/(1+add(b__||k*z^k, k=1..r+1)):


model  := J(T(alpha)):
NL_fit := unapply(NonlinearFit(model, Aq, alpha), alpha);


# these lines are for estimating the performances
B  := Sample(Uniform(0.5, 1), 10^4):
CodeTools:-Usage(Quantile~(Normal(0, 1), B)):
CodeTools:-Usage(Quantile~(Normal(0, 1), B, numeric)):
CodeTools:-Usage(NL_fit~(B)):
#-----------------------------------------------------


Y  := [seq(0..6, 0.01)]:
B  := cdf~(Y):
R1 := Quantile~(Normal(0, 1), B, numeric):
R2 := NL_fit~(B):

plots:-display(
  ScatterPlot(R1, log[10]~(abs~(R2-~R1)), legend=mmcdara, color=red, gridlines=true, size=[700, 400]),
  plottools:-rectangle([max(X), log[10]~(min(abs~(R2-~R1)))], [max(Y), log[10]~(max(abs~(R2-~R1)))], color=gray, transparency=0.6)
);

proc (alpha) options operator, arrow; (-2*ln(1-alpha))^(1/2)-(HFloat(2.9637294443959394)+HFloat(4.527738737327481)*(-2*ln(1-alpha))^(1/2)-HFloat(0.9571637188191973)*ln(1-alpha))/(1+HFloat(3.472400103322335)*(-2*ln(1-alpha))^(1/2)-HFloat(3.426536241250657)*ln(1-alpha)+HFloat(0.08875278252087411)*(-2*ln(1-alpha))^(3/2)) end proc

 

memory used=170.09MiB, alloc change=32.00MiB, cpu time=3.29s, real time=3.11s, gc time=268.60ms

memory used=170.85MiB, alloc change=0 bytes, cpu time=3.23s, real time=3.10s, gc time=201.52ms
memory used=10.76MiB, alloc change=0 bytes, cpu time=127.00ms, real time=127.00ms, gc time=0ns

 

 

# Optimized "r=2" computation

z_fit := simplify(subs(alpha=-exp(-(1/2)*z^2)+1, NL_fit(alpha))) assuming z > 0:
z_fit := unapply(convert~(%, horner), z);

p := proc(alpha)
  local z:
  z := sqrt(-2*log(1-alpha)):
  z_fit(z):
end proc:

R3 := CodeTools:-Usage(p~(B)):

plots:-display(
  ScatterPlot(R1, log[10]~(abs~(R2-~R1)), legend=mmcdara, color=red, gridlines=true, size=[700, 400]),
  plottools:-rectangle([max(X), log[10]~(min(abs~(R2-~R1)))], [max(Y), log[10]~(max(abs~(R2-~R1)))], color=gray, transparency=0.6)
);

proc (z) options operator, arrow; (-2.963729444+(-3.527738737+(2.993818244+(1.713268121+0.8875278252e-1*z)*z)*z)*z)/(1.+(3.472400103+(1.713268121+0.8875278252e-1*z)*z)*z) end proc

 

memory used=1.67MiB, alloc change=0 bytes, cpu time=14.00ms, real time=15.00ms, gc time=0ns

 

 


AS stands for Abramowith & Stegun

J_AS := unapply(normal(eval(J(t), [a__0=2.515517, a__1=0.802853, a__2=0.010328, b__1=1.432788, b__2=0.189269, b__3=0.001308])), t):
J_AS(t);


# for comparison:

print():
z_fit := simplify(subs(alpha=-exp(-(1/2)*z^2)+1, NL_fit(alpha))) assuming z > 0:
map(sort, %, z);

plot([z_fit(z), J_AS(z)], z=0.5..1, color=[blue, red], legend=[mmcdara, Abramowitz_Stegun], gridlines=true);

print():
R2_AS := CodeTools:-Usage(J_AS~(T~(B))):
print():


plots:-display(
  ScatterPlot(R1, log[10]~(abs~(R2_AS-~R1)), legend=Abramowitz_Stegun, gridlines=true, size=[700, 400]),
  ScatterPlot(R1, log[10]~(abs~(R2-~R1)), legend=mmcdara, color=red),
  plottools:-rectangle([max(X), log[10]~(min(abs~(R2-~R1)))], [max(Y), log[10]~(max(abs~(R2-~R1)))], color=gray, transparency=0.6)
);

(0.1308000000e-2*t^4+.1892690000*t^3+1.422460000*t^2+.1971470000*t-2.515517000)/(0.1308000000e-2*t^3+.1892690000*t^2+1.432788000*t+1.)

 

 

(0.8875278252e-1*z^4+1.713268121*z^3+2.993818244*z^2-3.527738737*z-2.963729444)/(0.8875278252e-1*z^3+1.713268121*z^2+3.472400103*z+1.)

 

 

 

memory used=2.92MiB, alloc change=0 bytes, cpu time=25.00ms, real time=25.00ms, gc time=0ns

 

 

 


 

Download InverseNormalCDF.mw

 

 

Page 1 of 1