Question: How to make this procedure more efficient?

Hi, 

This thread is more or less related to a previous one about the Statistics:-Sample procedure.
(see https://www.mapleprimes.com/questions/228421-A-Serious-Problem-With-StatisticsSample )
I've just implemented two variant of the Box-Muller procedure to sample normal rvs.
The source is "The art of computer programming", Donald E. Knuth, 2nd edition, p117 (aka "algorithm P").

The first implementation (BoxMuller_1) is basically what D.E. Knuth writes, except that I "vectorize" some operations in order to avoid using an if.. then..else structure (as a minor consequence I generate a little bit more numbers than required).
This procedure uses the build-in Maple's procedure select (please see the link above and acer's and carl love's replies).
It appears to be relatively slow.
More of this CodeTools:-Usage(BoxMuller_1(10^6)) generates a "conenction to kernel lost" for some unknown reason.

The variant named BoxMuller_2 uses sort and ListTools:-BinaryPlace instead of select.
Ir appears to be around 3 times faster than BoxMuller_1, but remains 10 time slower than Statistics:-Sample(..., method=envelope) (here again, see the link above to understand why method=envelope is needed).

I wonder how it could be possible to speed up this procedure. In particular acer showed in one of his reply (again see the link above) how using hfloats can improve the efficiency of a procedure, but I'm very incompetent on this point.
Does anyone have any idea?

Thanks in advance.

PS: this file has been written with Maple 2015.2


 

restart:

BoxMuller_1 := proc(N)
  local V, S, T, L, X1, X2:
  V  := Statistics:-Sample(Uniform(-1, 1), [ceil(2*N/3), 2]):
  S  := V[..,1]^~2 +~ V[..,2]^~2;
  T  := < subs(NULL=infinity, select(`<`, S, 1)) | V >;
  T[.., 1]  := (-2 *~ log~(T[.., 1]) /~ T[.., 1])^~(0.5);
  X1 := select[flatten](type, T[.., 1] *~  T[..,2], 'float');
  X2 := select[flatten](type, T[.., 1] *~  T[..,3], 'float');
  return <X1 , X2>:
end proc:


BoxMuller_2 := proc(N)
  local V1, V2, S, W, u, r, T, X1, X2:
  V1 := Statistics:-Sample(Uniform(-1, 1), ceil(2*N/3)):
  V2 := Statistics:-Sample(Uniform(-1, 1), ceil(2*N/3)):
  S  := V1^~2 +~ V2^~2;
  W  := sort(S, output = [sorted, permutation]):
  u  := ListTools:-BinaryPlace(W[1], 1);
  r  := [$1..u]:
  T  := (-2 *~ log~(W[1][r]) /~ W[1][r])^~(0.5);
  X1 := T *~ V1[W[2][r]];
  X2 := T *~ V2[W[2][r]];
  return <X1 | X2>^+:
end proc:  

CodeTools:-Usage(Statistics:-Sample(Uniform(-1, 1), 10^5)):
CodeTools:-Usage(Statistics:-Sample(Uniform(-1, 1), 10^5, method=envelope)):
# CodeTools:-Usage(BoxMuller_1(10^6)): # generates a "connection to kernel lost" error msg
CodeTools:-Usage(BoxMuller_2(10^6)):

print():
CodeTools:-Usage(BoxMuller_1(10^5)):
CodeTools:-Usage(BoxMuller_2(10^5)):
print():

S1 := BoxMuller_1(10^5):
S2 := BoxMuller_2(10^5):

memory used=1.17MiB, alloc change=0 bytes, cpu time=12.00ms, real time=12.00ms, gc time=0ns
memory used=3.32MiB, alloc change=32.00MiB, cpu time=64.00ms, real time=64.00ms, gc time=0ns

memory used=148.17MiB, alloc change=119.93MiB, cpu time=706.00ms, real time=637.00ms, gc time=14.63ms

 

 

memory used=67.43MiB, alloc change=186.80MiB, cpu time=946.00ms, real time=762.00ms, gc time=285.14ms
memory used=14.81MiB, alloc change=0 bytes, cpu time=67.00ms, real time=62.00ms, gc time=0ns

 

(1)

if false then
DocumentTools:-Tabulate(
  [
    plots:-display(
      Statistics:-Histogram(S1),
      plot(Statistics:-PDF(Normal(0, 1), x), x=-4..4, color=red,thickness=3)
    ),
    plots:-display(
      Statistics:-Histogram(S2),
      plot(Statistics:-PDF(Normal(0, 1), x), x=-4..4, color=red,thickness=3)
    )
  ], width=60
)
end if:
  

# carl love's procedure with slight modifications t

SampleCheck := proc(X, f, N::posint)
uses St= Statistics;
  local S, M, O, E:
  S:= f(N):
  M:= numelems(S):
  O:= <St:-TallyInto(S, <[$(floor@min-1..ceil@max)(S)]>)>:
  E:= St:-Probability~(X <~ (rhs@lhs)~(O), 'numeric') * M:
  <rhs~(O)[2..] | E[2..] - E[..-2]>
end proc:
 

interface(rtablesize=20):
SampleCheck(Statistics:-RandomVariable(Normal(0,1)), 'BoxMuller_1', 10^5),
SampleCheck(Statistics:-RandomVariable(Normal(0,1)), 'BoxMuller_2', 10^5);

Matrix(11, 2, {(1, 1) = 1, (1, 2) = HFloat(0.029943974977392658), (2, 1) = 5, (2, 2) = HFloat(3.28979552036377), (3, 1) = 121, (3, 2) = HFloat(138.1791685600983), (4, 1) = 2276, (4, 2) = HFloat(2243.2153196005097), (5, 1) = 14488, (5, 2) = HFloat(14245.846696531142), (6, 1) = 35630, (6, 2) = HFloat(35780.43897239682), (7, 1) = 35886, (7, 2) = HFloat(35780.43897239682), (8, 1) = 14071, (8, 2) = HFloat(14245.846696531138), (9, 1) = 2212, (9, 2) = HFloat(2243.2153196005092), (10, 1) = 130, (10, 2) = HFloat(138.1791685601056), (11, 1) = 2, (11, 2) = HFloat(3.2897955203516176)}), Matrix(10, 2, {(1, 1) = 1, (1, 2) = HFloat(3.293310594473029), (2, 1) = 127, (2, 2) = HFloat(138.32680996055558), (3, 1) = 2241, (3, 2) = HFloat(2245.6121457991635), (4, 1) = 14226, (4, 2) = HFloat(14261.068070193269), (5, 1) = 35632, (5, 2) = HFloat(35818.66958395649), (6, 1) = 35864, (6, 2) = HFloat(35818.66958395649), (7, 1) = 14451, (7, 2) = HFloat(14261.068070193272), (8, 1) = 2266, (8, 2) = HFloat(2245.6121457991685), (9, 1) = 125, (9, 2) = HFloat(138.32680996054842), (10, 1) = 1, (10, 2) = HFloat(3.293310594468494)})

(2)

 


 

Download Box-Muller.mw

Please Wait...