Skip to content

Commit e4acfd1

Browse files
author
luke
committed
New default algorithm for R_unif_rand, used in sample, to resolve PR
17494. Includes adding sample.kind to RNGkind to allow reverting to the old algorithm for reproducing previous results. Thanks to Duncan Murdoch for contributing the patch. git-svn-id: https://svn.r-project.org/R/trunk@76160 00db46b3-68df-0310-9c12-caf00c1e9a41
1 parent 247ec80 commit e4acfd1

21 files changed

Lines changed: 341 additions & 263 deletions

doc/NEWS.Rd

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,16 @@
2323
CMD build} are in serialization format version 2, and resave by
2424
default produces files in serialization format version 2 (unless
2525
the original is already in format version 3).
26+
27+
\item The default method for generating from a discrete uniform
28+
distribution (used in \code{sample()}, for instance) has been
29+
changed. This addresses the fact, pointed out by Ottoboni and
30+
Stark, that the previous method made \code{sample()} noticeably
31+
non-uniform on large populations. See \PR{17494} for a
32+
discussion. The previous method can be requested using
33+
\code{RNGkind()} if necessary for reproduction of old results.
34+
Thanks to Duncan Murdoch for contributing the patch and Gabe
35+
Becker for further assistance.
2636
}
2737
}
2838

src/include/R_ext/Random.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
* R : A Computer Language for Statistical Data Analysis
3-
* Copyright (C) 1998-2016 The R Core Team
3+
* Copyright (C) 1998-2019 The R Core Team
44
*
55
* This header file is free software; you can redistribute it and/or modify
66
* it under the terms of the GNU Lesser General Public License as published by
@@ -53,6 +53,11 @@ typedef enum {
5353
KINDERMAN_RAMAGE
5454
} N01type;
5555

56+
/* Different ways to generate discrete uniform samples */
57+
typedef enum {
58+
ROUNDING,
59+
REJECTION
60+
} Sampletype;
5661

5762
void GetRNGstate(void);
5863
void PutRNGstate(void);

src/include/Rmath.h0.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,7 @@ double R_pow_di(double, int);
361361

362362
double norm_rand(void);
363363
double unif_rand(void);
364+
double R_unif_index(double);
364365
double exp_rand(void);
365366
#ifdef MATHLIB_STANDALONE
366367
void set_seed(unsigned int, unsigned int);

src/library/base/R/RNG.R

Lines changed: 39 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# File src/library/base/R/RNG.R
22
# Part of the R package, https://www.R-project.org
33
#
4-
# Copyright (C) 1995-2014 The R Core Team
4+
# Copyright (C) 1995-2019 The R Core Team
55
#
66
# This program is free software; you can redistribute it and/or modify
77
# it under the terms of the GNU General Public License as published by
@@ -21,14 +21,15 @@
2121
## The available kinds are in
2222
## ../../../include/Random.h and ../../../main/RNG.c [RNG_Table]
2323
##
24-
RNGkind <- function(kind = NULL, normal.kind = NULL)
24+
RNGkind <- function(kind = NULL, normal.kind = NULL, sample.kind = NULL)
2525
{
2626
kinds <- c("Wichmann-Hill", "Marsaglia-Multicarry", "Super-Duper",
2727
"Mersenne-Twister", "Knuth-TAOCP", "user-supplied",
2828
"Knuth-TAOCP-2002", "L'Ecuyer-CMRG", "default")
2929
n.kinds <- c("Buggy Kinderman-Ramage", "Ahrens-Dieter", "Box-Muller",
3030
"user-supplied", "Inversion", "Kinderman-Ramage",
3131
"default")
32+
s.kinds <- c("Rounding", "Rejection", "default")
3233
do.set <- length(kind) > 0L
3334
if(do.set) {
3435
if(!is.character(kind) || length(kind) > 1L)
@@ -51,19 +52,34 @@ RNGkind <- function(kind = NULL, normal.kind = NULL)
5152
domain = NA)
5253
if(normal.kind == length(n.kinds) - 1L) normal.kind <- -1L
5354
}
54-
r <- 1L + .Internal(RNGkind(i.knd, normal.kind))
55-
r <- c(kinds[r[1L]], n.kinds[r[2L]])
56-
if(do.set || !is.null(normal.kind)) invisible(r) else r
55+
56+
if(!is.null(sample.kind)) {
57+
if(!is.character(sample.kind) || length(sample.kind) != 1L)
58+
stop("'sample.kind' must be a character string of length 1")
59+
sample.kind <- pmatch(sample.kind, s.kinds) - 1L
60+
if(is.na(sample.kind))
61+
stop(gettextf("'%s' is not a valid choice", sample.kind),
62+
domain = NA)
63+
if (sample.kind == 0L)
64+
warning("non-uniform 'Rounding' sampler used",
65+
domain = NA)
66+
if(sample.kind == length(s.kinds) - 1L) sample.kind <- -1L
67+
}
68+
r <- 1L + .Internal(RNGkind(i.knd, normal.kind, sample.kind))
69+
r <- c(kinds[r[1L]], n.kinds[r[2L]], s.kinds[r[3L]])
70+
if(do.set || !is.null(normal.kind) || !is.null(sample.kind))
71+
invisible(r) else r
5772
}
5873

59-
set.seed <- function(seed, kind = NULL, normal.kind = NULL)
74+
set.seed <- function(seed, kind = NULL, normal.kind = NULL, sample.kind = NULL)
6075
{
6176
kinds <- c("Wichmann-Hill", "Marsaglia-Multicarry", "Super-Duper",
6277
"Mersenne-Twister", "Knuth-TAOCP", "user-supplied",
6378
"Knuth-TAOCP-2002", "L'Ecuyer-CMRG", "default")
6479
n.kinds <- c("Buggy Kinderman-Ramage", "Ahrens-Dieter", "Box-Muller",
6580
"user-supplied", "Inversion", "Kinderman-Ramage",
6681
"default")
82+
s.kinds <- c("Rounding", "Rejection", "default")
6783
if(length(kind) ) {
6884
if(!is.character(kind) || length(kind) > 1L)
6985
stop("'kind' must be a character string of length 1 (RNG to be used).")
@@ -85,6 +101,18 @@ set.seed <- function(seed, kind = NULL, normal.kind = NULL)
85101
domain = NA)
86102
if(normal.kind == length(n.kinds) - 1L) normal.kind <- -1L
87103
}
104+
if(!is.null(sample.kind)) {
105+
if(!is.character(sample.kind) || length(sample.kind) != 1L)
106+
stop("'sample.kind' must be a character string of length 1")
107+
sample.kind <- pmatch(sample.kind, s.kinds) - 1L
108+
if(is.na(sample.kind))
109+
stop(gettextf("'%s' is not a valid choice", sample.kind),
110+
domain = NA)
111+
if (sample.kind == 0L)
112+
warning("non-uniform 'Rounding' sampler used",
113+
domain = NA)
114+
if(sample.kind == length(s.kinds) - 1L) sample.kind <- -1L
115+
}
88116
.Internal(set.seed(seed, i.knd, normal.kind))
89117
}
90118

@@ -96,9 +124,11 @@ RNGversion <- function(vstr)
96124
if (length(vnum) < 2L)
97125
stop("malformed version string")
98126
if (vnum[1L] == 0 && vnum[2L] < 99)
99-
RNGkind("Wichmann-Hill", "Buggy Kinderman-Ramage")
127+
RNGkind("Wichmann-Hill", "Buggy Kinderman-Ramage", "Rounding")
100128
else if (vnum[1L] == 0 || vnum[1L] == 1 && vnum[2L] <= 6)
101-
RNGkind("Marsaglia-Multicarry", "Buggy Kinderman-Ramage")
129+
RNGkind("Marsaglia-Multicarry", "Buggy Kinderman-Ramage", "Rounding")
130+
else if (vnum[1L] <= 2 || vnum[1L] == 3 && vnum[2L] <= 5)
131+
RNGkind("Mersenne-Twister", "Inversion", "Rounding")
102132
else
103-
RNGkind("Mersenne-Twister", "Inversion")
133+
RNGkind("Mersenne-Twister", "Inversion", "Rejection")
104134
}

src/library/base/man/Random.Rd

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
% File src/library/base/man/Random.Rd
22
% Part of the R package, https://www.R-project.org
3-
% Copyright 1995-2018 R Core Team
3+
% Copyright 1995-2019 R Core Team
44
% Distributed under GPL 2 or later
55

66
\name{Random}
@@ -27,9 +27,9 @@
2727
\usage{
2828
\special{.Random.seed <- c(rng.kind, n1, n2, \dots)}
2929

30-
RNGkind(kind = NULL, normal.kind = NULL)
30+
RNGkind(kind = NULL, normal.kind = NULL, sample.kind = NULL)
3131
RNGversion(vstr)
32-
set.seed(seed, kind = NULL, normal.kind = NULL)
32+
set.seed(seed, kind = NULL, normal.kind = NULL, sample.kind = NULL)
3333
}
3434
\arguments{
3535
\item{kind}{character or \code{NULL}. If \code{kind} is a character
@@ -39,6 +39,10 @@ set.seed(seed, kind = NULL, normal.kind = NULL)
3939
\item{normal.kind}{character string or \code{NULL}. If it is a character
4040
string, set the method of Normal generation. Use \code{"default"}
4141
to return to the \R default. \code{NULL} makes no change.}
42+
\item{sample.kind}{character string or \code{NULL}. If it is a character
43+
string, set the method of discrete uniform generation (used in
44+
\code{\link{sample}}, for instance). Use \code{"default"} to return to
45+
the \R default. \code{NULL} makes no change.}
4246
\item{seed}{a single value, interpreted as an integer, or \code{NULL}
4347
(see \sQuote{Details}).}
4448
\item{vstr}{a character string containing a version number,
@@ -153,6 +157,12 @@ set.seed(seed, kind = NULL, normal.kind = NULL)
153157
whenever it is selected (even if it is the current normal generator)
154158
and when \code{kind} is changed.
155159
160+
\code{sample.kind} can be \code{"Rounding"} or \code{"Rejection"},
161+
or partial matches to these. The former was the default in versions
162+
prior to 3.6.0: it made \code{\link{sample}} noticeably non-uniform
163+
on large populations, and should only be used for reproduction of old
164+
results. See \PR{17494} for a discussion.
165+
156166
\code{set.seed} uses a single integer argument to set as many seeds
157167
as are required. It is intended as a simple way to get quite different
158168
seeds by specifying small integer arguments, and also as a way to get
@@ -163,7 +173,8 @@ set.seed(seed, kind = NULL, normal.kind = NULL)
163173
called with \code{seed = NULL} it re-initializes (see \sQuote{Note})
164174
as if no seed had yet been set.
165175
166-
The use of \code{kind = NULL} or \code{normal.kind = NULL} in
176+
The use of \code{kind = NULL}, \code{normal.kind = NULL} or
177+
\code{sample.kind = NULL} in
167178
\code{RNGkind} or \code{set.seed} selects the currently-used
168179
generator (including that used in the previous session if the
169180
workspace has been restored): if no generator has been used it selects
@@ -198,16 +209,17 @@ set.seed(seed, kind = NULL, normal.kind = NULL)
198209
element \emph{codes} the kind of RNG and normal generator. The lowest
199210
two decimal digits are in \code{0:(k-1)}
200211
where \code{k} is the number of available RNGs. The hundreds
201-
represent the type of normal generator (starting at \code{0}).
212+
represent the type of normal generator (starting at \code{0}), and
213+
the ten thousands represent the type of discrete uniform sampler.
202214

203215
In the underlying C, \code{.Random.seed[-1]} is \code{unsigned};
204216
therefore in \R \code{.Random.seed[-1]} can be negative, due to
205217
the representation of an unsigned integer by a signed integer.
206218

207-
\code{RNGkind} returns a two-element character vector of the RNG and
208-
normal kinds selected \emph{before} the call, invisibly if either
209-
argument is not \code{NULL}. A type starts a session as the default,
210-
and is selected either by a call to \code{RNGkind} or by setting
219+
\code{RNGkind} returns a three-element character vector of the RNG,
220+
normal and sample kinds selected \emph{before} the call, invisibly if
221+
either argument is not \code{NULL}. A type starts a session as the
222+
default, and is selected either by a call to \code{RNGkind} or by setting
211223
\code{.Random.seed} in the workspace.
212224

213225
\code{RNGversion} returns the same information as \code{RNGkind} about
@@ -287,7 +299,8 @@ set.seed(seed, kind = NULL, normal.kind = NULL)
287299
\bold{34}, 198 and \bold{35}, 89.
288300
\doi{10.2307/2347988}.
289301
}
290-
\author{of RNGkind: Martin Maechler. Current implementation, B. D. Ripley}
302+
\author{of RNGkind: Martin Maechler. Current implementation, B. D. Ripley
303+
with modifications by Duncan Murdoch.}
291304

292305
\seealso{
293306
\code{\link{sample}} for random sampling with and without replacement.

src/library/datasets/man/crimtab.Rd

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
% File src/library/datasets/man/crimtab.Rd
22
% Part of the R package, https://www.R-project.org
3-
% Copyright 1995-2018 R Core Team
3+
% Copyright 1995-2019 R Core Team
44
% Distributed under GPL 2 or later
55

66
\name{crimtab}
@@ -121,7 +121,7 @@ zobs <- (h.mean - mean(d.hei[,"height"]))/h.sd
121121

122122
# 6) Replace infinite values by +/- 6 as in Student's paper:
123123

124-
zobs[infZ <- is.infinite(zobs)] # 3 of them
124+
zobs[infZ <- is.infinite(zobs)] # none of them
125125
zobs[infZ] <- 6 * sign(zobs[infZ])
126126

127127
# 7) Plot the distribution:

src/library/utils/R/example.R

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# File src/library/utils/R/example.R
22
# Part of the R package, https://www.R-project.org
33
#
4-
# Copyright (C) 1995-2018 The R Core Team
4+
# Copyright (C) 1995-2019 The R Core Team
55
#
66
# This program is free software; you can redistribute it and/or modify
77
# it under the terms of the GNU General Public License as published by
@@ -62,12 +62,12 @@ function(topic, package = NULL, lib.loc = NULL,
6262
add = TRUE)
6363
} else {
6464
oldRNG <- RNGkind()
65-
on.exit(RNGkind(oldRNG[1L], oldRNG[2L]), add = TRUE)
65+
on.exit(RNGkind(oldRNG[1L], oldRNG[2L], oldRNG[3L]), add = TRUE)
6666
}
6767
## set RNG
6868
if(is.logical(setRNG)) { # i.e. == TRUE: use the same as R CMD check
6969
## see share/R/examples-header.R
70-
RNGkind("default", "default")
70+
RNGkind("default", "default", "default")
7171
set.seed(1)
7272
} else eval(setRNG)
7373
}

src/library/utils/man/example.Rd

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
% File src/library/utils/man/example.Rd
22
% Part of the R package, https://www.R-project.org
3-
% Copyright 1995-2015 R Core Team
3+
% Copyright 1995-2019 R Core Team
44
% Distributed under GPL 2 or later
55

66
\name{example}
@@ -44,7 +44,7 @@ example(topic, package = NULL, lib.loc = NULL,
4444
\code{setRNG = TRUE} sets the same state as
4545
\command{R CMD \link{check}} does for
4646
running a package's examples. This is currently equivalent to
47-
\code{setRNG = \{RNGkind("default", "default"); set.seed(1)\}}.}
47+
\code{setRNG = \{RNGkind("default", "default", "default"); set.seed(1)\}}.}
4848
\item{ask}{logical (or \code{"default"}) indicating if
4949
\code{\link{devAskNewPage}(ask = TRUE)} should be called
5050
before graphical output happens from the example code. The value

0 commit comments

Comments
 (0)