Department of Industrial Psychology
Stellenbosch University
South Africa
This code is adapted from examples published on stackoverflow: https://stackoverflow.com/questions/31153186/r-split-data-into-2-parts-randomly
### Create a data frame for the example
n <- 1000
mydata <- data.frame(x=rnorm(n), y=rnorm(n))
### Specify the proportions of persons you want in each group
### Here we want about 60% of the cases in the first group and 30% in the second
### n is the size of the original data frame
n <- 1000
select <- sample(c(TRUE, FALSE), n, replace = TRUE, prob = c(0.6, 0.4))
mydata1 <- mydata[select, ]
mydata2 <- mydata[!select, ]
nrow(mydata1)
## [1] 601
nrow(mydata2)
## [1] 399
## Specify exactly how many persons you want in the two groups, respectively.
## Here we want n = 601 in the first group and n = 399 in the second.
v <- as.vector(c(rep(TRUE, 601), rep(FALSE, 399)))
select <- sample(v)
mydata1 <- mydata[select, ]
mydata2 <- mydata[!select, ]
nrow(mydata1)
## [1] 601
nrow(mydata2)
## [1] 399