Overview

Post Date 2018-10-27
URL Reddit

I have the following:

set.seed(1)
parts = c("A", "B", "C", "D", "E")
stuff = c(  rep("A",5), rep("B", 5), rep("C",5),rep("D",5),rep("E",5) )
observations1 = c(rep(1,5), rep(2,5), rep(3,5), rep(4,5), rep(5,5))
df = data.frame(parts = stuff, obs = observations1)
df

What I want to do is

  1. choose a random value from A,B,C,D,E
set.seed(1)
rparts <- sample(parts, 1)
rparts
[1] "B"
  1. get rid of all rows in the dataframe that match that value
df1 <- df[df$parts != rparts, ]
df1
  1. for each remaining of the remaining values ( so A,B,D,E, or A,B,C,D etc, depending on what was dropped previously ) drop one value

I will then be left with four parts (where parts are A,B,C,D,E) and for each remaining parts there will be four values.

What’s the best way to go about this using base R?

set.seed(1)
rowsToKeep <- length(rownames(df1)) - 1
df2 <- df1[sample(rownames(df1), size = rowsToKeep, replace = FALSE), ]
df3 <- df2[order(df2$parts, df2$obs, rownames(df2)), ]
df3

As you can see, the “missing” row was

removedRowIndex <- setdiff(rownames(df1), rownames(df2))
removedRowIndex
[1] "18"
df1[removedRowIndex, ]
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCmF1dGhvcjogInRoYXVmYXMiDQotLS0NCg0KIyMgT3ZlcnZpZXcNCnwgICAgICAgICAgIHwgICAgICAgICAgICAgfA0KfDotLS0tLS0tLS0tfDotLS0tLS0tLS0tLTp8DQp8IFBvc3QgRGF0ZSB8IDIwMTgtMTAtMjcgIHwNCnwgVVJMICAgICAgIHwgW1JlZGRpdF0oaHR0cHM6Ly93d3cucmVkZGl0LmNvbS9yL1JsYW5ndWFnZS9jb21tZW50cy85cnR2cmgvaG93X3RvX3JhbmRvbWx5X2Ryb3BfZWxlbWVudHNfZnJvbV9hX2RhdGFmcmFtZS8pIHwNCg0KDQo+IEkgaGF2ZSB0aGUgZm9sbG93aW5nOg0KDQpgYGB7cn0NCg0Kc2V0LnNlZWQoMSkNCnBhcnRzID0gYygiQSIsICJCIiwgIkMiLCAiRCIsICJFIikNCnN0dWZmID0gYyggIHJlcCgiQSIsNSksIHJlcCgiQiIsIDUpLCByZXAoIkMiLDUpLHJlcCgiRCIsNSkscmVwKCJFIiw1KSApDQpvYnNlcnZhdGlvbnMxID0gYyhyZXAoMSw1KSwgcmVwKDIsNSksIHJlcCgzLDUpLCByZXAoNCw1KSwgcmVwKDUsNSkpDQpkZiA9IGRhdGEuZnJhbWUocGFydHMgPSBzdHVmZiwgb2JzID0gb2JzZXJ2YXRpb25zMSkNCmRmDQoNCmBgYA0KDQo+IFdoYXQgSSB3YW50IHRvIGRvIGlzDQoNCj4gMS4gY2hvb3NlIGEgcmFuZG9tIHZhbHVlIGZyb20gQSxCLEMsRCxFDQoNCmBgYHtyfQ0KDQpzZXQuc2VlZCgxKQ0KcnBhcnRzIDwtIHNhbXBsZShwYXJ0cywgMSkNCnJwYXJ0cw0KDQpgYGANCg0KDQo+IDIuIGdldCByaWQgb2YgYWxsIHJvd3MgaW4gdGhlIGRhdGFmcmFtZSB0aGF0IG1hdGNoIHRoYXQgdmFsdWUNCg0KYGBge3J9DQoNCmRmMSA8LSBkZltkZiRwYXJ0cyAhPSBycGFydHMsIF0NCmRmMQ0KDQoNCmBgYA0KDQoNCj4gMy4gZm9yIGVhY2ggcmVtYWluaW5nIG9mIHRoZSByZW1haW5pbmcgdmFsdWVzICggc28gQSxCLEQsRSwgb3IgQSxCLEMsRCBldGMsIGRlcGVuZGluZyBvbiB3aGF0IHdhcyBkcm9wcGVkIHByZXZpb3VzbHkgKSBkcm9wIG9uZSB2YWx1ZQ0KPg0KPiBJIHdpbGwgdGhlbiBiZSBsZWZ0IHdpdGggZm91ciBwYXJ0cyAod2hlcmUgcGFydHMgYXJlIEEsQixDLEQsRSkgYW5kIGZvciBlYWNoIHJlbWFpbmluZyBwYXJ0cyB0aGVyZSB3aWxsIGJlIGZvdXIgdmFsdWVzLg0KPg0KPiBXaGF0J3MgdGhlIGJlc3Qgd2F5IHRvIGdvIGFib3V0IHRoaXMgdXNpbmcgYmFzZSBSPyANCg0KYGBge3J9DQpzZXQuc2VlZCgxKQ0KDQpyb3dzVG9LZWVwIDwtIGxlbmd0aChyb3duYW1lcyhkZjEpKSAtIDENCmRmMiA8LSBkZjFbc2FtcGxlKHJvd25hbWVzKGRmMSksIHNpemUgPSByb3dzVG9LZWVwLCByZXBsYWNlID0gRkFMU0UpLCBdDQpkZjMgPC0gZGYyW29yZGVyKGRmMiRwYXJ0cywgZGYyJG9icywgcm93bmFtZXMoZGYyKSksIF0NCmRmMw0KDQpgYGANCg0KQXMgeW91IGNhbiBzZWUsIHRoZSAibWlzc2luZyIgcm93IHdhcw0KDQpgYGB7cn0NCg0KcmVtb3ZlZFJvd0luZGV4IDwtIHNldGRpZmYocm93bmFtZXMoZGYxKSwgcm93bmFtZXMoZGYyKSkNCnJlbW92ZWRSb3dJbmRleA0KDQpkZjFbcmVtb3ZlZFJvd0luZGV4LCBdDQoNCmBgYA0KDQo=