Overview
Post Date |
2018-10-27 |
URL |
Reddit |
I have the following:
set.seed(1)
parts = c("A", "B", "C", "D", "E")
stuff = c( rep("A",5), rep("B", 5), rep("C",5),rep("D",5),rep("E",5) )
observations1 = c(rep(1,5), rep(2,5), rep(3,5), rep(4,5), rep(5,5))
df = data.frame(parts = stuff, obs = observations1)
df
What I want to do is
- choose a random value from A,B,C,D,E
set.seed(1)
rparts <- sample(parts, 1)
rparts
[1] "B"
- get rid of all rows in the dataframe that match that value
df1 <- df[df$parts != rparts, ]
df1
- for each remaining of the remaining values ( so A,B,D,E, or A,B,C,D etc, depending on what was dropped previously ) drop one value
I will then be left with four parts (where parts are A,B,C,D,E) and for each remaining parts there will be four values.
What’s the best way to go about this using base R?
set.seed(1)
rowsToKeep <- length(rownames(df1)) - 1
df2 <- df1[sample(rownames(df1), size = rowsToKeep, replace = FALSE), ]
df3 <- df2[order(df2$parts, df2$obs, rownames(df2)), ]
df3
As you can see, the “missing” row was
removedRowIndex <- setdiff(rownames(df1), rownames(df2))
removedRowIndex
[1] "18"
df1[removedRowIndex, ]
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCmF1dGhvcjogInRoYXVmYXMiDQotLS0NCg0KIyMgT3ZlcnZpZXcNCnwgICAgICAgICAgIHwgICAgICAgICAgICAgfA0KfDotLS0tLS0tLS0tfDotLS0tLS0tLS0tLTp8DQp8IFBvc3QgRGF0ZSB8IDIwMTgtMTAtMjcgIHwNCnwgVVJMICAgICAgIHwgW1JlZGRpdF0oaHR0cHM6Ly93d3cucmVkZGl0LmNvbS9yL1JsYW5ndWFnZS9jb21tZW50cy85cnR2cmgvaG93X3RvX3JhbmRvbWx5X2Ryb3BfZWxlbWVudHNfZnJvbV9hX2RhdGFmcmFtZS8pIHwNCg0KDQo+IEkgaGF2ZSB0aGUgZm9sbG93aW5nOg0KDQpgYGB7cn0NCg0Kc2V0LnNlZWQoMSkNCnBhcnRzID0gYygiQSIsICJCIiwgIkMiLCAiRCIsICJFIikNCnN0dWZmID0gYyggIHJlcCgiQSIsNSksIHJlcCgiQiIsIDUpLCByZXAoIkMiLDUpLHJlcCgiRCIsNSkscmVwKCJFIiw1KSApDQpvYnNlcnZhdGlvbnMxID0gYyhyZXAoMSw1KSwgcmVwKDIsNSksIHJlcCgzLDUpLCByZXAoNCw1KSwgcmVwKDUsNSkpDQpkZiA9IGRhdGEuZnJhbWUocGFydHMgPSBzdHVmZiwgb2JzID0gb2JzZXJ2YXRpb25zMSkNCmRmDQoNCmBgYA0KDQo+IFdoYXQgSSB3YW50IHRvIGRvIGlzDQoNCj4gMS4gY2hvb3NlIGEgcmFuZG9tIHZhbHVlIGZyb20gQSxCLEMsRCxFDQoNCmBgYHtyfQ0KDQpzZXQuc2VlZCgxKQ0KcnBhcnRzIDwtIHNhbXBsZShwYXJ0cywgMSkNCnJwYXJ0cw0KDQpgYGANCg0KDQo+IDIuIGdldCByaWQgb2YgYWxsIHJvd3MgaW4gdGhlIGRhdGFmcmFtZSB0aGF0IG1hdGNoIHRoYXQgdmFsdWUNCg0KYGBge3J9DQoNCmRmMSA8LSBkZltkZiRwYXJ0cyAhPSBycGFydHMsIF0NCmRmMQ0KDQoNCmBgYA0KDQoNCj4gMy4gZm9yIGVhY2ggcmVtYWluaW5nIG9mIHRoZSByZW1haW5pbmcgdmFsdWVzICggc28gQSxCLEQsRSwgb3IgQSxCLEMsRCBldGMsIGRlcGVuZGluZyBvbiB3aGF0IHdhcyBkcm9wcGVkIHByZXZpb3VzbHkgKSBkcm9wIG9uZSB2YWx1ZQ0KPg0KPiBJIHdpbGwgdGhlbiBiZSBsZWZ0IHdpdGggZm91ciBwYXJ0cyAod2hlcmUgcGFydHMgYXJlIEEsQixDLEQsRSkgYW5kIGZvciBlYWNoIHJlbWFpbmluZyBwYXJ0cyB0aGVyZSB3aWxsIGJlIGZvdXIgdmFsdWVzLg0KPg0KPiBXaGF0J3MgdGhlIGJlc3Qgd2F5IHRvIGdvIGFib3V0IHRoaXMgdXNpbmcgYmFzZSBSPyANCg0KYGBge3J9DQpzZXQuc2VlZCgxKQ0KDQpyb3dzVG9LZWVwIDwtIGxlbmd0aChyb3duYW1lcyhkZjEpKSAtIDENCmRmMiA8LSBkZjFbc2FtcGxlKHJvd25hbWVzKGRmMSksIHNpemUgPSByb3dzVG9LZWVwLCByZXBsYWNlID0gRkFMU0UpLCBdDQpkZjMgPC0gZGYyW29yZGVyKGRmMiRwYXJ0cywgZGYyJG9icywgcm93bmFtZXMoZGYyKSksIF0NCmRmMw0KDQpgYGANCg0KQXMgeW91IGNhbiBzZWUsIHRoZSAibWlzc2luZyIgcm93IHdhcw0KDQpgYGB7cn0NCg0KcmVtb3ZlZFJvd0luZGV4IDwtIHNldGRpZmYocm93bmFtZXMoZGYxKSwgcm93bmFtZXMoZGYyKSkNCnJlbW92ZWRSb3dJbmRleA0KDQpkZjFbcmVtb3ZlZFJvd0luZGV4LCBdDQoNCmBgYA0KDQo=