Section-1
§ 1.1 What is the accuracy of this baseline method on the test set?
set.seed(1000)
letters = letters_ABPR
letters$isB = as.factor(letters$letter == "B")
split = sample.split(letters$isB, SplitRatio = 0.5)
train = subset(letters, split == TRUE)
test = subset(letters, split == FALSE)
table(test$isB)
FALSE TRUE
1175 383
1175/(1175+383)
[1] 0.7542
§ 1.2 What is the accuracy of the CART model on the test set?
CARTb = rpart(isB ~ . - letter, data=train, method="class")
predb = predict(CARTb, newdata = test, type = "class")
table(test$isB, predb)
predb
FALSE TRUE
FALSE 1118 57
TRUE 43 340
(1118+340)/nrow(test)
[1] 0.9358
§ 1.3 What is the accuracy of the model on the test set?
set.seed(1000)
forest1 = randomForest(isB ~ . - letter, data=train)
predbforest = predict(forest1, newdata = test)
table(test$isB, predbforest)
predbforest
FALSE TRUE
FALSE 1163 12
TRUE 9 374
(1163+374)/nrow(test)
[1] 0.9865
Section-2
§ 2.1 What is the baseline accuracy on the testing set?
set.seed(2000)
letters$letter = as.factor( letters$letter)
split = sample.split(letters$letter, SplitRatio = 0.5)
train = subset(letters, split == TRUE)
test = subset(letters, split == FALSE)
table(test$letter)
A B P R
395 383 401 379
401/nrow(test)
[1] 0.2574
§ 2.2 What is the test set accuracy of your CART model?
CART1 = rpart(letter ~ . - isB, data=train, method="class")
pred1 = predict(CART1, newdata = test, type = "class")
table(test$letter, pred1)
pred1
A B P R
A 348 4 0 43
B 8 318 12 45
P 2 21 363 15
R 10 24 5 340
(348+318+363+340)/nrow(test)
[1] 0.8787
§ 2.3 What is the test set accuracy of your random forest model?
set.seed(1000)
forest2 = randomForest(letter ~ . - isB, data=train)
predbforest1 = predict(forest2, newdata = test)
table(test$letter, predbforest1)
predbforest1
A B P R
A 391 0 3 1
B 0 380 1 2
P 0 6 394 1
R 3 14 0 362
(391+380+394+362)/nrow(test)
[1] 0.9801
LS0tDQp0aXRsZTogIkFTNC0yIExldHRlciBSZWNvZ25pdGlvbiINCmF1dGhvcjogIumZs+ato+isgCBsb3Vpc2FuMTIzIDIwMTgvMDcvMTgiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQoNCiMjIyMgIFNlY3Rpb24tMQ0KX1/CpyAxLjFfXyBXaGF0IGlzIHRoZSBhY2N1cmFjeSBvZiB0aGlzIGJhc2VsaW5lIG1ldGhvZCBvbiB0aGUgdGVzdCBzZXQ/DQpgYGB7cn0NCnNldC5zZWVkKDEwMDApDQpsZXR0ZXJzID0gbGV0dGVyc19BQlBSDQpsZXR0ZXJzJGlzQiA9IGFzLmZhY3RvcihsZXR0ZXJzJGxldHRlciA9PSAiQiIpDQpzcGxpdCA9IHNhbXBsZS5zcGxpdChsZXR0ZXJzJGlzQiwgU3BsaXRSYXRpbyA9IDAuNSkNCnRyYWluID0gc3Vic2V0KGxldHRlcnMsIHNwbGl0ID09IFRSVUUpDQp0ZXN0ID0gc3Vic2V0KGxldHRlcnMsIHNwbGl0ID09IEZBTFNFKQ0KdGFibGUodGVzdCRpc0IpDQoxMTc1LygxMTc1KzM4MykNCmBgYA0KIA0KX1/CpyAxLjJfXyBXaGF0IGlzIHRoZSBhY2N1cmFjeSBvZiB0aGUgQ0FSVCBtb2RlbCBvbiB0aGUgdGVzdCBzZXQ/DQogDQpgYGB7cn0NCkNBUlRiID0gcnBhcnQoaXNCIH4gLiAtIGxldHRlciwgZGF0YT10cmFpbiwgbWV0aG9kPSJjbGFzcyIpDQpwcmVkYiA9IHByZWRpY3QoQ0FSVGIsIG5ld2RhdGEgPSB0ZXN0LCB0eXBlID0gImNsYXNzIikNCnRhYmxlKHRlc3QkaXNCLCBwcmVkYikNCigxMTE4KzM0MCkvbnJvdyh0ZXN0KQ0KYGBgDQogDQpfX8KnIDEuM19fIFdoYXQgaXMgdGhlIGFjY3VyYWN5IG9mIHRoZSBtb2RlbCBvbiB0aGUgdGVzdCBzZXQ/DQpgYGB7cn0NCnNldC5zZWVkKDEwMDApDQpmb3Jlc3QxID0gcmFuZG9tRm9yZXN0KGlzQiB+IC4gLSBsZXR0ZXIsIGRhdGE9dHJhaW4pDQpwcmVkYmZvcmVzdCA9IHByZWRpY3QoZm9yZXN0MSwgbmV3ZGF0YSA9IHRlc3QpDQp0YWJsZSh0ZXN0JGlzQiwgcHJlZGJmb3Jlc3QpDQooMTE2MyszNzQpL25yb3codGVzdCkNCmBgYA0KIA0KIyMjIyAgU2VjdGlvbi0yIA0KX1/CpyAyLjFfXyAgV2hhdCBpcyB0aGUgYmFzZWxpbmUgYWNjdXJhY3kgb24gdGhlIHRlc3Rpbmcgc2V0Pw0KIA0KYGBge3J9DQpzZXQuc2VlZCgyMDAwKQ0KbGV0dGVycyRsZXR0ZXIgPSBhcy5mYWN0b3IoIGxldHRlcnMkbGV0dGVyKQ0Kc3BsaXQgPSBzYW1wbGUuc3BsaXQobGV0dGVycyRsZXR0ZXIsIFNwbGl0UmF0aW8gPSAwLjUpDQp0cmFpbiA9IHN1YnNldChsZXR0ZXJzLCBzcGxpdCA9PSBUUlVFKQ0KdGVzdCA9IHN1YnNldChsZXR0ZXJzLCBzcGxpdCA9PSBGQUxTRSkNCnRhYmxlKHRlc3QkbGV0dGVyKQ0KNDAxL25yb3codGVzdCkNCmBgYA0KIA0KX1/CpyAyLjJfXyBXaGF0IGlzIHRoZSB0ZXN0IHNldCBhY2N1cmFjeSBvZiB5b3VyIENBUlQgbW9kZWw/DQogDQpgYGB7cn0NCkNBUlQxID0gcnBhcnQobGV0dGVyIH4gLiAtIGlzQiwgZGF0YT10cmFpbiwgbWV0aG9kPSJjbGFzcyIpDQpwcmVkMSA9IHByZWRpY3QoQ0FSVDEsIG5ld2RhdGEgPSB0ZXN0LCB0eXBlID0gImNsYXNzIikNCnRhYmxlKHRlc3QkbGV0dGVyLCBwcmVkMSkNCigzNDgrMzE4KzM2MyszNDApL25yb3codGVzdCkNCmBgYA0KIA0KX1/CpyAyLjNfXyBXaGF0IGlzIHRoZSB0ZXN0IHNldCBhY2N1cmFjeSBvZiB5b3VyIHJhbmRvbSBmb3Jlc3QgbW9kZWw/DQogDQpgYGB7cn0NCnNldC5zZWVkKDEwMDApDQpmb3Jlc3QyID0gcmFuZG9tRm9yZXN0KGxldHRlciB+IC4gLSBpc0IsIGRhdGE9dHJhaW4pDQpwcmVkYmZvcmVzdDEgPSBwcmVkaWN0KGZvcmVzdDIsIG5ld2RhdGEgPSB0ZXN0KQ0KdGFibGUodGVzdCRsZXR0ZXIsIHByZWRiZm9yZXN0MSkNCigzOTErMzgwKzM5NCszNjIpL25yb3codGVzdCkNCmBgYA0KIA0KIA0KIA0KIA0KIA0KIA0KIA0KIA0KIA0KIA0KIA0KIA0KIA0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQo=