set.seed(13)
# load libraries
library(party)
library(randomForest)
library(rpart)
library(rpart.plot)
#load data
maxWAP_build <- read.csv(file="~/2810_uc3_R/T4C4/maxWAP_build.csv")
head(maxWAP_build)
# WAP clustering
## one may rebuild the clusters or use the ones already saved in the maxWAP_build from previous runs.
## not super important at this point, possibly later for sake of consistency would keep the clusters saved.
# matWAP <- as.matrix(WAPs) # create a WAP matrix to apply k-means clustering
# 3 clusters
#kmeansWAPS <- kmeans(matWAP, centers=3, iter.max = 1000, algorithm = c("Hartigan-Wong", "Lloyd", "Forgy","MacQueen"))
# K-means clustering with 3 clusters of sizes 8445, 8995, 2497
#maxWAP_build$cluster <- kmeansWAPS$cluster # save cluster vector as a variable in maxWAP_build
#head(maxWAP_build)
# repeat steps for 5 clusters.
# kmeansWAPS5 <- kmeans(matWAP, centers=5, iter.max = 1000, algorithm = c("Hartigan-Wong", "Lloyd", "Forgy","MacQueen"))
# maxWAP_build$cluster5 <- kmeansWAPS5$cluster
# Make sure cluster vectors c(1,2,3) and c(1,2,3,4,5), as well as buildingID are saved as factors in the maxWAP_build.
maxWAP_build$df.BUILDINGID <- as.factor(maxWAP_build$df.BUILDINGID)
maxWAP_build$cluster <- as.factor(maxWAP_build$cluster)
maxWAP_build$cluster5 <- as.factor(maxWAP_build$cluster5)
# head(maxWAP_build)
controls = rpart.control(minsplit=20, maxdepth = 7) # using maxVal as predictor
fitMVt <- rpart(df.BUILDINGID~ df.maxVal,
data = maxWAP_build,
method="class",
control= controls)
rpart.plot(fitMVt)

# Create the forest.
fitMVrf <- randomForest(df.BUILDINGID ~ df.maxVal,
data = maxWAP_build)
# View the forest results.
print(fitMVrf)
Call:
randomForest(formula = df.BUILDINGID ~ df.maxVal, data = maxWAP_build)
Type of random forest: classification
Number of trees: 500
No. of variables tried at each split: 1
OOB estimate of error rate: 50.83%
Confusion matrix:
1 2 3 class.error
1 376 111 4762 0.9283673
2 153 948 4095 0.8175520
3 294 718 8480 0.1066161
controls = rpart.control(minsplit=20, maxdepth = 7) # using cluster 3 as predictor
fit3t <- rpart(df.BUILDINGID~ cluster,
data = maxWAP_build,
method="class",
control= controls)
rpart.plot(fit3t)

# Create the forest.
fit3rf <- randomForest(df.BUILDINGID ~ cluster,
data = maxWAP_build)
# View the forest results.
print(fit3rf)
Call:
randomForest(formula = df.BUILDINGID ~ cluster, data = maxWAP_build)
Type of random forest: classification
Number of trees: 500
No. of variables tried at each split: 1
OOB estimate of error rate: 16.3%
Confusion matrix:
1 2 3 class.error
1 2497 2752 0 0.52429034
2 0 5196 0 0.00000000
3 0 497 8995 0.05235988
controls = rpart.control(minsplit=20, maxdepth = 7) # using cluster 3 and cluster 5 as predictors
fit35t <- rpart(df.BUILDINGID~ cluster+cluster5,
data = maxWAP_build,
method="class",
control= controls)
rpart.plot(fit35t)

# Create the forest.
fit35rf <- randomForest(df.BUILDINGID ~ cluster+cluster5,
data = maxWAP_build)
# View the forest results.
print(fit35rf)
Call:
randomForest(formula = df.BUILDINGID ~ cluster + cluster5, data = maxWAP_build)
Type of random forest: classification
Number of trees: 500
No. of variables tried at each split: 1
OOB estimate of error rate: 3.92%
Confusion matrix:
1 2 3 class.error
1 4957 292 0 0.05562964
2 0 5196 0 0.00000000
3 0 490 9002 0.05162242
controls = rpart.control(minsplit=20, maxdepth = 7) # using cluster 3, cluster 5 and maxVal as predictors
fitt <- rpart(df.BUILDINGID~ .,
data = maxWAP_build,
method="class",
control= controls)
rpart.plot(fitt)

# Create the forest.
fitrf <- randomForest(df.BUILDINGID ~ .,
data = maxWAP_build)
# View the forest results.
print(fitrf)
Call:
randomForest(formula = df.BUILDINGID ~ ., data = maxWAP_build)
Type of random forest: classification
Number of trees: 500
No. of variables tried at each split: 1
OOB estimate of error rate: 3.92%
Confusion matrix:
1 2 3 class.error
1 4957 292 0 0.05562964
2 0 5196 0 0.00000000
3 0 490 9002 0.05162242
LS0tCnRpdGxlOiAiQlVJTERJTkcgSUQgcHJlZGljdGlvbiB1c2luZyBvbmx5IFdBUHMiCnN1YnRpdGxlOiAia21lYW5zIGNsdXN0ZXJpbmcgbWFnaWsiCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCmBgYHtyIGRlY2lzaW9uIHRyZWUgd2l0aCBrbWVhbnN9CgpzZXQuc2VlZCgxMykKCiMgbG9hZCBsaWJyYXJpZXMKbGlicmFyeShwYXJ0eSkKbGlicmFyeShyYW5kb21Gb3Jlc3QpCmxpYnJhcnkocnBhcnQpCmxpYnJhcnkocnBhcnQucGxvdCkKCiNsb2FkIGRhdGEKbWF4V0FQX2J1aWxkIDwtIHJlYWQuY3N2KGZpbGU9In4vMjgxMF91YzNfUi9UNEM0L21heFdBUF9idWlsZC5jc3YiKQoKaGVhZChtYXhXQVBfYnVpbGQpCgojIFdBUCBjbHVzdGVyaW5nIAoKIyMgb25lIG1heSByZWJ1aWxkIHRoZSBjbHVzdGVycyBvciB1c2UgdGhlIG9uZXMgYWxyZWFkeSBzYXZlZCBpbiB0aGUgbWF4V0FQX2J1aWxkIGZyb20gcHJldmlvdXMgcnVucy4gCiMjIG5vdCBzdXBlciBpbXBvcnRhbnQgYXQgdGhpcyBwb2ludCwgcG9zc2libHkgbGF0ZXIgZm9yIHNha2Ugb2YgY29uc2lzdGVuY3kgd291bGQga2VlcCB0aGUgY2x1c3RlcnMgc2F2ZWQuCgojIG1hdFdBUCA8LSBhcy5tYXRyaXgoV0FQcykgIyBjcmVhdGUgYSBXQVAgbWF0cml4IHRvIGFwcGx5IGstbWVhbnMgY2x1c3RlcmluZwoKIyAzIGNsdXN0ZXJzCiNrbWVhbnNXQVBTIDwtIGttZWFucyhtYXRXQVAsIGNlbnRlcnM9MywgaXRlci5tYXggPSAxMDAwLCBhbGdvcml0aG0gPSBjKCJIYXJ0aWdhbi1Xb25nIiwgIkxsb3lkIiwgIkZvcmd5IiwiTWFjUXVlZW4iKSkKIyBLLW1lYW5zIGNsdXN0ZXJpbmcgd2l0aCAzIGNsdXN0ZXJzIG9mIHNpemVzIDg0NDUsIDg5OTUsIDI0OTcKI21heFdBUF9idWlsZCRjbHVzdGVyIDwtIGttZWFuc1dBUFMkY2x1c3RlciAjIHNhdmUgY2x1c3RlciB2ZWN0b3IgYXMgYSB2YXJpYWJsZSBpbiBtYXhXQVBfYnVpbGQKI2hlYWQobWF4V0FQX2J1aWxkKQoKIyByZXBlYXQgc3RlcHMgZm9yIDUgY2x1c3RlcnMuIAojIGttZWFuc1dBUFM1IDwtIGttZWFucyhtYXRXQVAsIGNlbnRlcnM9NSwgaXRlci5tYXggPSAxMDAwLCBhbGdvcml0aG0gPSBjKCJIYXJ0aWdhbi1Xb25nIiwgIkxsb3lkIiwgIkZvcmd5IiwiTWFjUXVlZW4iKSkKIyBtYXhXQVBfYnVpbGQkY2x1c3RlcjUgPC0ga21lYW5zV0FQUzUkY2x1c3RlcgoKIyBNYWtlIHN1cmUgY2x1c3RlciB2ZWN0b3JzIGMoMSwyLDMpIGFuZCBjKDEsMiwzLDQsNSksIGFzIHdlbGwgYXMgYnVpbGRpbmdJRCBhcmUgc2F2ZWQgYXMgZmFjdG9ycyBpbiB0aGUgbWF4V0FQX2J1aWxkLgptYXhXQVBfYnVpbGQkZGYuQlVJTERJTkdJRCA8LSBhcy5mYWN0b3IobWF4V0FQX2J1aWxkJGRmLkJVSUxESU5HSUQpCm1heFdBUF9idWlsZCRjbHVzdGVyIDwtIGFzLmZhY3RvcihtYXhXQVBfYnVpbGQkY2x1c3RlcikKbWF4V0FQX2J1aWxkJGNsdXN0ZXI1IDwtIGFzLmZhY3RvcihtYXhXQVBfYnVpbGQkY2x1c3RlcjUpCgojIGhlYWQobWF4V0FQX2J1aWxkKQoKY29udHJvbHMgPSBycGFydC5jb250cm9sKG1pbnNwbGl0PTIwLCBtYXhkZXB0aCA9IDcpICMgdXNpbmcgbWF4VmFsIGFzIHByZWRpY3RvcgpmaXRNVnQgPC0gcnBhcnQoZGYuQlVJTERJTkdJRH4gZGYubWF4VmFsLAogZGF0YSA9IG1heFdBUF9idWlsZCwKIG1ldGhvZD0iY2xhc3MiLAogY29udHJvbD0gY29udHJvbHMpCnJwYXJ0LnBsb3QoZml0TVZ0KQoKIyBDcmVhdGUgdGhlIGZvcmVzdC4KZml0TVZyZiA8LSByYW5kb21Gb3Jlc3QoZGYuQlVJTERJTkdJRCB+IGRmLm1heFZhbCwgCiAgICAgICAgICAgZGF0YSA9IG1heFdBUF9idWlsZCkKCiMgVmlldyB0aGUgZm9yZXN0IHJlc3VsdHMuCnByaW50KGZpdE1WcmYpIAoKCmNvbnRyb2xzID0gcnBhcnQuY29udHJvbChtaW5zcGxpdD0yMCwgbWF4ZGVwdGggPSA3KSAjIHVzaW5nIGNsdXN0ZXIgMyBhcyBwcmVkaWN0b3IKZml0M3QgPC0gcnBhcnQoZGYuQlVJTERJTkdJRH4gY2x1c3RlciwKIGRhdGEgPSBtYXhXQVBfYnVpbGQsCiBtZXRob2Q9ImNsYXNzIiwKIGNvbnRyb2w9IGNvbnRyb2xzKQpycGFydC5wbG90KGZpdDN0KQoKIyBDcmVhdGUgdGhlIGZvcmVzdC4KZml0M3JmIDwtIHJhbmRvbUZvcmVzdChkZi5CVUlMRElOR0lEIH4gY2x1c3RlciwgCiAgICAgICAgICAgZGF0YSA9IG1heFdBUF9idWlsZCkKCiMgVmlldyB0aGUgZm9yZXN0IHJlc3VsdHMuCnByaW50KGZpdDNyZikgCgoKY29udHJvbHMgPSBycGFydC5jb250cm9sKG1pbnNwbGl0PTIwLCBtYXhkZXB0aCA9IDcpICMgdXNpbmcgY2x1c3RlciAzIGFuZCBjbHVzdGVyIDUgYXMgcHJlZGljdG9ycwpmaXQzNXQgPC0gcnBhcnQoZGYuQlVJTERJTkdJRH4gY2x1c3RlcitjbHVzdGVyNSwKIGRhdGEgPSBtYXhXQVBfYnVpbGQsCiBtZXRob2Q9ImNsYXNzIiwKIGNvbnRyb2w9IGNvbnRyb2xzKQpycGFydC5wbG90KGZpdDM1dCkKCiMgQ3JlYXRlIHRoZSBmb3Jlc3QuCmZpdDM1cmYgPC0gcmFuZG9tRm9yZXN0KGRmLkJVSUxESU5HSUQgfiBjbHVzdGVyK2NsdXN0ZXI1LCAKICAgICAgICAgICBkYXRhID0gbWF4V0FQX2J1aWxkKQoKIyBWaWV3IHRoZSBmb3Jlc3QgcmVzdWx0cy4KcHJpbnQoZml0MzVyZikgCgoKY29udHJvbHMgPSBycGFydC5jb250cm9sKG1pbnNwbGl0PTIwLCBtYXhkZXB0aCA9IDcpICMgdXNpbmcgY2x1c3RlciAzLCBjbHVzdGVyIDUgYW5kIG1heFZhbCBhcyBwcmVkaWN0b3JzCmZpdHQgPC0gcnBhcnQoZGYuQlVJTERJTkdJRH4gLiwKIGRhdGEgPSBtYXhXQVBfYnVpbGQsCiBtZXRob2Q9ImNsYXNzIiwKIGNvbnRyb2w9IGNvbnRyb2xzKQpycGFydC5wbG90KGZpdHQpCgojIENyZWF0ZSB0aGUgZm9yZXN0LgpmaXRyZiA8LSByYW5kb21Gb3Jlc3QoZGYuQlVJTERJTkdJRCB+IC4sIAogICAgICAgICAgIGRhdGEgPSBtYXhXQVBfYnVpbGQpCgojIFZpZXcgdGhlIGZvcmVzdCByZXN1bHRzLgpwcmludChmaXRyZikgCgoKYGBg