Renew: 06/02 line 37~47 => Use to see the number of each outcometype line 50~58 => Not yet. line 61~168 => Breed line 174~202 => Age line 207~290
06/04 line 205~265 => Color
line 465~617 => Random Forest
共26729筆資料,9個解釋變數,包含狗以及貓。
library(dplyr)
library(ggplot2)
library(e1071)
library(randomForest)
library(caret)
trainInit<-read.csv("train.csv")
head(trainInit)
## AnimalID Name DateTime OutcomeType OutcomeSubtype
## 1 A671945 Hambone 2014-02-12 18:22:00 Return_to_owner
## 2 A656520 Emily 2013-10-13 12:44:00 Euthanasia Suffering
## 3 A686464 Pearce 2015-01-31 12:28:00 Adoption Foster
## 4 A683430 2014-07-11 19:09:00 Transfer Partner
## 5 A667013 2013-11-15 12:52:00 Transfer Partner
## 6 A677334 Elsa 2014-04-25 13:04:00 Transfer Partner
## AnimalType SexuponOutcome AgeuponOutcome
## 1 Dog Neutered Male 1 year
## 2 Cat Spayed Female 1 year
## 3 Dog Neutered Male 2 years
## 4 Cat Intact Male 3 weeks
## 5 Dog Neutered Male 2 years
## 6 Dog Intact Female 1 month
## Breed Color
## 1 Shetland Sheepdog Mix Brown/White
## 2 Domestic Shorthair Mix Cream Tabby
## 3 Pit Bull Mix Blue/White
## 4 Domestic Shorthair Mix Blue Cream
## 5 Lhasa Apso/Miniature Poodle Tan
## 6 Cairn Terrier/Chihuahua Shorthair Black/Tan
刪除 Name 以及 OutcomeSubtype
train<-trainInit[,-c(1,2,5)]
head(train)
## DateTime OutcomeType AnimalType SexuponOutcome
## 1 2014-02-12 18:22:00 Return_to_owner Dog Neutered Male
## 2 2013-10-13 12:44:00 Euthanasia Cat Spayed Female
## 3 2015-01-31 12:28:00 Adoption Dog Neutered Male
## 4 2014-07-11 19:09:00 Transfer Cat Intact Male
## 5 2013-11-15 12:52:00 Transfer Dog Neutered Male
## 6 2014-04-25 13:04:00 Transfer Dog Intact Female
## AgeuponOutcome Breed Color
## 1 1 year Shetland Sheepdog Mix Brown/White
## 2 1 year Domestic Shorthair Mix Cream Tabby
## 3 2 years Pit Bull Mix Blue/White
## 4 3 weeks Domestic Shorthair Mix Blue Cream
## 5 2 years Lhasa Apso/Miniature Poodle Tan
## 6 1 month Cairn Terrier/Chihuahua Shorthair Black/Tan
attach(train)
將狗和貓的data分散開來。
Dogtrain<-train[which(AnimalType=="Dog"),]
Cattrain<-train[-which(AnimalType=="Dog"),]
head(Dogtrain)
## DateTime OutcomeType AnimalType SexuponOutcome
## 1 2014-02-12 18:22:00 Return_to_owner Dog Neutered Male
## 3 2015-01-31 12:28:00 Adoption Dog Neutered Male
## 5 2013-11-15 12:52:00 Transfer Dog Neutered Male
## 6 2014-04-25 13:04:00 Transfer Dog Intact Female
## 9 2014-02-04 17:17:00 Adoption Dog Spayed Female
## 10 2014-05-03 07:48:00 Adoption Dog Spayed Female
## AgeuponOutcome Breed Color
## 1 1 year Shetland Sheepdog Mix Brown/White
## 3 2 years Pit Bull Mix Blue/White
## 5 2 years Lhasa Apso/Miniature Poodle Tan
## 6 1 month Cairn Terrier/Chihuahua Shorthair Black/Tan
## 9 5 months American Pit Bull Terrier Mix Red/White
## 10 1 year Cairn Terrier White
attach(Dogtrain)
n = 15595 p=8
先看各個 OucomeType 分別有幾筆資料。
## Return to owner = 4286
## Transfer = 3917
## Adoption = 6497
## Died = 50
## Euthanasia = 845
因為“死亡”的個數太少,所以我們決定把他拿掉。 而且,我們將本來的OutcomeType中的,Return_to_owner 改為 Return。 因為後面在做XGBoosting的時候,會出現錯誤,說Return_to_owner超過64位元…….
Dogtrain<-Dogtrain[-Died,]
library(dplyr)
temp <- gsub("_"," ", Dogtrain$OutcomeType)
Dogtrain$OutcomeType<-as.factor(strsplit(x = temp, split = " ") %>% sapply(function(x){x[1]}))
將“死亡”的資料,都換成“安樂死”
# summary(Dogtrain$OutcomeType)
# temp <- gsub("_"," ", Dogtrain$OutcomeType)
# temp <- gsub("Died","Euthanasia", temp)
# Dogtrain$OutcomeType<-as.factor(strsplit(x = temp, split = " ") %>% sapply(function(x){x[1]}))
# summary(Dogtrain$OutcomeType)
Change the AgeuponOutcome as “Puppy”, “AdultDog”, “OldDog” three kinds of types.
## DateTime OutcomeType AnimalType SexuponOutcome
## 1 2014-02-12 18:22:00 Return Dog Neutered Male
## 3 2015-01-31 12:28:00 Adoption Dog Neutered Male
## 5 2013-11-15 12:52:00 Transfer Dog Neutered Male
## 6 2014-04-25 13:04:00 Transfer Dog Intact Female
## 9 2014-02-04 17:17:00 Adoption Dog Spayed Female
## 10 2014-05-03 07:48:00 Adoption Dog Spayed Female
## AgeuponOutcome Color breed
## 1 1 year Brown/White Other Breed
## 3 2 years Blue/White Pit Bull
## 5 2 years Tan Miniature Poodle
## 6 1 month Black/Tan Cairn Terrier
## 9 5 months Red/White Other Breed
## 10 1 year White Cairn Terrier
Replace each color to “Simple”, “Double”, “Tricolor”, “Brindle”, “Tick”, “Merle”, Six categories.
And the “Brindle” contains the color that it has Brindle and Tick, or Brindle and Merle at the same time.
But, there are still some of colors that it not belong to Brindle, like “Blue Tiger”, “Blue cream”, “Smoke” etc.. I will just consider their color, and classify them to “Simple”, “Double” or “Tricolor”.
Try to use SVM to fit model.
method 1 : 使training data中,各個OutcomeType的個數相等。 各個OutcomeType各取 500~600 隨機一個個數
#remove DateTime & AnimalID
Dogtrainsvm<-Dogtrain
AdoL<-length(which(Dogtrainsvm$OutcomeType=="Adoption"))
ReL<-length(which(Dogtrainsvm$OutcomeType=="Return"))
EuL<-length(which(Dogtrainsvm$OutcomeType=="Euthanasia"))
TrL<-length(which(Dogtrainsvm$OutcomeType=="Transfer"))
choiceddata<-
c(which(Dogtrainsvm$OutcomeType=="Adoption")[sample(1:AdoL,sample(500:600,1))],
which(Dogtrainsvm$OutcomeType=="Return")[sample(1:ReL,sample(500:600,1))],
which(Dogtrainsvm$OutcomeType=="Euthanasia")[sample(1:EuL,sample(500:600,1))],
which(Dogtrainsvm$OutcomeType=="Transfer")[sample(1:TrL,sample(500:600,1))]
)
cat("The number of training data is ",length(choiceddata))
## The number of training data is 2199
cat("\n")
SVMtrain<-Dogtrainsvm[choiceddata,]
SVMtest<-Dogtrainsvm[-choiceddata,]
AdoL<-length(which(SVMtrain$OutcomeType=="Adoption"))
ReL<-length(which(SVMtrain$OutcomeType=="Return"))
EuL<-length(which(SVMtrain$OutcomeType=="Euthanasia"))
TrL<-length(which(SVMtrain$OutcomeType=="Transfer"))
cat("The number of Adoption in the training data is ",AdoL)
## The number of Adoption in the training data is 552
cat("\n")
cat("The number of Return to transfer in the training data is",ReL)
## The number of Return to transfer in the training data is 552
cat("\n")
cat("The number of Euthanasia in the training data is",EuL)
## The number of Euthanasia in the training data is 509
cat("\n")
cat("The number of Transfer in the training data is",TrL)
## The number of Transfer in the training data is 586
cat("\n")
tic<-Sys.time()
model<-svm(OutcomeType~.,data = SVMtrain)
toc<-Sys.time()
toc-tic
## Time difference of 1.644709 secs
testEr<-predict(model,SVMtest[,-2])
trainEr<-predict(model,SVMtrain[,-2])
cat("The confusion Matrix of training data.")
## The confusion Matrix of training data.
confusionMatrix(trainEr,SVMtrain$OutcomeType)$table
## Reference
## Prediction Adoption Euthanasia Return Transfer
## Adoption 193 5 38 80
## Euthanasia 9 208 61 94
## Return 332 229 430 247
## Transfer 18 67 23 165
cat("\n")
cat("The confusion Matrix of testing data.")
## The confusion Matrix of testing data.
confusionMatrix(testEr,SVMtest$OutcomeType)$table
## Reference
## Prediction Adoption Euthanasia Return Transfer
## Adoption 2172 2 246 440
## Euthanasia 41 135 505 522
## Return 3678 159 2819 1457
## Transfer 54 39 164 912
cat("\n")
testErVa<-length(which((SVMtest$OutcomeType==testEr)==F))/(dim(Dogtrainsvm)[1]-length(choiceddata))
trainErVa<-length(which((SVMtrain$OutcomeType==trainEr)==F))/length(choiceddata)
cat("Training Error Rate =",trainErVa)
## Training Error Rate = 0.5470668
cat("\n")
cat("Testing Error Rate =",testErVa)
## Testing Error Rate = 0.5475459
cat("\n")
rm(Dogtrainsvm)
但是效果還是沒很好,懷疑是不是為了讓他balance,導致 training data 過少
method 2 : 補上安樂死的data 使他們balance 選出 10000 筆作為 training data
#remove DateTime & AnimalID
Dogtrainsvm<-Dogtrain
AdoL<-length(which(Dogtrainsvm$OutcomeType=="Adoption"))
ReL<-length(which(Dogtrainsvm$OutcomeType=="Return"))
EuL<-length(which(Dogtrainsvm$OutcomeType=="Euthanasia"))
TrL<-length(which(Dogtrainsvm$OutcomeType=="Transfer"))
Dogtrainsvm<-rbind(Dogtrainsvm,
Dogtrainsvm[which(Dogtrainsvm$OutcomeType=="Euthanasia")[sample(1:EuL,sample(500:600,1))],],
Dogtrainsvm[which(Dogtrainsvm$OutcomeType=="Euthanasia")[sample(1:EuL,sample(500:600,1))],],
Dogtrainsvm[which(Dogtrainsvm$OutcomeType=="Euthanasia")[sample(1:EuL,sample(500:600,1))],],
Dogtrainsvm[which(Dogtrainsvm$OutcomeType=="Euthanasia")[sample(1:EuL,sample(500:600,1))],],
Dogtrainsvm[which(Dogtrainsvm$OutcomeType=="Euthanasia")[sample(1:EuL,sample(500:600,1))],],
Dogtrainsvm[which(Dogtrainsvm$OutcomeType=="Euthanasia")[sample(1:EuL,sample(500:600,1))],])
choiceddata<-sample(1:dim(Dogtrainsvm)[1],10000)
SVMtrain<-Dogtrainsvm[choiceddata,]
SVMtest<-Dogtrainsvm[-choiceddata,]
AdoL<-length(which(SVMtrain$OutcomeType=="Adoption"))
ReL<-length(which(SVMtrain$OutcomeType=="Return"))
EuL<-length(which(SVMtrain$OutcomeType=="Euthanasia"))
TrL<-length(which(SVMtrain$OutcomeType=="Transfer"))
cat("The number of Adoption in the training data is ",AdoL)
## The number of Adoption in the training data is 3473
cat("\n")
cat("The number of Return to transfer in the training data is",ReL)
## The number of Return to transfer in the training data is 2248
cat("\n")
cat("The number of Euthanasia in the training data is",EuL)
## The number of Euthanasia in the training data is 2269
cat("\n")
cat("The number of Transfer in the training data is",TrL)
## The number of Transfer in the training data is 2010
cat("\n")
tic<-Sys.time()
model<-svm(OutcomeType~.,data = SVMtrain)
toc<-Sys.time()
toc-tic
## Time difference of 30.6969 secs
testEr<-predict(model,SVMtest[,-2])
trainEr<-predict(model,SVMtrain[,-2])
cat("The confusion Matrix of training data.")
## The confusion Matrix of training data.
confusionMatrix(trainEr,SVMtrain$OutcomeType)$table
## Reference
## Prediction Adoption Euthanasia Return Transfer
## Adoption 3084 649 1574 1020
## Euthanasia 266 1412 574 490
## Return 0 0 0 0
## Transfer 123 208 100 500
cat("\n")
cat("The confusion Matrix of testing data.")
## The confusion Matrix of testing data.
confusionMatrix(testEr,SVMtest$OutcomeType)$table
## Reference
## Prediction Adoption Euthanasia Return Transfer
## Adoption 2665 577 1451 924
## Euthanasia 258 1200 495 471
## Return 0 0 0 0
## Transfer 101 197 92 512
cat("\n")
testErVa<-length(which((SVMtest$OutcomeType==testEr)==F))/(dim(Dogtrainsvm)[1]-length(choiceddata))
trainErVa<-length(which((SVMtrain$OutcomeType==trainEr)==F))/length(choiceddata)
cat("Training Error Rate =",trainErVa)
## Training Error Rate = 0.5004
cat("\n")
cat("Testing Error Rate =",testErVa)
## Testing Error Rate = 0.5105669
cat("\n")
rm(Dogtrainsvm)
method 3 : 補上安樂死的data 使他們balance。並使training data也balance NOT FIX!!!!!!!!!!!!!!!!!!
#remove DateTime & AnimalID
Dogtrainsvm<-Dogtrain
AdoL<-length(which(Dogtrainsvm$OutcomeType=="Adoption"))
ReL<-length(which(Dogtrainsvm$OutcomeType=="Return"))
EuL<-length(which(Dogtrainsvm$OutcomeType=="Euthanasia"))
TrL<-length(which(Dogtrainsvm$OutcomeType=="Transfer"))
Dogtrainsvm<-rbind(Dogtrainsvm,
Dogtrainsvm[which(Dogtrainsvm$OutcomeType=="Euthanasia")[sample(1:EuL,sample(500:600,1))],],
Dogtrainsvm[which(Dogtrainsvm$OutcomeType=="Euthanasia")[sample(1:EuL,sample(500:600,1))],],
Dogtrainsvm[which(Dogtrainsvm$OutcomeType=="Euthanasia")[sample(1:EuL,sample(500:600,1))],],
Dogtrainsvm[which(Dogtrainsvm$OutcomeType=="Euthanasia")[sample(1:EuL,sample(500:600,1))],],
Dogtrainsvm[which(Dogtrainsvm$OutcomeType=="Euthanasia")[sample(1:EuL,sample(500:600,1))],],
Dogtrainsvm[which(Dogtrainsvm$OutcomeType=="Euthanasia")[sample(1:EuL,sample(500:600,1))],])
(AdoL<-length(which(Dogtrainsvm$OutcomeType=="Adoption")))
## [1] 6497
(ReL<-length(which(Dogtrainsvm$OutcomeType=="Return")))
## [1] 4286
(EuL<-length(which(Dogtrainsvm$OutcomeType=="Euthanasia")))
## [1] 4313
(TrL<-length(which(Dogtrainsvm$OutcomeType=="Transfer")))
## [1] 3917
choiceddata<-
c(which(Dogtrainsvm$OutcomeType=="Adoption")[sample(1:AdoL,sample(2000:2500,1))],
which(Dogtrainsvm$OutcomeType=="Return")[sample(1:ReL,sample(2000:2500,1))],
which(Dogtrainsvm$OutcomeType=="Euthanasia")[sample(1:EuL,sample(2000:2500,1))],
which(Dogtrainsvm$OutcomeType=="Transfer")[sample(1:TrL,sample(2000:2500,1))]
)
SVMtrain<-Dogtrainsvm[choiceddata,]
SVMtest<-Dogtrainsvm[-choiceddata,]
(AdoL<-length(which(SVMtrain$OutcomeType=="Adoption")))
## [1] 2286
(ReL<-length(which(SVMtrain$OutcomeType=="Return")))
## [1] 2475
(EuL<-length(which(SVMtrain$OutcomeType=="Euthanasia")))
## [1] 2202
(TrL<-length(which(SVMtrain$OutcomeType=="Transfer")))
## [1] 2073
tic<-Sys.time()
model<-svm(OutcomeType~.,data = SVMtrain)
toc<-Sys.time()
toc-tic
## Time difference of 25.44123 secs
testEr<-predict(model,SVMtest[,-2])
trainEr<-predict(model,SVMtrain[,-2])
cat("The confusion Matrix of training data.")
## The confusion Matrix of training data.
confusionMatrix(trainEr,SVMtrain$OutcomeType)$table
## Reference
## Prediction Adoption Euthanasia Return Transfer
## Adoption 812 15 159 275
## Euthanasia 25 1006 347 377
## Return 1431 995 1883 896
## Transfer 18 186 86 525
cat("\n")
cat("The confusion Matrix of testing data.")
## The confusion Matrix of testing data.
confusionMatrix(testEr,SVMtest$OutcomeType)$table
## Reference
## Prediction Adoption Euthanasia Return Transfer
## Adoption 1553 19 125 245
## Euthanasia 37 936 253 356
## Return 2578 945 1364 801
## Transfer 43 211 69 442
cat("\n")
testErVa<-length(which((SVMtest$OutcomeType==testEr)==F))/(dim(Dogtrainsvm)[1]-length(choiceddata))
trainErVa<-length(which((SVMtrain$OutcomeType==trainEr)==F))/length(choiceddata)
cat("Training Error Rate =",trainErVa)
## Training Error Rate = 0.5323152
cat("\n")
cat("Testing Error Rate =",testErVa)
## Testing Error Rate = 0.5695099
cat("\n")
rm(Dogtrainsvm)
method 1 : Training data : 10000 ; Tree 600 & not adjust training data
DogtrainRF<-Dogtrain
choiceddata<-sample(1:dim(DogtrainRF)[1],10000)
RFtrain<-DogtrainRF[choiceddata,]
RFtest<-DogtrainRF[-choiceddata,]
tic<-Sys.time()
model<-randomForest(OutcomeType~.,data=RFtrain,ntree=600)
toc<-Sys.time()
toc-tic
## Time difference of 46.98847 secs
trainEr<-predict(model,RFtrain[,-2])
testEr<-predict(model,RFtest[,-2])
plot(model, ylim=c(0,1))
legend('topright', colnames(model$err.rate), col=1:6, fill=1:6)
cat("The confusion Matrix of training data.")
## The confusion Matrix of training data.
confusionMatrix(trainEr,RFtrain$OutcomeType)$table
## Reference
## Prediction Adoption Euthanasia Return Transfer
## Adoption 3761 153 1455 1149
## Euthanasia 1 49 4 2
## Return 383 177 1069 342
## Transfer 67 137 218 1033
cat("\n")
cat("The confusion Matrix of testing data.")
## The confusion Matrix of testing data.
confusionMatrix(testEr,RFtest$OutcomeType)$table
## Reference
## Prediction Adoption Euthanasia Return Transfer
## Adoption 1934 98 954 613
## Euthanasia 0 5 7 5
## Return 309 108 428 219
## Transfer 42 117 151 554
cat("\n")
trainErVa<-length(which((RFtrain$OutcomeType==trainEr)==F))/length(choiceddata)
testErVa<-length(which((RFtest$OutcomeType==testEr)==F))/(dim(DogtrainRF)[1]-length(choiceddata))
cat("Training Error Rate =",trainErVa)
## Training Error Rate = 0.4088
cat("\n")
cat("Testing Error Rate =",testErVa)
## Testing Error Rate = 0.4731241
cat("\n")
#rm(DogtrainRF)
method 2 : 使training data中,各個OutcomeType的個數相等。 各個OutcomeType各取 500~600 隨機一個個數 分成 600 個 Tree
DogtrainRF<-Dogtrain
AdoL<-length(which(DogtrainRF$OutcomeType=="Adoption"))
ReL<-length(which(DogtrainRF$OutcomeType=="Return"))
EuL<-length(which(DogtrainRF$OutcomeType=="Euthanasia"))
TrL<-length(which(DogtrainRF$OutcomeType=="Transfer"))
choiceddata<-
c(which(DogtrainRF$OutcomeType=="Adoption")[sample(1:AdoL,sample(500:600,1))],
which(DogtrainRF$OutcomeType=="Return")[sample(1:ReL,sample(500:600,1))],
which(DogtrainRF$OutcomeType=="Euthanasia")[sample(1:EuL,sample(500:600,1))],
which(DogtrainRF$OutcomeType=="Transfer")[sample(1:TrL,sample(500:600,1))]
)
RFtrain<-DogtrainRF[choiceddata,]
RFtest<-DogtrainRF[-choiceddata,]
tic<-Sys.time()
model<-randomForest(OutcomeType~.,data=RFtrain,ntree=600)
toc<-Sys.time()
toc-tic
## Time difference of 21.12418 secs
testEr<-predict(model,RFtest[,-2])
trainEr<-predict(model,RFtrain[,-2])
plot(model, ylim=c(0,1))
legend('topright', colnames(model$err.rate), col=1:6, fill=1:6)
cat("The confusion Matrix of training data.")
## The confusion Matrix of training data.
confusionMatrix(trainEr,RFtrain$OutcomeType)$table
## Reference
## Prediction Adoption Euthanasia Return Transfer
## Adoption 383 28 57 92
## Euthanasia 45 405 99 71
## Return 83 61 338 89
## Transfer 11 54 26 302
cat("\n")
cat("The confusion Matrix of testing data.")
## The confusion Matrix of testing data.
confusionMatrix(testEr,RFtest$OutcomeType)$table
## Reference
## Prediction Adoption Euthanasia Return Transfer
## Adoption 3112 28 862 797
## Euthanasia 615 161 1024 789
## Return 1840 65 1520 750
## Transfer 408 42 360 1027
cat("\n")
trainErVa<-length(which((RFtrain$OutcomeType==trainEr)==F))/length(choiceddata)
testErVa<-length(which((RFtest$OutcomeType==testEr)==F))/(dim(DogtrainRF)[1]-length(choiceddata))
cat("Training Error Rate =",trainErVa)
## Training Error Rate = 0.3339552
cat("\n")
cat("Testing Error Rate =",testErVa)
## Testing Error Rate = 0.5656716
cat("\n")
rm(DogtrainRF)
method 3 : Training data : 1000 ; Tree : 600 隨機選出10000筆的 training data 後,再使 trainig data 中的 OutcomeType balance。
DogtrainRF<-Dogtrain
choiceddata<-sample(1:dim(DogtrainRF)[1],10000)
RFtrain<-DogtrainRF[choiceddata,]
RFtest<-DogtrainRF[-choiceddata,]
AdoL<-length(which(RFtrain$OutcomeType=="Adoption"))
ReL<-length(which(RFtrain$OutcomeType=="Return"))
EuL<-length(which(RFtrain$OutcomeType=="Euthanasia"))
TrL<-length(which(RFtrain$OutcomeType=="Transfer"))
RFtrain<-rbind(RFtrain,
RFtrain[which(RFtrain$OutcomeType=="Euthanasia")[sample(1:EuL,sample(400:500,1))],],
RFtrain[which(RFtrain$OutcomeType=="Euthanasia")[sample(1:EuL,sample(400:500,1))],],
RFtrain[which(RFtrain$OutcomeType=="Euthanasia")[sample(1:EuL,sample(400:500,1))],],
RFtrain[which(RFtrain$OutcomeType=="Euthanasia")[sample(1:EuL,sample(400:500,1))],],
RFtrain[which(RFtrain$OutcomeType=="Euthanasia")[sample(1:EuL,sample(400:500,1))],],
RFtrain[which(RFtrain$OutcomeType=="Euthanasia")[sample(1:EuL,sample(400:500,1))],],
RFtrain[which(RFtrain$OutcomeType=="Euthanasia")[sample(1:EuL,sample(400:500,1))],],
RFtrain[which(RFtrain$OutcomeType=="Euthanasia")[sample(1:EuL,sample(400:500,1))],])
RFtrain<-rbind(RFtrain,
RFtrain[which(RFtrain$OutcomeType=="Transfer")[sample(1:TrL,sample(500:600,1))],],
RFtrain[which(RFtrain$OutcomeType=="Transfer")[sample(1:TrL,sample(500:600,1))],],
RFtrain[which(RFtrain$OutcomeType=="Transfer")[sample(1:TrL,sample(500:600,1))],])
RFtrain<-rbind(RFtrain,
RFtrain[which(RFtrain$OutcomeType=="Return")[sample(1:ReL,sample(500:600,1))],],
RFtrain[which(RFtrain$OutcomeType=="Return")[sample(1:ReL,sample(500:600,1))],],
RFtrain[which(RFtrain$OutcomeType=="Return")[sample(1:ReL,sample(300:400,1))],])
AdoL<-length(which(RFtrain$OutcomeType=="Adoption"))
ReL<-length(which(RFtrain$OutcomeType=="Return"))
EuL<-length(which(RFtrain$OutcomeType=="Euthanasia"))
TrL<-length(which(RFtrain$OutcomeType=="Transfer"))
cat("The number of Adoption in the training data is ",AdoL)
## The number of Adoption in the training data is 4136
cat("\n")
cat("The number of Return to transfer in the training data is",ReL)
## The number of Return to transfer in the training data is 4122
cat("\n")
cat("The number of Euthanasia in the training data is",EuL)
## The number of Euthanasia in the training data is 4122
cat("\n")
cat("The number of Transfer in the training data is",TrL)
## The number of Transfer in the training data is 4181
cat("\n")
tic<-Sys.time()
model<-randomForest(OutcomeType~.,data=RFtrain,ntree=600)
toc<-Sys.time()
toc-tic
## Time difference of 48.91821 secs
testEr<-predict(model,RFtest[,-2])
trainEr<-predict(model,RFtrain[,-2])
plot(model, ylim=c(0,1))
legend('topright', colnames(model$err.rate), col=1:6, fill=1:6)
cat("The confusion Matrix of training data.")
## The confusion Matrix of training data.
confusionMatrix(trainEr,RFtrain$OutcomeType)$table
## Reference
## Prediction Adoption Euthanasia Return Transfer
## Adoption 2339 136 568 848
## Euthanasia 396 3264 912 752
## Return 1321 500 2450 1054
## Transfer 80 222 192 1527
cat("\n")
cat("The confusion Matrix of testing data.")
## The confusion Matrix of testing data.
confusionMatrix(testEr,RFtest$OutcomeType)$table
## Reference
## Prediction Adoption Euthanasia Return Transfer
## Adoption 1196 20 324 298
## Euthanasia 263 161 375 302
## Return 846 72 710 352
## Transfer 56 42 114 413
cat("\n")
testErVa<-length(which((RFtest$OutcomeType==testEr)==F))/(dim(DogtrainRF)[1]-length(choiceddata))
trainErVa<-length(which((RFtrain$OutcomeType==trainEr)==F))/length(choiceddata)
cat("Training Error Rate =",trainErVa)
## Training Error Rate = 0.6981
cat("\n")
cat("Testing Error Rate =",testErVa)
## Testing Error Rate = 0.5526696
cat("\n")
rm(DogtrainRF)