This report is designed to predict the brand preferences of customers with data from the Blackwell Eklectronics Analytical Team.
After using the statistical tecniques like decision tree and KNN,we can say that,we can say that 3106 customers would choose Sony and 1894 customers would choose Acer.
In the following analysis, I used the salary and age variables to make predictions and classifications.
rm(list=ls())# clear memory
# Load Data
setwd("C:/Users/xzhenning/Documents/R/spring 2019/")
#Load Libraries: p_load can install, load, and update packages
#if(require("pacman")=="FALSE"){
# install.packages("pacman")
#}
pacman::p_load(caret, readxl, ggplot2, plyr, clusterSim, party)
## Warning: unable to access index for repository http://www.stats.ox.ac.uk/pub/RWin/bin/windows/contrib/3.5:
## cannot open URL 'http://www.stats.ox.ac.uk/pub/RWin/bin/windows/contrib/3.5/PACKAGES'
## package 'clusterSim' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\xzhenning\AppData\Local\Temp\Rtmpe2ywet\downloaded_packages
##
## clusterSim installed
## Warning: package 'clusterSim' was built under R version 3.5.3
## Warning in pacman::p_load(caret, readxl, ggplot2, plyr, clusterSim, party): Failed to install/load:
## clusterSim
Survey<-read_excel("Survey_Key_and_Complete_Responses_excel.xlsx", sheet = 2)
## readxl works best with a newer version of the tibble package.
## You currently have tibble v1.4.2.
## Falling back to column name repair from tibble <= v1.4.2.
## Message displays once per session.
#Rename some variables
Survey$brand[Survey$brand=="0"] <-"Acer"
Survey$brand[Survey$brand=="1"] <-"Sony"
# Data Type. Education Level to ordinal. Car, zipcode and brand to factor
Survey$elevel<-as.ordered(Survey$elevel)
Survey$car<-as.factor(Survey$car)
Survey$zipcode<-as.factor(Survey$zipcode)
Survey$brand<-as.factor(Survey$brand)
#sum(is.na(Survey)) is 0
#Relation brand-variables
for(i in 1:ncol(Survey)) {
if(is.numeric(Survey[[i]]) == "TRUE"){
p1<-ggplot(Survey, aes(x = Survey[[i]], fill=brand)) + geom_histogram(color="black",bins = 10) +
labs(x=colnames(Survey[i]))
print(paste("Plot Brand-", colnames(Survey[i])))
print(p1)
} else {
p1<-ggplot(Survey, aes(x = Survey[[i]], fill = brand)) + geom_bar(position = "fill") +
labs(x=colnames(Survey[i]))
print(paste("Plot Brand-", colnames(Survey[i])))
print(p1)
}
}
## [1] "Plot Brand- salary"
## [1] "Plot Brand- age"
## [1] "Plot Brand- elevel"
## [1] "Plot Brand- car"
## [1] "Plot Brand- zipcode"
## [1] "Plot Brand- credit"
## [1] "Plot Brand- brand"
# Removing redundance
MatrixTest<-matrix(ncol=7,nrow=7)
rownames(MatrixTest)<-c("Salary", "Age", "EducationalLevel", "Car", "ZipCode", "Credit", "Brand")
colnames(MatrixTest)<-c("Salary", "Age", "EducationalLevel", "Car", "ZipCode", "Credit", "Brand")
for (i in 1:ncol(Survey)){
for (j in 1:ncol(Survey)){
if (is.numeric(Survey[[i]]) == "TRUE" & is.numeric(Survey[[j]]) == "TRUE"){
MatrixTest[i,j]<-(cor(Survey[[i]], Survey[[j]]))
} else if (is.numeric(Survey[[i]]) == "TRUE" & is.numeric(Survey[[j]]) == "FALSE"){
MatrixTest[i,j]<-(summary(aov((Survey[[i]]~Survey[[j]]), data=Survey))[[1]][[5]][1])
} else if (is.numeric(Survey[[i]]) == "FALSE" & is.numeric(Survey[[j]]) == "TRUE"){
MatrixTest[i,j]<-(summary(aov((Survey[[j]]~Survey[[i]]), data=Survey))[[1]][[5]][1])
} else {
MatrixTest[i,j]<-(chisq.test(Survey[[i]], Survey[[j]])$p.value)
}
}
}
MatrixTest<-round(MatrixTest, digits = 5)
#_Create training and testing sets_
# set seed and define an 75%/25% train/test split of the dataset
set.seed(314)
inTraining<-createDataPartition(Survey$brand, p=0.75, list=FALSE)
train<- Survey[inTraining,]
test <- Survey[-inTraining,]
#_Create a predictive model: KNN_
# 10 fold cross validation
fitControl <- trainControl(method = "repeatedcv", number = 10, repeats = 5)
# Train KNN model
KNNfit1<-train(brand~., data= train, method="knn", trControl=fitControl, preProcess=c("center", "scale"), tuneLength=5)
KNNfit2<-train(brand~salary, data=train, method="knn", trControl=fitControl, preProcess=c("center", "scale"), tuneLength=5)
KNNfit3<-train(brand~salary + age, data=train, method="knn", trControl=fitControl, preProcess=c("center", "scale"), tuneLength=5)
# Predictor variables
predictors(KNNfit1)
## [1] "salary" "age" "elevel.L" "elevel.Q" "elevel.C" "elevel^4"
## [7] "car2" "car3" "car4" "car5" "car6" "car7"
## [13] "car8" "car9" "car10" "car11" "car12" "car13"
## [19] "car14" "car15" "car16" "car17" "car18" "car19"
## [25] "car20" "zipcode1" "zipcode2" "zipcode3" "zipcode4" "zipcode5"
## [31] "zipcode6" "zipcode7" "zipcode8" "credit"
predictors(KNNfit2)
## [1] "salary"
predictors(KNNfit3)
## [1] "salary" "age"
# Make predictions
testPredKNN1<-predict(KNNfit1, test)
testPredKNN2<-predict(KNNfit2, test)
testPredKNN3<-predict(KNNfit3, test)
# Performance measurment
postResample(testPredKNN1, test$brand)
## Accuracy Kappa
## 0.6310524 0.1616260
postResample(testPredKNN2, test$brand)
## Accuracy Kappa
## 0.7034814 0.3693888
postResample(testPredKNN3, test$brand)
## Accuracy Kappa
## 0.9179672 0.8262601
# Plot predicted verses actual
plot(testPredKNN1, test$brand)
plot(KNNfit1)
plot(testPredKNN2, test$brand)
plot(KNNfit2)
plot(testPredKNN3, test$brand)
plot(KNNfit3)
#### CREATE A PREDICTIVE MODEL: Decision Tree ####
fitControldt <- trainControl(method = "repeatedcv", number = 10, repeats = 2)
# Train DT model
DTfit1<-train(brand~., data= train, method="parRF", trControl=fitControldt, ntree=50, do.trace=10)
## Warning: executing %dopar% sequentially: no parallel backend registered
## ntree OOB 1 2
## 10: 35.38% 68.44% 15.30%
## 20: 35.50% 83.04% 6.56%
## 30: 36.33% 90.41% 3.41%
## 40: 36.39% 93.38% 1.69%
## 50: 36.55% 94.52% 1.26%
## ntree OOB 1 2
## 10: 9.83% 11.86% 8.59%
## 20: 8.42% 10.14% 7.37%
## 30: 7.93% 9.67% 6.86%
## 40: 7.85% 9.55% 6.82%
## 50: 7.97% 9.75% 6.89%
## ntree OOB 1 2
## 10: 9.79% 11.76% 8.59%
## 20: 9.07% 11.28% 7.72%
## 30: 8.34% 10.49% 7.03%
## 40: 8.12% 10.26% 6.82%
## 50: 8.25% 10.42% 6.94%
## ntree OOB 1 2
## 10: 36.82% 77.98% 11.74%
## 20: 36.86% 89.86% 4.62%
## 30: 37.11% 94.52% 2.14%
## 40: 37.34% 96.67% 1.22%
## 50: 37.09% 96.87% 0.69%
## ntree OOB 1 2
## 10: 10.44% 12.47% 9.21%
## 20: 8.65% 10.61% 7.46%
## 30: 8.32% 10.37% 7.08%
## 40: 8.46% 10.80% 7.03%
## 50: 8.30% 10.29% 7.08%
## ntree OOB 1 2
## 10: 9.61% 12.23% 8.02%
## 20: 9.04% 11.51% 7.53%
## 30: 8.72% 11.27% 7.17%
## 40: 8.81% 11.15% 7.39%
## 50: 8.72% 10.88% 7.41%
## ntree OOB 1 2
## 10: 36.01% 78.77% 9.90%
## 20: 36.53% 88.49% 4.91%
## 30: 35.54% 89.15% 2.91%
## 40: 36.23% 92.13% 2.22%
## 50: 36.17% 92.68% 1.79%
## ntree OOB 1 2
## 10: 10.41% 12.61% 9.07%
## 20: 8.86% 10.38% 7.93%
## 30: 8.61% 10.57% 7.41%
## 40: 8.40% 10.30% 7.24%
## 50: 8.31% 10.18% 7.17%
## ntree OOB 1 2
## 10: 9.35% 11.39% 8.10%
## 20: 8.99% 11.35% 7.55%
## 30: 9.07% 11.59% 7.53%
## 40: 8.92% 11.32% 7.46%
## 50: 8.93% 11.12% 7.60%
## ntree OOB 1 2
## 10: 35.50% 78.02% 9.69%
## 20: 35.24% 84.93% 5.00%
## 30: 35.57% 87.47% 3.98%
## 40: 35.25% 88.21% 3.03%
## 50: 35.62% 91.19% 1.81%
## ntree OOB 1 2
## 10: 9.90% 12.73% 8.18%
## 20: 8.93% 11.51% 7.36%
## 30: 8.41% 10.81% 6.96%
## 40: 8.15% 10.34% 6.81%
## 50: 7.98% 10.10% 6.70%
## ntree OOB 1 2
## 10: 9.32% 11.51% 7.99%
## 20: 8.84% 11.08% 7.48%
## 30: 8.74% 10.69% 7.55%
## 40: 8.75% 10.69% 7.58%
## 50: 8.52% 10.38% 7.39%
## ntree OOB 1 2
## 10: 37.13% 74.90% 14.11%
## 20: 35.64% 81.56% 7.70%
## 30: 36.26% 89.15% 4.07%
## 40: 36.35% 91.66% 2.69%
## 50: 36.26% 92.05% 2.31%
## ntree OOB 1 2
## 10: 10.32% 12.29% 9.12%
## 20: 9.08% 11.47% 7.63%
## 30: 8.74% 10.92% 7.41%
## 40: 8.49% 10.81% 7.08%
## 50: 8.50% 10.57% 7.24%
## ntree OOB 1 2
## 10: 9.67% 12.00% 8.25%
## 20: 9.21% 11.90% 7.58%
## 30: 8.92% 11.35% 7.43%
## 40: 8.72% 10.81% 7.46%
## 50: 8.64% 11.00% 7.20%
## ntree OOB 1 2
## 10: 36.36% 81.35% 9.04%
## 20: 36.70% 91.23% 3.50%
## 30: 37.26% 94.60% 2.36%
## 40: 37.05% 95.14% 1.69%
## 50: 37.07% 96.16% 1.10%
## ntree OOB 1 2
## 10: 9.94% 11.37% 9.07%
## 20: 8.76% 10.77% 7.53%
## 30: 8.22% 9.95% 7.17%
## 40: 8.30% 10.18% 7.15%
## 50: 7.99% 9.95% 6.79%
## ntree OOB 1 2
## 10: 9.54% 11.38% 8.41%
## 20: 8.65% 10.97% 7.24%
## 30: 8.64% 10.45% 7.53%
## 40: 8.68% 10.65% 7.48%
## 50: 8.53% 10.61% 7.27%
## ntree OOB 1 2
## 10: 34.32% 70.36% 12.41%
## 20: 34.61% 79.71% 7.17%
## 30: 36.04% 88.29% 4.24%
## 40: 35.98% 90.88% 2.57%
## 50: 36.07% 92.52% 1.72%
## ntree OOB 1 2
## 10: 9.63% 12.11% 8.12%
## 20: 8.64% 10.81% 7.32%
## 30: 8.30% 10.22% 7.12%
## 40: 8.27% 10.02% 7.20%
## 50: 8.15% 9.79% 7.15%
## ntree OOB 1 2
## 10: 9.71% 12.44% 8.06%
## 20: 9.33% 12.10% 7.65%
## 30: 9.02% 11.32% 7.62%
## 40: 9.05% 11.28% 7.70%
## 50: 8.77% 10.96% 7.43%
## ntree OOB 1 2
## 10: 38.40% 86.77% 8.87%
## 20: 37.06% 90.25% 4.67%
## 30: 37.31% 93.50% 3.12%
## 40: 37.00% 93.46% 2.64%
## 50: 37.14% 95.61% 1.55%
## ntree OOB 1 2
## 10: 9.68% 12.08% 8.22%
## 20: 8.61% 10.88% 7.22%
## 30: 8.38% 10.73% 6.96%
## 40: 8.40% 10.65% 7.03%
## 50: 8.06% 10.06% 6.84%
## ntree OOB 1 2
## 10: 9.28% 10.86% 8.31%
## 20: 9.02% 10.88% 7.89%
## 30: 8.61% 10.81% 7.27%
## 40: 8.49% 10.53% 7.24%
## 50: 8.41% 10.69% 7.03%
## ntree OOB 1 2
## 10: 37.42% 79.45% 11.93%
## 20: 37.24% 87.78% 6.50%
## 30: 36.27% 87.91% 4.84%
## 40: 36.46% 92.76% 2.19%
## 50: 37.10% 95.34% 1.64%
## ntree OOB 1 2
## 10: 10.41% 12.82% 8.96%
## 20: 8.53% 9.94% 7.67%
## 30: 8.44% 9.98% 7.51%
## 40: 8.55% 10.10% 7.60%
## 50: 8.43% 9.98% 7.48%
## ntree OOB 1 2
## 10: 9.55% 11.74% 8.22%
## 20: 9.09% 11.00% 7.93%
## 30: 8.86% 11.04% 7.53%
## 40: 8.63% 11.12% 7.12%
## 50: 8.43% 10.76% 7.01%
## ntree OOB 1 2
## 10: 37.42% 82.92% 9.78%
## 20: 36.93% 91.19% 3.91%
## 30: 36.72% 91.90% 3.15%
## 40: 36.31% 92.13% 2.34%
## 50: 36.85% 94.68% 1.67%
## ntree OOB 1 2
## 10: 10.24% 12.92% 8.60%
## 20: 8.62% 10.61% 7.41%
## 30: 8.44% 10.45% 7.22%
## 40: 8.38% 10.53% 7.08%
## 50: 8.19% 10.69% 6.67%
## ntree OOB 1 2
## 10: 9.85% 12.10% 8.48%
## 20: 9.23% 11.87% 7.62%
## 30: 9.15% 11.86% 7.51%
## 40: 8.99% 11.47% 7.48%
## 50: 8.64% 10.73% 7.36%
## ntree OOB 1 2
## 10: 35.86% 77.48% 10.39%
## 20: 36.24% 87.12% 5.27%
## 30: 36.50% 91.35% 3.12%
## 40: 36.76% 93.46% 2.24%
## 50: 36.77% 93.70% 2.12%
## ntree OOB 1 2
## 10: 10.14% 12.19% 8.89%
## 20: 8.73% 10.69% 7.53%
## 30: 8.43% 10.06% 7.44%
## 40: 8.27% 9.83% 7.32%
## 50: 8.16% 10.02% 7.03%
## ntree OOB 1 2
## 10: 9.59% 11.70% 8.31%
## 20: 8.76% 10.49% 7.70%
## 30: 8.58% 10.18% 7.60%
## 40: 8.90% 10.73% 7.79%
## 50: 8.65% 10.49% 7.53%
## ntree OOB 1 2
## 10: 37.80% 91.01% 5.32%
## 20: 35.82% 87.35% 4.46%
## 30: 35.36% 88.68% 2.91%
## 40: 36.22% 93.11% 1.60%
## 50: 36.65% 95.26% 0.98%
## ntree OOB 1 2
## 10: 10.38% 12.45% 9.12%
## 20: 8.95% 11.47% 7.41%
## 30: 8.31% 10.30% 7.10%
## 40: 8.09% 9.98% 6.93%
## 50: 7.92% 9.79% 6.79%
## ntree OOB 1 2
## 10: 9.50% 11.53% 8.26%
## 20: 8.65% 10.77% 7.36%
## 30: 8.61% 10.92% 7.20%
## 40: 8.68% 11.04% 7.24%
## 50: 8.32% 10.77% 6.84%
## ntree OOB 1 2
## 10: 37.13% 83.60% 8.93%
## 20: 35.79% 85.59% 5.48%
## 30: 35.94% 89.78% 3.17%
## 40: 36.65% 94.44% 1.48%
## 50: 36.53% 94.48% 1.26%
## ntree OOB 1 2
## 10: 10.18% 12.79% 8.59%
## 20: 8.46% 11.08% 6.86%
## 30: 8.34% 11.12% 6.65%
## 40: 8.09% 10.14% 6.84%
## 50: 8.06% 9.95% 6.91%
## ntree OOB 1 2
## 10: 9.53% 11.50% 8.33%
## 20: 9.04% 11.20% 7.72%
## 30: 8.55% 10.57% 7.31%
## 40: 8.43% 10.96% 6.89%
## 50: 8.25% 10.69% 6.77%
## ntree OOB 1 2
## 10: 37.44% 80.27% 11.25%
## 20: 34.30% 78.75% 7.24%
## 30: 35.23% 86.93% 3.76%
## 40: 34.58% 86.81% 2.79%
## 50: 35.41% 89.98% 2.19%
## ntree OOB 1 2
## 10: 9.63% 11.74% 8.36%
## 20: 8.58% 10.65% 7.32%
## 30: 8.15% 10.29% 6.84%
## 40: 8.12% 10.02% 6.96%
## 50: 8.09% 10.14% 6.84%
## ntree OOB 1 2
## 10: 10.06% 12.41% 8.64%
## 20: 9.41% 12.72% 7.39%
## 30: 9.17% 12.13% 7.36%
## 40: 8.99% 11.86% 7.24%
## 50: 8.75% 11.23% 7.24%
## ntree OOB 1 2
## 10: 36.94% 77.92% 11.93%
## 20: 36.98% 90.21% 4.58%
## 30: 36.68% 93.15% 2.31%
## 40: 36.61% 94.64% 1.29%
## 50: 36.77% 95.34% 1.12%
## ntree OOB 1 2
## 10: 10.68% 13.17% 9.16%
## 20: 9.35% 11.75% 7.89%
## 30: 8.59% 10.77% 7.27%
## 40: 8.76% 11.24% 7.24%
## 50: 8.55% 10.69% 7.24%
## ntree OOB 1 2
## 10: 9.97% 12.19% 8.63%
## 20: 9.56% 11.71% 8.25%
## 30: 9.10% 11.04% 7.91%
## 40: 9.13% 11.28% 7.82%
## 50: 9.19% 11.39% 7.84%
## ntree OOB 1 2
## 10: 33.42% 64.79% 14.39%
## 20: 33.98% 75.69% 8.60%
## 30: 35.28% 86.45% 4.15%
## 40: 35.09% 86.88% 3.57%
## 50: 35.55% 90.25% 2.26%
## ntree OOB 1 2
## 10: 10.21% 12.15% 9.03%
## 20: 9.17% 11.59% 7.70%
## 30: 8.53% 10.92% 7.08%
## 40: 8.52% 10.61% 7.24%
## 50: 8.52% 10.49% 7.31%
## ntree OOB 1 2
## 10: 9.50% 10.99% 8.60%
## 20: 8.89% 10.81% 7.72%
## 30: 8.78% 11.00% 7.43%
## 40: 8.77% 11.00% 7.41%
## 50: 8.67% 11.00% 7.24%
## ntree OOB 1 2
## 10: 36.75% 83.80% 8.10%
## 20: 37.25% 93.19% 3.19%
## 30: 37.44% 95.62% 2.03%
## 40: 37.46% 97.26% 1.05%
## 50: 36.72% 94.60% 1.48%
## ntree OOB 1 2
## 10: 9.99% 11.95% 8.79%
## 20: 8.81% 10.68% 7.67%
## 30: 8.47% 10.49% 7.24%
## 40: 8.25% 10.29% 7.01%
## 50: 8.25% 10.14% 7.10%
## ntree OOB 1 2
## 10: 10.11% 12.63% 8.58%
## 20: 9.14% 11.47% 7.72%
## 30: 8.81% 11.04% 7.46%
## 40: 8.87% 11.23% 7.43%
## 50: 8.86% 11.27% 7.39%
## ntree OOB 1 2
## 10: 38.24% 85.38% 9.61%
## 20: 37.40% 93.54% 3.22%
## 30: 37.13% 94.95% 1.93%
## 40: 37.07% 95.46% 1.53%
## 50: 37.44% 96.83% 1.29%
## ntree OOB 1 2
## 10: 9.89% 12.34% 8.40%
## 20: 8.37% 10.18% 7.27%
## 30: 8.37% 10.88% 6.84%
## 40: 7.96% 10.10% 6.65%
## 50: 7.84% 9.67% 6.72%
## ntree OOB 1 2
## 10: 9.46% 12.17% 7.80%
## 20: 9.07% 12.02% 7.27%
## 30: 8.65% 11.47% 6.94%
## 40: 8.31% 10.81% 6.79%
## 50: 8.58% 11.12% 7.03%
## ntree OOB 1 2
## 10: 35.83% 71.30% 14.23%
## 20: 35.51% 81.52% 7.51%
## 30: 35.24% 84.26% 5.41%
## 40: 35.68% 89.12% 3.17%
## 50: 36.17% 90.68% 3.00%
## ntree OOB 1 2
## 10: 9.86% 12.18% 8.45%
## 20: 8.52% 10.53% 7.29%
## 30: 8.46% 10.02% 7.51%
## 40: 8.34% 10.06% 7.29%
## 50: 8.19% 9.98% 7.10%
## ntree OOB 1 2
## 10: 9.23% 12.10% 7.48%
## 20: 8.70% 11.28% 7.12%
## 30: 8.74% 11.04% 7.34%
## 40: 8.52% 10.69% 7.20%
## 50: 8.38% 10.69% 6.98%
## ntree OOB 1 2
## 10: 36.63% 79.50% 10.54%
## 20: 35.93% 83.51% 6.98%
## 30: 35.33% 86.53% 4.17%
## 40: 35.97% 90.02% 3.07%
## 50: 36.44% 93.19% 1.91%
## ntree OOB 1 2
## 10: 10.32% 12.51% 8.98%
## 20: 8.40% 10.22% 7.29%
## 30: 8.06% 9.79% 7.01%
## 40: 8.03% 9.71% 7.01%
## 50: 7.92% 9.71% 6.84%
## ntree OOB 1 2
## 10: 9.44% 11.23% 8.36%
## 20: 9.13% 11.59% 7.62%
## 30: 8.83% 11.28% 7.34%
## 40: 8.77% 11.24% 7.27%
## 50: 8.59% 11.12% 7.05%
## ntree OOB 1 2
## 10: 9.87% 12.09% 8.52%
## 20: 8.39% 10.18% 7.29%
## 30: 8.19% 10.25% 6.93%
## 40: 8.36% 10.68% 6.95%
## 50: 8.29% 10.61% 6.88%
DTfit2<-train(brand~salary, data= train, method="parRF", trControl=fitControldt,ntree=50, do.trace=10)
## Warning in randomForest.default(ntree = 50, do.trace = 10, x =
## structure(c(119806.544798, : invalid mtry: reset to within valid range
## ntree OOB 1 2
## 10: 36.82% 45.98% 31.24%
## 20: 36.09% 45.81% 30.17%
## 30: 35.79% 46.48% 29.29%
## 40: 35.91% 46.28% 29.60%
## 50: 35.91% 46.48% 29.48%
## Warning in randomForest.default(ntree = 50, do.trace = 10, x =
## structure(c(119806.544798, : invalid mtry: reset to within valid range
## ntree OOB 1 2
## 10: 35.81% 44.32% 30.63%
## 20: 35.74% 46.59% 29.14%
## 30: 35.32% 46.14% 28.73%
## 40: 35.49% 46.54% 28.76%
## 50: 35.66% 46.50% 29.07%
## Warning in randomForest.default(ntree = 50, do.trace = 10, x =
## structure(c(119806.544798, : invalid mtry: reset to within valid range
## ntree OOB 1 2
## 10: 35.64% 46.73% 28.92%
## 20: 35.44% 46.63% 28.62%
## 30: 35.39% 46.63% 28.55%
## 40: 35.79% 46.99% 28.98%
## 50: 35.76% 47.34% 28.72%
## Warning in randomForest.default(ntree = 50, do.trace = 10, x =
## structure(c(119806.544798, : invalid mtry: reset to within valid range
## ntree OOB 1 2
## 10: 35.45% 45.10% 29.56%
## 20: 35.88% 46.46% 29.43%
## 30: 35.43% 45.87% 29.07%
## 40: 35.75% 46.54% 29.19%
## 50: 35.89% 46.61% 29.35%
## Warning in randomForest.default(ntree = 50, do.trace = 10, x =
## structure(c(119806.544798, : invalid mtry: reset to within valid range
## ntree OOB 1 2
## 10: 35.48% 45.67% 29.26%
## 20: 35.69% 46.67% 29.00%
## 30: 35.61% 46.95% 28.71%
## 40: 35.92% 47.34% 28.97%
## 50: 35.91% 47.34% 28.95%
## Warning in randomForest.default(ntree = 50, do.trace = 10, x =
## structure(c(119806.544798, : invalid mtry: reset to within valid range
## ntree OOB 1 2
## 10: 35.36% 45.17% 29.39%
## 20: 35.49% 45.58% 29.35%
## 30: 35.33% 45.73% 29.00%
## 40: 35.57% 46.44% 28.95%
## 50: 35.64% 46.83% 28.83%
## Warning in randomForest.default(ntree = 50, do.trace = 10, x =
## structure(c(106880.478399, : invalid mtry: reset to within valid range
## ntree OOB 1 2
## 10: 36.06% 46.36% 29.79%
## 20: 35.50% 46.36% 28.88%
## 30: 35.53% 46.01% 29.15%
## 40: 35.39% 45.97% 28.96%
## 50: 35.51% 46.12% 29.05%
## Warning in randomForest.default(ntree = 50, do.trace = 10, x =
## structure(c(119806.544798, : invalid mtry: reset to within valid range
## ntree OOB 1 2
## 10: 34.99% 43.87% 29.58%
## 20: 35.14% 45.81% 28.64%
## 30: 35.30% 46.75% 28.33%
## 40: 35.42% 46.44% 28.71%
## 50: 35.24% 46.40% 28.45%
## Warning in randomForest.default(ntree = 50, do.trace = 10, x =
## structure(c(119806.544798, : invalid mtry: reset to within valid range
## ntree OOB 1 2
## 10: 35.79% 45.29% 30.04%
## 20: 35.78% 45.69% 29.74%
## 30: 35.45% 45.77% 29.16%
## 40: 35.39% 46.20% 28.81%
## 50: 35.49% 46.16% 29.00%
## Warning in randomForest.default(ntree = 50, do.trace = 10, x =
## structure(c(119806.544798, : invalid mtry: reset to within valid range
## ntree OOB 1 2
## 10: 35.36% 44.42% 29.84%
## 20: 35.28% 45.69% 28.95%
## 30: 35.09% 46.08% 28.40%
## 40: 35.54% 46.36% 28.95%
## 50: 35.71% 46.79% 28.97%
## Warning in randomForest.default(ntree = 50, do.trace = 10, x =
## structure(c(106880.478399, : invalid mtry: reset to within valid range
## ntree OOB 1 2
## 10: 35.90% 45.91% 29.82%
## 20: 34.73% 45.08% 28.43%
## 30: 34.82% 45.14% 28.54%
## 40: 34.65% 45.07% 28.31%
## 50: 35.03% 45.58% 28.62%
## Warning in randomForest.default(ntree = 50, do.trace = 10, x =
## structure(c(119806.544798, : invalid mtry: reset to within valid range
## ntree OOB 1 2
## 10: 35.66% 45.77% 29.51%
## 20: 35.95% 46.75% 29.37%
## 30: 35.79% 46.83% 29.08%
## 40: 35.90% 46.63% 29.36%
## 50: 36.01% 47.02% 29.31%
## Warning in randomForest.default(ntree = 50, do.trace = 10, x =
## structure(c(119806.544798, : invalid mtry: reset to within valid range
## ntree OOB 1 2
## 10: 35.38% 45.21% 29.40%
## 20: 35.58% 46.36% 29.02%
## 30: 35.64% 46.59% 28.97%
## 40: 35.61% 46.91% 28.73%
## 50: 35.64% 47.30% 28.54%
## Warning in randomForest.default(ntree = 50, do.trace = 10, x =
## structure(c(119806.544798, : invalid mtry: reset to within valid range
## ntree OOB 1 2
## 10: 35.04% 44.58% 29.22%
## 20: 35.49% 46.59% 28.73%
## 30: 35.79% 46.75% 29.12%
## 40: 35.77% 47.10% 28.88%
## 50: 35.42% 46.63% 28.59%
## Warning in randomForest.default(ntree = 50, do.trace = 10, x =
## structure(c(119806.544798, : invalid mtry: reset to within valid range
## ntree OOB 1 2
## 10: 35.78% 45.99% 29.54%
## 20: 35.94% 47.12% 29.14%
## 30: 36.03% 47.44% 29.09%
## 40: 36.29% 47.71% 29.33%
## 50: 36.30% 47.75% 29.33%
## Warning in randomForest.default(ntree = 50, do.trace = 10, x =
## structure(c(119806.544798, : invalid mtry: reset to within valid range
## ntree OOB 1 2
## 10: 35.39% 45.68% 29.10%
## 20: 35.66% 46.57% 29.02%
## 30: 35.57% 47.02% 28.59%
## 40: 35.92% 47.18% 29.07%
## 50: 35.95% 47.45% 28.95%
## Warning in randomForest.default(ntree = 50, do.trace = 10, x =
## structure(c(119806.544798, : invalid mtry: reset to within valid range
## ntree OOB 1 2
## 10: 35.30% 45.52% 29.07%
## 20: 35.37% 46.91% 28.35%
## 30: 35.72% 46.79% 28.98%
## 40: 35.45% 46.59% 28.67%
## 50: 35.48% 46.79% 28.60%
## Warning in randomForest.default(ntree = 50, do.trace = 10, x =
## structure(c(119806.544798, : invalid mtry: reset to within valid range
## ntree OOB 1 2
## 10: 35.57% 45.52% 29.52%
## 20: 35.53% 45.75% 29.31%
## 30: 35.38% 46.07% 28.88%
## 40: 35.34% 46.22% 28.71%
## 50: 35.29% 46.34% 28.57%
## Warning in randomForest.default(ntree = 50, do.trace = 10, x =
## structure(c(119806.544798, : invalid mtry: reset to within valid range
## ntree OOB 1 2
## 10: 35.59% 45.42% 29.62%
## 20: 35.26% 44.97% 29.36%
## 30: 35.64% 45.65% 29.55%
## 40: 35.57% 45.81% 29.34%
## 50: 35.76% 46.59% 29.17%
## Warning in randomForest.default(ntree = 50, do.trace = 10, x =
## structure(c(119806.544798, : invalid mtry: reset to within valid range
## ntree OOB 1 2
## 10: 35.41% 45.07% 29.54%
## 20: 35.56% 45.69% 29.39%
## 30: 35.68% 46.32% 29.21%
## 40: 36.20% 47.02% 29.62%
## 50: 36.22% 46.83% 29.76%
## Warning in randomForest.default(ntree = 50, do.trace = 10, x =
## structure(c(119806.544798, : invalid mtry: reset to within valid range
## ntree OOB 1 2
## 10: 35.23% 44.97% 29.29%
## 20: 35.08% 46.34% 28.22%
## 30: 35.13% 46.48% 28.22%
## 40: 35.56% 46.93% 28.63%
## 50: 35.69% 47.11% 28.74%
DTfit3<-train(brand~salary + age, data= train, method="parRF", trControl=fitControldt,ntree=50, do.trace=10)
## note: only 1 unique complexity parameters in default grid. Truncating the grid to 1 .
##
## ntree OOB 1 2
## 10: 10.04% 12.72% 8.41%
## 20: 9.91% 13.04% 8.01%
## 30: 9.61% 12.37% 7.93%
## 40: 9.76% 12.37% 8.17%
## 50: 9.61% 12.49% 7.86%
## ntree OOB 1 2
## 10: 10.12% 12.77% 8.51%
## 20: 10.07% 13.16% 8.20%
## 30: 9.89% 13.00% 8.01%
## 40: 9.89% 13.04% 7.98%
## 50: 9.94% 12.96% 8.10%
## ntree OOB 1 2
## 10: 9.84% 11.99% 8.54%
## 20: 9.61% 12.33% 7.96%
## 30: 9.49% 12.06% 7.93%
## 40: 9.55% 12.18% 7.96%
## 50: 9.67% 12.69% 7.84%
## ntree OOB 1 2
## 10: 10.06% 12.56% 8.53%
## 20: 9.50% 12.26% 7.82%
## 30: 9.69% 12.76% 7.82%
## 40: 9.58% 12.33% 7.91%
## 50: 9.61% 12.37% 7.93%
## ntree OOB 1 2
## 10: 10.10% 13.10% 8.27%
## 20: 9.79% 13.16% 7.75%
## 30: 9.78% 12.80% 7.94%
## 40: 9.59% 13.00% 7.51%
## 50: 9.60% 12.92% 7.58%
## ntree OOB 1 2
## 10: 9.75% 12.21% 8.25%
## 20: 9.57% 12.64% 7.70%
## 30: 9.39% 12.56% 7.46%
## 40: 9.54% 12.76% 7.58%
## 50: 9.42% 12.60% 7.48%
## ntree OOB 1 2
## 10: 9.75% 12.16% 8.29%
## 20: 9.60% 12.02% 8.13%
## 30: 9.54% 12.33% 7.84%
## 40: 9.44% 12.22% 7.75%
## 50: 9.42% 12.14% 7.77%
## ntree OOB 1 2
## 10: 9.43% 12.28% 7.70%
## 20: 9.39% 12.41% 7.55%
## 30: 9.29% 12.29% 7.46%
## 40: 9.27% 12.49% 7.31%
## 50: 9.36% 12.53% 7.43%
## ntree OOB 1 2
## 10: 9.53% 11.92% 8.07%
## 20: 9.42% 12.17% 7.75%
## 30: 9.33% 12.17% 7.60%
## 40: 9.33% 12.02% 7.70%
## 50: 9.35% 11.82% 7.84%
## ntree OOB 1 2
## 10: 9.94% 12.71% 8.25%
## 20: 9.61% 12.49% 7.86%
## 30: 9.45% 12.26% 7.74%
## 40: 9.41% 12.18% 7.72%
## 50: 9.47% 12.18% 7.82%
## ntree OOB 1 2
## 10: 9.50% 11.61% 8.21%
## 20: 9.09% 11.47% 7.65%
## 30: 9.26% 12.06% 7.55%
## 40: 9.30% 11.98% 7.67%
## 50: 9.33% 12.06% 7.67%
## ntree OOB 1 2
## 10: 9.73% 11.70% 8.52%
## 20: 9.68% 12.15% 8.17%
## 30: 9.52% 12.14% 7.93%
## 40: 9.44% 12.33% 7.67%
## 50: 9.38% 12.02% 7.77%
## ntree OOB 1 2
## 10: 9.17% 11.53% 7.73%
## 20: 9.39% 12.69% 7.39%
## 30: 9.50% 12.57% 7.63%
## 40: 9.44% 12.26% 7.72%
## 50: 9.50% 12.61% 7.60%
## ntree OOB 1 2
## 10: 10.21% 12.70% 8.70%
## 20: 9.32% 11.82% 7.79%
## 30: 9.24% 11.79% 7.70%
## 40: 9.08% 11.86% 7.39%
## 50: 9.30% 12.22% 7.53%
## ntree OOB 1 2
## 10: 10.16% 13.36% 8.22%
## 20: 9.70% 12.84% 7.79%
## 30: 9.51% 12.45% 7.72%
## 40: 9.57% 12.49% 7.79%
## 50: 9.48% 12.65% 7.55%
## ntree OOB 1 2
## 10: 10.24% 12.78% 8.69%
## 20: 9.85% 12.96% 7.96%
## 30: 9.82% 12.73% 8.05%
## 40: 9.87% 12.69% 8.15%
## 50: 9.89% 12.96% 8.03%
## ntree OOB 1 2
## 10: 9.71% 12.12% 8.24%
## 20: 9.57% 12.22% 7.96%
## 30: 9.53% 12.22% 7.89%
## 40: 9.44% 12.14% 7.79%
## 50: 9.57% 12.57% 7.75%
## ntree OOB 1 2
## 10: 10.00% 12.40% 8.54%
## 20: 9.85% 12.88% 8.01%
## 30: 9.60% 12.60% 7.77%
## 40: 9.48% 12.72% 7.51%
## 50: 9.47% 12.68% 7.51%
## ntree OOB 1 2
## 10: 9.91% 12.54% 8.31%
## 20: 9.57% 12.37% 7.86%
## 30: 9.46% 12.17% 7.82%
## 40: 9.54% 12.49% 7.74%
## 50: 9.77% 12.52% 8.10%
## ntree OOB 1 2
## 10: 10.07% 12.94% 8.32%
## 20: 9.42% 12.14% 7.77%
## 30: 9.66% 12.22% 8.10%
## 40: 9.47% 12.29% 7.74%
## 50: 9.54% 12.37% 7.82%
## ntree OOB 1 2
## 10: 9.63% 12.11% 8.12%
## 20: 9.67% 12.68% 7.83%
## 30: 9.48% 12.30% 7.76%
## 40: 9.43% 12.30% 7.68%
## 50: 9.36% 12.19% 7.63%
# Predictor variables
predictors(DTfit1)
## [1] "salary" "age" "elevel.L" "elevel.Q" "elevel.C" "elevel^4"
## [7] "car2" "car3" "car4" "car5" "car6" "car7"
## [13] "car8" "car9" "car10" "car11" "car12" "car13"
## [19] "car14" "car15" "car16" "car17" "car18" "car19"
## [25] "car20" "zipcode1" "zipcode2" "zipcode3" "zipcode4" "zipcode5"
## [31] "zipcode6" "zipcode7" "zipcode8" "credit"
predictors(DTfit2)
## [1] "salary"
predictors(DTfit3)
## [1] "salary" "age"
# Make predictions
testPredDTfit1<-predict(DTfit1, test)
testPredDTfit2<-predict(DTfit2, test)
testPredDTfit3<-predict(DTfit3, test)
# Performance measurment
postResample(testPredDTfit1, test$brand)
## Accuracy Kappa
## 0.9211685 0.8333156
postResample(testPredDTfit2, test$brand)
## Accuracy Kappa
## 0.6394558 0.2303512
postResample(testPredDTfit3, test$brand)
## Accuracy Kappa
## 0.9115646 0.8126225
ct<-ctree(brand~salary + age, data=Survey, controls = ctree_control(maxdepth=3))
plot(ct)