Assignment 29
library(randomForest)
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
mydata <- read.csv("C:\\Users\\RISHI RAHUL\\Desktop\\DS\\11 Random Forests\\Assignment\\Company_Data.csv")
colnames(mydata)
## [1] "Sales" "CompPrice" "Income" "Advertising" "Population"
## [6] "Price" "ShelveLoc" "Age" "Education" "Urban"
## [11] "US"
Sales_Result <- NULL
Sales_Result <- ifelse(mydata$Sales > 7.490,1,0)
mydata[,"Sales_Result"] <- Sales_Result
mydata$ShelveLoc <- as.factor(mydata$ShelveLoc)
mydata$Urban <- as.factor(mydata$Urban)
mydata$US <- as.factor(mydata$US)
mydata$Sales_Result <- as.factor(mydata$Sales_Result)
sales_high <- mydata[mydata$Sales_Result == "1",]
sales_low <- mydata[mydata$Sales_Result == "0",]
data_train <- rbind(sales_high[1:150,], sales_low[1:150,])
data_test <- rbind(sales_high[151:199,], sales_low[151:201,])
# Building a random forest model on training data
fit.forest <- randomForest(Sales_Result~.,data=data_train, na.action=na.roughfix,importance=TRUE)
# Training accuracy
mean(data_train$Sales_Result == predict(fit.forest,data_train)) # 100% accuracy
## [1] 1
# Prediction of train data
pred_train <- predict(fit.forest,data_train)
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:randomForest':
##
## margin
# Confusion Matrix
confusionMatrix(data_train$Sales_Result, pred_train)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 150 0
## 1 0 150
##
## Accuracy : 1
## 95% CI : (0.9878, 1)
## No Information Rate : 0.5
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 1
## Mcnemar's Test P-Value : NA
##
## Sensitivity : 1.0
## Specificity : 1.0
## Pos Pred Value : 1.0
## Neg Pred Value : 1.0
## Prevalence : 0.5
## Detection Rate : 0.5
## Detection Prevalence : 0.5
## Balanced Accuracy : 1.0
##
## 'Positive' Class : 0
##
# Predicting test data
pred_test <- predict(fit.forest,newdata=data_test)
mean(pred_test == data_test$Sales_Result) # Accuracy = 100%
## [1] 0.99
# Confusion Matrix
confusionMatrix(data_test$Sales_Result, pred_test)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 51 0
## 1 1 48
##
## Accuracy : 0.99
## 95% CI : (0.9455, 0.9997)
## No Information Rate : 0.52
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.98
## Mcnemar's Test P-Value : 1
##
## Sensitivity : 0.9808
## Specificity : 1.0000
## Pos Pred Value : 1.0000
## Neg Pred Value : 0.9796
## Prevalence : 0.5200
## Detection Rate : 0.5100
## Detection Prevalence : 0.5100
## Balanced Accuracy : 0.9904
##
## 'Positive' Class : 0
##
# Visualization
plot(fit.forest,lwd=2)
legend("topright", colnames(fit.forest$err.rate),col=1:4,cex=0.8,fill=1:4)
