One of the most popular machine learning predictive methods is Random Forest. It is an excellent tool for making predictions but with flexibilty comes lack of interpretation.

## Loading required package: lattice
## randomForest 4.6-10
## Type rfNews() to see new features/changes/bug fixes.

Read in the data

car <- read.csv("Car.txt", header=FALSE, sep=",", na.strings="?")
colnames(car) <- c("symboling", "normalized_losses", "make", "fuel_type", "aspiration", "num_of_doors",
                   "body_style", "drive_wheels", "engine_location", "wheel_base", "length", "width", "height",
                   "curb_weight", "engine_type", "num_cylinders", "engine_size", "fuel_system", "bore",
                   "stroke", "compression_ration", "horsepower", "peak_rpm", "city_mpg", "highway_mpg",
                   "price")
car <- car[complete.cases(car),]
 
inTrain <- createDataPartition(y=car$fuel_type, p=0.7, list=FALSE)
training <- car[inTrain,]; testing <- car[-inTrain,]
modFit3 <- train(fuel_type ~., data=training,method="rf",prox=TRUE)
modFit3
## Random Forest 
## 
## 112 samples
##  25 predictor
##   2 classes: 'diesel', 'gas' 
## 
## No pre-processing
## Resampling: Bootstrapped (25 reps) 
## 
## Summary of sample sizes: 112, 112, 112, 112, 112, 112, ... 
## 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa      Accuracy SD  Kappa SD 
##    2    0.9465533  0.6163742  0.02913985   0.2121647
##   33    1.0000000  1.0000000  0.00000000   0.0000000
##   65    1.0000000  1.0000000  0.00000000   0.0000000
## 
## Accuracy was used to select the optimal model using  the largest value.
## The final value used for the model was mtry = 33.
# predicting new values
pred <- predict(modFit3,testing);
testing$predRight <- pred == testing$fuel_type
table(pred,testing$fuel_type)
##         
## pred     diesel gas
##   diesel      4   0
##   gas         0  43
# predicting new values plot
qplot(fuel_type,price,colour=predRight,data=testing, xlab="Make", ylab="Price", main="Prediction Success")