Dataset source

https://www.kaggle.com/miksaas/abalone-eda-regression-pca-classification

Dataset details

There are seven predictor variables included in the dataset. The first three constitute the shell measurements:

The other four variables are related to the weight of abalone:

The response variable, that is the dependent variable in this analysis is:

Import dataset

Import the csv file from Google drive.

id <- "18dk-yeiB7jFTRv1KR3bTzgiya3dwnKCc" # sharable google drive link
df<-read.csv(sprintf("https://docs.google.com/uc?id=%s&export=download", id))

Data Cleaning/Preprocessing

Load the required libraries.

defaultW <- getOption("warn") 
options(warn = -1) 
library(plyr)
library(dplyr)
library(tidyr)
library(tidyverse)
library(ggplot2)
library(corrplot)
library(caret)
options(warn = defaultW)

Inspect the dataset.

class(df)
## [1] "data.frame"
typeof(df)
## [1] "list"
dim(df)
## [1] 4187    9
ncol(df)
## [1] 9
nrow((df))
## [1] 4187
colnames(df)
## [1] "Sex"            "Length"         "Diameter"       "Height"        
## [5] "Whole.weight"   "Shucked.weight" "Viscera.weight" "Shell.weight"  
## [9] "Rings"
str(df)
## 'data.frame':    4187 obs. of  9 variables:
##  $ Sex           : chr  "M" "M" "F" "M" ...
##  $ Length        : num  0.455 0.35 0.53 0.44 0.33 0.425 0.53 0.545 0.475 0.55 ...
##  $ Diameter      : num  0.365 0.265 0.42 0.365 0.255 0.3 0.415 0.425 0.37 0.44 ...
##  $ Height        : num  0.095 0.09 0.135 0.125 0.08 0.095 0.15 0.125 0.125 0.15 ...
##  $ Whole.weight  : num  0.514 0.226 0.677 0.516 0.205 ...
##  $ Shucked.weight: num  0.2245 0.0995 0.2565 0.2155 0.0895 ...
##  $ Viscera.weight: num  0.101 0.0485 0.1415 0.114 0.0395 ...
##  $ Shell.weight  : num  0.15 0.07 0.21 0.155 0.055 0.12 0.33 0.26 0.165 0.32 ...
##  $ Rings         : int  15 7 9 10 7 8 20 16 9 19 ...
glimpse(df)
## Rows: 4,187
## Columns: 9
## $ Sex            <chr> "M", "M", "F", "M", "I", "I", "F", "F", "M", "F", "F", ~
## $ Length         <dbl> 0.455, 0.350, 0.530, 0.440, 0.330, 0.425, 0.530, 0.545,~
## $ Diameter       <dbl> 0.365, 0.265, 0.420, 0.365, 0.255, 0.300, 0.415, 0.425,~
## $ Height         <dbl> 0.095, 0.090, 0.135, 0.125, 0.080, 0.095, 0.150, 0.125,~
## $ Whole.weight   <dbl> 0.5140, 0.2255, 0.6770, 0.5160, 0.2050, 0.3515, 0.7775,~
## $ Shucked.weight <dbl> 0.2245, 0.0995, 0.2565, 0.2155, 0.0895, 0.1410, 0.2370,~
## $ Viscera.weight <dbl> 0.1010, 0.0485, 0.1415, 0.1140, 0.0395, 0.0775, 0.1415,~
## $ Shell.weight   <dbl> 0.150, 0.070, 0.210, 0.155, 0.055, 0.120, 0.330, 0.260,~
## $ Rings          <int> 15, 7, 9, 10, 7, 8, 20, 16, 9, 19, 14, 10, 11, 10, 10, ~
summary(df)
##      Sex                Length         Diameter          Height      
##  Length:4187        Min.   :0.075   Min.   :0.0550   Min.   :0.0000  
##  Class :character   1st Qu.:0.450   1st Qu.:0.3500   1st Qu.:0.1150  
##  Mode  :character   Median :0.545   Median :0.4250   Median :0.1400  
##                     Mean   :0.524   Mean   :0.4078   Mean   :0.1395  
##                     3rd Qu.:0.615   3rd Qu.:0.4800   3rd Qu.:0.1650  
##                     Max.   :0.815   Max.   :0.6500   Max.   :1.1300  
##                     NA's   :1       NA's   :1        NA's   :1       
##   Whole.weight    Shucked.weight   Viscera.weight    Shell.weight   
##  Min.   :0.0020   Min.   :0.0010   Min.   :0.0005   Min.   :0.0015  
##  1st Qu.:0.4421   1st Qu.:0.1862   1st Qu.:0.0935   1st Qu.:0.1300  
##  Median :0.7995   Median :0.3355   Median :0.1710   Median :0.2335  
##  Mean   :0.8286   Mean   :0.3593   Mean   :0.1805   Mean   :0.2388  
##  3rd Qu.:1.1530   3rd Qu.:0.5020   3rd Qu.:0.2527   3rd Qu.:0.3289  
##  Max.   :2.8255   Max.   :1.4880   Max.   :0.7600   Max.   :1.0050  
##  NA's   :1                                          NA's   :1       
##      Rings       
##  Min.   : 1.000  
##  1st Qu.: 8.000  
##  Median : 9.000  
##  Mean   : 9.932  
##  3rd Qu.:11.000  
##  Max.   :29.000  
## 
head(df, 5)
##   Sex Length Diameter Height Whole.weight Shucked.weight Viscera.weight
## 1   M  0.455    0.365  0.095       0.5140         0.2245         0.1010
## 2   M  0.350    0.265  0.090       0.2255         0.0995         0.0485
## 3   F  0.530    0.420  0.135       0.6770         0.2565         0.1415
## 4   M  0.440    0.365  0.125       0.5160         0.2155         0.1140
## 5   I  0.330    0.255  0.080       0.2050         0.0895         0.0395
##   Shell.weight Rings
## 1        0.150    15
## 2        0.070     7
## 3        0.210     9
## 4        0.155    10
## 5        0.055     7
tail(df, 5)
##      Sex Length Diameter Height Whole.weight Shucked.weight Viscera.weight
## 4183   F  0.565    0.450  0.165       0.8870         0.3700         0.2390
## 4184   M  0.590    0.440  0.135       0.9660         0.4390         0.2145
## 4185   M  0.600    0.475  0.205       1.1760         0.5255         0.2875
## 4186   F  0.625    0.485  0.150       1.0945         0.5310         0.2610
## 4187   M  0.710    0.555  0.195       1.9485         0.9455         0.3765
##      Shell.weight Rings
## 4183       0.2490    11
## 4184       0.2605    10
## 4185       0.3080     9
## 4186       0.2960    10
## 4187       0.4950    12

Check for duplicate rows and remove them; then check the number of duplicate rows again.

df[duplicated(df),]
##      Sex Length Diameter Height Whole.weight Shucked.weight Viscera.weight
## 4025   M  0.660    0.485  0.155       1.2275         0.6100         0.2740
## 4105   M  0.635    0.500  0.180       1.2915         0.5940         0.2695
## 4123   F  0.570    0.450  0.150       0.9645         0.5310         0.1890
## 4135   I  0.540    0.415  0.135       0.7090         0.3195         0.1740
## 4168   M  0.475    0.360  0.140       0.5135         0.2410         0.1045
##      Shell.weight Rings
## 4025        0.300     8
## 4105        0.370     9
## 4123        0.209     9
## 4135        0.185     9
## 4168        0.155     8
df<-df[!duplicated(df),]
sum(duplicated(df))
## [1] 0

Check for rows containing NA or missing values.

df[!complete.cases(df),]
##      Sex Length Diameter Height Whole.weight Shucked.weight Viscera.weight
## 3893   F  0.500     0.40  0.150       0.8085         0.2730         0.1120
## 3923   M  0.375     0.28     NA       0.2225         0.0875         0.0430
## 3951   F  0.530       NA  0.165       0.7720         0.2855         0.1975
## 3980   I     NA     0.35  0.135       0.4940         0.1925         0.0945
## 4001   I  0.315     0.23  0.000           NA         0.0575         0.0285
##      Shell.weight Rings
## 3893           NA    13
## 3923       0.0800    10
## 3951       0.2300    12
## 3980       0.1405     7
## 4001       0.3640     6
which(is.na(df), arr.ind=TRUE)
##       row col
## 3980 3980   2
## 3951 3951   3
## 3923 3923   4
## 4001 4001   5
## 3893 3893   8

The 2nd, 3rd, 4th, 5th and 8th columns (all with “numeric” type data) contain missing values; therefore, we impute them by mean.

df[2][is.na(df[2])] <- mean(df[,2], na.rm = TRUE)
df[3][is.na(df[3])] <- mean(df[,3], na.rm = TRUE)
df[4][is.na(df[4])] <- mean(df[,4], na.rm = TRUE)
df[5][is.na(df[5])] <- mean(df[,5], na.rm = TRUE)
df[8][is.na(df[8])] <- mean(df[,8], na.rm = TRUE)

Check if there is any remaining missing values.

sum(!complete.cases(df))
## [1] 0

The minimum value of ’Height” column is 0.0; we inspect and get the total number of rows with “Height” = 0.

df[df$Height==0,]
##      Sex Length Diameter Height Whole.weight Shucked.weight Viscera.weight
## 1258   I  0.430     0.34      0    0.4280000         0.2065         0.0860
## 4001   I  0.315     0.23      0    0.8284987         0.0575         0.0285
## 4002   I  0.315     0.23      0    0.1340000         0.0575         0.0285
##      Shell.weight Rings
## 1258       0.1150     8
## 4001       0.3640     6
## 4002       0.3505     6
nrow(df[df$Height==0,])
## [1] 3

The next cleaning steps are:

df<- df %>%
  mutate_if(is.character, str_trim) %>%
  mutate(Weight.diff=Whole.weight-(Viscera.weight + Shucked.weight + Shell.weight)) %>%
  subset(Height>0) %>%
  subset(Weight.diff>0) %>%
  select(-Weight.diff)

Rename the columns.

names(df)<-c("Sex", "Length", "Diameter", "Height", "Whole", "Shucked", "Viscera", "Shell", "Rings")  

Convert “Sex” column into ‘factor’ type.

df$Sex <- ordered(df$Sex, 
                  levels = c("I", "M", "F"), 
                  labels = c("Infant", "Male", "Female"))

Create two way contingency table.

table('Sex'=df$Sex,'Rings'=df$Rings)
##         Rings
## Sex        2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
##   Infant   1  10  42  87 200 252 255 163  91  62  21  24  14   8   6   7   5
##   Male     0   2   6  11  22  76 170 269 289 218 115  88  56  51  30  25  18
##   Female   0   0   0   4  16  41 118 230 241 198 125  88  56  41  30  26  19
##         Rings
## Sex       19  20  21  22  23  24  25  26  27  29
##   Infant   2   2   1   0   0   0   0   0   0   0
##   Male    14  12   6   3   3   1   0   1   1   0
##   Female  15  12   7   3   6   1   1   0   1   1

Show the preprocessed dataset.

head(df,10)
##       Sex Length Diameter Height  Whole Shucked Viscera Shell Rings
## 1    Male  0.455    0.365  0.095 0.5140  0.2245  0.1010 0.150    15
## 2    Male  0.350    0.265  0.090 0.2255  0.0995  0.0485 0.070     7
## 3  Female  0.530    0.420  0.135 0.6770  0.2565  0.1415 0.210     9
## 4    Male  0.440    0.365  0.125 0.5160  0.2155  0.1140 0.155    10
## 5  Infant  0.330    0.255  0.080 0.2050  0.0895  0.0395 0.055     7
## 6  Infant  0.425    0.300  0.095 0.3515  0.1410  0.0775 0.120     8
## 7  Female  0.530    0.415  0.150 0.7775  0.2370  0.1415 0.330    20
## 8  Female  0.545    0.425  0.125 0.7680  0.2940  0.1495 0.260    16
## 9    Male  0.475    0.370  0.125 0.5095  0.2165  0.1125 0.165     9
## 10 Female  0.550    0.440  0.150 0.8945  0.3145  0.1510 0.320    19

Exploratory Data Analysis

Visualize the distribution of “Sex”.

ggplot(data=df,aes(x=Sex,fill=Sex))+geom_bar()

Create boxplots and violin plots of “Sex” versus “Whole”.

qplot(Sex, Whole, data = df, geom = "boxplot", fill = Sex)

qplot(Sex, Whole, data = df, geom = "violin", fill = Sex)

Create a scatterplot of “Whole” versus “Shucked”, “Whole” versus “Viscera” and “Whole” versus “Shell” to examine how they relate to each other.

qplot(Whole, Shucked, data = df, color = Sex)

qplot(Whole, Viscera, data = df, color = Sex)

qplot(Whole, Shell, data = df, color = Sex)

It appears that “Whole” versus “Shucked” has the strongest positive correlation.

Create a correlation matrix.

corr<-cor(df[c(-1,-10)], method = "pearson", use = "complete.obs")
round(corr, 2)
##          Length Diameter Height Whole Shucked Viscera Shell Rings
## Length     1.00     0.99   0.82  0.93    0.90    0.90  0.90  0.54
## Diameter   0.99     1.00   0.83  0.93    0.90    0.90  0.91  0.56
## Height     0.82     0.83   1.00  0.81    0.77    0.79  0.81  0.54
## Whole      0.93     0.93   0.81  1.00    0.97    0.97  0.96  0.53
## Shucked    0.90     0.90   0.77  0.97    1.00    0.93  0.88  0.41
## Viscera    0.90     0.90   0.79  0.97    0.93    1.00  0.91  0.49
## Shell      0.90     0.91   0.81  0.96    0.88    0.91  1.00  0.62
## Rings      0.54     0.56   0.54  0.53    0.41    0.49  0.62  1.00
corrplot(corr, type = 'lower', order = 'hclust', tl.col = 'black',
         tl.srt = 45, tl.cex=0.8,  addCoef.col = 'black', number.cex=0.8, col = COL2('RdYlBu'), cl.pos='n')

The correlation matrix shows that “Shell” and “Shucked” have the strongest and lowest correlation with “Rings”,respectively.

To examine the relationship between some of the predictor variables and the dependent variable, we plot “Shell” versus “Rings” and “Shucked” versus “Rings”.

ggplot(data=df,aes(x=Shell,y=Rings,color=Sex))+geom_point()+geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'

ggplot(data=df,aes(x=Shucked,y=Rings,color=Sex))+geom_point()+geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'

The relationship between “Shell” & “Rings” and “Shucked” & “Rings” appears similar for males and females but steeper for infants.

Draw histograms to show the distribution of each variable.

par(mfrow=c(3,3))
Length<-df$Length; hist(Length, col="blue")
Diameter<-df$Diameter; hist(Diameter, col="blue")
Height<-df$Height; hist(Height, col="blue")
Whole<-df$Whole; hist(Whole, col="blue")
Shucked<-df$Shucked; hist(Shucked, col="blue")
Viscera<-df$Viscera; hist(Viscera, col="blue")
Shell<-df$Shell; hist(Shell, col="blue")
Rings<-df$Rings; hist(Rings, col="blue")

Classifying the abalones into Adult and Infant

Transform Female and Male to Adults

levels(df$Sex) <- c(levels(df$Sex), "Adult")
df2 = df
df2$Sex[df2$Sex=="Male"] <- "Adult"
df2$Sex[df2$Sex=="Female"] <- "Adult"
df2$Sex <- ordered(df2$Sex, 
                      levels = c("Infant", "Adult"), 
                      labels = c("Infant", "Adult"))
head(df,5)
##      Sex Length Diameter Height  Whole Shucked Viscera Shell Rings
## 1   Male  0.455    0.365  0.095 0.5140  0.2245  0.1010 0.150    15
## 2   Male  0.350    0.265  0.090 0.2255  0.0995  0.0485 0.070     7
## 3 Female  0.530    0.420  0.135 0.6770  0.2565  0.1415 0.210     9
## 4   Male  0.440    0.365  0.125 0.5160  0.2155  0.1140 0.155    10
## 5 Infant  0.330    0.255  0.080 0.2050  0.0895  0.0395 0.055     7

Split the dataset to training and testing data.

trainIndex <- createDataPartition(df2$Sex, p = 0.8, list = F)
trainingset <- df2[trainIndex,]
testingset <- df2[-trainIndex,]

Models building

fitControl <- trainControl(## 10-fold CV
  method = "repeatedcv",
  number = 10,
  ## repeated ten times
  repeats = 10)

set.seed(888)
svm <- train(Sex ~ ., data = trainingset,
                  method = "svmPoly",
                  preProcess=c("scale","center"),
                  trControl=fitControl,
                  tuneGrid = data.frame(degree=1,scale=1,C=1))

options(warn=-1)
nb <- train(Sex~., trainingset, trControl=fitControl, method="nb")
options(warn=1)

knn <- train(Sex~., data = trainingset,
                 method = "knn",
                 preProcess = c("center", "scale"),
                 tuneLength = 10,
                 trControl = fitControl)

svm_pred<-predict(svm, newdata = testingset)

options(warn=-1)
nb_pred<-predict(nb, newdata = testingset)
options(warn=1)

knn_pred<-predict(knn, newdata = testingset)

Models’ Performance Evaluation

# Support Vector Machines (SVM)
svm_cm<-confusionMatrix(svm_pred,testingset$Sex)
svm_cm
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Infant Adult
##     Infant    172    68
##     Adult      78   485
##                                           
##                Accuracy : 0.8182          
##                  95% CI : (0.7897, 0.8443)
##     No Information Rate : 0.6887          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.5713          
##                                           
##  Mcnemar's Test P-Value : 0.4564          
##                                           
##             Sensitivity : 0.6880          
##             Specificity : 0.8770          
##          Pos Pred Value : 0.7167          
##          Neg Pred Value : 0.8615          
##              Prevalence : 0.3113          
##          Detection Rate : 0.2142          
##    Detection Prevalence : 0.2989          
##       Balanced Accuracy : 0.7825          
##                                           
##        'Positive' Class : Infant          
## 
# Naive Bayes (NB)
nb_cm<-confusionMatrix(nb_pred,testingset$Sex)
nb_cm
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Infant Adult
##     Infant    189   115
##     Adult      61   438
##                                          
##                Accuracy : 0.7808         
##                  95% CI : (0.7506, 0.809)
##     No Information Rate : 0.6887         
##     P-Value [Acc > NIR] : 3.742e-09      
##                                          
##                   Kappa : 0.5174         
##                                          
##  Mcnemar's Test P-Value : 6.469e-05      
##                                          
##             Sensitivity : 0.7560         
##             Specificity : 0.7920         
##          Pos Pred Value : 0.6217         
##          Neg Pred Value : 0.8778         
##              Prevalence : 0.3113         
##          Detection Rate : 0.2354         
##    Detection Prevalence : 0.3786         
##       Balanced Accuracy : 0.7740         
##                                          
##        'Positive' Class : Infant         
## 
# K-Nearest Neighbors (KNN)
knn_cm<-confusionMatrix(knn_pred,testingset$Sex)
knn_cm
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Infant Adult
##     Infant    168    54
##     Adult      82   499
##                                           
##                Accuracy : 0.8306          
##                  95% CI : (0.8029, 0.8559)
##     No Information Rate : 0.6887          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.5925          
##                                           
##  Mcnemar's Test P-Value : 0.0206          
##                                           
##             Sensitivity : 0.6720          
##             Specificity : 0.9024          
##          Pos Pred Value : 0.7568          
##          Neg Pred Value : 0.8589          
##              Prevalence : 0.3113          
##          Detection Rate : 0.2092          
##    Detection Prevalence : 0.2765          
##       Balanced Accuracy : 0.7872          
##                                           
##        'Positive' Class : Infant          
## 

Predicting the number of rings using regression.

Split the dataset to training and testing data.

reg_trainIndex <- createDataPartition(df$Rings, p=0.8, list = FALSE) 
reg_trainingset <- df[reg_trainIndex,] 
reg_testingset <- df[-reg_trainIndex,]

Model building

# Linear Regression
reg <- lm(Rings ~. , data=trainingset)
                  
summary(reg)
## 
## Call:
## lm(formula = Rings ~ ., data = trainingset)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.3170 -1.2770 -0.2897  0.8974 13.3415 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.635437   0.316501  11.486  < 2e-16 ***
## Sex.L         0.604648   0.071566   8.449  < 2e-16 ***
## Length        0.007127   2.065793   0.003 0.997247    
## Diameter     10.664756   2.510270   4.248 2.21e-05 ***
## Height        8.664939   1.605086   5.398 7.21e-08 ***
## Whole        12.467377   0.973853  12.802  < 2e-16 ***
## Shucked     -23.965396   1.068335 -22.432  < 2e-16 ***
## Viscera     -12.898012   1.573127  -8.199 3.47e-16 ***
## Shell         4.775017   1.442872   3.309 0.000945 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.156 on 3208 degrees of freedom
## Multiple R-squared:  0.5442, Adjusted R-squared:  0.5431 
## F-statistic: 478.8 on 8 and 3208 DF,  p-value: < 2.2e-16
## if a P-value(Pr(>|t|)) is 0.05 or below we can say that there’s a low chance it is not significant for the analysis

options(warn=-1)      #turn off warnings
reg_pred <- predict(reg, testingset)
options(warn=1)

modelEval <- cbind(testingset$Rings, reg_pred)
colnames(modelEval) <- c('Actual', 'Predicted')
modelEval <- as.data.frame(modelEval)
modelEval
##      Actual Predicted
## 1        15  9.223590
## 8        16 11.524882
## 9         9  9.595922
## 18       10  9.174553
## 22       10  7.677932
## 25       10 10.376858
## 29       15 11.510046
## 36        8  8.297595
## 38        8  8.786167
## 49        6  6.513450
## 53       10  9.090057
## 54       10  9.209412
## 57        8  9.166942
## 60        7  9.947487
## 68       13 16.427000
## 83       16 11.488588
## 95       15 14.119465
## 96       14 14.981136
## 102      15 11.359502
## 103      15 13.012310
## 108      10 10.836992
## 121       9  9.538523
## 122       7  7.449538
## 131      17 13.070207
## 132       9  9.521951
## 145      10  9.980281
## 151      15 11.109395
## 162      13 11.175338
## 171      14 14.953033
## 197      11 10.070273
## 199      15 10.340038
## 209      13 13.049208
## 212       7  7.709851
## 217       8  9.703264
## 218       7  8.080191
## 223       9 10.318032
## 224      11  9.988706
## 226       9 10.061920
## 231      13 11.570083
## 234       7  6.223383
## 240       5  4.839978
## 251       7  7.189738
## 268       8  8.275496
## 270      13  9.861073
## 275      13 16.178374
## 310      10 10.121075
## 316       8  8.821566
## 317      14 11.769909
## 321       5  6.013337
## 323      11  8.743105
## 326       7  7.426143
## 328      12  9.842427
## 340      15 11.466808
## 341      14  9.274773
## 345       8 11.226421
## 346      13  9.596221
## 347       9 11.860927
## 348       6  6.799371
## 350      14 10.196933
## 353      12  9.879464
## 356      20 17.264503
## 360      16 12.779972
## 361      18 12.616327
## 362      12 11.812180
## 363      20 12.560862
## 365      12 11.538214
## 370      17 15.425835
## 371      16 12.433903
## 374      14 13.750677
## 375      13 14.692797
## 379      15 13.019429
## 384      12  9.984901
## 388      10  9.449450
## 391      10  7.606902
## 403       7  9.093718
## 407       8  9.096289
## 410       8 10.022200
## 419      16 13.629637
## 430      18 13.991177
## 431      18 12.449394
## 432      20 11.877977
## 433      18 13.013946
## 434      22 12.725380
## 446       9 12.334568
## 450      18 10.892053
## 457      15 12.864869
## 465       6  6.062526
## 469      17 18.233594
## 476      17 12.437380
## 477       9  9.762137
## 480      16 14.000299
## 482      17 10.941299
## 483      15 10.492685
## 493      11 12.392300
## 498      19 11.579579
## 501      12 11.244180
## 511      10 12.243305
## 512      10  9.192998
## 515       5  6.847652
## 526       4  5.761960
## 531      20 10.470477
## 532      13  9.347411
## 533      12  9.020661
## 534       9  8.683416
## 536      11  9.217283
## 542      11  9.656488
## 553      12 10.235528
## 555       9  7.698248
## 567      12  8.595462
## 571      16  8.236584
## 573      20 13.445499
## 577      10  9.492314
## 579      11  9.653105
## 592       9  8.234497
## 596      13 12.105627
## 599      12 12.855557
## 610      12  8.289421
## 621      10  7.856903
## 626      12 11.236420
## 634       9  9.360286
## 636       8  8.339780
## 637       7  7.812151
## 638       6  7.645785
## 643      19 12.385791
## 645       9  9.079824
## 648       9 12.967746
## 659      18 12.508734
## 661      18 14.423212
## 663      10  9.332746
## 666      10  8.650209
## 671      14 11.129101
## 676      22 13.782648
## 679      23 12.532192
## 681       7  7.484644
## 685      10 10.520467
## 689      11 10.507332
## 698       5  7.039745
## 706      10  9.643745
## 709      10  9.385128
## 714       8  8.128798
## 716       7  5.936877
## 719       4  4.917899
## 729      13 11.627797
## 731      11 12.996328
## 735      18 12.786506
## 748       9  9.791808
## 756      13 10.488705
## 757      20 15.118600
## 759      13 10.876089
## 761      15 13.872918
## 766      11 13.269926
## 768       9  8.319825
## 770      17 11.790307
## 779      15 10.324109
## 781      13 11.439710
## 787       9 10.998414
## 794      12 12.256313
## 795      12 14.143261
## 798       9  9.085883
## 800      11  9.786058
## 802      10  9.409625
## 810      12 11.468149
## 820       7  7.202957
## 822       6  7.500107
## 828       6  7.757792
## 836       8  7.877052
## 846       9  7.532859
## 855      10  9.525659
## 856       9 10.540984
## 858      12 11.859431
## 864      10  9.256391
## 868      12 11.836979
## 870       9 11.155032
## 878      10  9.113291
## 879       9  8.736255
## 891      11 11.278590
## 892      17 19.035826
## 900       5  6.363213
## 902       6  6.576655
## 914       6  7.298306
## 919       7  7.404654
## 921       6  7.302975
## 923       6  7.361108
## 925       6  7.767420
## 932       6  8.326828
## 933       7  8.589847
## 935       8  8.165416
## 938       7  8.401289
## 940       7  7.473257
## 941       7  8.086136
## 945       6  8.534994
## 953       7  8.180196
## 967       7  8.841533
## 968       7  8.343742
## 973       8  9.008373
## 974      11  9.453951
## 991       9  9.555916
## 999       8  9.114923
## 1000     11 12.022564
## 1006      9  9.389210
## 1018     11 10.563165
## 1024     10 14.625577
## 1028      8  9.982006
## 1033     11 14.610407
## 1035      9  9.064153
## 1037     11 11.774570
## 1039     10 14.770668
## 1045      8  9.865408
## 1047     10 12.249248
## 1052     11 14.252623
## 1057      4  5.297156
## 1060      5  5.866927
## 1061      7  5.832157
## 1064      5  6.224496
## 1070      6  6.850445
## 1071      6  6.885217
## 1076      6  8.003074
## 1079      6  7.573267
## 1082      6  7.127829
## 1089      6  7.427891
## 1094      7  7.943769
## 1096      6  7.995414
## 1097      7  7.405792
## 1100      7  8.643493
## 1109      8  7.500047
## 1122      9  8.059945
## 1123      9  8.271962
## 1128      8  9.563072
## 1139      9  9.446923
## 1144      9  9.225388
## 1145      8 11.174331
## 1149      8  8.060661
## 1157     10  8.667266
## 1160     10  9.930003
## 1163      9  6.778639
## 1166      9  9.572862
## 1173      8  9.075614
## 1176     10 10.216107
## 1180     11  9.548731
## 1184      8 10.996876
## 1186      9 11.466467
## 1188     10 10.386481
## 1189      9 12.200522
## 1190     12 10.271022
## 1195      9  9.310347
## 1196      9  9.671879
## 1198     12  8.664938
## 1205     11 10.869376
## 1210     11  6.484117
## 1211      6  8.710570
## 1212      4  5.976825
## 1226      5  6.411244
## 1227      6  6.869729
## 1231      7  7.095778
## 1234      6  6.859005
## 1237      6  7.250462
## 1240      7  7.110950
## 1243      9  7.271621
## 1245      7  6.913370
## 1266      7  7.686141
## 1269     11  8.287026
## 1275      8  8.378449
## 1284      9  9.074166
## 1294     10  9.381171
## 1295      9  8.791559
## 1296      9  7.397672
## 1306     10  9.188087
## 1314      9  9.684639
## 1316      9  9.086904
## 1325     11  8.683521
## 1328     11  9.277145
## 1331      9 10.851157
## 1337     10  9.636571
## 1338     10 10.059861
## 1344     10  8.609012
## 1352     12 12.040811
## 1354     11 11.483544
## 1357     10 10.296846
## 1359     11  8.645615
## 1362      9 11.736860
## 1374     10 10.727736
## 1377     10 12.089597
## 1384     12  9.916592
## 1390     10 10.852984
## 1394     10 12.008207
## 1405     10 12.502040
## 1408     10 10.371677
## 1412     11 11.536667
## 1414     13 10.347441
## 1419     11 11.044051
## 1422     13 11.316592
## 1423     12 11.639170
## 1427      9  9.257734
## 1439      7  7.454898
## 1446      6  7.075331
## 1454      6  7.911712
## 1457      7  7.967202
## 1463      7  8.970118
## 1468      7  8.788077
## 1469      8  9.216421
## 1471      9  8.492600
## 1473      9  9.020874
## 1478      8  8.864674
## 1492      9 10.508354
## 1497      9 10.906901
## 1498     10  9.656010
## 1506      8 12.078320
## 1513      9  8.803452
## 1516     11  9.033340
## 1532      5  5.469368
## 1537      5  6.605106
## 1545      6  7.017255
## 1551      8  7.051556
## 1553      7  7.132359
## 1568      8  7.994677
## 1571      7  8.663149
## 1576      7  7.921427
## 1580      8  9.164819
## 1590      9  8.519389
## 1601      7  8.747070
## 1602      9  9.571251
## 1612     10 10.652273
## 1624      9  9.761098
## 1625      9  9.898643
## 1636      9  9.667431
## 1644      8  9.290839
## 1647      9  9.935001
## 1654     10  9.530048
## 1661      9  9.838516
## 1668     11 10.085075
## 1677     10 10.064051
## 1679     12 10.707070
## 1682     11 12.049732
## 1689     11 11.126516
## 1692     12  7.914042
## 1697     11 10.912940
## 1698     10  8.771921
## 1699     11 10.262011
## 1700     13  8.685974
## 1711     10 11.308976
## 1719     11 11.744004
## 1720     11 11.342426
## 1722     10 11.451217
## 1724     12 12.177370
## 1728     10 13.264592
## 1737     13 13.220212
## 1739     11 11.924275
## 1741     10 11.737496
## 1750     11 10.021644
## 1752     10 10.463173
## 1753     11 11.831212
## 1774     10  7.726905
## 1775      7  7.830865
## 1776      8  9.112412
## 1777      9  9.345219
## 1781      9  9.363799
## 1785      8  9.476269
## 1792     10  9.118225
## 1797     10 10.371937
## 1798      8  9.482692
## 1799     11 10.135501
## 1801      9 10.601593
## 1803     11  9.978891
## 1804     10 10.427471
## 1806      9 10.393085
## 1807     11 11.201764
## 1813     10 16.493417
## 1819     10  9.891895
## 1822      9  8.683018
## 1827      7  6.205639
## 1832      6  6.984943
## 1837      6  7.435286
## 1845      7  8.666431
## 1847      7  9.294387
## 1848      8  8.992431
## 1855      8  9.899408
## 1860     10 10.069446
## 1863      8  9.511547
## 1865      9  8.789829
## 1866      7  7.160385
## 1867     10  8.105116
## 1869      8  9.196244
## 1874      9  8.929869
## 1883      8  9.991433
## 1887      9 10.286156
## 1889     11 11.532912
## 1894     10  9.747761
## 1916     11  9.925973
## 1917     11 11.459960
## 1923      8 10.684939
## 1924     10 10.345719
## 1930     10 11.687908
## 1932      9 12.255260
## 1936     12 11.846491
## 1938      9 11.754649
## 1939     10 11.598670
## 1943      9  9.360533
## 1947     12 11.352943
## 1952     10 14.152005
## 1957     11 10.182262
## 1967     10 13.074159
## 1969     12 11.119115
## 1975     10 11.703750
## 1978     13 16.385667
## 1984     11 12.551375
## 1992      6  6.292105
## 1997      7  6.294643
## 2002      6  6.804190
## 2025     11 10.588909
## 2026     10 10.861140
## 2027      9 10.706549
## 2042      8  6.781747
## 2056      9  8.018473
## 2059      8 10.142421
## 2061      9  9.594725
## 2062      8  9.624778
## 2063      7  8.610177
## 2087     10  8.439335
## 2089     12 14.369715
## 2092      8  8.983303
## 2094     11 10.641516
## 2095      7  7.745337
## 2096      7  8.182131
## 2101     10  7.584714
## 2102     19 10.593931
## 2122      9  8.888461
## 2134      9  9.154423
## 2140     14 14.061573
## 2143      9  8.258382
## 2152     18 13.408911
## 2153      8  8.680441
## 2158     17 17.194049
## 2159     14 12.111868
## 2160     12 11.638625
## 2162     17 20.460637
## 2165      5  8.525636
## 2166      9 11.915356
## 2173      5  5.256732
## 2178     14 16.294606
## 2179     15 13.494140
## 2186      9  8.971607
## 2188     14 12.108716
## 2194      6  5.676912
## 2195     13  8.331627
## 2205      7  9.204712
## 2214     17 16.273201
## 2216     10 10.567001
## 2227      7  7.687826
## 2228      7  6.232106
## 2234     17 12.952616
## 2235     14 12.662330
## 2246      9 10.672918
## 2260     13 10.158435
## 2265     14 12.781653
## 2267     14 10.748068
## 2270     12 10.478428
## 2271     18 11.011274
## 2274     13 13.282517
## 2275     15 17.010787
## 2285      9  7.424731
## 2289      6  7.252896
## 2290     10 10.696406
## 2296     10 10.946467
## 2301     10  9.960606
## 2308     16 10.817888
## 2310     13 10.092185
## 2317     13 11.890325
## 2320     15 10.204886
## 2326     10  9.267022
## 2335     23 16.960712
## 2338     11 10.432662
## 2353     18 13.672106
## 2363     18 14.689427
## 2367     19 11.651651
## 2371      8  7.868372
## 2382      5  5.562014
## 2390     11  9.661488
## 2394      7  6.519931
## 2409     15 11.051281
## 2418     11  7.358199
## 2419     18  9.834656
## 2441     13  9.585168
## 2445      9  9.302458
## 2455      5  5.956171
## 2456      6  5.738276
## 2457      5  5.776479
## 2466     11 10.293772
## 2480     14 12.923775
## 2487     13 12.358794
## 2489     14 11.486310
## 2494      8  7.657203
## 2496     12 11.659498
## 2518      7  8.258835
## 2520      7  9.201047
## 2522      9  9.499224
## 2523     11 11.174943
## 2524      9  9.739275
## 2525     11 11.283486
## 2537     12 14.421743
## 2548      5  5.766462
## 2549      4  5.998115
## 2555      7  7.276423
## 2556      6  7.186696
## 2557      6  6.997199
## 2558      7  7.554676
## 2560      7  7.769673
## 2564      7  7.850244
## 2569      7  7.734797
## 2570      6  7.814960
## 2571      7  7.624579
## 2575      8  8.792749
## 2576      7  9.072227
## 2585      9 10.127650
## 2589      8  9.831923
## 2590      7 10.326949
## 2599      9 10.891222
## 2611      9  9.776627
## 2624     10 14.263878
## 2640      6  7.758670
## 2643      7  8.272257
## 2649      9  8.754987
## 2655      9  8.568853
## 2660      8  8.354643
## 2667      8  9.691892
## 2677     10 10.884506
## 2681     10 10.602308
## 2690      9 11.175791
## 2692      9 10.327090
## 2705     10  9.731108
## 2710     12 11.284225
## 2717      6  6.426300
## 2728      7  8.043763
## 2732      8  7.941070
## 2742      8  7.237358
## 2743      8  7.895881
## 2754      8  8.663649
## 2765      9  8.531230
## 2770     11 10.594377
## 2795     10 10.132635
## 2802     10  9.473948
## 2803      9 11.217387
## 2809      9 10.523886
## 2812      9  6.325169
## 2815      4  5.915208
## 2816      7  6.197617
## 2833      9  9.344475
## 2834      9  9.176475
## 2847     10  9.599828
## 2849      9 10.420289
## 2856     11 12.040669
## 2858     11  8.879460
## 2873      8  7.255515
## 2876      7  8.510941
## 2877      7  7.006839
## 2884      8  8.370469
## 2893     10 10.410300
## 2896     10  8.613680
## 2905      9  8.583979
## 2906      8  9.046058
## 2909      9  9.570726
## 2913      9  9.575278
## 2914      9 11.543247
## 2916     10  9.046743
## 2919      9  8.522698
## 2920     10 11.217070
## 2929     11  9.769818
## 2934     10 10.560358
## 2947     11 11.141030
## 2951     11 10.746247
## 2954     11 11.144961
## 2955     12 12.387925
## 2972     11  8.916799
## 2974     12 13.242343
## 2975     12 10.588972
## 2980      8  8.879314
## 2995      9 11.785380
## 2997     10 10.809674
## 3001      9 10.698420
## 3005     11 10.376602
## 3009     12 18.958591
## 3013      7  7.591085
## 3017      7  8.259178
## 3022      9  8.895505
## 3029      7  8.789159
## 3050      9 10.305162
## 3054     11 11.579916
## 3056     11 12.349783
## 3057     11 12.091430
## 3061     11 12.770913
## 3067      9 10.905143
## 3068     11 11.065766
## 3073     10  9.609398
## 3074     10 13.386831
## 3079     11 13.229777
## 3101     10  8.312971
## 3103      8 10.525343
## 3105     10 12.544075
## 3107      5  6.060319
## 3120     10  9.996388
## 3127     11 14.074199
## 3129     10 10.726157
## 3137     11 11.001217
## 3148     13 11.874115
## 3149     12 16.621737
## 3153     11  8.600467
## 3157     10 13.522481
## 3159      9  9.932075
## 3161      7  6.003874
## 3173     11 11.039602
## 3185     13 13.668541
## 3191      5  5.089845
## 3194     20 13.534392
## 3196     18 15.070055
## 3200      9 10.169387
## 3202      6  8.503686
## 3216     11 11.677206
## 3218     10  9.473075
## 3220     16 15.173706
## 3224      8 12.225864
## 3225     16 16.294223
## 3260     18 12.100599
## 3263     14 11.835484
## 3264     17 10.494854
## 3275     18 11.239218
## 3281     24 16.944385
## 3285     11 11.026934
## 3294     13  9.968383
## 3296     14 10.575948
## 3300     16 13.117602
## 3310      9  8.415122
## 3311     13 12.256608
## 3314     11  8.674903
## 3317     16 10.505878
## 3319      4  5.801795
## 3321     16 11.560972
## 3323     15  9.364891
## 3325     12  7.060753
## 3327     12 10.484173
## 3328     16 10.780678
## 3332     11  9.335354
## 3337     11  9.509217
## 3341     17  9.354545
## 3347     14 11.355378
## 3360     20 13.398773
## 3364     10  9.160746
## 3368     17 13.979060
## 3369     13 11.530885
## 3374     19 12.994885
## 3377      6  6.720338
## 3379      8  7.826498
## 3388     18 12.258606
## 3390     16 12.389554
## 3391     10  9.586183
## 3394     10 12.276362
## 3402     18 12.313864
## 3409      7  7.470499
## 3421     10 13.241197
## 3423      9  9.909126
## 3437      6  7.338226
## 3440      8  8.072962
## 3443      8  7.946318
## 3448      8  9.268215
## 3450      7  8.623563
## 3451      7  9.895601
## 3458     10 11.500519
## 3468     10 12.579447
## 3470     10 14.812639
## 3482      6  7.081881
## 3488      8  7.999124
## 3495      9  9.183388
## 3502     12 11.141903
## 3508     11 10.503986
## 3509      9 10.361077
## 3510     10 10.040229
## 3515     10 10.108628
## 3517     11 12.084856
## 3532      7  6.899117
## 3536      9  6.981068
## 3539      7  8.791757
## 3545      9  8.234035
## 3549      7  8.985363
## 3551      8  9.212352
## 3552      9  8.573706
## 3554      9  8.954673
## 3562      9  9.189945
## 3564      8 10.580123
## 3567      9 11.123949
## 3569     10  9.298418
## 3571      9  9.737657
## 3572      9 10.751609
## 3575      9 10.687814
## 3576      8 10.776580
## 3579     10 10.703540
## 3581     10 10.662204
## 3584      9 11.161329
## 3588     10  9.950954
## 3590     11 11.511593
## 3594      8 13.471050
## 3600     12  9.531532
## 3603      5  6.991752
## 3608      8  8.578927
## 3610      9  8.278574
## 3619     11 11.073319
## 3620     12 11.078057
## 3627     10 13.032492
## 3643      8  7.939319
## 3646      9  8.161229
## 3655      9  9.803614
## 3656      9  9.613091
## 3659     10  9.992170
## 3666      9  7.649807
## 3670      9 10.329041
## 3672     10  9.666241
## 3673     10  8.970916
## 3677     10 10.737649
## 3682     11 10.292242
## 3683     11  8.889262
## 3700     11 12.147432
## 3710     10 10.800813
## 3713     10 13.044719
## 3714     11  5.602486
## 3721      7  7.880806
## 3727      8  9.360802
## 3730      9  8.527402
## 3732      9 10.155298
## 3740      9  9.255070
## 3741     11  9.194634
## 3747      7  7.297071
## 3752      7  7.622408
## 3754      8  8.405440
## 3758     10  8.640575
## 3763      8  8.893725
## 3764     11  9.616334
## 3767     10 11.321860
## 3768     10 11.882005
## 3777     13 10.164144
## 3780     10 10.538527
## 3788     12 13.316325
## 3789      8 11.087961
## 3792     11 11.945851
## 3795     10 11.384639
## 3803      6  6.546248
## 3810      8  9.943292
## 3811      8  9.076140
## 3826     11 12.635452
## 3828     11 18.276410
## 3834     11 10.309915
## 3840      8  8.879594
## 3848      9  9.173670
## 3858     16 15.780711
## 3860      9 10.980630
## 3866     19 12.177083
## 3872     12  9.129708
## 3887      7  9.625416
## 3889     12  9.275766
## 3898     14 15.485531
## 3902     15  9.177116
## 3904      4  4.672602
## 3906     16 13.508525
## 3907      6  8.167598
## 3912     13 10.064449
## 3913     10  7.541120
## 3914     15  9.407773
## 3917     11  9.391731
## 3924     10  8.379368
## 3937      5  5.934926
## 3947     20 13.784769
## 3948      6  5.747332
## 3961     10  9.805682
## 3964     11 12.502724
## 3969      6  6.273202
## 3976      8  8.042724
## 3986      8 10.459417
## 3993      8 11.130625
## 3999      4  5.138119
## 4010     10  9.645200
## 4018      8  9.506157
## 4021     10  7.350731
## 4027     11 11.140564
## 4034      6  6.828775
## 4037      7  7.521978
## 4039      8  7.572003
## 4044     10  8.769200
## 4046     11  9.353512
## 4047     12 10.276833
## 4058     10 11.156631
## 4061     10 11.612528
## 4062      9 11.098543
## 4068     10 11.214523
## 4089      9  8.898034
## 4097     11 11.347037
## 4110     11 12.970208
## 4128      4  6.173584
## 4131      8  8.855758
## 4132      8  8.820237
## 4137     10  9.154696
## 4139     10  9.909075
## 4152     13 11.708512
## 4158     11 13.327851
## 4161      6  6.376446
## 4169      8  8.258059
## 4175      7  7.277003
## 4183     11 10.593591

Model’s Performance Evaluation

mse <- mean((modelEval$Actual - modelEval$Predicted)^2)
rmse <- sqrt(mse)
mse; rmse
## [1] 5.518349
## [1] 2.349117