#Splitting the genres
genre.split<-movie%>%
  select(genres,imdb_score)%>%
  mutate(Action=ifelse(grepl("Action",genres),1,0),
         Adventure=ifelse(grepl("Adventure",genres),1,0),
         Animation=ifelse(grepl("Animation",genres),1,0),
         Biography=ifelse(grepl("Biography",genres),1,0),
         Comedy=ifelse(grepl("Comedy",genres),1,0),
         Crime =ifelse(grepl("Crime",genres),1,0),
         Documentary=ifelse(grepl("Documentary",genres),1,0),
         Drama=ifelse(grepl("Drama",genres),1,0),
         Family=ifelse(grepl("Family",genres),1,0),
         Fantasy=ifelse(grepl("Fantasy",genres),1,0),
         `Film-Noir`=ifelse(grepl("Film-Noir",genres),1,0),
         History =ifelse(grepl("History",genres),1,0),
         Horror=ifelse(grepl("Horror",genres),1,0),
         Musical=ifelse(grepl("Musical",genres),1,0),
         Mystery=ifelse(grepl("Mystery",genres),1,0),
         News=ifelse(grepl("News",genres),1,0),
         Romance=ifelse(grepl("Romance",genres),1,0),
         `Sci-Fi`=ifelse(grepl("Sci-Fi",genres),1,0),
         Short=ifelse(grepl("Short",genres),1,0),
         Sport=ifelse(grepl("Sport",genres),1,0),
         War=ifelse(grepl("War",genres),1,0),
         Western=ifelse(grepl("Western",genres),1,0))

#Genre wise movie Score
genre.split%>%
  tidyr::gather(Genre_Type,Binary,Action:Western)%>%
  filter(Binary==1)%>%
  select(-c(Binary,genres))%>%
  group_by(Genre_Type)%>%
  summarise(Mean_Score=mean(imdb_score))%>%
  arrange(Mean_Score)%>%
  ggplot(aes(x=Genre_Type,y=Mean_Score,fill=Genre_Type))+
  geom_bar(stat="identity", color="black")+
  coord_flip()

#Removing Genres
movie <- movie%>%select(-genres)
missing.values <- aggr(movie, sortVars = T, prop = T, sortCombs = T, cex.lab = 1.5, cex.axis = .6, cex.numbers = 5, combined = F, gap = -.2)

## 
##  Variables sorted by number of missings: 
##                   Variable       Count
##                      gross 0.174869948
##                     budget 0.097438976
##               aspect_ratio 0.065426170
##                 title_year 0.021408563
##    director_facebook_likes 0.020608243
##     num_critic_for_reviews 0.009803922
##     actor_3_facebook_likes 0.004601841
##       num_user_for_reviews 0.004201681
##                   duration 0.003001200
##       facenumber_in_poster 0.002601040
##     actor_2_facebook_likes 0.002601040
##     actor_1_facebook_likes 0.001400560
##                      color 0.000000000
##              director_name 0.000000000
##               actor_2_name 0.000000000
##               actor_1_name 0.000000000
##                movie_title 0.000000000
##            num_voted_users 0.000000000
##  cast_total_facebook_likes 0.000000000
##               actor_3_name 0.000000000
##              plot_keywords 0.000000000
##            movie_imdb_link 0.000000000
##                   language 0.000000000
##                    country 0.000000000
##             content_rating 0.000000000
##                 imdb_score 0.000000000
##       movie_facebook_likes 0.000000000

Imputation with column mean has been done for some of the predictors like social media likes for actors and directors and the zero’s in the predictor columns have been converted to NA’s. The data now contains 3857 observations with 26 variables. We remove the observations that have no values as we don’t have any information about them Further cleaning of the content ratings column needs to be done decrease the number of categories. So the M and GP categories are clubbed into the PG category and X is a part of the NC-17 category. Categories like Approved, Not Rated, Unrated or Passed are clubbed in the R category These are the final cleaning steps: Adding the Profit column based on the difference between the Budget and Gross Income. Profit = Gross – Budget Removing the color column as most of the movies(~96%+) are in color and less than 4% are black & white Removing the language column as most of the movies(~95%+) are in English and less than 5% are from other languages We clean the country column as well by making 3 specific categories i.e. USA, UK and other regions. So most of the movies are produced in USA(79%, 3025 observations), then UK(8%, 316 observations) and finally Others category with 465 observations The final, cleaned dataset has 3806 observations with 26 predictors

#Profit Column
movie <- movie %>% 
  mutate(profit = gross - budget,
         return_on_investment_perc = (profit/budget)*100)

#Removing Color and Language Columns
movie <- subset(movie, select = -c(color))
movie <- subset(movie, select = -c(language))

#Cleaning the Country column into 3 categories
levels(movie$country) <- c(levels(movie$country), "Others")
movie$country[(movie$country != 'USA')&(movie$country != 'UK')] <- 'Others' 
movie$country <- factor(movie$country)
##Distribution of IMDB Score Variable
ggplot(movie, aes(x=imdb_score)) +
  geom_density(fill="red",alpha = 0.6)+coord_cartesian(xlim = c(0, 10))+
  geom_vline(xintercept = mean(movie$imdb_score), color="blue")

profit.movie <-movie%>%
  select(movie_title,profit)%>%
  filter(!is.na(profit))%>%
  arrange(desc(profit))%>%
  top_n(20)
## Selecting by profit
p1 <- ggplot(profit.movie, aes(x=reorder(movie_title,profit/1000000), profit/1000000,fill=factor(movie_title))) + 
  geom_bar(stat = "identity") +
  ggtitle("Top Profitable Movies")+coord_flip()+xlab("Movie Name")+ylab("Profit in Million $")+theme_bw()

p1

movie %>%
  # filter(title_year %in% c(2000:2016)) %>%
  arrange(desc(profit)) %>%
  top_n(20, profit) %>%
  ggplot(aes(x=budget/1000000, y=profit/1000000)) +
  geom_point(size=3) +
  geom_smooth(size=2) + 
  geom_text_repel(aes(label=movie_title)) +
  labs(x = "Budget in Million $", y = "Profit in Million $", title = "Top 20 Profitable Movies") +
  theme(plot.title = element_text(hjust = 0.5))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

movie %>%
  mutate(profit = gross - budget,
         return_on_investment_perc = (profit/budget)*100) %>%
  arrange(desc(profit)) %>%
  top_n(20, profit) %>%
  ggplot(aes(x=budget/1000000, y = return_on_investment_perc)) + 
  geom_point(size = 3) + 
  geom_smooth(size = 2) + 
  geom_text_repel(aes(label = movie_title), size = 3) + 
  xlab("Budget in Million $") + 
  ylab("Percentage Return on Investment") 
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

p<-ggplot(movie, aes(x=imdb_score, y=num_voted_users, group=content_rating))+
geom_point(aes(color=content_rating),size=0.7)+
 scale_color_brewer(palette="Dark2")+geom_smooth(aes(color=content_rating),se = FALSE, method = lm)+
  xlab("IMDB Score")+ylab("Number of Voters")+labs(color = "Rating\n")
ggplotly(p)
movie %>%
  top_n(20, profit) %>%
  ggplot(aes(x = imdb_score, y = gross/10^6, size = profit/10^6, color = content_rating)) + 
  geom_point() + 
  geom_hline(aes(yintercept = 550)) + 
  geom_vline(aes(xintercept = 7.75)) + 
  geom_text_repel(aes(label = movie_title), size = 4) +
  xlab("IMDB Score") + 
  ylab("Gross Money Earned(in million dollars)") + 
  ggtitle("Commercial Success Vs Critical Acclaim") +
  annotate("text", x = 8.5, y = 700, label = "High IMDB Score & High Gross",size=5) +
  theme(plot.title = element_text(hjust = 0.5))

#Time Series for IMDB Score
imdb.ts<-movie%>%
  select(title_year,imdb_score,country)%>%
  group_by(title_year)%>%
  summarise(IMDB_Rating=mean(imdb_score))

  plot.ts1<-ggplot(data=imdb.ts,aes(x=title_year,y=IMDB_Rating))+geom_point(size=3)+geom_line(size=1)+
    geom_smooth(col="red")+xlab("Year of Release")+ylab("IMDB Rating")
  ggplotly(plot.ts1)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#Time Series for Return on Investment
roi.ts<-movie%>%
  select(title_year,return_on_investment_perc)%>%
  group_by(title_year)%>%
  summarise(ROI=mean(return_on_investment_perc))
plot.ts2<-ggplot(data=roi.ts,aes(x=title_year,y=ROI))+geom_point(size=3)+geom_line(size=1)+
  geom_smooth(col="green")+xlab("Year of Release")+ylab("Return on Investment")
ggplotly(plot.ts2)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
director.imdb<-movie%>%
  select(director_name,imdb_score)%>%
  group_by(director_name)%>%
  summarise(Average_IMDB_Rating=mean(imdb_score))%>%
  arrange(desc(Average_IMDB_Rating))%>%
  top_n(20)
## Selecting by Average_IMDB_Rating
director.df<-as.data.frame(director.imdb)
names(director.df)[names(director.df) == "director_name"] <- "Director"
names(director.df)[names(director.df) == "Average_IMDB_Rating"] <- "Average_IMDB_Rating"
director.table <- formattable(director.df,list(Average_IMDB_Rating=color_bar("lightgreen")))
director.table
Director Average_IMDB_Rating
Akira Kurosawa 8.700000
Charles Chaplin 8.600000
Tony Kaye 8.600000
Alfred Hitchcock 8.500000
Damien Chazelle 8.500000
Majid Majidi 8.500000
Ron Fricke 8.500000
Sergio Leone 8.433333
Christopher Nolan 8.425000
Asghar Farhadi 8.400000
Marius A. Markevicius 8.400000
Richard Marquand 8.400000
Billy Wilder 8.300000
Fritz Lang 8.300000
Lee Unkrich 8.300000
Lenny Abrahamson 8.300000
Pete Docter 8.233333
Hayao Miyazaki 8.225000
Elia Kazan 8.200000
George Roy Hill 8.200000
Joshua Oppenheimer 8.200000
Juan José Campanella 8.200000
Quentin Tarantino 8.200000
imdb.user<-ggplot(data=movie,aes(x=imdb_score,y=num_user_for_reviews,colour=factor(country)))+
  geom_point(aes(colour= factor(country)),size=0.7)+
  geom_smooth(se = FALSE, method = "lm")+xlab("IMDB Score")+ylab("Number of User Reviews")+
  ylim(0,1500)+labs(color = "Country\n")

ggplotly(imdb.user)
## Warning: Removed 86 rows containing non-finite values (stat_smooth).
ggplot(data=movie,aes(x=actor_1_facebook_likes,y=movie_facebook_likes))+
  geom_point()+
  geom_smooth(se = TRUE, method = "lm")+xlim(0,50000)+ylim(0,200000)
## Warning: Removed 14 rows containing non-finite values (stat_smooth).
## Warning: Removed 14 rows containing missing values (geom_point).

We want to find out whether we can include director and actor names for our prediction algorithm. There are 1709 unique directors and 3713 actors. Our model will become too complex if we include these names in our final algorithm. So we remove these columns Initially we had added to additional columns i.e. profit and return on investment. We will be removing these columns to avoid multicollinearity Features like movie link and plot keywords will also be removed as they are inessential for making predictions We finally visualize the correlation between numeric variables to check for highly correlated variables It can be observed that certain variables are highly correlated i.e. for actor 1 Facebook likes and total cast Facebook likes, the correlation is 0.95. Similarly, num of users who voted is highly correlated with number of user reviews. To make the analysis simple, we remove the total cast Facebook likes and keep 2 features: actor 1 Facebook likes and other actors Facebook likes. This new column will be the summation of actor 2 and actor 3 Facebook likes. We also create a new feature called critical review ratio which would be the ratio between critical reviews and total number of reviews We finally remove all the unnecessary columns like total Facebook likes, actor 2 Facebook likes, actor 3 Facebook likes, critical reviews and total number of reviews Finally, we make 4 categories based on the imdb_score variable i.e. LOW, MEDIUM, HIGH, EXCELLENT. We also remove the imdb_score variable as it will have no meaning The final cleaned data set contains 3806 observations and 14 variables

#Visualizing Correlation Plots
ggcorr(movie, label = TRUE, label_round = 3, label_size = 3, size = 2, hjust = .85) +
  ggtitle("Correlation between continuous variables") +
  theme(plot.title = element_text(hjust = 0.5))
## Warning in ggcorr(movie, label = TRUE, label_round = 3, label_size = 3, :
## data in column(s) 'country', 'content_rating' are not numeric and were
## ignored

#Adding new columns and deleting unnecessary columns
movie<-movie%>%
  mutate(other_actor_facebook_likes=actor_2_facebook_likes + actor_3_facebook_likes,
         critic_total_ratio=num_critic_for_reviews/num_user_for_reviews)%>%
  select (-c(cast_total_facebook_likes, actor_2_facebook_likes, actor_3_facebook_likes,
              num_critic_for_reviews, num_user_for_reviews))

#Creating Score Categories
movie <- movie %>% mutate(Rating_Category = cut(imdb_score, c(0, 4, 7, 9, 10),
                               labels = c("LOW", "MEDIUM", "HIGH", "EXCELLENT")))

movie.final<-movie%>%select(-imdb_score)
##Splitting Data
set.seed(13469385)
training.samples <- movie.final$Rating_Category%>% 
  createDataPartition(p = 0.8, list = FALSE)
train.data  <- movie.final[training.samples, ]
test.data <- movie.final[-training.samples, ]

##Multinomial Logistic Regression
# Fit the model
model.multi <- nnet::multinom(Rating_Category ~., data = train.data)
## # weights:  76 (54 variable)
## initial  value 4222.652624 
## iter  10 value 3204.940875
## iter  20 value 2779.730790
## iter  30 value 2433.508868
## iter  40 value 1854.626103
## iter  50 value 1840.991293
## iter  60 value 1799.565188
## iter  70 value 1773.856065
## iter  80 value 1757.618743
## iter  90 value 1707.639100
## iter 100 value 1701.554271
## final  value 1701.554271 
## stopped after 100 iterations
tidy(model.multi)
formattable(tidy(model.multi))
y.level term estimate std.error statistic p.value
MEDIUM (Intercept) 0.9984410 1.369488e-11 -1.139293e+08 0.000000e+00
MEDIUM duration 1.0078225 1.496031e-09 5.208489e+06 0.000000e+00
MEDIUM director_facebook_likes 1.0004594 1.530147e-08 3.001488e+04 0.000000e+00
MEDIUM actor_1_facebook_likes 1.0000174 1.202104e-06 1.447013e+01 1.871226e-47
MEDIUM gross 1.0000000 3.870099e-09 1.326953e+00 1.845242e-01
MEDIUM num_voted_users 1.0000102 6.811821e-07 1.501308e+01 6.028664e-51
MEDIUM facenumber_in_poster 1.0499538 2.601767e-11 1.873580e+09 0.000000e+00
MEDIUM countryUSA 1.1217744 1.105256e-11 1.039684e+10 0.000000e+00
MEDIUM countryOthers 0.9358600 1.494668e-12 -4.435057e+10 0.000000e+00
MEDIUM content_ratingNC-17 0.9975295 3.586818e-14 -6.896173e+10 0.000000e+00
MEDIUM content_ratingPG 0.9479850 2.146214e-12 -2.488877e+10 0.000000e+00
MEDIUM content_ratingPG-13 1.1302067 5.016445e-12 2.439986e+10 0.000000e+00
MEDIUM content_ratingR 0.9593154 6.941376e-12 -5.983736e+09 0.000000e+00
MEDIUM budget 1.0000000 2.396464e-09 3.643075e-01 7.156284e-01
MEDIUM title_year 1.0007604 2.749907e-08 2.763964e+04 0.000000e+00
MEDIUM movie_facebook_likes 1.0000016 3.464553e-07 4.517580e+00 6.255028e-06
MEDIUM other_actor_facebook_likes 1.0000499 1.200430e-07 4.155244e+02 0.000000e+00
MEDIUM critic_total_ratio 1.0049864 1.427480e-11 3.484442e+08 0.000000e+00
HIGH (Intercept) 1.0015504 1.339715e-11 1.156340e+08 0.000000e+00
HIGH duration 1.0262821 1.468151e-09 1.767027e+07 0.000000e+00
HIGH director_facebook_likes 1.0005113 1.520479e-08 3.361597e+04 0.000000e+00
HIGH actor_1_facebook_likes 1.0000183 1.196216e-06 1.529562e+01 8.178162e-53
HIGH gross 1.0000000 3.924285e-09 -1.918451e+00 5.505385e-02
HIGH num_voted_users 1.0000227 5.540170e-07 4.094660e+01 0.000000e+00
HIGH facenumber_in_poster 0.9200265 2.548731e-11 -3.270367e+09 0.000000e+00
HIGH countryUSA 0.8865607 1.081852e-11 -1.112959e+10 0.000000e+00
HIGH countryOthers 1.0739334 1.439854e-12 4.953834e+10 0.000000e+00
HIGH content_ratingNC-17 1.0035034 3.528688e-14 9.910894e+10 0.000000e+00
HIGH content_ratingPG 1.0430275 2.051971e-12 2.053030e+10 0.000000e+00
HIGH content_ratingPG-13 0.8558075 4.921985e-12 -3.163556e+10 0.000000e+00
HIGH content_ratingR 1.0889115 6.663616e-12 1.278264e+10 0.000000e+00
HIGH budget 1.0000000 2.407617e-09 3.741992e-01 7.082561e-01
HIGH title_year 0.9990805 2.690119e-08 -3.419637e+04 0.000000e+00
HIGH movie_facebook_likes 1.0000146 3.400848e-07 4.283182e+01 0.000000e+00
HIGH other_actor_facebook_likes 1.0000306 1.192569e-07 2.569871e+02 0.000000e+00
HIGH critic_total_ratio 1.0587263 1.394240e-11 4.093025e+09 0.000000e+00
EXCELLENT (Intercept) 1.0000045 1.379445e-13 3.263422e+07 0.000000e+00
EXCELLENT duration 1.0623104 5.343790e-12 1.131148e+10 0.000000e+00
EXCELLENT director_facebook_likes 0.9985708 5.068228e-10 -2.821886e+06 0.000000e+00
EXCELLENT actor_1_facebook_likes 0.9998499 7.239411e-09 -2.073773e+04 0.000000e+00
EXCELLENT gross 1.0000000 9.612253e-09 -1.407733e+00 1.592103e-01
EXCELLENT num_voted_users 1.0000291 9.760376e-07 2.976378e+01 1.150413e-194
EXCELLENT facenumber_in_poster 0.9829743 6.256249e-13 -2.744817e+10 0.000000e+00
EXCELLENT countryUSA 1.0011202 2.463254e-13 4.545281e+09 0.000000e+00
EXCELLENT countryOthers 0.9995858 5.407209e-14 -7.661813e+09 0.000000e+00
EXCELLENT content_ratingNC-17 0.9999316 8.956466e-16 -7.634757e+10 0.000000e+00
EXCELLENT content_ratingPG 1.0003737 1.504262e-13 2.483547e+09 0.000000e+00
EXCELLENT content_ratingPG-13 1.0008016 1.300159e-13 6.162703e+09 0.000000e+00
EXCELLENT content_ratingR 0.9996138 6.319010e-13 -6.112820e+08 0.000000e+00
EXCELLENT budget 0.9999998 8.548225e-08 -1.786536e+00 7.401257e-02
EXCELLENT title_year 0.9951730 2.785500e-10 -1.737095e+07 0.000000e+00
EXCELLENT movie_facebook_likes 1.0000080 4.287546e-08 1.873724e+02 0.000000e+00
EXCELLENT other_actor_facebook_likes 1.0001301 4.355748e-09 2.987372e+04 0.000000e+00
EXCELLENT critic_total_ratio 0.9986556 3.465287e-13 -3.882211e+09 0.000000e+00
summary(model.multi)
## Call:
## nnet::multinom(formula = Rating_Category ~ ., data = train.data)
## 
## Coefficients:
##             (Intercept)    duration director_facebook_likes
## MEDIUM    -1.560249e-03 0.007792058            0.0004592718
## HIGH       1.549165e-03 0.025942624            0.0005111236
## EXCELLENT  4.501712e-06 0.060446204           -0.0014301964
##           actor_1_facebook_likes         gross num_voted_users
## MEDIUM              1.739460e-05  5.135441e-09    1.022664e-05
## HIGH                1.829686e-05 -7.528549e-09    2.268511e-05
## EXCELLENT          -1.501289e-04 -1.353148e-08    2.905056e-05
##           facenumber_in_poster   countryUSA countryOthers
## MEDIUM              0.04874620  0.114911699 -0.0662893552
## HIGH               -0.08335285 -0.120405702  0.0713279718
## EXCELLENT          -0.01717226  0.001119618 -0.0004142902
##           content_ratingNC-17 content_ratingPG content_ratingPG-13
## MEDIUM          -2.473532e-03    -0.0534166416        0.1224005568
## HIGH             3.497245e-03     0.0421275878       -0.1557097853
## EXCELLENT       -6.838044e-05     0.0003735906        0.0008012495
##           content_ratingR        budget    title_year movie_facebook_likes
## MEDIUM      -0.0415353616  8.730499e-10  0.0007600643         1.565140e-06
## HIGH         0.0851785855  9.009284e-10 -0.0009199230         1.456645e-05
## EXCELLENT   -0.0003862697 -1.527171e-07 -0.0048386777         8.033679e-06
##           other_actor_facebook_likes critic_total_ratio
## MEDIUM                  4.988078e-05        0.004973972
## HIGH                    3.064750e-05        0.057066587
## EXCELLENT               1.301224e-04       -0.001345298
## 
## Std. Errors:
##            (Intercept)     duration director_facebook_likes
## MEDIUM    1.369488e-11 1.496031e-09            1.530147e-08
## HIGH      1.339715e-11 1.468151e-09            1.520479e-08
## EXCELLENT 1.379445e-13 5.343790e-12            5.068228e-10
##           actor_1_facebook_likes        gross num_voted_users
## MEDIUM              1.202104e-06 3.870099e-09    6.811821e-07
## HIGH                1.196216e-06 3.924285e-09    5.540170e-07
## EXCELLENT           7.239411e-09 9.612253e-09    9.760376e-07
##           facenumber_in_poster   countryUSA countryOthers
## MEDIUM            2.601767e-11 1.105256e-11  1.494668e-12
## HIGH              2.548731e-11 1.081852e-11  1.439854e-12
## EXCELLENT         6.256249e-13 2.463254e-13  5.407209e-14
##           content_ratingNC-17 content_ratingPG content_ratingPG-13
## MEDIUM           3.586818e-14     2.146214e-12        5.016445e-12
## HIGH             3.528688e-14     2.051971e-12        4.921985e-12
## EXCELLENT        8.956466e-16     1.504262e-13        1.300159e-13
##           content_ratingR       budget   title_year movie_facebook_likes
## MEDIUM       6.941376e-12 2.396464e-09 2.749907e-08         3.464553e-07
## HIGH         6.663616e-12 2.407617e-09 2.690119e-08         3.400848e-07
## EXCELLENT    6.319010e-13 8.548225e-08 2.785500e-10         4.287546e-08
##           other_actor_facebook_likes critic_total_ratio
## MEDIUM                  1.200430e-07       1.427480e-11
## HIGH                    1.192569e-07       1.394240e-11
## EXCELLENT               4.355748e-09       3.465287e-13
## 
## Residual Deviance: 3403.109 
## AIC: 3511.109
# Make predictions
predicted.classes <- model.multi %>% predict(test.data)
head(predicted.classes)
## [1] HIGH   HIGH   MEDIUM MEDIUM HIGH   MEDIUM
## Levels: LOW MEDIUM HIGH EXCELLENT
# Model accuracy
mean(predicted.classes == test.data$Rating_Category)
## [1] 0.7460526
rpart.fit <- rpart(Rating_Category~., data = train.data, method = 'class')
plotcp(rpart.fit)

rpart.fit.2<-prune.rpart(rpart.fit,cp=0.01)
rpart.plot(rpart.fit.2, extra = 104)

#Prediction
predict_unseen <-predict(rpart.fit.2, test.data, type = 'class')
table_mat <- table(test.data$Rating_Category, predict_unseen)
table_mat
##            predict_unseen
##             LOW MEDIUM HIGH EXCELLENT
##   LOW         0     19    0         0
##   MEDIUM      0    481   30         0
##   HIGH        0    135   95         0
##   EXCELLENT   0      0    0         0
accuracy_Test <- sum(diag(table_mat)) / sum(table_mat)
print(paste('Accuracy for test', accuracy_Test))
## [1] "Accuracy for test 0.757894736842105"
#3hyper parameter Tuning
accuracy_tune <- function(fit) {
  predict_unseen <- predict(fit, test.data, type = 'class')
  table_mat <- table(test.data$Rating_Category, predict_unseen)
  accuracy_Test <- sum(diag(table_mat)) / sum(table_mat)
  accuracy_Test
}

control <- rpart.control(minsplit = 20,
                         minbucket = round(20 / 3),
                         maxdepth = 20,
                         cp = 0.01)
tune_fit <- rpart(Rating_Category~., data = train.data, method = 'class', control = control)
accuracy_tune(tune_fit)
## [1] 0.7578947

Random Forest is a bootstrap aggregation algorithm with random sample of predictors at each split. Aggregating a number of predictors gives a better prediction result compared to one good predictor The caret package has been used for modeling purposes. Also, the Random Search algorithm will randomly search and choose a hyperparameter combination for every iteration We use the trainControl function to do a grid search with 10 fold cross-validation and we train a Random Forest model to get best result for accuracy. We initially get the best result for mtry=9 mtry: Number of predictors drawn to feed the algorithm. By default, it is the square of the number of columns. We test the model for different mtry values from 1 to 10 and there by extract the best value which is 4 with an accuracy of 81.48% maxnodes: It is the maximum number of terminal nodes for the model. We do a similar search like mtry for nodes between 5 and 30. The best value is 27 and the accuracy associated with it is 80.20% ntrees: It is the number of trees in the forest. The search is made for different tree values ranging from 250 to 2000. It is was observed that the best number of trees was 600 with an accuracy of 77.24% So the final model has a mtry=4, maxnodes=27 and ntrees=600. The prediction accuracy associated with this model is 78.42% The variable importance plot from the algorithm clearly shows that important factors like number of users who voted, duration of the movie, budget and gross earnings have a huge impact on the IMDB score and they would be helpful during prediction

trControl <- trainControl(method = "cv",number = 10,search = "grid")
rf_default <- train(Rating_Category~.,data = train.data,method = "rf",metric = "Accuracy",
                    trControl = trControl)                          
print(rf_default) 
## Random Forest 
## 
## 3046 samples
##   13 predictor
##    4 classes: 'LOW', 'MEDIUM', 'HIGH', 'EXCELLENT' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 2741, 2740, 2743, 2742, 2740, 2742, ... 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##    2    0.8023490  0.5085656
##    9    0.8095578  0.5407069
##   17    0.8079109  0.5429585
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 9.
#Best mtry
tuneGrid <- expand.grid(.mtry = c(1: 10))
rf_mtry <- train(Rating_Category~.,
                 data = train.data,
                 method = "rf",
                 metric = "Accuracy",
                 tuneGrid = tuneGrid,
                 trControl = trControl,
                 importance = TRUE,
                 nodesize = 14,
                 ntree = 300)
print(rf_mtry)
## Random Forest 
## 
## 3046 samples
##   13 predictor
##    4 classes: 'LOW', 'MEDIUM', 'HIGH', 'EXCELLENT' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 2742, 2742, 2741, 2742, 2742, 2741, ... 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##    1    0.7353748  0.2629126
##    2    0.8033236  0.5116329
##    3    0.8023432  0.5234097
##    4    0.8082610  0.5386487
##    5    0.8089200  0.5413153
##    6    0.8082621  0.5409011
##    7    0.8069560  0.5357070
##    8    0.8082653  0.5418166
##    9    0.8086007  0.5423119
##   10    0.8082642  0.5412493
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 5.
best_mtry <- rf_mtry$bestTune$mtry 
best_mtry
## [1] 5
max(rf_mtry$results$Accuracy)
## [1] 0.80892
#Best max nodes
store_maxnode <- list()
tuneGrid <- expand.grid(.mtry = best_mtry)
for (maxnodes in c(5: 30)) {
  set.seed(1234)
  rf_maxnode <- train(Rating_Category~.,
                      data = train.data,
                      method = "rf",
                      metric = "Accuracy",
                      tuneGrid = tuneGrid,
                      trControl = trControl,
                      importance = TRUE,
                      nodesize = 14,
                      maxnodes = maxnodes,
                      ntree = 300)
  current_iteration <- toString(maxnodes)
  store_maxnode[[current_iteration]] <- rf_maxnode
}
results_mtry <- resamples(store_maxnode)
summary(results_mtry) #Best max node=27
## 
## Call:
## summary.resamples(object = results_mtry)
## 
## Models: 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30 
## Number of resamples: 10 
## 
## Accuracy 
##         Min.   1st Qu.    Median      Mean   3rd Qu.      Max. NA's
## 5  0.7026144 0.7192177 0.7450658 0.7403208 0.7551391 0.7810458    0
## 6  0.7058824 0.7311475 0.7536777 0.7485196 0.7664474 0.7843137    0
## 7  0.7091503 0.7286885 0.7586281 0.7498409 0.7664474 0.7875817    0
## 8  0.7058824 0.7281448 0.7532895 0.7472082 0.7596365 0.7875817    0
## 9  0.7058824 0.7281448 0.7516447 0.7491765 0.7697368 0.7875817    0
## 10 0.7091503 0.7270492 0.7631579 0.7531195 0.7694807 0.7908497    0
## 11 0.7058824 0.7311475 0.7582237 0.7537774 0.7735844 0.7908497    0
## 12 0.7156863 0.7281448 0.7631579 0.7554200 0.7760435 0.7894737    0
## 13 0.7156863 0.7344262 0.7648026 0.7600167 0.7801472 0.7960526    0
## 14 0.7189542 0.7368852 0.7730263 0.7606789 0.7768658 0.7960526    0
## 15 0.7058824 0.7413245 0.7763158 0.7626633 0.7808663 0.7993421    0
## 16 0.7156863 0.7385246 0.7713816 0.7619914 0.7826062 0.7960526    0
## 17 0.7156863 0.7418033 0.7713816 0.7636362 0.7806856 0.8059211    0
## 18 0.7156863 0.7368852 0.7680921 0.7632889 0.7809615 0.8071895    0
## 19 0.7091503 0.7393443 0.7746711 0.7652744 0.7875243 0.8092105    0
## 20 0.7124183 0.7475410 0.7746711 0.7665805 0.7891690 0.7993421    0
## 21 0.7222222 0.7563713 0.7766879 0.7718405 0.7914899 0.8092105    0
## 22 0.7222222 0.7506309 0.7717591 0.7705204 0.7899914 0.8125000    0
## 23 0.7189542 0.7553238 0.7812500 0.7744678 0.7916280 0.8071895    0
## 24 0.7287582 0.7577991 0.7796053 0.7764383 0.7945805 0.8092105    0
## 25 0.7293729 0.7571937 0.7799612 0.7767510 0.7998355 0.8157895    0
## 26 0.7320261 0.7596527 0.7816113 0.7767510 0.7965541 0.8092105    0
## 27 0.7352941 0.7645708 0.7849008 0.7787236 0.7986869 0.8092105    0
## 28 0.7320261 0.7699811 0.7832506 0.7807146 0.7978672 0.8157895    0
## 29 0.7287582 0.7621198 0.7881795 0.7816896 0.8011540 0.8104575    0
## 30 0.7352941 0.7693394 0.7861842 0.7816950 0.7949094 0.8157895    0
## 
## Kappa 
##         Min.   1st Qu.    Median      Mean   3rd Qu.      Max. NA's
## 5  0.1802077 0.2395541 0.3319733 0.3098643 0.3560071 0.4356730    0
## 6  0.2354033 0.2799368 0.3610244 0.3403055 0.3919994 0.4492350    0
## 7  0.2480327 0.2731295 0.3715076 0.3431226 0.3938396 0.4508104    0
## 8  0.2311525 0.2665171 0.3363481 0.3271542 0.3753836 0.4508104    0
## 9  0.2323987 0.2688723 0.3349554 0.3351776 0.4005483 0.4508104    0
## 10 0.2426013 0.2617303 0.3750822 0.3482622 0.4066973 0.4642740    0
## 11 0.2372036 0.2692705 0.3630729 0.3512614 0.4194697 0.4609414    0
## 12 0.2311525 0.2712999 0.3785130 0.3558920 0.4254852 0.4609026    0
## 13 0.2649308 0.2845807 0.3820428 0.3698738 0.4345814 0.4809286    0
## 14 0.2665143 0.2890367 0.4099851 0.3722158 0.4228121 0.4776777    0
## 15 0.2372036 0.3088525 0.4173513 0.3785229 0.4393319 0.4877065    0
## 16 0.2556410 0.2922122 0.4035591 0.3754485 0.4412804 0.4776777    0
## 17 0.2603151 0.3057167 0.4029129 0.3803115 0.4345221 0.5013900    0
## 18 0.2529802 0.2899722 0.3968196 0.3812106 0.4369915 0.5165877    0
## 19 0.2573018 0.2973529 0.4139625 0.3882589 0.4612215 0.5144171    0
## 20 0.2641591 0.3226613 0.4139404 0.3927483 0.4606313 0.4908851    0
## 21 0.2862827 0.3558620 0.4210458 0.4100069 0.4706846 0.5174207    0
## 22 0.2862827 0.3379182 0.4110018 0.4065226 0.4673653 0.5272033    0
## 23 0.2845414 0.3602681 0.4375770 0.4181305 0.4699175 0.5165877    0
## 24 0.3116140 0.3691943 0.4295551 0.4247407 0.4756337 0.5203873    0
## 25 0.2863011 0.3566719 0.4286821 0.4238983 0.4984569 0.5311614    0
## 26 0.2927293 0.3629339 0.4295085 0.4230919 0.4886792 0.5174207    0
## 27 0.3037084 0.3791174 0.4398835 0.4294915 0.4888694 0.5203873    0
## 28 0.3136011 0.4084167 0.4380518 0.4365007 0.4863983 0.5369257    0
## 29 0.3073713 0.3787151 0.4532956 0.4391339 0.4955392 0.5233389    0
## 30 0.3240612 0.4077138 0.4482410 0.4399782 0.4867494 0.5369257    0
#Best ntrees
store_maxtrees <- list()
for (ntree in c(250,  500, 1000, 2000)) {
  rf_maxtrees <- train(Rating_Category~.,
                       data = train.data,
                       method = "rf",
                       metric = "Accuracy",
                       tuneGrid = tuneGrid,
                       trControl = trControl,
                       importance = TRUE,
                       nodesize = 14,
                       maxnodes = 27,
                       ntree = ntree)
  key <- toString(ntree)
  store_maxtrees[[key]] <- rf_maxtrees
}
results_tree <- resamples(store_maxtrees)
summary(results_tree) #best ntree=250
## 
## Call:
## summary.resamples(object = results_tree)
## 
## Models: 250, 500, 1000, 2000 
## Number of resamples: 10 
## 
## Accuracy 
##           Min.   1st Qu.    Median      Mean   3rd Qu.      Max. NA's
## 250  0.7508197 0.7598684 0.7651855 0.7767537 0.7959016 0.8157895    0
## 500  0.7344262 0.7639344 0.7741401 0.7731548 0.7827156 0.8065574    0
## 1000 0.7606557 0.7684307 0.7828924 0.7790693 0.7855398 0.7960526    0
## 2000 0.7475410 0.7598361 0.7816113 0.7774342 0.7883466 0.8125000    0
## 
## Kappa 
##           Min.   1st Qu.    Median      Mean   3rd Qu.      Max. NA's
## 250  0.3596436 0.3704339 0.3982079 0.4258527 0.4827396 0.5340614    0
## 500  0.3079444 0.3887484 0.4078011 0.4126284 0.4356615 0.5128719    0
## 1000 0.3743150 0.4050328 0.4424358 0.4310009 0.4490075 0.4743858    0
## 2000 0.3379285 0.3749235 0.4340855 0.4243862 0.4514213 0.5317769    0
#Best Model
fit_rf <- train(Rating_Category~.,
                train.data,
                method = "rf",
                metric = "Accuracy",
                tuneGrid = tuneGrid,
                trControl = trControl,
                importance = TRUE,
                nodesize = 14,
                ntree = 600,
                maxnodes = 27)
prediction.rf <-predict(fit_rf, test.data)
confusionMatrix(prediction.rf, test.data$Rating_Category)
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  LOW MEDIUM HIGH EXCELLENT
##   LOW         0      0    0         0
##   MEDIUM     19    489  132         0
##   HIGH        0     22   98         0
##   EXCELLENT   0      0    0         0
## 
## Overall Statistics
##                                           
##                Accuracy : 0.7724          
##                  95% CI : (0.7409, 0.8017)
##     No Information Rate : 0.6724          
##     P-Value [Acc > NIR] : 8.827e-10       
##                                           
##                   Kappa : 0.4103          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: LOW Class: MEDIUM Class: HIGH Class: EXCELLENT
## Sensitivity               0.000        0.9569      0.4261               NA
## Specificity               1.000        0.3936      0.9585                1
## Pos Pred Value              NaN        0.7641      0.8167               NA
## Neg Pred Value            0.975        0.8167      0.7938               NA
## Prevalence                0.025        0.6724      0.3026                0
## Detection Rate            0.000        0.6434      0.1289                0
## Detection Prevalence      0.000        0.8421      0.1579                0
## Balanced Accuracy         0.500        0.6753      0.6923               NA
#78.42% Accuracy

varImp(fit_rf)
## rf variable importance
## 
##   variables are sorted by maximum importance across the classes
##                              LOW MEDIUM    HIGH EXCELLENT
## num_voted_users            1.977 71.206 100.000     8.638
## budget                     2.340 48.314  16.283     6.370
## director_facebook_likes    8.188 10.118  47.824     7.127
## duration                   6.034 37.376  41.940     4.546
## movie_facebook_likes       7.871 24.977  32.332     4.546
## countryUSA                 3.457  2.785  29.930     4.546
## gross                      0.000 26.483   8.599     4.546
## title_year                 4.106 25.447  16.724     4.546
## actor_1_facebook_likes     4.788 12.555  24.014     4.546
## content_ratingPG-13        7.070 23.401  17.143     6.370
## critic_total_ratio         6.379 12.308  19.963     7.710
## other_actor_facebook_likes 4.546 16.947  12.379     4.546
## countryOthers              2.722  4.532  14.943     4.546
## content_ratingR            2.771 12.310  14.708     4.546
## facenumber_in_poster       4.764  8.649  12.408     4.546
## content_ratingPG           4.546  6.456   8.614     4.546
## content_ratingNC-17        4.546  5.603   2.722     4.546
rf <- randomForest(Rating_Category ~ . , data = train.data, mtry = 4)

# Get importance
importance <- importance(rf)
varImportance <- data.frame(Variables = row.names(importance), 
                            Importance = round(importance[ ,'MeanDecreaseGini'],2))

# Create a rank variable based on importance
rankImportance <- varImportance %>%
  mutate(Rank = paste0('#',dense_rank(desc(Importance))))

# Use ggplot2 to visualize the relative importance of variables
ggplot(rankImportance, aes(x = reorder(Variables, Importance), 
                           y = Importance, fill = Importance)) +
  geom_bar(stat='identity') + 
  geom_text(aes(x = Variables, y = 0.5, label = Rank),
            hjust=0, vjust=0.55, size = 4, colour = 'red') +
  labs(x = 'Variables') +
  coord_flip() + 
  theme_few()

Another model called the Gradient Boost is fit to the training dataset. Here, the sample selection is made intelligently compared to other Algorithms. It is a slow learning algorithm and trees are grown sequentially. Decision Trees are fitted to the residuals rather than the final outcome We use the same cross validation technique like Random Forests using the trainControl and finally tune the hyper parameters. There are 3 parameters to be tune here: number of trees, number of splits and learning rate From the final Confusion Matrix, we conclude that the Specificity for the LOW class is the highest i.e. 0.976 while the Sensitivity is highest for the MEDIUM class with a value of 0.811. The overall model has an accuracy of 77.5% which is a bit lower than the Random Forest model and certainly higher than the Multinomial Logistic Model

tc<-trainControl(method = "repeatedcv", number = 10)
gbm.model = train(Rating_Category ~., data=train.data, method="gbm", trControl=tc)
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1        1.3863             nan     0.1000    0.2711
##      2        1.2327             nan     0.1000    0.2138
##      3        1.1030             nan     0.1000    0.1585
##      4        1.0076             nan     0.1000    0.1161
##      5        0.9332             nan     0.1000    0.0872
##      6        0.8773             nan     0.1000    0.0718
##      7        0.8309             nan     0.1000    0.0566
##      8        0.7937             nan     0.1000    0.0481
##      9        0.7622             nan     0.1000    0.0397
##     10        0.7351             nan     0.1000    0.0308
##     20        0.6018             nan     0.1000    0.0079
##     40        0.5253             nan     0.1000    0.0023
##     60        0.4944             nan     0.1000    0.0005
##     80        0.4737             nan     0.1000   -0.0008
##    100        0.4604             nan     0.1000   -0.0011
##    120        0.4506             nan     0.1000   -0.0015
##    140        0.4420             nan     0.1000   -0.0013
##    150        0.4386             nan     0.1000   -0.0014
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1        1.3863             nan     0.1000    0.3054
##      2        1.2033             nan     0.1000    0.2127
##      3        1.0716             nan     0.1000    0.1518
##      4        0.9760             nan     0.1000    0.1198
##      5        0.9007             nan     0.1000    0.0926
##      6        0.8411             nan     0.1000    0.0730
##      7        0.7941             nan     0.1000    0.0620
##      8        0.7532             nan     0.1000    0.0509
##      9        0.7198             nan     0.1000    0.0427
##     10        0.6916             nan     0.1000    0.0353
##     20        0.5484             nan     0.1000    0.0071
##     40        0.4661             nan     0.1000   -0.0009
##     60        0.4285             nan     0.1000   -0.0002
##     80        0.4070             nan     0.1000   -0.0014
##    100        0.3889             nan     0.1000   -0.0018
##    120        0.3741             nan     0.1000   -0.0018
##    140        0.3609             nan     0.1000   -0.0020
##    150        0.3544             nan     0.1000   -0.0027
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1        1.3863             nan     0.1000    0.3150
##      2        1.1955             nan     0.1000    0.2242
##      3        1.0557             nan     0.1000    0.1594
##      4        0.9539             nan     0.1000    0.1228
##      5        0.8771             nan     0.1000    0.0951
##      6        0.8157             nan     0.1000    0.0770
##      7        0.7678             nan     0.1000    0.0616
##      8        0.7266             nan     0.1000    0.0484
##      9        0.6913             nan     0.1000    0.0443
##     10        0.6609             nan     0.1000    0.0269
##     20        0.5162             nan     0.1000    0.0066
##     40        0.4322             nan     0.1000    0.0009
##     60        0.3960             nan     0.1000   -0.0013
##     80        0.3675             nan     0.1000   -0.0013
##    100        0.3463             nan     0.1000   -0.0012
##    120        0.3257             nan     0.1000   -0.0027
##    140        0.3093             nan     0.1000   -0.0010
##    150        0.3015             nan     0.1000   -0.0029
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1        1.3863             nan     0.1000    0.2737
##      2        1.2333             nan     0.1000    0.2124
##      3        1.1032             nan     0.1000    0.1574
##      4        1.0046             nan     0.1000    0.1124
##      5        0.9328             nan     0.1000    0.0873
##      6        0.8741             nan     0.1000    0.0722
##      7        0.8275             nan     0.1000    0.0547
##      8        0.7899             nan     0.1000    0.0458
##      9        0.7593             nan     0.1000    0.0393
##     10        0.7330             nan     0.1000    0.0311
##     20        0.6003             nan     0.1000    0.0091
##     40        0.5249             nan     0.1000    0.0016
##     60        0.4928             nan     0.1000   -0.0002
##     80        0.4746             nan     0.1000   -0.0013
##    100        0.4615             nan     0.1000   -0.0013
##    120        0.4523             nan     0.1000   -0.0013
##    140        0.4434             nan     0.1000   -0.0009
##    150        0.4404             nan     0.1000   -0.0011
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1        1.3863             nan     0.1000    0.2873
##      2        1.2239             nan     0.1000    0.2198
##      3        1.0882             nan     0.1000    0.1640
##      4        0.9873             nan     0.1000    0.1214
##      5        0.9101             nan     0.1000    0.1015
##      6        0.8437             nan     0.1000    0.0715
##      7        0.7941             nan     0.1000    0.0592
##      8        0.7546             nan     0.1000    0.0511
##      9        0.7210             nan     0.1000    0.0433
##     10        0.6923             nan     0.1000    0.0388
##     20        0.5457             nan     0.1000    0.0066
##     40        0.4673             nan     0.1000   -0.0003
##     60        0.4358             nan     0.1000   -0.0013
##     80        0.4117             nan     0.1000   -0.0009
##    100        0.3954             nan     0.1000   -0.0013
##    120        0.3796             nan     0.1000   -0.0013
##    140        0.3666             nan     0.1000   -0.0008
##    150        0.3609             nan     0.1000   -0.0028
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1        1.3863             nan     0.1000    0.3088
##      2        1.2035             nan     0.1000    0.2225
##      3        1.0667             nan     0.1000    0.1699
##      4        0.9619             nan     0.1000    0.1262
##      5        0.8830             nan     0.1000    0.0987
##      6        0.8188             nan     0.1000    0.0771
##      7        0.7690             nan     0.1000    0.0644
##      8        0.7264             nan     0.1000    0.0517
##      9        0.6918             nan     0.1000    0.0440
##     10        0.6613             nan     0.1000    0.0299
##     20        0.5164             nan     0.1000    0.0044
##     40        0.4326             nan     0.1000    0.0013
##     60        0.3937             nan     0.1000   -0.0020
##     80        0.3672             nan     0.1000   -0.0011
##    100        0.3446             nan     0.1000   -0.0028
##    120        0.3265             nan     0.1000   -0.0020
##    140        0.3097             nan     0.1000   -0.0042
##    150        0.3023             nan     0.1000   -0.0006
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1        1.3863             nan     0.1000    0.2977
##      2        1.2103             nan     0.1000    0.2063
##      3        1.0870             nan     0.1000    0.1507
##      4        0.9946             nan     0.1000    0.1137
##      5        0.9240             nan     0.1000    0.0885
##      6        0.8694             nan     0.1000    0.0683
##      7        0.8251             nan     0.1000    0.0520
##      8        0.7900             nan     0.1000    0.0454
##      9        0.7598             nan     0.1000    0.0377
##     10        0.7336             nan     0.1000    0.0308
##     20        0.6028             nan     0.1000    0.0106
##     40        0.5252             nan     0.1000    0.0006
##     60        0.4960             nan     0.1000   -0.0002
##     80        0.4777             nan     0.1000   -0.0003
##    100        0.4639             nan     0.1000   -0.0021
##    120        0.4545             nan     0.1000   -0.0010
##    140        0.4454             nan     0.1000   -0.0003
##    150        0.4411             nan     0.1000   -0.0009
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1        1.3863             nan     0.1000    0.2858
##      2        1.2265             nan     0.1000    0.2211
##      3        1.0944             nan     0.1000    0.1625
##      4        0.9940             nan     0.1000    0.1262
##      5        0.9144             nan     0.1000    0.0905
##      6        0.8531             nan     0.1000    0.0775
##      7        0.8030             nan     0.1000    0.0626
##      8        0.7605             nan     0.1000    0.0464
##      9        0.7264             nan     0.1000    0.0384
##     10        0.6981             nan     0.1000    0.0368
##     20        0.5482             nan     0.1000    0.0080
##     40        0.4682             nan     0.1000   -0.0008
##     60        0.4310             nan     0.1000   -0.0021
##     80        0.4093             nan     0.1000   -0.0022
##    100        0.3919             nan     0.1000   -0.0020
##    120        0.3763             nan     0.1000   -0.0005
##    140        0.3632             nan     0.1000   -0.0016
##    150        0.3566             nan     0.1000   -0.0015
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1        1.3863             nan     0.1000    0.3163
##      2        1.1989             nan     0.1000    0.2157
##      3        1.0645             nan     0.1000    0.1611
##      4        0.9645             nan     0.1000    0.1275
##      5        0.8847             nan     0.1000    0.0996
##      6        0.8214             nan     0.1000    0.0810
##      7        0.7691             nan     0.1000    0.0625
##      8        0.7282             nan     0.1000    0.0487
##      9        0.6932             nan     0.1000    0.0426
##     10        0.6640             nan     0.1000    0.0319
##     20        0.5176             nan     0.1000    0.0109
##     40        0.4275             nan     0.1000   -0.0008
##     60        0.3894             nan     0.1000   -0.0005
##     80        0.3613             nan     0.1000   -0.0029
##    100        0.3384             nan     0.1000   -0.0006
##    120        0.3217             nan     0.1000   -0.0020
##    140        0.3058             nan     0.1000   -0.0010
##    150        0.2981             nan     0.1000   -0.0025
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1        1.3863             nan     0.1000    0.3003
##      2        1.2085             nan     0.1000    0.2086
##      3        1.0858             nan     0.1000    0.1497
##      4        0.9921             nan     0.1000    0.1131
##      5        0.9203             nan     0.1000    0.0879
##      6        0.8649             nan     0.1000    0.0675
##      7        0.8205             nan     0.1000    0.0530
##      8        0.7844             nan     0.1000    0.0469
##      9        0.7546             nan     0.1000    0.0397
##     10        0.7294             nan     0.1000    0.0339
##     20        0.5997             nan     0.1000    0.0065
##     40        0.5236             nan     0.1000    0.0022
##     60        0.4933             nan     0.1000   -0.0020
##     80        0.4743             nan     0.1000   -0.0008
##    100        0.4602             nan     0.1000   -0.0025
##    120        0.4493             nan     0.1000   -0.0018
##    140        0.4402             nan     0.1000   -0.0019
##    150        0.4363             nan     0.1000   -0.0015
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1        1.3863             nan     0.1000    0.3084
##      2        1.2031             nan     0.1000    0.2127
##      3        1.0763             nan     0.1000    0.1604
##      4        0.9775             nan     0.1000    0.1186
##      5        0.9020             nan     0.1000    0.0954
##      6        0.8419             nan     0.1000    0.0772
##      7        0.7918             nan     0.1000    0.0589
##      8        0.7520             nan     0.1000    0.0513
##      9        0.7176             nan     0.1000    0.0380
##     10        0.6905             nan     0.1000    0.0346
##     20        0.5506             nan     0.1000    0.0056
##     40        0.4695             nan     0.1000   -0.0010
##     60        0.4350             nan     0.1000   -0.0020
##     80        0.4136             nan     0.1000   -0.0012
##    100        0.3928             nan     0.1000   -0.0022
##    120        0.3760             nan     0.1000   -0.0010
##    140        0.3618             nan     0.1000   -0.0020
##    150        0.3557             nan     0.1000   -0.0019
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1        1.3863             nan     0.1000    0.3124
##      2        1.1931             nan     0.1000    0.2109
##      3        1.0622             nan     0.1000    0.1598
##      4        0.9654             nan     0.1000    0.1196
##      5        0.8863             nan     0.1000    0.0993
##      6        0.8217             nan     0.1000    0.0739
##      7        0.7713             nan     0.1000    0.0604
##      8        0.7284             nan     0.1000    0.0478
##      9        0.6956             nan     0.1000    0.0405
##     10        0.6657             nan     0.1000    0.0367
##     20        0.5164             nan     0.1000    0.0058
##     40        0.4308             nan     0.1000    0.0001
##     60        0.3933             nan     0.1000   -0.0015
##     80        0.3685             nan     0.1000   -0.0021
##    100        0.3451             nan     0.1000   -0.0036
##    120        0.3259             nan     0.1000   -0.0004
##    140        0.3077             nan     0.1000   -0.0026
##    150        0.3010             nan     0.1000   -0.0010
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1        1.3863             nan     0.1000    0.2983
##      2        1.2097             nan     0.1000    0.2083
##      3        1.0874             nan     0.1000    0.1500
##      4        0.9926             nan     0.1000    0.1158
##      5        0.9221             nan     0.1000    0.0858
##      6        0.8664             nan     0.1000    0.0689
##      7        0.8211             nan     0.1000    0.0552
##      8        0.7853             nan     0.1000    0.0413
##      9        0.7559             nan     0.1000    0.0393
##     10        0.7301             nan     0.1000    0.0310
##     20        0.6014             nan     0.1000    0.0074
##     40        0.5251             nan     0.1000    0.0017
##     60        0.4965             nan     0.1000   -0.0009
##     80        0.4759             nan     0.1000   -0.0012
##    100        0.4625             nan     0.1000   -0.0007
##    120        0.4531             nan     0.1000   -0.0007
##    140        0.4444             nan     0.1000   -0.0006
##    150        0.4406             nan     0.1000   -0.0006
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1        1.3863             nan     0.1000    0.3079
##      2        1.2009             nan     0.1000    0.2098
##      3        1.0718             nan     0.1000    0.1537
##      4        0.9745             nan     0.1000    0.1210
##      5        0.8981             nan     0.1000    0.0912
##      6        0.8379             nan     0.1000    0.0703
##      7        0.7903             nan     0.1000    0.0566
##      8        0.7514             nan     0.1000    0.0450
##      9        0.7188             nan     0.1000    0.0434
##     10        0.6903             nan     0.1000    0.0361
##     20        0.5505             nan     0.1000    0.0089
##     40        0.4711             nan     0.1000    0.0013
##     60        0.4366             nan     0.1000   -0.0023
##     80        0.4141             nan     0.1000   -0.0019
##    100        0.3970             nan     0.1000   -0.0007
##    120        0.3820             nan     0.1000   -0.0026
##    140        0.3677             nan     0.1000   -0.0018
##    150        0.3618             nan     0.1000   -0.0020
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1        1.3863             nan     0.1000    0.3091
##      2        1.1980             nan     0.1000    0.2173
##      3        1.0627             nan     0.1000    0.1669
##      4        0.9593             nan     0.1000    0.1285
##      5        0.8806             nan     0.1000    0.0962
##      6        0.8180             nan     0.1000    0.0744
##      7        0.7682             nan     0.1000    0.0601
##      8        0.7263             nan     0.1000    0.0496
##      9        0.6924             nan     0.1000    0.0375
##     10        0.6655             nan     0.1000    0.0347
##     20        0.5194             nan     0.1000    0.0060
##     40        0.4363             nan     0.1000   -0.0035
##     60        0.3950             nan     0.1000   -0.0008
##     80        0.3701             nan     0.1000   -0.0035
##    100        0.3468             nan     0.1000   -0.0027
##    120        0.3271             nan     0.1000   -0.0053
##    140        0.3118             nan     0.1000   -0.0017
##    150        0.3027             nan     0.1000   -0.0027
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1        1.3863             nan     0.1000    0.2942
##      2        1.2111             nan     0.1000    0.2039
##      3        1.0874             nan     0.1000    0.1490
##      4        0.9942             nan     0.1000    0.1127
##      5        0.9222             nan     0.1000    0.0864
##      6        0.8667             nan     0.1000    0.0692
##      7        0.8227             nan     0.1000    0.0548
##      8        0.7870             nan     0.1000    0.0449
##      9        0.7571             nan     0.1000    0.0344
##     10        0.7321             nan     0.1000    0.0322
##     20        0.6049             nan     0.1000    0.0055
##     40        0.5294             nan     0.1000    0.0019
##     60        0.4977             nan     0.1000   -0.0000
##     80        0.4787             nan     0.1000   -0.0005
##    100        0.4643             nan     0.1000   -0.0008
##    120        0.4531             nan     0.1000   -0.0014
##    140        0.4456             nan     0.1000   -0.0019
##    150        0.4411             nan     0.1000   -0.0018
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1        1.3863             nan     0.1000    0.3101
##      2        1.2023             nan     0.1000    0.2093
##      3        1.0725             nan     0.1000    0.1561
##      4        0.9756             nan     0.1000    0.1215
##      5        0.9017             nan     0.1000    0.0912
##      6        0.8433             nan     0.1000    0.0762
##      7        0.7947             nan     0.1000    0.0585
##      8        0.7549             nan     0.1000    0.0455
##      9        0.7211             nan     0.1000    0.0419
##     10        0.6935             nan     0.1000    0.0364
##     20        0.5513             nan     0.1000    0.0051
##     40        0.4700             nan     0.1000   -0.0012
##     60        0.4350             nan     0.1000   -0.0010
##     80        0.4136             nan     0.1000   -0.0007
##    100        0.3960             nan     0.1000   -0.0023
##    120        0.3783             nan     0.1000   -0.0015
##    140        0.3652             nan     0.1000   -0.0014
##    150        0.3591             nan     0.1000   -0.0043
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1        1.3863             nan     0.1000    0.3125
##      2        1.1973             nan     0.1000    0.2171
##      3        1.0629             nan     0.1000    0.1635
##      4        0.9628             nan     0.1000    0.1216
##      5        0.8857             nan     0.1000    0.0992
##      6        0.8220             nan     0.1000    0.0783
##      7        0.7715             nan     0.1000    0.0589
##      8        0.7308             nan     0.1000    0.0532
##      9        0.6944             nan     0.1000    0.0393
##     10        0.6653             nan     0.1000    0.0316
##     20        0.5206             nan     0.1000    0.0033
##     40        0.4373             nan     0.1000   -0.0008
##     60        0.3955             nan     0.1000   -0.0014
##     80        0.3686             nan     0.1000   -0.0023
##    100        0.3487             nan     0.1000   -0.0016
##    120        0.3308             nan     0.1000   -0.0012
##    140        0.3144             nan     0.1000   -0.0028
##    150        0.3067             nan     0.1000   -0.0020
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1        1.3863             nan     0.1000    0.2729
##      2        1.2300             nan     0.1000    0.2109
##      3        1.1020             nan     0.1000    0.1515
##      4        1.0076             nan     0.1000    0.1124
##      5        0.9338             nan     0.1000    0.0916
##      6        0.8756             nan     0.1000    0.0724
##      7        0.8295             nan     0.1000    0.0595
##      8        0.7916             nan     0.1000    0.0456
##      9        0.7608             nan     0.1000    0.0395
##     10        0.7352             nan     0.1000    0.0324
##     20        0.6006             nan     0.1000    0.0079
##     40        0.5220             nan     0.1000    0.0015
##     60        0.4892             nan     0.1000    0.0010
##     80        0.4715             nan     0.1000   -0.0007
##    100        0.4573             nan     0.1000   -0.0011
##    120        0.4479             nan     0.1000   -0.0021
##    140        0.4394             nan     0.1000   -0.0030
##    150        0.4350             nan     0.1000   -0.0018
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1        1.3863             nan     0.1000    0.3108
##      2        1.2052             nan     0.1000    0.2155
##      3        1.0724             nan     0.1000    0.1594
##      4        0.9753             nan     0.1000    0.1195
##      5        0.9004             nan     0.1000    0.1000
##      6        0.8380             nan     0.1000    0.0736
##      7        0.7895             nan     0.1000    0.0563
##      8        0.7519             nan     0.1000    0.0476
##      9        0.7197             nan     0.1000    0.0399
##     10        0.6915             nan     0.1000    0.0366
##     20        0.5461             nan     0.1000    0.0095
##     40        0.4657             nan     0.1000   -0.0020
##     60        0.4345             nan     0.1000   -0.0001
##     80        0.4134             nan     0.1000   -0.0030
##    100        0.3949             nan     0.1000   -0.0018
##    120        0.3806             nan     0.1000   -0.0009
##    140        0.3684             nan     0.1000   -0.0039
##    150        0.3623             nan     0.1000   -0.0025
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1        1.3863             nan     0.1000    0.3204
##      2        1.2000             nan     0.1000    0.2267
##      3        1.0640             nan     0.1000    0.1682
##      4        0.9626             nan     0.1000    0.1268
##      5        0.8811             nan     0.1000    0.0992
##      6        0.8180             nan     0.1000    0.0749
##      7        0.7682             nan     0.1000    0.0632
##      8        0.7272             nan     0.1000    0.0543
##      9        0.6906             nan     0.1000    0.0424
##     10        0.6618             nan     0.1000    0.0352
##     20        0.5152             nan     0.1000    0.0070
##     40        0.4311             nan     0.1000   -0.0033
##     60        0.3919             nan     0.1000   -0.0027
##     80        0.3643             nan     0.1000   -0.0025
##    100        0.3427             nan     0.1000   -0.0026
##    120        0.3249             nan     0.1000   -0.0013
##    140        0.3084             nan     0.1000   -0.0030
##    150        0.3012             nan     0.1000   -0.0012
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1        1.3863             nan     0.1000    0.2988
##      2        1.2118             nan     0.1000    0.2044
##      3        1.0872             nan     0.1000    0.1463
##      4        0.9934             nan     0.1000    0.1147
##      5        0.9219             nan     0.1000    0.0881
##      6        0.8675             nan     0.1000    0.0649
##      7        0.8235             nan     0.1000    0.0557
##      8        0.7880             nan     0.1000    0.0466
##      9        0.7570             nan     0.1000    0.0365
##     10        0.7330             nan     0.1000    0.0325
##     20        0.6032             nan     0.1000    0.0096
##     40        0.5299             nan     0.1000    0.0007
##     60        0.4990             nan     0.1000    0.0001
##     80        0.4806             nan     0.1000   -0.0009
##    100        0.4671             nan     0.1000   -0.0008
##    120        0.4566             nan     0.1000   -0.0008
##    140        0.4464             nan     0.1000   -0.0010
##    150        0.4422             nan     0.1000   -0.0035
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1        1.3863             nan     0.1000    0.3045
##      2        1.2031             nan     0.1000    0.2111
##      3        1.0744             nan     0.1000    0.1534
##      4        0.9774             nan     0.1000    0.1188
##      5        0.9029             nan     0.1000    0.0897
##      6        0.8452             nan     0.1000    0.0730
##      7        0.7950             nan     0.1000    0.0610
##      8        0.7550             nan     0.1000    0.0510
##      9        0.7211             nan     0.1000    0.0386
##     10        0.6947             nan     0.1000    0.0332
##     20        0.5537             nan     0.1000    0.0100
##     40        0.4715             nan     0.1000    0.0002
##     60        0.4379             nan     0.1000    0.0006
##     80        0.4139             nan     0.1000   -0.0018
##    100        0.3969             nan     0.1000   -0.0020
##    120        0.3803             nan     0.1000   -0.0034
##    140        0.3672             nan     0.1000   -0.0014
##    150        0.3607             nan     0.1000   -0.0013
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1        1.3863             nan     0.1000    0.2869
##      2        1.2193             nan     0.1000    0.2226
##      3        1.0823             nan     0.1000    0.1709
##      4        0.9748             nan     0.1000    0.1338
##      5        0.8919             nan     0.1000    0.0970
##      6        0.8284             nan     0.1000    0.0766
##      7        0.7754             nan     0.1000    0.0662
##      8        0.7310             nan     0.1000    0.0462
##      9        0.6975             nan     0.1000    0.0455
##     10        0.6662             nan     0.1000    0.0324
##     20        0.5202             nan     0.1000    0.0068
##     40        0.4357             nan     0.1000   -0.0013
##     60        0.3961             nan     0.1000   -0.0003
##     80        0.3681             nan     0.1000   -0.0010
##    100        0.3455             nan     0.1000   -0.0027
##    120        0.3265             nan     0.1000   -0.0003
##    140        0.3112             nan     0.1000   -0.0020
##    150        0.3030             nan     0.1000   -0.0015
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1        1.3863             nan     0.1000    0.2963
##      2        1.2093             nan     0.1000    0.2070
##      3        1.0858             nan     0.1000    0.1488
##      4        0.9943             nan     0.1000    0.1158
##      5        0.9223             nan     0.1000    0.0862
##      6        0.8672             nan     0.1000    0.0651
##      7        0.8247             nan     0.1000    0.0525
##      8        0.7888             nan     0.1000    0.0409
##      9        0.7599             nan     0.1000    0.0382
##     10        0.7344             nan     0.1000    0.0332
##     20        0.6010             nan     0.1000    0.0063
##     40        0.5248             nan     0.1000    0.0014
##     60        0.4932             nan     0.1000    0.0002
##     80        0.4743             nan     0.1000   -0.0014
##    100        0.4606             nan     0.1000   -0.0012
##    120        0.4506             nan     0.1000   -0.0011
##    140        0.4423             nan     0.1000   -0.0022
##    150        0.4382             nan     0.1000   -0.0033
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1        1.3863             nan     0.1000    0.3039
##      2        1.2047             nan     0.1000    0.2136
##      3        1.0754             nan     0.1000    0.1632
##      4        0.9774             nan     0.1000    0.1159
##      5        0.9033             nan     0.1000    0.0975
##      6        0.8421             nan     0.1000    0.0719
##      7        0.7930             nan     0.1000    0.0599
##      8        0.7536             nan     0.1000    0.0512
##      9        0.7196             nan     0.1000    0.0388
##     10        0.6904             nan     0.1000    0.0346
##     20        0.5485             nan     0.1000    0.0077
##     40        0.4666             nan     0.1000   -0.0013
##     60        0.4338             nan     0.1000    0.0002
##     80        0.4110             nan     0.1000   -0.0030
##    100        0.3935             nan     0.1000   -0.0013
##    120        0.3782             nan     0.1000   -0.0010
##    140        0.3633             nan     0.1000   -0.0009
##    150        0.3570             nan     0.1000   -0.0012
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1        1.3863             nan     0.1000    0.2896
##      2        1.2218             nan     0.1000    0.2355
##      3        1.0786             nan     0.1000    0.1637
##      4        0.9725             nan     0.1000    0.1241
##      5        0.8910             nan     0.1000    0.0969
##      6        0.8275             nan     0.1000    0.0752
##      7        0.7765             nan     0.1000    0.0611
##      8        0.7350             nan     0.1000    0.0546
##      9        0.6983             nan     0.1000    0.0440
##     10        0.6682             nan     0.1000    0.0390
##     20        0.5154             nan     0.1000    0.0061
##     40        0.4307             nan     0.1000   -0.0017
##     60        0.3897             nan     0.1000   -0.0016
##     80        0.3626             nan     0.1000   -0.0023
##    100        0.3394             nan     0.1000   -0.0012
##    120        0.3203             nan     0.1000   -0.0017
##    140        0.3031             nan     0.1000   -0.0009
##    150        0.2966             nan     0.1000   -0.0026
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1        1.3863             nan     0.1000    0.2901
##      2        1.2097             nan     0.1000    0.2105
##      3        1.0812             nan     0.1000    0.1541
##      4        0.9890             nan     0.1000    0.1153
##      5        0.9170             nan     0.1000    0.0855
##      6        0.8617             nan     0.1000    0.0683
##      7        0.8157             nan     0.1000    0.0546
##      8        0.7797             nan     0.1000    0.0487
##      9        0.7489             nan     0.1000    0.0354
##     10        0.7246             nan     0.1000    0.0339
##     20        0.5937             nan     0.1000    0.0075
##     40        0.5187             nan     0.1000    0.0030
##     60        0.4868             nan     0.1000   -0.0003
##     80        0.4690             nan     0.1000    0.0006
##    100        0.4541             nan     0.1000    0.0002
##    120        0.4435             nan     0.1000   -0.0004
##    140        0.4340             nan     0.1000   -0.0010
##    150        0.4304             nan     0.1000   -0.0028
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1        1.3863             nan     0.1000    0.3066
##      2        1.2002             nan     0.1000    0.2206
##      3        1.0684             nan     0.1000    0.1576
##      4        0.9712             nan     0.1000    0.1197
##      5        0.8973             nan     0.1000    0.0939
##      6        0.8358             nan     0.1000    0.0768
##      7        0.7850             nan     0.1000    0.0613
##      8        0.7449             nan     0.1000    0.0504
##      9        0.7122             nan     0.1000    0.0417
##     10        0.6837             nan     0.1000    0.0300
##     20        0.5436             nan     0.1000    0.0100
##     40        0.4605             nan     0.1000    0.0016
##     60        0.4292             nan     0.1000   -0.0017
##     80        0.4067             nan     0.1000   -0.0019
##    100        0.3884             nan     0.1000   -0.0032
##    120        0.3743             nan     0.1000   -0.0014
##    140        0.3606             nan     0.1000   -0.0027
##    150        0.3546             nan     0.1000   -0.0009
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1        1.3863             nan     0.1000    0.2937
##      2        1.2218             nan     0.1000    0.2347
##      3        1.0779             nan     0.1000    0.1697
##      4        0.9721             nan     0.1000    0.1281
##      5        0.8903             nan     0.1000    0.0951
##      6        0.8274             nan     0.1000    0.0785
##      7        0.7755             nan     0.1000    0.0682
##      8        0.7313             nan     0.1000    0.0525
##      9        0.6953             nan     0.1000    0.0433
##     10        0.6653             nan     0.1000    0.0354
##     20        0.5122             nan     0.1000    0.0032
##     40        0.4262             nan     0.1000    0.0007
##     60        0.3868             nan     0.1000   -0.0004
##     80        0.3585             nan     0.1000   -0.0018
##    100        0.3356             nan     0.1000   -0.0042
##    120        0.3179             nan     0.1000   -0.0023
##    140        0.3021             nan     0.1000   -0.0009
##    150        0.2938             nan     0.1000   -0.0028
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1        1.3863             nan     0.1000    0.2847
##      2        1.2282             nan     0.1000    0.2229
##      3        1.0940             nan     0.1000    0.1621
##      4        0.9907             nan     0.1000    0.1165
##      5        0.9152             nan     0.1000    0.0964
##      6        0.8526             nan     0.1000    0.0757
##      7        0.8027             nan     0.1000    0.0603
##      8        0.7630             nan     0.1000    0.0469
##      9        0.7304             nan     0.1000    0.0440
##     10        0.7015             nan     0.1000    0.0365
##     20        0.5522             nan     0.1000    0.0028
##     40        0.4704             nan     0.1000    0.0010
##     60        0.4380             nan     0.1000   -0.0011
##     80        0.4165             nan     0.1000   -0.0007
##    100        0.3981             nan     0.1000   -0.0011
##    120        0.3823             nan     0.1000   -0.0002
##    140        0.3695             nan     0.1000   -0.0004
##    150        0.3634             nan     0.1000   -0.0012
plot(gbm.model)

pred.gbm = predict(gbm.model, test.data)
result = data.frame(test.data$Rating_Category, pred.gbm)
print(result)
##     test.data.Rating_Category pred.gbm
## 1                      MEDIUM     HIGH
## 2                        HIGH     HIGH
## 3                      MEDIUM     HIGH
## 4                      MEDIUM   MEDIUM
## 5                      MEDIUM     HIGH
## 6                      MEDIUM   MEDIUM
## 7                      MEDIUM   MEDIUM
## 8                        HIGH     HIGH
## 9                      MEDIUM     HIGH
## 10                       HIGH     HIGH
## 11                     MEDIUM   MEDIUM
## 12                     MEDIUM   MEDIUM
## 13                     MEDIUM   MEDIUM
## 14                       HIGH   MEDIUM
## 15                       HIGH     HIGH
## 16                       HIGH     HIGH
## 17                     MEDIUM   MEDIUM
## 18                       HIGH     HIGH
## 19                     MEDIUM   MEDIUM
## 20                       HIGH     HIGH
## 21                     MEDIUM   MEDIUM
## 22                       HIGH   MEDIUM
## 23                     MEDIUM   MEDIUM
## 24                       HIGH     HIGH
## 25                     MEDIUM   MEDIUM
## 26                     MEDIUM     HIGH
## 27                     MEDIUM   MEDIUM
## 28                     MEDIUM   MEDIUM
## 29                     MEDIUM   MEDIUM
## 30                       HIGH     HIGH
## 31                       HIGH     HIGH
## 32                     MEDIUM     HIGH
## 33                       HIGH   MEDIUM
## 34                       HIGH   MEDIUM
## 35                     MEDIUM   MEDIUM
## 36                     MEDIUM   MEDIUM
## 37                     MEDIUM   MEDIUM
## 38                       HIGH     HIGH
## 39                     MEDIUM     HIGH
## 40                     MEDIUM   MEDIUM
## 41                     MEDIUM     HIGH
## 42                     MEDIUM     HIGH
## 43                     MEDIUM   MEDIUM
## 44                     MEDIUM   MEDIUM
## 45                     MEDIUM   MEDIUM
## 46                     MEDIUM   MEDIUM
## 47                     MEDIUM   MEDIUM
## 48                       HIGH   MEDIUM
## 49                     MEDIUM   MEDIUM
## 50                        LOW      LOW
## 51                       HIGH     HIGH
## 52                       HIGH     HIGH
## 53                     MEDIUM   MEDIUM
## 54                     MEDIUM   MEDIUM
## 55                     MEDIUM   MEDIUM
## 56                     MEDIUM   MEDIUM
## 57                     MEDIUM   MEDIUM
## 58                     MEDIUM   MEDIUM
## 59                     MEDIUM   MEDIUM
## 60                     MEDIUM   MEDIUM
## 61                     MEDIUM   MEDIUM
## 62                     MEDIUM   MEDIUM
## 63                       HIGH     HIGH
## 64                       HIGH     HIGH
## 65                     MEDIUM   MEDIUM
## 66                     MEDIUM     HIGH
## 67                     MEDIUM   MEDIUM
## 68                     MEDIUM   MEDIUM
## 69                     MEDIUM   MEDIUM
## 70                     MEDIUM   MEDIUM
## 71                       HIGH     HIGH
## 72                     MEDIUM   MEDIUM
## 73                     MEDIUM   MEDIUM
## 74                       HIGH     HIGH
## 75                       HIGH     HIGH
## 76                     MEDIUM   MEDIUM
## 77                     MEDIUM   MEDIUM
## 78                       HIGH   MEDIUM
## 79                     MEDIUM     HIGH
## 80                     MEDIUM   MEDIUM
## 81                     MEDIUM   MEDIUM
## 82                       HIGH   MEDIUM
## 83                     MEDIUM   MEDIUM
## 84                     MEDIUM   MEDIUM
## 85                     MEDIUM   MEDIUM
## 86                     MEDIUM   MEDIUM
## 87                     MEDIUM   MEDIUM
## 88                     MEDIUM   MEDIUM
## 89                     MEDIUM   MEDIUM
## 90                     MEDIUM   MEDIUM
## 91                        LOW   MEDIUM
## 92                     MEDIUM   MEDIUM
## 93                     MEDIUM   MEDIUM
## 94                     MEDIUM   MEDIUM
## 95                       HIGH     HIGH
## 96                     MEDIUM   MEDIUM
## 97                       HIGH     HIGH
## 98                     MEDIUM   MEDIUM
## 99                     MEDIUM   MEDIUM
## 100                      HIGH     HIGH
## 101                    MEDIUM   MEDIUM
## 102                    MEDIUM   MEDIUM
## 103                    MEDIUM   MEDIUM
## 104                    MEDIUM   MEDIUM
## 105                      HIGH     HIGH
## 106                      HIGH   MEDIUM
## 107                    MEDIUM   MEDIUM
## 108                      HIGH     HIGH
## 109                    MEDIUM     HIGH
## 110                    MEDIUM     HIGH
## 111                    MEDIUM   MEDIUM
## 112                    MEDIUM   MEDIUM
## 113                    MEDIUM   MEDIUM
## 114                      HIGH   MEDIUM
## 115                    MEDIUM   MEDIUM
## 116                      HIGH   MEDIUM
## 117                    MEDIUM   MEDIUM
## 118                    MEDIUM   MEDIUM
## 119                    MEDIUM   MEDIUM
## 120                    MEDIUM   MEDIUM
## 121                    MEDIUM   MEDIUM
## 122                    MEDIUM   MEDIUM
## 123                    MEDIUM   MEDIUM
## 124                    MEDIUM   MEDIUM
## 125                    MEDIUM   MEDIUM
## 126                    MEDIUM   MEDIUM
## 127                    MEDIUM   MEDIUM
## 128                    MEDIUM   MEDIUM
## 129                    MEDIUM     HIGH
## 130                    MEDIUM   MEDIUM
## 131                    MEDIUM   MEDIUM
## 132                    MEDIUM   MEDIUM
## 133                    MEDIUM   MEDIUM
## 134                      HIGH     HIGH
## 135                    MEDIUM   MEDIUM
## 136                    MEDIUM   MEDIUM
## 137                    MEDIUM   MEDIUM
## 138                    MEDIUM   MEDIUM
## 139                    MEDIUM   MEDIUM
## 140                      HIGH     HIGH
## 141                    MEDIUM   MEDIUM
## 142                    MEDIUM   MEDIUM
## 143                    MEDIUM   MEDIUM
## 144                    MEDIUM   MEDIUM
## 145                    MEDIUM   MEDIUM
## 146                    MEDIUM   MEDIUM
## 147                    MEDIUM   MEDIUM
## 148                    MEDIUM   MEDIUM
## 149                    MEDIUM   MEDIUM
## 150                    MEDIUM   MEDIUM
## 151                    MEDIUM   MEDIUM
## 152                    MEDIUM   MEDIUM
## 153                    MEDIUM     HIGH
## 154                    MEDIUM   MEDIUM
## 155                    MEDIUM   MEDIUM
## 156                    MEDIUM   MEDIUM
## 157                    MEDIUM   MEDIUM
## 158                      HIGH     HIGH
## 159                    MEDIUM   MEDIUM
## 160                    MEDIUM   MEDIUM
## 161                    MEDIUM   MEDIUM
## 162                    MEDIUM   MEDIUM
## 163                    MEDIUM   MEDIUM
## 164                    MEDIUM   MEDIUM
## 165                      HIGH     HIGH
## 166                    MEDIUM   MEDIUM
## 167                    MEDIUM   MEDIUM
## 168                    MEDIUM   MEDIUM
## 169                    MEDIUM   MEDIUM
## 170                    MEDIUM   MEDIUM
## 171                    MEDIUM     HIGH
## 172                    MEDIUM   MEDIUM
## 173                      HIGH     HIGH
## 174                    MEDIUM   MEDIUM
## 175                    MEDIUM   MEDIUM
## 176                    MEDIUM   MEDIUM
## 177                    MEDIUM   MEDIUM
## 178                    MEDIUM   MEDIUM
## 179                    MEDIUM   MEDIUM
## 180                    MEDIUM   MEDIUM
## 181                      HIGH     HIGH
## 182                      HIGH     HIGH
## 183                    MEDIUM   MEDIUM
## 184                    MEDIUM   MEDIUM
## 185                    MEDIUM   MEDIUM
## 186                    MEDIUM   MEDIUM
## 187                    MEDIUM   MEDIUM
## 188                    MEDIUM   MEDIUM
## 189                      HIGH     HIGH
## 190                    MEDIUM   MEDIUM
## 191                    MEDIUM   MEDIUM
## 192                    MEDIUM   MEDIUM
## 193                    MEDIUM   MEDIUM
## 194                    MEDIUM   MEDIUM
## 195                    MEDIUM   MEDIUM
## 196                      HIGH   MEDIUM
## 197                    MEDIUM   MEDIUM
## 198                      HIGH     HIGH
## 199                    MEDIUM   MEDIUM
## 200                    MEDIUM   MEDIUM
## 201                    MEDIUM   MEDIUM
## 202                    MEDIUM   MEDIUM
## 203                      HIGH     HIGH
## 204                    MEDIUM   MEDIUM
## 205                    MEDIUM   MEDIUM
## 206                      HIGH   MEDIUM
## 207                    MEDIUM   MEDIUM
## 208                    MEDIUM   MEDIUM
## 209                    MEDIUM   MEDIUM
## 210                      HIGH     HIGH
## 211                    MEDIUM   MEDIUM
## 212                    MEDIUM   MEDIUM
## 213                      HIGH   MEDIUM
## 214                    MEDIUM   MEDIUM
## 215                    MEDIUM   MEDIUM
## 216                    MEDIUM   MEDIUM
## 217                    MEDIUM   MEDIUM
## 218                    MEDIUM   MEDIUM
## 219                    MEDIUM   MEDIUM
## 220                      HIGH   MEDIUM
## 221                    MEDIUM   MEDIUM
## 222                    MEDIUM   MEDIUM
## 223                    MEDIUM   MEDIUM
## 224                    MEDIUM   MEDIUM
## 225                    MEDIUM   MEDIUM
## 226                    MEDIUM   MEDIUM
## 227                      HIGH     HIGH
## 228                    MEDIUM   MEDIUM
## 229                    MEDIUM   MEDIUM
## 230                    MEDIUM   MEDIUM
## 231                    MEDIUM   MEDIUM
## 232                    MEDIUM     HIGH
## 233                    MEDIUM   MEDIUM
## 234                    MEDIUM   MEDIUM
## 235                    MEDIUM   MEDIUM
## 236                    MEDIUM   MEDIUM
## 237                    MEDIUM   MEDIUM
## 238                    MEDIUM   MEDIUM
## 239                    MEDIUM   MEDIUM
## 240                    MEDIUM   MEDIUM
## 241                    MEDIUM   MEDIUM
## 242                    MEDIUM   MEDIUM
## 243                    MEDIUM   MEDIUM
## 244                    MEDIUM   MEDIUM
## 245                    MEDIUM   MEDIUM
## 246                    MEDIUM   MEDIUM
## 247                    MEDIUM     HIGH
## 248                    MEDIUM   MEDIUM
## 249                    MEDIUM   MEDIUM
## 250                    MEDIUM   MEDIUM
## 251                    MEDIUM   MEDIUM
## 252                    MEDIUM   MEDIUM
## 253                    MEDIUM   MEDIUM
## 254                      HIGH   MEDIUM
## 255                    MEDIUM   MEDIUM
## 256                    MEDIUM   MEDIUM
## 257                    MEDIUM   MEDIUM
## 258                       LOW   MEDIUM
## 259                    MEDIUM   MEDIUM
## 260                    MEDIUM   MEDIUM
## 261                    MEDIUM   MEDIUM
## 262                      HIGH   MEDIUM
## 263                      HIGH   MEDIUM
## 264                    MEDIUM   MEDIUM
## 265                      HIGH     HIGH
## 266                    MEDIUM   MEDIUM
## 267                    MEDIUM   MEDIUM
## 268                      HIGH     HIGH
## 269                    MEDIUM   MEDIUM
## 270                      HIGH     HIGH
## 271                      HIGH     HIGH
## 272                    MEDIUM   MEDIUM
## 273                    MEDIUM   MEDIUM
## 274                    MEDIUM   MEDIUM
## 275                      HIGH     HIGH
## 276                    MEDIUM   MEDIUM
## 277                      HIGH   MEDIUM
## 278                    MEDIUM   MEDIUM
## 279                    MEDIUM   MEDIUM
## 280                    MEDIUM   MEDIUM
## 281                    MEDIUM   MEDIUM
## 282                      HIGH     HIGH
## 283                    MEDIUM   MEDIUM
## 284                    MEDIUM     HIGH
## 285                      HIGH   MEDIUM
## 286                    MEDIUM   MEDIUM
## 287                      HIGH     HIGH
## 288                    MEDIUM   MEDIUM
## 289                    MEDIUM   MEDIUM
## 290                    MEDIUM   MEDIUM
## 291                    MEDIUM     HIGH
## 292                    MEDIUM   MEDIUM
## 293                      HIGH   MEDIUM
## 294                    MEDIUM   MEDIUM
## 295                    MEDIUM   MEDIUM
## 296                    MEDIUM   MEDIUM
## 297                    MEDIUM   MEDIUM
## 298                    MEDIUM   MEDIUM
## 299                    MEDIUM   MEDIUM
## 300                      HIGH   MEDIUM
## 301                    MEDIUM   MEDIUM
## 302                      HIGH     HIGH
## 303                    MEDIUM   MEDIUM
## 304                      HIGH   MEDIUM
## 305                    MEDIUM   MEDIUM
## 306                    MEDIUM   MEDIUM
## 307                    MEDIUM   MEDIUM
## 308                    MEDIUM   MEDIUM
## 309                    MEDIUM   MEDIUM
## 310                    MEDIUM   MEDIUM
## 311                    MEDIUM   MEDIUM
## 312                    MEDIUM   MEDIUM
## 313                    MEDIUM   MEDIUM
## 314                      HIGH     HIGH
## 315                    MEDIUM   MEDIUM
## 316                      HIGH     HIGH
## 317                      HIGH   MEDIUM
## 318                    MEDIUM   MEDIUM
## 319                      HIGH   MEDIUM
## 320                      HIGH     HIGH
## 321                      HIGH     HIGH
## 322                      HIGH   MEDIUM
## 323                      HIGH   MEDIUM
## 324                    MEDIUM   MEDIUM
## 325                    MEDIUM   MEDIUM
## 326                    MEDIUM   MEDIUM
## 327                    MEDIUM   MEDIUM
## 328                    MEDIUM   MEDIUM
## 329                    MEDIUM   MEDIUM
## 330                      HIGH   MEDIUM
## 331                    MEDIUM   MEDIUM
## 332                       LOW   MEDIUM
## 333                    MEDIUM   MEDIUM
## 334                    MEDIUM   MEDIUM
## 335                      HIGH     HIGH
## 336                    MEDIUM   MEDIUM
## 337                    MEDIUM   MEDIUM
## 338                    MEDIUM   MEDIUM
## 339                      HIGH     HIGH
## 340                      HIGH   MEDIUM
## 341                    MEDIUM   MEDIUM
## 342                      HIGH     HIGH
## 343                    MEDIUM   MEDIUM
## 344                    MEDIUM   MEDIUM
## 345                      HIGH   MEDIUM
## 346                      HIGH   MEDIUM
## 347                      HIGH     HIGH
## 348                    MEDIUM   MEDIUM
## 349                    MEDIUM   MEDIUM
## 350                    MEDIUM   MEDIUM
## 351                    MEDIUM   MEDIUM
## 352                    MEDIUM   MEDIUM
## 353                    MEDIUM   MEDIUM
## 354                      HIGH   MEDIUM
## 355                    MEDIUM   MEDIUM
## 356                      HIGH     HIGH
## 357                      HIGH     HIGH
## 358                      HIGH     HIGH
## 359                      HIGH     HIGH
## 360                    MEDIUM   MEDIUM
## 361                      HIGH   MEDIUM
## 362                      HIGH     HIGH
## 363                    MEDIUM   MEDIUM
## 364                    MEDIUM   MEDIUM
## 365                    MEDIUM   MEDIUM
## 366                    MEDIUM   MEDIUM
## 367                    MEDIUM   MEDIUM
## 368                    MEDIUM   MEDIUM
## 369                    MEDIUM   MEDIUM
## 370                    MEDIUM   MEDIUM
## 371                    MEDIUM   MEDIUM
## 372                    MEDIUM   MEDIUM
## 373                    MEDIUM   MEDIUM
## 374                      HIGH   MEDIUM
## 375                    MEDIUM   MEDIUM
## 376                    MEDIUM   MEDIUM
## 377                    MEDIUM   MEDIUM
## 378                    MEDIUM   MEDIUM
## 379                      HIGH   MEDIUM
## 380                    MEDIUM      LOW
## 381                       LOW   MEDIUM
## 382                      HIGH     HIGH
## 383                    MEDIUM   MEDIUM
## 384                    MEDIUM   MEDIUM
## 385                      HIGH   MEDIUM
## 386                    MEDIUM   MEDIUM
## 387                    MEDIUM   MEDIUM
## 388                      HIGH     HIGH
## 389                      HIGH   MEDIUM
## 390                    MEDIUM   MEDIUM
## 391                    MEDIUM      LOW
## 392                    MEDIUM   MEDIUM
## 393                    MEDIUM   MEDIUM
## 394                    MEDIUM   MEDIUM
## 395                    MEDIUM   MEDIUM
## 396                      HIGH     HIGH
## 397                    MEDIUM   MEDIUM
## 398                    MEDIUM   MEDIUM
## 399                      HIGH     HIGH
## 400                      HIGH   MEDIUM
## 401                    MEDIUM   MEDIUM
## 402                    MEDIUM   MEDIUM
## 403                      HIGH   MEDIUM
## 404                    MEDIUM   MEDIUM
## 405                    MEDIUM   MEDIUM
## 406                      HIGH     HIGH
## 407                      HIGH     HIGH
## 408                    MEDIUM   MEDIUM
## 409                      HIGH   MEDIUM
## 410                    MEDIUM   MEDIUM
## 411                    MEDIUM   MEDIUM
## 412                    MEDIUM   MEDIUM
## 413                       LOW   MEDIUM
## 414                      HIGH   MEDIUM
## 415                    MEDIUM   MEDIUM
## 416                    MEDIUM   MEDIUM
## 417                    MEDIUM     HIGH
## 418                    MEDIUM   MEDIUM
## 419                    MEDIUM   MEDIUM
## 420                    MEDIUM   MEDIUM
## 421                    MEDIUM   MEDIUM
## 422                      HIGH   MEDIUM
## 423                    MEDIUM   MEDIUM
## 424                    MEDIUM   MEDIUM
## 425                    MEDIUM   MEDIUM
## 426                      HIGH     HIGH
## 427                      HIGH     HIGH
## 428                      HIGH     HIGH
## 429                       LOW   MEDIUM
## 430                    MEDIUM   MEDIUM
## 431                    MEDIUM   MEDIUM
## 432                    MEDIUM   MEDIUM
## 433                    MEDIUM   MEDIUM
## 434                       LOW      LOW
## 435                    MEDIUM   MEDIUM
## 436                    MEDIUM   MEDIUM
## 437                      HIGH     HIGH
## 438                    MEDIUM   MEDIUM
## 439                    MEDIUM   MEDIUM
## 440                    MEDIUM   MEDIUM
## 441                      HIGH     HIGH
## 442                      HIGH     HIGH
## 443                    MEDIUM   MEDIUM
## 444                    MEDIUM   MEDIUM
## 445                      HIGH     HIGH
## 446                    MEDIUM   MEDIUM
## 447                    MEDIUM   MEDIUM
## 448                      HIGH   MEDIUM
## 449                       LOW   MEDIUM
## 450                    MEDIUM   MEDIUM
## 451                    MEDIUM   MEDIUM
## 452                      HIGH     HIGH
## 453                      HIGH     HIGH
## 454                    MEDIUM   MEDIUM
## 455                    MEDIUM   MEDIUM
## 456                    MEDIUM   MEDIUM
## 457                    MEDIUM   MEDIUM
## 458                    MEDIUM   MEDIUM
## 459                      HIGH     HIGH
## 460                      HIGH     HIGH
## 461                      HIGH   MEDIUM
## 462                    MEDIUM   MEDIUM
## 463                    MEDIUM   MEDIUM
## 464                      HIGH   MEDIUM
## 465                    MEDIUM   MEDIUM
## 466                    MEDIUM   MEDIUM
## 467                      HIGH     HIGH
## 468                    MEDIUM   MEDIUM
## 469                      HIGH     HIGH
## 470                    MEDIUM     HIGH
## 471                      HIGH   MEDIUM
## 472                      HIGH     HIGH
## 473                      HIGH   MEDIUM
## 474                    MEDIUM   MEDIUM
## 475                    MEDIUM   MEDIUM
## 476                    MEDIUM   MEDIUM
## 477                      HIGH     HIGH
## 478                      HIGH   MEDIUM
## 479                    MEDIUM   MEDIUM
## 480                    MEDIUM   MEDIUM
## 481                    MEDIUM   MEDIUM
## 482                    MEDIUM   MEDIUM
## 483                    MEDIUM   MEDIUM
## 484                      HIGH     HIGH
## 485                    MEDIUM   MEDIUM
## 486                    MEDIUM   MEDIUM
## 487                    MEDIUM   MEDIUM
## 488                    MEDIUM   MEDIUM
## 489                    MEDIUM   MEDIUM
## 490                    MEDIUM   MEDIUM
## 491                      HIGH   MEDIUM
## 492                      HIGH     HIGH
## 493                      HIGH     HIGH
## 494                    MEDIUM   MEDIUM
## 495                      HIGH   MEDIUM
## 496                    MEDIUM     HIGH
## 497                    MEDIUM   MEDIUM
## 498                    MEDIUM   MEDIUM
## 499                      HIGH     HIGH
## 500                      HIGH   MEDIUM
## 501                    MEDIUM   MEDIUM
## 502                    MEDIUM   MEDIUM
## 503                    MEDIUM   MEDIUM
## 504                       LOW   MEDIUM
## 505                    MEDIUM   MEDIUM
## 506                    MEDIUM   MEDIUM
## 507                      HIGH   MEDIUM
## 508                    MEDIUM   MEDIUM
## 509                      HIGH   MEDIUM
## 510                       LOW   MEDIUM
## 511                    MEDIUM   MEDIUM
## 512                    MEDIUM   MEDIUM
## 513                      HIGH     HIGH
## 514                    MEDIUM   MEDIUM
## 515                    MEDIUM   MEDIUM
## 516                    MEDIUM   MEDIUM
## 517                    MEDIUM   MEDIUM
## 518                      HIGH     HIGH
## 519                    MEDIUM   MEDIUM
## 520                    MEDIUM   MEDIUM
## 521                    MEDIUM   MEDIUM
## 522                      HIGH     HIGH
## 523                      HIGH     HIGH
## 524                    MEDIUM   MEDIUM
## 525                    MEDIUM     HIGH
## 526                      HIGH     HIGH
## 527                    MEDIUM   MEDIUM
## 528                    MEDIUM   MEDIUM
## 529                    MEDIUM   MEDIUM
## 530                    MEDIUM   MEDIUM
## 531                      HIGH     HIGH
## 532                      HIGH     HIGH
## 533                    MEDIUM   MEDIUM
## 534                      HIGH     HIGH
## 535                    MEDIUM   MEDIUM
## 536                    MEDIUM   MEDIUM
## 537                    MEDIUM   MEDIUM
## 538                    MEDIUM   MEDIUM
## 539                    MEDIUM   MEDIUM
## 540                    MEDIUM   MEDIUM
## 541                    MEDIUM   MEDIUM
## 542                    MEDIUM   MEDIUM
## 543                    MEDIUM   MEDIUM
## 544                      HIGH     HIGH
## 545                    MEDIUM   MEDIUM
## 546                      HIGH   MEDIUM
## 547                      HIGH     HIGH
## 548                    MEDIUM   MEDIUM
## 549                    MEDIUM   MEDIUM
## 550                    MEDIUM   MEDIUM
## 551                    MEDIUM   MEDIUM
## 552                      HIGH     HIGH
## 553                    MEDIUM   MEDIUM
## 554                    MEDIUM   MEDIUM
## 555                    MEDIUM   MEDIUM
## 556                    MEDIUM   MEDIUM
## 557                    MEDIUM   MEDIUM
## 558                      HIGH   MEDIUM
## 559                      HIGH   MEDIUM
## 560                      HIGH     HIGH
## 561                    MEDIUM   MEDIUM
## 562                    MEDIUM   MEDIUM
## 563                      HIGH     HIGH
## 564                    MEDIUM   MEDIUM
## 565                      HIGH   MEDIUM
## 566                    MEDIUM   MEDIUM
## 567                       LOW   MEDIUM
## 568                      HIGH   MEDIUM
## 569                    MEDIUM   MEDIUM
## 570                    MEDIUM   MEDIUM
## 571                      HIGH   MEDIUM
## 572                    MEDIUM   MEDIUM
## 573                      HIGH   MEDIUM
## 574                    MEDIUM   MEDIUM
## 575                       LOW   MEDIUM
## 576                    MEDIUM   MEDIUM
## 577                    MEDIUM   MEDIUM
## 578                      HIGH     HIGH
## 579                    MEDIUM   MEDIUM
## 580                      HIGH     HIGH
## 581                    MEDIUM   MEDIUM
## 582                      HIGH     HIGH
## 583                    MEDIUM     HIGH
## 584                    MEDIUM   MEDIUM
## 585                    MEDIUM   MEDIUM
## 586                    MEDIUM   MEDIUM
## 587                    MEDIUM   MEDIUM
## 588                    MEDIUM   MEDIUM
## 589                    MEDIUM   MEDIUM
## 590                      HIGH   MEDIUM
## 591                      HIGH     HIGH
## 592                    MEDIUM   MEDIUM
## 593                      HIGH     HIGH
## 594                    MEDIUM   MEDIUM
## 595                      HIGH     HIGH
## 596                    MEDIUM   MEDIUM
## 597                      HIGH   MEDIUM
## 598                    MEDIUM   MEDIUM
## 599                      HIGH   MEDIUM
## 600                      HIGH   MEDIUM
## 601                    MEDIUM   MEDIUM
## 602                    MEDIUM   MEDIUM
## 603                      HIGH   MEDIUM
## 604                    MEDIUM   MEDIUM
## 605                       LOW   MEDIUM
## 606                    MEDIUM     HIGH
## 607                    MEDIUM   MEDIUM
## 608                      HIGH     HIGH
## 609                    MEDIUM   MEDIUM
## 610                    MEDIUM   MEDIUM
## 611                      HIGH     HIGH
## 612                       LOW   MEDIUM
## 613                    MEDIUM   MEDIUM
## 614                    MEDIUM   MEDIUM
## 615                      HIGH     HIGH
## 616                    MEDIUM   MEDIUM
## 617                       LOW   MEDIUM
## 618                      HIGH     HIGH
## 619                      HIGH     HIGH
## 620                      HIGH     HIGH
## 621                      HIGH     HIGH
## 622                    MEDIUM     HIGH
## 623                    MEDIUM   MEDIUM
## 624                    MEDIUM   MEDIUM
## 625                    MEDIUM     HIGH
## 626                      HIGH   MEDIUM
## 627                    MEDIUM   MEDIUM
## 628                      HIGH   MEDIUM
## 629                    MEDIUM   MEDIUM
## 630                    MEDIUM   MEDIUM
## 631                      HIGH     HIGH
## 632                    MEDIUM     HIGH
## 633                      HIGH     HIGH
## 634                    MEDIUM   MEDIUM
## 635                    MEDIUM   MEDIUM
## 636                    MEDIUM   MEDIUM
## 637                      HIGH   MEDIUM
## 638                    MEDIUM   MEDIUM
## 639                    MEDIUM   MEDIUM
## 640                    MEDIUM   MEDIUM
## 641                    MEDIUM   MEDIUM
## 642                      HIGH   MEDIUM
## 643                    MEDIUM     HIGH
## 644                      HIGH     HIGH
## 645                      HIGH   MEDIUM
## 646                    MEDIUM   MEDIUM
## 647                    MEDIUM   MEDIUM
## 648                      HIGH     HIGH
## 649                    MEDIUM   MEDIUM
## 650                    MEDIUM   MEDIUM
## 651                      HIGH     HIGH
## 652                    MEDIUM   MEDIUM
## 653                    MEDIUM   MEDIUM
## 654                    MEDIUM   MEDIUM
## 655                    MEDIUM   MEDIUM
## 656                      HIGH     HIGH
## 657                    MEDIUM   MEDIUM
## 658                    MEDIUM     HIGH
## 659                    MEDIUM   MEDIUM
## 660                    MEDIUM   MEDIUM
## 661                      HIGH     HIGH
## 662                      HIGH     HIGH
## 663                    MEDIUM   MEDIUM
## 664                      HIGH   MEDIUM
## 665                      HIGH     HIGH
## 666                    MEDIUM   MEDIUM
## 667                      HIGH     HIGH
## 668                      HIGH     HIGH
## 669                    MEDIUM   MEDIUM
## 670                    MEDIUM   MEDIUM
## 671                    MEDIUM   MEDIUM
## 672                       LOW   MEDIUM
## 673                    MEDIUM   MEDIUM
## 674                    MEDIUM   MEDIUM
## 675                    MEDIUM   MEDIUM
## 676                    MEDIUM   MEDIUM
## 677                      HIGH     HIGH
## 678                    MEDIUM   MEDIUM
## 679                      HIGH     HIGH
## 680                    MEDIUM   MEDIUM
## 681                    MEDIUM     HIGH
## 682                      HIGH   MEDIUM
## 683                      HIGH     HIGH
## 684                    MEDIUM   MEDIUM
## 685                      HIGH   MEDIUM
## 686                      HIGH     HIGH
## 687                    MEDIUM   MEDIUM
## 688                      HIGH   MEDIUM
## 689                      HIGH     HIGH
## 690                    MEDIUM   MEDIUM
## 691                      HIGH     HIGH
## 692                      HIGH   MEDIUM
## 693                    MEDIUM   MEDIUM
## 694                      HIGH   MEDIUM
## 695                      HIGH     HIGH
## 696                    MEDIUM   MEDIUM
## 697                    MEDIUM   MEDIUM
## 698                      HIGH   MEDIUM
## 699                      HIGH   MEDIUM
## 700                    MEDIUM   MEDIUM
## 701                    MEDIUM   MEDIUM
## 702                      HIGH     HIGH
## 703                    MEDIUM   MEDIUM
## 704                      HIGH     HIGH
## 705                       LOW   MEDIUM
## 706                    MEDIUM   MEDIUM
## 707                    MEDIUM   MEDIUM
## 708                      HIGH     HIGH
## 709                      HIGH     HIGH
## 710                    MEDIUM     HIGH
## 711                    MEDIUM   MEDIUM
## 712                    MEDIUM   MEDIUM
## 713                      HIGH     HIGH
## 714                    MEDIUM   MEDIUM
## 715                    MEDIUM   MEDIUM
## 716                    MEDIUM   MEDIUM
## 717                      HIGH   MEDIUM
## 718                      HIGH     HIGH
## 719                    MEDIUM   MEDIUM
## 720                    MEDIUM   MEDIUM
## 721                    MEDIUM   MEDIUM
## 722                    MEDIUM   MEDIUM
## 723                      HIGH   MEDIUM
## 724                      HIGH   MEDIUM
## 725                    MEDIUM   MEDIUM
## 726                       LOW   MEDIUM
## 727                      HIGH   MEDIUM
## 728                    MEDIUM     HIGH
## 729                    MEDIUM   MEDIUM
## 730                    MEDIUM   MEDIUM
## 731                      HIGH   MEDIUM
## 732                    MEDIUM   MEDIUM
## 733                    MEDIUM   MEDIUM
## 734                      HIGH     HIGH
## 735                    MEDIUM   MEDIUM
## 736                      HIGH     HIGH
## 737                      HIGH   MEDIUM
## 738                    MEDIUM   MEDIUM
## 739                      HIGH     HIGH
## 740                    MEDIUM   MEDIUM
## 741                    MEDIUM   MEDIUM
## 742                      HIGH     HIGH
## 743                      HIGH   MEDIUM
## 744                      HIGH     HIGH
## 745                      HIGH     HIGH
## 746                      HIGH   MEDIUM
## 747                      HIGH   MEDIUM
## 748                    MEDIUM   MEDIUM
## 749                    MEDIUM   MEDIUM
## 750                      HIGH   MEDIUM
## 751                    MEDIUM   MEDIUM
## 752                      HIGH   MEDIUM
## 753                      HIGH   MEDIUM
## 754                      HIGH     HIGH
## 755                      HIGH   MEDIUM
## 756                    MEDIUM   MEDIUM
## 757                      HIGH   MEDIUM
## 758                    MEDIUM   MEDIUM
## 759                      HIGH     HIGH
## 760                    MEDIUM     HIGH
cm = confusionMatrix(test.data$Rating_Category, as.factor(pred.gbm))
print(cm)
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  LOW MEDIUM HIGH EXCELLENT
##   LOW         2     17    0         0
##   MEDIUM      2    474   35         0
##   HIGH        0     91  139         0
##   EXCELLENT   0      0    0         0
## 
## Overall Statistics
##                                           
##                Accuracy : 0.8092          
##                  95% CI : (0.7794, 0.8366)
##     No Information Rate : 0.7658          
##     P-Value [Acc > NIR] : 0.002252        
##                                           
##                   Kappa : 0.541           
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: LOW Class: MEDIUM Class: HIGH Class: EXCELLENT
## Sensitivity            0.500000        0.8144      0.7989               NA
## Specificity            0.977513        0.7921      0.8447                1
## Pos Pred Value         0.105263        0.9276      0.6043               NA
## Neg Pred Value         0.997301        0.5663      0.9340               NA
## Prevalence             0.005263        0.7658      0.2289                0
## Detection Rate         0.002632        0.6237      0.1829                0
## Detection Prevalence   0.025000        0.6724      0.3026                0
## Balanced Accuracy      0.738757        0.8033      0.8218               NA