#Splitting the genres
genre.split<-movie%>%
select(genres,imdb_score)%>%
mutate(Action=ifelse(grepl("Action",genres),1,0),
Adventure=ifelse(grepl("Adventure",genres),1,0),
Animation=ifelse(grepl("Animation",genres),1,0),
Biography=ifelse(grepl("Biography",genres),1,0),
Comedy=ifelse(grepl("Comedy",genres),1,0),
Crime =ifelse(grepl("Crime",genres),1,0),
Documentary=ifelse(grepl("Documentary",genres),1,0),
Drama=ifelse(grepl("Drama",genres),1,0),
Family=ifelse(grepl("Family",genres),1,0),
Fantasy=ifelse(grepl("Fantasy",genres),1,0),
`Film-Noir`=ifelse(grepl("Film-Noir",genres),1,0),
History =ifelse(grepl("History",genres),1,0),
Horror=ifelse(grepl("Horror",genres),1,0),
Musical=ifelse(grepl("Musical",genres),1,0),
Mystery=ifelse(grepl("Mystery",genres),1,0),
News=ifelse(grepl("News",genres),1,0),
Romance=ifelse(grepl("Romance",genres),1,0),
`Sci-Fi`=ifelse(grepl("Sci-Fi",genres),1,0),
Short=ifelse(grepl("Short",genres),1,0),
Sport=ifelse(grepl("Sport",genres),1,0),
War=ifelse(grepl("War",genres),1,0),
Western=ifelse(grepl("Western",genres),1,0))
#Genre wise movie Score
genre.split%>%
tidyr::gather(Genre_Type,Binary,Action:Western)%>%
filter(Binary==1)%>%
select(-c(Binary,genres))%>%
group_by(Genre_Type)%>%
summarise(Mean_Score=mean(imdb_score))%>%
arrange(Mean_Score)%>%
ggplot(aes(x=Genre_Type,y=Mean_Score,fill=Genre_Type))+
geom_bar(stat="identity", color="black")+
coord_flip()
#Removing Genres
movie <- movie%>%select(-genres)
missing.values <- aggr(movie, sortVars = T, prop = T, sortCombs = T, cex.lab = 1.5, cex.axis = .6, cex.numbers = 5, combined = F, gap = -.2)
##
## Variables sorted by number of missings:
## Variable Count
## gross 0.174869948
## budget 0.097438976
## aspect_ratio 0.065426170
## title_year 0.021408563
## director_facebook_likes 0.020608243
## num_critic_for_reviews 0.009803922
## actor_3_facebook_likes 0.004601841
## num_user_for_reviews 0.004201681
## duration 0.003001200
## facenumber_in_poster 0.002601040
## actor_2_facebook_likes 0.002601040
## actor_1_facebook_likes 0.001400560
## color 0.000000000
## director_name 0.000000000
## actor_2_name 0.000000000
## actor_1_name 0.000000000
## movie_title 0.000000000
## num_voted_users 0.000000000
## cast_total_facebook_likes 0.000000000
## actor_3_name 0.000000000
## plot_keywords 0.000000000
## movie_imdb_link 0.000000000
## language 0.000000000
## country 0.000000000
## content_rating 0.000000000
## imdb_score 0.000000000
## movie_facebook_likes 0.000000000
Imputation with column mean has been done for some of the predictors like social media likes for actors and directors and the zero’s in the predictor columns have been converted to NA’s. The data now contains 3857 observations with 26 variables. We remove the observations that have no values as we don’t have any information about them Further cleaning of the content ratings column needs to be done decrease the number of categories. So the M and GP categories are clubbed into the PG category and X is a part of the NC-17 category. Categories like Approved, Not Rated, Unrated or Passed are clubbed in the R category These are the final cleaning steps: Adding the Profit column based on the difference between the Budget and Gross Income. Profit = Gross – Budget Removing the color column as most of the movies(~96%+) are in color and less than 4% are black & white Removing the language column as most of the movies(~95%+) are in English and less than 5% are from other languages We clean the country column as well by making 3 specific categories i.e. USA, UK and other regions. So most of the movies are produced in USA(79%, 3025 observations), then UK(8%, 316 observations) and finally Others category with 465 observations The final, cleaned dataset has 3806 observations with 26 predictors
#Profit Column
movie <- movie %>%
mutate(profit = gross - budget,
return_on_investment_perc = (profit/budget)*100)
#Removing Color and Language Columns
movie <- subset(movie, select = -c(color))
movie <- subset(movie, select = -c(language))
#Cleaning the Country column into 3 categories
levels(movie$country) <- c(levels(movie$country), "Others")
movie$country[(movie$country != 'USA')&(movie$country != 'UK')] <- 'Others'
movie$country <- factor(movie$country)
##Distribution of IMDB Score Variable
ggplot(movie, aes(x=imdb_score)) +
geom_density(fill="red",alpha = 0.6)+coord_cartesian(xlim = c(0, 10))+
geom_vline(xintercept = mean(movie$imdb_score), color="blue")
profit.movie <-movie%>%
select(movie_title,profit)%>%
filter(!is.na(profit))%>%
arrange(desc(profit))%>%
top_n(20)
## Selecting by profit
p1 <- ggplot(profit.movie, aes(x=reorder(movie_title,profit/1000000), profit/1000000,fill=factor(movie_title))) +
geom_bar(stat = "identity") +
ggtitle("Top Profitable Movies")+coord_flip()+xlab("Movie Name")+ylab("Profit in Million $")+theme_bw()
p1
movie %>%
# filter(title_year %in% c(2000:2016)) %>%
arrange(desc(profit)) %>%
top_n(20, profit) %>%
ggplot(aes(x=budget/1000000, y=profit/1000000)) +
geom_point(size=3) +
geom_smooth(size=2) +
geom_text_repel(aes(label=movie_title)) +
labs(x = "Budget in Million $", y = "Profit in Million $", title = "Top 20 Profitable Movies") +
theme(plot.title = element_text(hjust = 0.5))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
movie %>%
mutate(profit = gross - budget,
return_on_investment_perc = (profit/budget)*100) %>%
arrange(desc(profit)) %>%
top_n(20, profit) %>%
ggplot(aes(x=budget/1000000, y = return_on_investment_perc)) +
geom_point(size = 3) +
geom_smooth(size = 2) +
geom_text_repel(aes(label = movie_title), size = 3) +
xlab("Budget in Million $") +
ylab("Percentage Return on Investment")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
p<-ggplot(movie, aes(x=imdb_score, y=num_voted_users, group=content_rating))+
geom_point(aes(color=content_rating),size=0.7)+
scale_color_brewer(palette="Dark2")+geom_smooth(aes(color=content_rating),se = FALSE, method = lm)+
xlab("IMDB Score")+ylab("Number of Voters")+labs(color = "Rating\n")
ggplotly(p)
movie %>%
top_n(20, profit) %>%
ggplot(aes(x = imdb_score, y = gross/10^6, size = profit/10^6, color = content_rating)) +
geom_point() +
geom_hline(aes(yintercept = 550)) +
geom_vline(aes(xintercept = 7.75)) +
geom_text_repel(aes(label = movie_title), size = 4) +
xlab("IMDB Score") +
ylab("Gross Money Earned(in million dollars)") +
ggtitle("Commercial Success Vs Critical Acclaim") +
annotate("text", x = 8.5, y = 700, label = "High IMDB Score & High Gross",size=5) +
theme(plot.title = element_text(hjust = 0.5))
#Time Series for IMDB Score
imdb.ts<-movie%>%
select(title_year,imdb_score,country)%>%
group_by(title_year)%>%
summarise(IMDB_Rating=mean(imdb_score))
plot.ts1<-ggplot(data=imdb.ts,aes(x=title_year,y=IMDB_Rating))+geom_point(size=3)+geom_line(size=1)+
geom_smooth(col="red")+xlab("Year of Release")+ylab("IMDB Rating")
ggplotly(plot.ts1)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#Time Series for Return on Investment
roi.ts<-movie%>%
select(title_year,return_on_investment_perc)%>%
group_by(title_year)%>%
summarise(ROI=mean(return_on_investment_perc))
plot.ts2<-ggplot(data=roi.ts,aes(x=title_year,y=ROI))+geom_point(size=3)+geom_line(size=1)+
geom_smooth(col="green")+xlab("Year of Release")+ylab("Return on Investment")
ggplotly(plot.ts2)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
director.imdb<-movie%>%
select(director_name,imdb_score)%>%
group_by(director_name)%>%
summarise(Average_IMDB_Rating=mean(imdb_score))%>%
arrange(desc(Average_IMDB_Rating))%>%
top_n(20)
## Selecting by Average_IMDB_Rating
director.df<-as.data.frame(director.imdb)
names(director.df)[names(director.df) == "director_name"] <- "Director"
names(director.df)[names(director.df) == "Average_IMDB_Rating"] <- "Average_IMDB_Rating"
director.table <- formattable(director.df,list(Average_IMDB_Rating=color_bar("lightgreen")))
director.table
| Director | Average_IMDB_Rating |
|---|---|
| Akira Kurosawa | 8.700000 |
| Charles Chaplin | 8.600000 |
| Tony Kaye | 8.600000 |
| Alfred Hitchcock | 8.500000 |
| Damien Chazelle | 8.500000 |
| Majid Majidi | 8.500000 |
| Ron Fricke | 8.500000 |
| Sergio Leone | 8.433333 |
| Christopher Nolan | 8.425000 |
| Asghar Farhadi | 8.400000 |
| Marius A. Markevicius | 8.400000 |
| Richard Marquand | 8.400000 |
| Billy Wilder | 8.300000 |
| Fritz Lang | 8.300000 |
| Lee Unkrich | 8.300000 |
| Lenny Abrahamson | 8.300000 |
| Pete Docter | 8.233333 |
| Hayao Miyazaki | 8.225000 |
| Elia Kazan | 8.200000 |
| George Roy Hill | 8.200000 |
| Joshua Oppenheimer | 8.200000 |
| Juan José Campanella | 8.200000 |
| Quentin Tarantino | 8.200000 |
imdb.user<-ggplot(data=movie,aes(x=imdb_score,y=num_user_for_reviews,colour=factor(country)))+
geom_point(aes(colour= factor(country)),size=0.7)+
geom_smooth(se = FALSE, method = "lm")+xlab("IMDB Score")+ylab("Number of User Reviews")+
ylim(0,1500)+labs(color = "Country\n")
ggplotly(imdb.user)
## Warning: Removed 86 rows containing non-finite values (stat_smooth).
ggplot(data=movie,aes(x=actor_1_facebook_likes,y=movie_facebook_likes))+
geom_point()+
geom_smooth(se = TRUE, method = "lm")+xlim(0,50000)+ylim(0,200000)
## Warning: Removed 14 rows containing non-finite values (stat_smooth).
## Warning: Removed 14 rows containing missing values (geom_point).
We want to find out whether we can include director and actor names for our prediction algorithm. There are 1709 unique directors and 3713 actors. Our model will become too complex if we include these names in our final algorithm. So we remove these columns Initially we had added to additional columns i.e. profit and return on investment. We will be removing these columns to avoid multicollinearity Features like movie link and plot keywords will also be removed as they are inessential for making predictions We finally visualize the correlation between numeric variables to check for highly correlated variables It can be observed that certain variables are highly correlated i.e. for actor 1 Facebook likes and total cast Facebook likes, the correlation is 0.95. Similarly, num of users who voted is highly correlated with number of user reviews. To make the analysis simple, we remove the total cast Facebook likes and keep 2 features: actor 1 Facebook likes and other actors Facebook likes. This new column will be the summation of actor 2 and actor 3 Facebook likes. We also create a new feature called critical review ratio which would be the ratio between critical reviews and total number of reviews We finally remove all the unnecessary columns like total Facebook likes, actor 2 Facebook likes, actor 3 Facebook likes, critical reviews and total number of reviews Finally, we make 4 categories based on the imdb_score variable i.e. LOW, MEDIUM, HIGH, EXCELLENT. We also remove the imdb_score variable as it will have no meaning The final cleaned data set contains 3806 observations and 14 variables
#Visualizing Correlation Plots
ggcorr(movie, label = TRUE, label_round = 3, label_size = 3, size = 2, hjust = .85) +
ggtitle("Correlation between continuous variables") +
theme(plot.title = element_text(hjust = 0.5))
## Warning in ggcorr(movie, label = TRUE, label_round = 3, label_size = 3, :
## data in column(s) 'country', 'content_rating' are not numeric and were
## ignored
#Adding new columns and deleting unnecessary columns
movie<-movie%>%
mutate(other_actor_facebook_likes=actor_2_facebook_likes + actor_3_facebook_likes,
critic_total_ratio=num_critic_for_reviews/num_user_for_reviews)%>%
select (-c(cast_total_facebook_likes, actor_2_facebook_likes, actor_3_facebook_likes,
num_critic_for_reviews, num_user_for_reviews))
#Creating Score Categories
movie <- movie %>% mutate(Rating_Category = cut(imdb_score, c(0, 4, 7, 9, 10),
labels = c("LOW", "MEDIUM", "HIGH", "EXCELLENT")))
movie.final<-movie%>%select(-imdb_score)
##Splitting Data
set.seed(13469385)
training.samples <- movie.final$Rating_Category%>%
createDataPartition(p = 0.8, list = FALSE)
train.data <- movie.final[training.samples, ]
test.data <- movie.final[-training.samples, ]
##Multinomial Logistic Regression
# Fit the model
model.multi <- nnet::multinom(Rating_Category ~., data = train.data)
## # weights: 76 (54 variable)
## initial value 4222.652624
## iter 10 value 3204.940875
## iter 20 value 2779.730790
## iter 30 value 2433.508868
## iter 40 value 1854.626103
## iter 50 value 1840.991293
## iter 60 value 1799.565188
## iter 70 value 1773.856065
## iter 80 value 1757.618743
## iter 90 value 1707.639100
## iter 100 value 1701.554271
## final value 1701.554271
## stopped after 100 iterations
tidy(model.multi)
formattable(tidy(model.multi))
| y.level | term | estimate | std.error | statistic | p.value |
|---|---|---|---|---|---|
| MEDIUM | (Intercept) | 0.9984410 | 1.369488e-11 | -1.139293e+08 | 0.000000e+00 |
| MEDIUM | duration | 1.0078225 | 1.496031e-09 | 5.208489e+06 | 0.000000e+00 |
| MEDIUM | director_facebook_likes | 1.0004594 | 1.530147e-08 | 3.001488e+04 | 0.000000e+00 |
| MEDIUM | actor_1_facebook_likes | 1.0000174 | 1.202104e-06 | 1.447013e+01 | 1.871226e-47 |
| MEDIUM | gross | 1.0000000 | 3.870099e-09 | 1.326953e+00 | 1.845242e-01 |
| MEDIUM | num_voted_users | 1.0000102 | 6.811821e-07 | 1.501308e+01 | 6.028664e-51 |
| MEDIUM | facenumber_in_poster | 1.0499538 | 2.601767e-11 | 1.873580e+09 | 0.000000e+00 |
| MEDIUM | countryUSA | 1.1217744 | 1.105256e-11 | 1.039684e+10 | 0.000000e+00 |
| MEDIUM | countryOthers | 0.9358600 | 1.494668e-12 | -4.435057e+10 | 0.000000e+00 |
| MEDIUM | content_ratingNC-17 | 0.9975295 | 3.586818e-14 | -6.896173e+10 | 0.000000e+00 |
| MEDIUM | content_ratingPG | 0.9479850 | 2.146214e-12 | -2.488877e+10 | 0.000000e+00 |
| MEDIUM | content_ratingPG-13 | 1.1302067 | 5.016445e-12 | 2.439986e+10 | 0.000000e+00 |
| MEDIUM | content_ratingR | 0.9593154 | 6.941376e-12 | -5.983736e+09 | 0.000000e+00 |
| MEDIUM | budget | 1.0000000 | 2.396464e-09 | 3.643075e-01 | 7.156284e-01 |
| MEDIUM | title_year | 1.0007604 | 2.749907e-08 | 2.763964e+04 | 0.000000e+00 |
| MEDIUM | movie_facebook_likes | 1.0000016 | 3.464553e-07 | 4.517580e+00 | 6.255028e-06 |
| MEDIUM | other_actor_facebook_likes | 1.0000499 | 1.200430e-07 | 4.155244e+02 | 0.000000e+00 |
| MEDIUM | critic_total_ratio | 1.0049864 | 1.427480e-11 | 3.484442e+08 | 0.000000e+00 |
| HIGH | (Intercept) | 1.0015504 | 1.339715e-11 | 1.156340e+08 | 0.000000e+00 |
| HIGH | duration | 1.0262821 | 1.468151e-09 | 1.767027e+07 | 0.000000e+00 |
| HIGH | director_facebook_likes | 1.0005113 | 1.520479e-08 | 3.361597e+04 | 0.000000e+00 |
| HIGH | actor_1_facebook_likes | 1.0000183 | 1.196216e-06 | 1.529562e+01 | 8.178162e-53 |
| HIGH | gross | 1.0000000 | 3.924285e-09 | -1.918451e+00 | 5.505385e-02 |
| HIGH | num_voted_users | 1.0000227 | 5.540170e-07 | 4.094660e+01 | 0.000000e+00 |
| HIGH | facenumber_in_poster | 0.9200265 | 2.548731e-11 | -3.270367e+09 | 0.000000e+00 |
| HIGH | countryUSA | 0.8865607 | 1.081852e-11 | -1.112959e+10 | 0.000000e+00 |
| HIGH | countryOthers | 1.0739334 | 1.439854e-12 | 4.953834e+10 | 0.000000e+00 |
| HIGH | content_ratingNC-17 | 1.0035034 | 3.528688e-14 | 9.910894e+10 | 0.000000e+00 |
| HIGH | content_ratingPG | 1.0430275 | 2.051971e-12 | 2.053030e+10 | 0.000000e+00 |
| HIGH | content_ratingPG-13 | 0.8558075 | 4.921985e-12 | -3.163556e+10 | 0.000000e+00 |
| HIGH | content_ratingR | 1.0889115 | 6.663616e-12 | 1.278264e+10 | 0.000000e+00 |
| HIGH | budget | 1.0000000 | 2.407617e-09 | 3.741992e-01 | 7.082561e-01 |
| HIGH | title_year | 0.9990805 | 2.690119e-08 | -3.419637e+04 | 0.000000e+00 |
| HIGH | movie_facebook_likes | 1.0000146 | 3.400848e-07 | 4.283182e+01 | 0.000000e+00 |
| HIGH | other_actor_facebook_likes | 1.0000306 | 1.192569e-07 | 2.569871e+02 | 0.000000e+00 |
| HIGH | critic_total_ratio | 1.0587263 | 1.394240e-11 | 4.093025e+09 | 0.000000e+00 |
| EXCELLENT | (Intercept) | 1.0000045 | 1.379445e-13 | 3.263422e+07 | 0.000000e+00 |
| EXCELLENT | duration | 1.0623104 | 5.343790e-12 | 1.131148e+10 | 0.000000e+00 |
| EXCELLENT | director_facebook_likes | 0.9985708 | 5.068228e-10 | -2.821886e+06 | 0.000000e+00 |
| EXCELLENT | actor_1_facebook_likes | 0.9998499 | 7.239411e-09 | -2.073773e+04 | 0.000000e+00 |
| EXCELLENT | gross | 1.0000000 | 9.612253e-09 | -1.407733e+00 | 1.592103e-01 |
| EXCELLENT | num_voted_users | 1.0000291 | 9.760376e-07 | 2.976378e+01 | 1.150413e-194 |
| EXCELLENT | facenumber_in_poster | 0.9829743 | 6.256249e-13 | -2.744817e+10 | 0.000000e+00 |
| EXCELLENT | countryUSA | 1.0011202 | 2.463254e-13 | 4.545281e+09 | 0.000000e+00 |
| EXCELLENT | countryOthers | 0.9995858 | 5.407209e-14 | -7.661813e+09 | 0.000000e+00 |
| EXCELLENT | content_ratingNC-17 | 0.9999316 | 8.956466e-16 | -7.634757e+10 | 0.000000e+00 |
| EXCELLENT | content_ratingPG | 1.0003737 | 1.504262e-13 | 2.483547e+09 | 0.000000e+00 |
| EXCELLENT | content_ratingPG-13 | 1.0008016 | 1.300159e-13 | 6.162703e+09 | 0.000000e+00 |
| EXCELLENT | content_ratingR | 0.9996138 | 6.319010e-13 | -6.112820e+08 | 0.000000e+00 |
| EXCELLENT | budget | 0.9999998 | 8.548225e-08 | -1.786536e+00 | 7.401257e-02 |
| EXCELLENT | title_year | 0.9951730 | 2.785500e-10 | -1.737095e+07 | 0.000000e+00 |
| EXCELLENT | movie_facebook_likes | 1.0000080 | 4.287546e-08 | 1.873724e+02 | 0.000000e+00 |
| EXCELLENT | other_actor_facebook_likes | 1.0001301 | 4.355748e-09 | 2.987372e+04 | 0.000000e+00 |
| EXCELLENT | critic_total_ratio | 0.9986556 | 3.465287e-13 | -3.882211e+09 | 0.000000e+00 |
summary(model.multi)
## Call:
## nnet::multinom(formula = Rating_Category ~ ., data = train.data)
##
## Coefficients:
## (Intercept) duration director_facebook_likes
## MEDIUM -1.560249e-03 0.007792058 0.0004592718
## HIGH 1.549165e-03 0.025942624 0.0005111236
## EXCELLENT 4.501712e-06 0.060446204 -0.0014301964
## actor_1_facebook_likes gross num_voted_users
## MEDIUM 1.739460e-05 5.135441e-09 1.022664e-05
## HIGH 1.829686e-05 -7.528549e-09 2.268511e-05
## EXCELLENT -1.501289e-04 -1.353148e-08 2.905056e-05
## facenumber_in_poster countryUSA countryOthers
## MEDIUM 0.04874620 0.114911699 -0.0662893552
## HIGH -0.08335285 -0.120405702 0.0713279718
## EXCELLENT -0.01717226 0.001119618 -0.0004142902
## content_ratingNC-17 content_ratingPG content_ratingPG-13
## MEDIUM -2.473532e-03 -0.0534166416 0.1224005568
## HIGH 3.497245e-03 0.0421275878 -0.1557097853
## EXCELLENT -6.838044e-05 0.0003735906 0.0008012495
## content_ratingR budget title_year movie_facebook_likes
## MEDIUM -0.0415353616 8.730499e-10 0.0007600643 1.565140e-06
## HIGH 0.0851785855 9.009284e-10 -0.0009199230 1.456645e-05
## EXCELLENT -0.0003862697 -1.527171e-07 -0.0048386777 8.033679e-06
## other_actor_facebook_likes critic_total_ratio
## MEDIUM 4.988078e-05 0.004973972
## HIGH 3.064750e-05 0.057066587
## EXCELLENT 1.301224e-04 -0.001345298
##
## Std. Errors:
## (Intercept) duration director_facebook_likes
## MEDIUM 1.369488e-11 1.496031e-09 1.530147e-08
## HIGH 1.339715e-11 1.468151e-09 1.520479e-08
## EXCELLENT 1.379445e-13 5.343790e-12 5.068228e-10
## actor_1_facebook_likes gross num_voted_users
## MEDIUM 1.202104e-06 3.870099e-09 6.811821e-07
## HIGH 1.196216e-06 3.924285e-09 5.540170e-07
## EXCELLENT 7.239411e-09 9.612253e-09 9.760376e-07
## facenumber_in_poster countryUSA countryOthers
## MEDIUM 2.601767e-11 1.105256e-11 1.494668e-12
## HIGH 2.548731e-11 1.081852e-11 1.439854e-12
## EXCELLENT 6.256249e-13 2.463254e-13 5.407209e-14
## content_ratingNC-17 content_ratingPG content_ratingPG-13
## MEDIUM 3.586818e-14 2.146214e-12 5.016445e-12
## HIGH 3.528688e-14 2.051971e-12 4.921985e-12
## EXCELLENT 8.956466e-16 1.504262e-13 1.300159e-13
## content_ratingR budget title_year movie_facebook_likes
## MEDIUM 6.941376e-12 2.396464e-09 2.749907e-08 3.464553e-07
## HIGH 6.663616e-12 2.407617e-09 2.690119e-08 3.400848e-07
## EXCELLENT 6.319010e-13 8.548225e-08 2.785500e-10 4.287546e-08
## other_actor_facebook_likes critic_total_ratio
## MEDIUM 1.200430e-07 1.427480e-11
## HIGH 1.192569e-07 1.394240e-11
## EXCELLENT 4.355748e-09 3.465287e-13
##
## Residual Deviance: 3403.109
## AIC: 3511.109
# Make predictions
predicted.classes <- model.multi %>% predict(test.data)
head(predicted.classes)
## [1] HIGH HIGH MEDIUM MEDIUM HIGH MEDIUM
## Levels: LOW MEDIUM HIGH EXCELLENT
# Model accuracy
mean(predicted.classes == test.data$Rating_Category)
## [1] 0.7460526
rpart.fit <- rpart(Rating_Category~., data = train.data, method = 'class')
plotcp(rpart.fit)
rpart.fit.2<-prune.rpart(rpart.fit,cp=0.01)
rpart.plot(rpart.fit.2, extra = 104)
#Prediction
predict_unseen <-predict(rpart.fit.2, test.data, type = 'class')
table_mat <- table(test.data$Rating_Category, predict_unseen)
table_mat
## predict_unseen
## LOW MEDIUM HIGH EXCELLENT
## LOW 0 19 0 0
## MEDIUM 0 481 30 0
## HIGH 0 135 95 0
## EXCELLENT 0 0 0 0
accuracy_Test <- sum(diag(table_mat)) / sum(table_mat)
print(paste('Accuracy for test', accuracy_Test))
## [1] "Accuracy for test 0.757894736842105"
#3hyper parameter Tuning
accuracy_tune <- function(fit) {
predict_unseen <- predict(fit, test.data, type = 'class')
table_mat <- table(test.data$Rating_Category, predict_unseen)
accuracy_Test <- sum(diag(table_mat)) / sum(table_mat)
accuracy_Test
}
control <- rpart.control(minsplit = 20,
minbucket = round(20 / 3),
maxdepth = 20,
cp = 0.01)
tune_fit <- rpart(Rating_Category~., data = train.data, method = 'class', control = control)
accuracy_tune(tune_fit)
## [1] 0.7578947
Random Forest is a bootstrap aggregation algorithm with random sample of predictors at each split. Aggregating a number of predictors gives a better prediction result compared to one good predictor The caret package has been used for modeling purposes. Also, the Random Search algorithm will randomly search and choose a hyperparameter combination for every iteration We use the trainControl function to do a grid search with 10 fold cross-validation and we train a Random Forest model to get best result for accuracy. We initially get the best result for mtry=9 mtry: Number of predictors drawn to feed the algorithm. By default, it is the square of the number of columns. We test the model for different mtry values from 1 to 10 and there by extract the best value which is 4 with an accuracy of 81.48% maxnodes: It is the maximum number of terminal nodes for the model. We do a similar search like mtry for nodes between 5 and 30. The best value is 27 and the accuracy associated with it is 80.20% ntrees: It is the number of trees in the forest. The search is made for different tree values ranging from 250 to 2000. It is was observed that the best number of trees was 600 with an accuracy of 77.24% So the final model has a mtry=4, maxnodes=27 and ntrees=600. The prediction accuracy associated with this model is 78.42% The variable importance plot from the algorithm clearly shows that important factors like number of users who voted, duration of the movie, budget and gross earnings have a huge impact on the IMDB score and they would be helpful during prediction
trControl <- trainControl(method = "cv",number = 10,search = "grid")
rf_default <- train(Rating_Category~.,data = train.data,method = "rf",metric = "Accuracy",
trControl = trControl)
print(rf_default)
## Random Forest
##
## 3046 samples
## 13 predictor
## 4 classes: 'LOW', 'MEDIUM', 'HIGH', 'EXCELLENT'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 2741, 2740, 2743, 2742, 2740, 2742, ...
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 2 0.8023490 0.5085656
## 9 0.8095578 0.5407069
## 17 0.8079109 0.5429585
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 9.
#Best mtry
tuneGrid <- expand.grid(.mtry = c(1: 10))
rf_mtry <- train(Rating_Category~.,
data = train.data,
method = "rf",
metric = "Accuracy",
tuneGrid = tuneGrid,
trControl = trControl,
importance = TRUE,
nodesize = 14,
ntree = 300)
print(rf_mtry)
## Random Forest
##
## 3046 samples
## 13 predictor
## 4 classes: 'LOW', 'MEDIUM', 'HIGH', 'EXCELLENT'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 2742, 2742, 2741, 2742, 2742, 2741, ...
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 1 0.7353748 0.2629126
## 2 0.8033236 0.5116329
## 3 0.8023432 0.5234097
## 4 0.8082610 0.5386487
## 5 0.8089200 0.5413153
## 6 0.8082621 0.5409011
## 7 0.8069560 0.5357070
## 8 0.8082653 0.5418166
## 9 0.8086007 0.5423119
## 10 0.8082642 0.5412493
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 5.
best_mtry <- rf_mtry$bestTune$mtry
best_mtry
## [1] 5
max(rf_mtry$results$Accuracy)
## [1] 0.80892
#Best max nodes
store_maxnode <- list()
tuneGrid <- expand.grid(.mtry = best_mtry)
for (maxnodes in c(5: 30)) {
set.seed(1234)
rf_maxnode <- train(Rating_Category~.,
data = train.data,
method = "rf",
metric = "Accuracy",
tuneGrid = tuneGrid,
trControl = trControl,
importance = TRUE,
nodesize = 14,
maxnodes = maxnodes,
ntree = 300)
current_iteration <- toString(maxnodes)
store_maxnode[[current_iteration]] <- rf_maxnode
}
results_mtry <- resamples(store_maxnode)
summary(results_mtry) #Best max node=27
##
## Call:
## summary.resamples(object = results_mtry)
##
## Models: 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30
## Number of resamples: 10
##
## Accuracy
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 5 0.7026144 0.7192177 0.7450658 0.7403208 0.7551391 0.7810458 0
## 6 0.7058824 0.7311475 0.7536777 0.7485196 0.7664474 0.7843137 0
## 7 0.7091503 0.7286885 0.7586281 0.7498409 0.7664474 0.7875817 0
## 8 0.7058824 0.7281448 0.7532895 0.7472082 0.7596365 0.7875817 0
## 9 0.7058824 0.7281448 0.7516447 0.7491765 0.7697368 0.7875817 0
## 10 0.7091503 0.7270492 0.7631579 0.7531195 0.7694807 0.7908497 0
## 11 0.7058824 0.7311475 0.7582237 0.7537774 0.7735844 0.7908497 0
## 12 0.7156863 0.7281448 0.7631579 0.7554200 0.7760435 0.7894737 0
## 13 0.7156863 0.7344262 0.7648026 0.7600167 0.7801472 0.7960526 0
## 14 0.7189542 0.7368852 0.7730263 0.7606789 0.7768658 0.7960526 0
## 15 0.7058824 0.7413245 0.7763158 0.7626633 0.7808663 0.7993421 0
## 16 0.7156863 0.7385246 0.7713816 0.7619914 0.7826062 0.7960526 0
## 17 0.7156863 0.7418033 0.7713816 0.7636362 0.7806856 0.8059211 0
## 18 0.7156863 0.7368852 0.7680921 0.7632889 0.7809615 0.8071895 0
## 19 0.7091503 0.7393443 0.7746711 0.7652744 0.7875243 0.8092105 0
## 20 0.7124183 0.7475410 0.7746711 0.7665805 0.7891690 0.7993421 0
## 21 0.7222222 0.7563713 0.7766879 0.7718405 0.7914899 0.8092105 0
## 22 0.7222222 0.7506309 0.7717591 0.7705204 0.7899914 0.8125000 0
## 23 0.7189542 0.7553238 0.7812500 0.7744678 0.7916280 0.8071895 0
## 24 0.7287582 0.7577991 0.7796053 0.7764383 0.7945805 0.8092105 0
## 25 0.7293729 0.7571937 0.7799612 0.7767510 0.7998355 0.8157895 0
## 26 0.7320261 0.7596527 0.7816113 0.7767510 0.7965541 0.8092105 0
## 27 0.7352941 0.7645708 0.7849008 0.7787236 0.7986869 0.8092105 0
## 28 0.7320261 0.7699811 0.7832506 0.7807146 0.7978672 0.8157895 0
## 29 0.7287582 0.7621198 0.7881795 0.7816896 0.8011540 0.8104575 0
## 30 0.7352941 0.7693394 0.7861842 0.7816950 0.7949094 0.8157895 0
##
## Kappa
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 5 0.1802077 0.2395541 0.3319733 0.3098643 0.3560071 0.4356730 0
## 6 0.2354033 0.2799368 0.3610244 0.3403055 0.3919994 0.4492350 0
## 7 0.2480327 0.2731295 0.3715076 0.3431226 0.3938396 0.4508104 0
## 8 0.2311525 0.2665171 0.3363481 0.3271542 0.3753836 0.4508104 0
## 9 0.2323987 0.2688723 0.3349554 0.3351776 0.4005483 0.4508104 0
## 10 0.2426013 0.2617303 0.3750822 0.3482622 0.4066973 0.4642740 0
## 11 0.2372036 0.2692705 0.3630729 0.3512614 0.4194697 0.4609414 0
## 12 0.2311525 0.2712999 0.3785130 0.3558920 0.4254852 0.4609026 0
## 13 0.2649308 0.2845807 0.3820428 0.3698738 0.4345814 0.4809286 0
## 14 0.2665143 0.2890367 0.4099851 0.3722158 0.4228121 0.4776777 0
## 15 0.2372036 0.3088525 0.4173513 0.3785229 0.4393319 0.4877065 0
## 16 0.2556410 0.2922122 0.4035591 0.3754485 0.4412804 0.4776777 0
## 17 0.2603151 0.3057167 0.4029129 0.3803115 0.4345221 0.5013900 0
## 18 0.2529802 0.2899722 0.3968196 0.3812106 0.4369915 0.5165877 0
## 19 0.2573018 0.2973529 0.4139625 0.3882589 0.4612215 0.5144171 0
## 20 0.2641591 0.3226613 0.4139404 0.3927483 0.4606313 0.4908851 0
## 21 0.2862827 0.3558620 0.4210458 0.4100069 0.4706846 0.5174207 0
## 22 0.2862827 0.3379182 0.4110018 0.4065226 0.4673653 0.5272033 0
## 23 0.2845414 0.3602681 0.4375770 0.4181305 0.4699175 0.5165877 0
## 24 0.3116140 0.3691943 0.4295551 0.4247407 0.4756337 0.5203873 0
## 25 0.2863011 0.3566719 0.4286821 0.4238983 0.4984569 0.5311614 0
## 26 0.2927293 0.3629339 0.4295085 0.4230919 0.4886792 0.5174207 0
## 27 0.3037084 0.3791174 0.4398835 0.4294915 0.4888694 0.5203873 0
## 28 0.3136011 0.4084167 0.4380518 0.4365007 0.4863983 0.5369257 0
## 29 0.3073713 0.3787151 0.4532956 0.4391339 0.4955392 0.5233389 0
## 30 0.3240612 0.4077138 0.4482410 0.4399782 0.4867494 0.5369257 0
#Best ntrees
store_maxtrees <- list()
for (ntree in c(250, 500, 1000, 2000)) {
rf_maxtrees <- train(Rating_Category~.,
data = train.data,
method = "rf",
metric = "Accuracy",
tuneGrid = tuneGrid,
trControl = trControl,
importance = TRUE,
nodesize = 14,
maxnodes = 27,
ntree = ntree)
key <- toString(ntree)
store_maxtrees[[key]] <- rf_maxtrees
}
results_tree <- resamples(store_maxtrees)
summary(results_tree) #best ntree=250
##
## Call:
## summary.resamples(object = results_tree)
##
## Models: 250, 500, 1000, 2000
## Number of resamples: 10
##
## Accuracy
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 250 0.7508197 0.7598684 0.7651855 0.7767537 0.7959016 0.8157895 0
## 500 0.7344262 0.7639344 0.7741401 0.7731548 0.7827156 0.8065574 0
## 1000 0.7606557 0.7684307 0.7828924 0.7790693 0.7855398 0.7960526 0
## 2000 0.7475410 0.7598361 0.7816113 0.7774342 0.7883466 0.8125000 0
##
## Kappa
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 250 0.3596436 0.3704339 0.3982079 0.4258527 0.4827396 0.5340614 0
## 500 0.3079444 0.3887484 0.4078011 0.4126284 0.4356615 0.5128719 0
## 1000 0.3743150 0.4050328 0.4424358 0.4310009 0.4490075 0.4743858 0
## 2000 0.3379285 0.3749235 0.4340855 0.4243862 0.4514213 0.5317769 0
#Best Model
fit_rf <- train(Rating_Category~.,
train.data,
method = "rf",
metric = "Accuracy",
tuneGrid = tuneGrid,
trControl = trControl,
importance = TRUE,
nodesize = 14,
ntree = 600,
maxnodes = 27)
prediction.rf <-predict(fit_rf, test.data)
confusionMatrix(prediction.rf, test.data$Rating_Category)
## Confusion Matrix and Statistics
##
## Reference
## Prediction LOW MEDIUM HIGH EXCELLENT
## LOW 0 0 0 0
## MEDIUM 19 489 132 0
## HIGH 0 22 98 0
## EXCELLENT 0 0 0 0
##
## Overall Statistics
##
## Accuracy : 0.7724
## 95% CI : (0.7409, 0.8017)
## No Information Rate : 0.6724
## P-Value [Acc > NIR] : 8.827e-10
##
## Kappa : 0.4103
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: LOW Class: MEDIUM Class: HIGH Class: EXCELLENT
## Sensitivity 0.000 0.9569 0.4261 NA
## Specificity 1.000 0.3936 0.9585 1
## Pos Pred Value NaN 0.7641 0.8167 NA
## Neg Pred Value 0.975 0.8167 0.7938 NA
## Prevalence 0.025 0.6724 0.3026 0
## Detection Rate 0.000 0.6434 0.1289 0
## Detection Prevalence 0.000 0.8421 0.1579 0
## Balanced Accuracy 0.500 0.6753 0.6923 NA
#78.42% Accuracy
varImp(fit_rf)
## rf variable importance
##
## variables are sorted by maximum importance across the classes
## LOW MEDIUM HIGH EXCELLENT
## num_voted_users 1.977 71.206 100.000 8.638
## budget 2.340 48.314 16.283 6.370
## director_facebook_likes 8.188 10.118 47.824 7.127
## duration 6.034 37.376 41.940 4.546
## movie_facebook_likes 7.871 24.977 32.332 4.546
## countryUSA 3.457 2.785 29.930 4.546
## gross 0.000 26.483 8.599 4.546
## title_year 4.106 25.447 16.724 4.546
## actor_1_facebook_likes 4.788 12.555 24.014 4.546
## content_ratingPG-13 7.070 23.401 17.143 6.370
## critic_total_ratio 6.379 12.308 19.963 7.710
## other_actor_facebook_likes 4.546 16.947 12.379 4.546
## countryOthers 2.722 4.532 14.943 4.546
## content_ratingR 2.771 12.310 14.708 4.546
## facenumber_in_poster 4.764 8.649 12.408 4.546
## content_ratingPG 4.546 6.456 8.614 4.546
## content_ratingNC-17 4.546 5.603 2.722 4.546
rf <- randomForest(Rating_Category ~ . , data = train.data, mtry = 4)
# Get importance
importance <- importance(rf)
varImportance <- data.frame(Variables = row.names(importance),
Importance = round(importance[ ,'MeanDecreaseGini'],2))
# Create a rank variable based on importance
rankImportance <- varImportance %>%
mutate(Rank = paste0('#',dense_rank(desc(Importance))))
# Use ggplot2 to visualize the relative importance of variables
ggplot(rankImportance, aes(x = reorder(Variables, Importance),
y = Importance, fill = Importance)) +
geom_bar(stat='identity') +
geom_text(aes(x = Variables, y = 0.5, label = Rank),
hjust=0, vjust=0.55, size = 4, colour = 'red') +
labs(x = 'Variables') +
coord_flip() +
theme_few()
Another model called the Gradient Boost is fit to the training dataset. Here, the sample selection is made intelligently compared to other Algorithms. It is a slow learning algorithm and trees are grown sequentially. Decision Trees are fitted to the residuals rather than the final outcome We use the same cross validation technique like Random Forests using the trainControl and finally tune the hyper parameters. There are 3 parameters to be tune here: number of trees, number of splits and learning rate From the final Confusion Matrix, we conclude that the Specificity for the LOW class is the highest i.e. 0.976 while the Sensitivity is highest for the MEDIUM class with a value of 0.811. The overall model has an accuracy of 77.5% which is a bit lower than the Random Forest model and certainly higher than the Multinomial Logistic Model
tc<-trainControl(method = "repeatedcv", number = 10)
gbm.model = train(Rating_Category ~., data=train.data, method="gbm", trControl=tc)
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.3863 nan 0.1000 0.2711
## 2 1.2327 nan 0.1000 0.2138
## 3 1.1030 nan 0.1000 0.1585
## 4 1.0076 nan 0.1000 0.1161
## 5 0.9332 nan 0.1000 0.0872
## 6 0.8773 nan 0.1000 0.0718
## 7 0.8309 nan 0.1000 0.0566
## 8 0.7937 nan 0.1000 0.0481
## 9 0.7622 nan 0.1000 0.0397
## 10 0.7351 nan 0.1000 0.0308
## 20 0.6018 nan 0.1000 0.0079
## 40 0.5253 nan 0.1000 0.0023
## 60 0.4944 nan 0.1000 0.0005
## 80 0.4737 nan 0.1000 -0.0008
## 100 0.4604 nan 0.1000 -0.0011
## 120 0.4506 nan 0.1000 -0.0015
## 140 0.4420 nan 0.1000 -0.0013
## 150 0.4386 nan 0.1000 -0.0014
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.3863 nan 0.1000 0.3054
## 2 1.2033 nan 0.1000 0.2127
## 3 1.0716 nan 0.1000 0.1518
## 4 0.9760 nan 0.1000 0.1198
## 5 0.9007 nan 0.1000 0.0926
## 6 0.8411 nan 0.1000 0.0730
## 7 0.7941 nan 0.1000 0.0620
## 8 0.7532 nan 0.1000 0.0509
## 9 0.7198 nan 0.1000 0.0427
## 10 0.6916 nan 0.1000 0.0353
## 20 0.5484 nan 0.1000 0.0071
## 40 0.4661 nan 0.1000 -0.0009
## 60 0.4285 nan 0.1000 -0.0002
## 80 0.4070 nan 0.1000 -0.0014
## 100 0.3889 nan 0.1000 -0.0018
## 120 0.3741 nan 0.1000 -0.0018
## 140 0.3609 nan 0.1000 -0.0020
## 150 0.3544 nan 0.1000 -0.0027
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.3863 nan 0.1000 0.3150
## 2 1.1955 nan 0.1000 0.2242
## 3 1.0557 nan 0.1000 0.1594
## 4 0.9539 nan 0.1000 0.1228
## 5 0.8771 nan 0.1000 0.0951
## 6 0.8157 nan 0.1000 0.0770
## 7 0.7678 nan 0.1000 0.0616
## 8 0.7266 nan 0.1000 0.0484
## 9 0.6913 nan 0.1000 0.0443
## 10 0.6609 nan 0.1000 0.0269
## 20 0.5162 nan 0.1000 0.0066
## 40 0.4322 nan 0.1000 0.0009
## 60 0.3960 nan 0.1000 -0.0013
## 80 0.3675 nan 0.1000 -0.0013
## 100 0.3463 nan 0.1000 -0.0012
## 120 0.3257 nan 0.1000 -0.0027
## 140 0.3093 nan 0.1000 -0.0010
## 150 0.3015 nan 0.1000 -0.0029
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.3863 nan 0.1000 0.2737
## 2 1.2333 nan 0.1000 0.2124
## 3 1.1032 nan 0.1000 0.1574
## 4 1.0046 nan 0.1000 0.1124
## 5 0.9328 nan 0.1000 0.0873
## 6 0.8741 nan 0.1000 0.0722
## 7 0.8275 nan 0.1000 0.0547
## 8 0.7899 nan 0.1000 0.0458
## 9 0.7593 nan 0.1000 0.0393
## 10 0.7330 nan 0.1000 0.0311
## 20 0.6003 nan 0.1000 0.0091
## 40 0.5249 nan 0.1000 0.0016
## 60 0.4928 nan 0.1000 -0.0002
## 80 0.4746 nan 0.1000 -0.0013
## 100 0.4615 nan 0.1000 -0.0013
## 120 0.4523 nan 0.1000 -0.0013
## 140 0.4434 nan 0.1000 -0.0009
## 150 0.4404 nan 0.1000 -0.0011
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.3863 nan 0.1000 0.2873
## 2 1.2239 nan 0.1000 0.2198
## 3 1.0882 nan 0.1000 0.1640
## 4 0.9873 nan 0.1000 0.1214
## 5 0.9101 nan 0.1000 0.1015
## 6 0.8437 nan 0.1000 0.0715
## 7 0.7941 nan 0.1000 0.0592
## 8 0.7546 nan 0.1000 0.0511
## 9 0.7210 nan 0.1000 0.0433
## 10 0.6923 nan 0.1000 0.0388
## 20 0.5457 nan 0.1000 0.0066
## 40 0.4673 nan 0.1000 -0.0003
## 60 0.4358 nan 0.1000 -0.0013
## 80 0.4117 nan 0.1000 -0.0009
## 100 0.3954 nan 0.1000 -0.0013
## 120 0.3796 nan 0.1000 -0.0013
## 140 0.3666 nan 0.1000 -0.0008
## 150 0.3609 nan 0.1000 -0.0028
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.3863 nan 0.1000 0.3088
## 2 1.2035 nan 0.1000 0.2225
## 3 1.0667 nan 0.1000 0.1699
## 4 0.9619 nan 0.1000 0.1262
## 5 0.8830 nan 0.1000 0.0987
## 6 0.8188 nan 0.1000 0.0771
## 7 0.7690 nan 0.1000 0.0644
## 8 0.7264 nan 0.1000 0.0517
## 9 0.6918 nan 0.1000 0.0440
## 10 0.6613 nan 0.1000 0.0299
## 20 0.5164 nan 0.1000 0.0044
## 40 0.4326 nan 0.1000 0.0013
## 60 0.3937 nan 0.1000 -0.0020
## 80 0.3672 nan 0.1000 -0.0011
## 100 0.3446 nan 0.1000 -0.0028
## 120 0.3265 nan 0.1000 -0.0020
## 140 0.3097 nan 0.1000 -0.0042
## 150 0.3023 nan 0.1000 -0.0006
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.3863 nan 0.1000 0.2977
## 2 1.2103 nan 0.1000 0.2063
## 3 1.0870 nan 0.1000 0.1507
## 4 0.9946 nan 0.1000 0.1137
## 5 0.9240 nan 0.1000 0.0885
## 6 0.8694 nan 0.1000 0.0683
## 7 0.8251 nan 0.1000 0.0520
## 8 0.7900 nan 0.1000 0.0454
## 9 0.7598 nan 0.1000 0.0377
## 10 0.7336 nan 0.1000 0.0308
## 20 0.6028 nan 0.1000 0.0106
## 40 0.5252 nan 0.1000 0.0006
## 60 0.4960 nan 0.1000 -0.0002
## 80 0.4777 nan 0.1000 -0.0003
## 100 0.4639 nan 0.1000 -0.0021
## 120 0.4545 nan 0.1000 -0.0010
## 140 0.4454 nan 0.1000 -0.0003
## 150 0.4411 nan 0.1000 -0.0009
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.3863 nan 0.1000 0.2858
## 2 1.2265 nan 0.1000 0.2211
## 3 1.0944 nan 0.1000 0.1625
## 4 0.9940 nan 0.1000 0.1262
## 5 0.9144 nan 0.1000 0.0905
## 6 0.8531 nan 0.1000 0.0775
## 7 0.8030 nan 0.1000 0.0626
## 8 0.7605 nan 0.1000 0.0464
## 9 0.7264 nan 0.1000 0.0384
## 10 0.6981 nan 0.1000 0.0368
## 20 0.5482 nan 0.1000 0.0080
## 40 0.4682 nan 0.1000 -0.0008
## 60 0.4310 nan 0.1000 -0.0021
## 80 0.4093 nan 0.1000 -0.0022
## 100 0.3919 nan 0.1000 -0.0020
## 120 0.3763 nan 0.1000 -0.0005
## 140 0.3632 nan 0.1000 -0.0016
## 150 0.3566 nan 0.1000 -0.0015
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.3863 nan 0.1000 0.3163
## 2 1.1989 nan 0.1000 0.2157
## 3 1.0645 nan 0.1000 0.1611
## 4 0.9645 nan 0.1000 0.1275
## 5 0.8847 nan 0.1000 0.0996
## 6 0.8214 nan 0.1000 0.0810
## 7 0.7691 nan 0.1000 0.0625
## 8 0.7282 nan 0.1000 0.0487
## 9 0.6932 nan 0.1000 0.0426
## 10 0.6640 nan 0.1000 0.0319
## 20 0.5176 nan 0.1000 0.0109
## 40 0.4275 nan 0.1000 -0.0008
## 60 0.3894 nan 0.1000 -0.0005
## 80 0.3613 nan 0.1000 -0.0029
## 100 0.3384 nan 0.1000 -0.0006
## 120 0.3217 nan 0.1000 -0.0020
## 140 0.3058 nan 0.1000 -0.0010
## 150 0.2981 nan 0.1000 -0.0025
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.3863 nan 0.1000 0.3003
## 2 1.2085 nan 0.1000 0.2086
## 3 1.0858 nan 0.1000 0.1497
## 4 0.9921 nan 0.1000 0.1131
## 5 0.9203 nan 0.1000 0.0879
## 6 0.8649 nan 0.1000 0.0675
## 7 0.8205 nan 0.1000 0.0530
## 8 0.7844 nan 0.1000 0.0469
## 9 0.7546 nan 0.1000 0.0397
## 10 0.7294 nan 0.1000 0.0339
## 20 0.5997 nan 0.1000 0.0065
## 40 0.5236 nan 0.1000 0.0022
## 60 0.4933 nan 0.1000 -0.0020
## 80 0.4743 nan 0.1000 -0.0008
## 100 0.4602 nan 0.1000 -0.0025
## 120 0.4493 nan 0.1000 -0.0018
## 140 0.4402 nan 0.1000 -0.0019
## 150 0.4363 nan 0.1000 -0.0015
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.3863 nan 0.1000 0.3084
## 2 1.2031 nan 0.1000 0.2127
## 3 1.0763 nan 0.1000 0.1604
## 4 0.9775 nan 0.1000 0.1186
## 5 0.9020 nan 0.1000 0.0954
## 6 0.8419 nan 0.1000 0.0772
## 7 0.7918 nan 0.1000 0.0589
## 8 0.7520 nan 0.1000 0.0513
## 9 0.7176 nan 0.1000 0.0380
## 10 0.6905 nan 0.1000 0.0346
## 20 0.5506 nan 0.1000 0.0056
## 40 0.4695 nan 0.1000 -0.0010
## 60 0.4350 nan 0.1000 -0.0020
## 80 0.4136 nan 0.1000 -0.0012
## 100 0.3928 nan 0.1000 -0.0022
## 120 0.3760 nan 0.1000 -0.0010
## 140 0.3618 nan 0.1000 -0.0020
## 150 0.3557 nan 0.1000 -0.0019
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.3863 nan 0.1000 0.3124
## 2 1.1931 nan 0.1000 0.2109
## 3 1.0622 nan 0.1000 0.1598
## 4 0.9654 nan 0.1000 0.1196
## 5 0.8863 nan 0.1000 0.0993
## 6 0.8217 nan 0.1000 0.0739
## 7 0.7713 nan 0.1000 0.0604
## 8 0.7284 nan 0.1000 0.0478
## 9 0.6956 nan 0.1000 0.0405
## 10 0.6657 nan 0.1000 0.0367
## 20 0.5164 nan 0.1000 0.0058
## 40 0.4308 nan 0.1000 0.0001
## 60 0.3933 nan 0.1000 -0.0015
## 80 0.3685 nan 0.1000 -0.0021
## 100 0.3451 nan 0.1000 -0.0036
## 120 0.3259 nan 0.1000 -0.0004
## 140 0.3077 nan 0.1000 -0.0026
## 150 0.3010 nan 0.1000 -0.0010
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.3863 nan 0.1000 0.2983
## 2 1.2097 nan 0.1000 0.2083
## 3 1.0874 nan 0.1000 0.1500
## 4 0.9926 nan 0.1000 0.1158
## 5 0.9221 nan 0.1000 0.0858
## 6 0.8664 nan 0.1000 0.0689
## 7 0.8211 nan 0.1000 0.0552
## 8 0.7853 nan 0.1000 0.0413
## 9 0.7559 nan 0.1000 0.0393
## 10 0.7301 nan 0.1000 0.0310
## 20 0.6014 nan 0.1000 0.0074
## 40 0.5251 nan 0.1000 0.0017
## 60 0.4965 nan 0.1000 -0.0009
## 80 0.4759 nan 0.1000 -0.0012
## 100 0.4625 nan 0.1000 -0.0007
## 120 0.4531 nan 0.1000 -0.0007
## 140 0.4444 nan 0.1000 -0.0006
## 150 0.4406 nan 0.1000 -0.0006
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.3863 nan 0.1000 0.3079
## 2 1.2009 nan 0.1000 0.2098
## 3 1.0718 nan 0.1000 0.1537
## 4 0.9745 nan 0.1000 0.1210
## 5 0.8981 nan 0.1000 0.0912
## 6 0.8379 nan 0.1000 0.0703
## 7 0.7903 nan 0.1000 0.0566
## 8 0.7514 nan 0.1000 0.0450
## 9 0.7188 nan 0.1000 0.0434
## 10 0.6903 nan 0.1000 0.0361
## 20 0.5505 nan 0.1000 0.0089
## 40 0.4711 nan 0.1000 0.0013
## 60 0.4366 nan 0.1000 -0.0023
## 80 0.4141 nan 0.1000 -0.0019
## 100 0.3970 nan 0.1000 -0.0007
## 120 0.3820 nan 0.1000 -0.0026
## 140 0.3677 nan 0.1000 -0.0018
## 150 0.3618 nan 0.1000 -0.0020
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.3863 nan 0.1000 0.3091
## 2 1.1980 nan 0.1000 0.2173
## 3 1.0627 nan 0.1000 0.1669
## 4 0.9593 nan 0.1000 0.1285
## 5 0.8806 nan 0.1000 0.0962
## 6 0.8180 nan 0.1000 0.0744
## 7 0.7682 nan 0.1000 0.0601
## 8 0.7263 nan 0.1000 0.0496
## 9 0.6924 nan 0.1000 0.0375
## 10 0.6655 nan 0.1000 0.0347
## 20 0.5194 nan 0.1000 0.0060
## 40 0.4363 nan 0.1000 -0.0035
## 60 0.3950 nan 0.1000 -0.0008
## 80 0.3701 nan 0.1000 -0.0035
## 100 0.3468 nan 0.1000 -0.0027
## 120 0.3271 nan 0.1000 -0.0053
## 140 0.3118 nan 0.1000 -0.0017
## 150 0.3027 nan 0.1000 -0.0027
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.3863 nan 0.1000 0.2942
## 2 1.2111 nan 0.1000 0.2039
## 3 1.0874 nan 0.1000 0.1490
## 4 0.9942 nan 0.1000 0.1127
## 5 0.9222 nan 0.1000 0.0864
## 6 0.8667 nan 0.1000 0.0692
## 7 0.8227 nan 0.1000 0.0548
## 8 0.7870 nan 0.1000 0.0449
## 9 0.7571 nan 0.1000 0.0344
## 10 0.7321 nan 0.1000 0.0322
## 20 0.6049 nan 0.1000 0.0055
## 40 0.5294 nan 0.1000 0.0019
## 60 0.4977 nan 0.1000 -0.0000
## 80 0.4787 nan 0.1000 -0.0005
## 100 0.4643 nan 0.1000 -0.0008
## 120 0.4531 nan 0.1000 -0.0014
## 140 0.4456 nan 0.1000 -0.0019
## 150 0.4411 nan 0.1000 -0.0018
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.3863 nan 0.1000 0.3101
## 2 1.2023 nan 0.1000 0.2093
## 3 1.0725 nan 0.1000 0.1561
## 4 0.9756 nan 0.1000 0.1215
## 5 0.9017 nan 0.1000 0.0912
## 6 0.8433 nan 0.1000 0.0762
## 7 0.7947 nan 0.1000 0.0585
## 8 0.7549 nan 0.1000 0.0455
## 9 0.7211 nan 0.1000 0.0419
## 10 0.6935 nan 0.1000 0.0364
## 20 0.5513 nan 0.1000 0.0051
## 40 0.4700 nan 0.1000 -0.0012
## 60 0.4350 nan 0.1000 -0.0010
## 80 0.4136 nan 0.1000 -0.0007
## 100 0.3960 nan 0.1000 -0.0023
## 120 0.3783 nan 0.1000 -0.0015
## 140 0.3652 nan 0.1000 -0.0014
## 150 0.3591 nan 0.1000 -0.0043
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.3863 nan 0.1000 0.3125
## 2 1.1973 nan 0.1000 0.2171
## 3 1.0629 nan 0.1000 0.1635
## 4 0.9628 nan 0.1000 0.1216
## 5 0.8857 nan 0.1000 0.0992
## 6 0.8220 nan 0.1000 0.0783
## 7 0.7715 nan 0.1000 0.0589
## 8 0.7308 nan 0.1000 0.0532
## 9 0.6944 nan 0.1000 0.0393
## 10 0.6653 nan 0.1000 0.0316
## 20 0.5206 nan 0.1000 0.0033
## 40 0.4373 nan 0.1000 -0.0008
## 60 0.3955 nan 0.1000 -0.0014
## 80 0.3686 nan 0.1000 -0.0023
## 100 0.3487 nan 0.1000 -0.0016
## 120 0.3308 nan 0.1000 -0.0012
## 140 0.3144 nan 0.1000 -0.0028
## 150 0.3067 nan 0.1000 -0.0020
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.3863 nan 0.1000 0.2729
## 2 1.2300 nan 0.1000 0.2109
## 3 1.1020 nan 0.1000 0.1515
## 4 1.0076 nan 0.1000 0.1124
## 5 0.9338 nan 0.1000 0.0916
## 6 0.8756 nan 0.1000 0.0724
## 7 0.8295 nan 0.1000 0.0595
## 8 0.7916 nan 0.1000 0.0456
## 9 0.7608 nan 0.1000 0.0395
## 10 0.7352 nan 0.1000 0.0324
## 20 0.6006 nan 0.1000 0.0079
## 40 0.5220 nan 0.1000 0.0015
## 60 0.4892 nan 0.1000 0.0010
## 80 0.4715 nan 0.1000 -0.0007
## 100 0.4573 nan 0.1000 -0.0011
## 120 0.4479 nan 0.1000 -0.0021
## 140 0.4394 nan 0.1000 -0.0030
## 150 0.4350 nan 0.1000 -0.0018
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.3863 nan 0.1000 0.3108
## 2 1.2052 nan 0.1000 0.2155
## 3 1.0724 nan 0.1000 0.1594
## 4 0.9753 nan 0.1000 0.1195
## 5 0.9004 nan 0.1000 0.1000
## 6 0.8380 nan 0.1000 0.0736
## 7 0.7895 nan 0.1000 0.0563
## 8 0.7519 nan 0.1000 0.0476
## 9 0.7197 nan 0.1000 0.0399
## 10 0.6915 nan 0.1000 0.0366
## 20 0.5461 nan 0.1000 0.0095
## 40 0.4657 nan 0.1000 -0.0020
## 60 0.4345 nan 0.1000 -0.0001
## 80 0.4134 nan 0.1000 -0.0030
## 100 0.3949 nan 0.1000 -0.0018
## 120 0.3806 nan 0.1000 -0.0009
## 140 0.3684 nan 0.1000 -0.0039
## 150 0.3623 nan 0.1000 -0.0025
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.3863 nan 0.1000 0.3204
## 2 1.2000 nan 0.1000 0.2267
## 3 1.0640 nan 0.1000 0.1682
## 4 0.9626 nan 0.1000 0.1268
## 5 0.8811 nan 0.1000 0.0992
## 6 0.8180 nan 0.1000 0.0749
## 7 0.7682 nan 0.1000 0.0632
## 8 0.7272 nan 0.1000 0.0543
## 9 0.6906 nan 0.1000 0.0424
## 10 0.6618 nan 0.1000 0.0352
## 20 0.5152 nan 0.1000 0.0070
## 40 0.4311 nan 0.1000 -0.0033
## 60 0.3919 nan 0.1000 -0.0027
## 80 0.3643 nan 0.1000 -0.0025
## 100 0.3427 nan 0.1000 -0.0026
## 120 0.3249 nan 0.1000 -0.0013
## 140 0.3084 nan 0.1000 -0.0030
## 150 0.3012 nan 0.1000 -0.0012
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.3863 nan 0.1000 0.2988
## 2 1.2118 nan 0.1000 0.2044
## 3 1.0872 nan 0.1000 0.1463
## 4 0.9934 nan 0.1000 0.1147
## 5 0.9219 nan 0.1000 0.0881
## 6 0.8675 nan 0.1000 0.0649
## 7 0.8235 nan 0.1000 0.0557
## 8 0.7880 nan 0.1000 0.0466
## 9 0.7570 nan 0.1000 0.0365
## 10 0.7330 nan 0.1000 0.0325
## 20 0.6032 nan 0.1000 0.0096
## 40 0.5299 nan 0.1000 0.0007
## 60 0.4990 nan 0.1000 0.0001
## 80 0.4806 nan 0.1000 -0.0009
## 100 0.4671 nan 0.1000 -0.0008
## 120 0.4566 nan 0.1000 -0.0008
## 140 0.4464 nan 0.1000 -0.0010
## 150 0.4422 nan 0.1000 -0.0035
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.3863 nan 0.1000 0.3045
## 2 1.2031 nan 0.1000 0.2111
## 3 1.0744 nan 0.1000 0.1534
## 4 0.9774 nan 0.1000 0.1188
## 5 0.9029 nan 0.1000 0.0897
## 6 0.8452 nan 0.1000 0.0730
## 7 0.7950 nan 0.1000 0.0610
## 8 0.7550 nan 0.1000 0.0510
## 9 0.7211 nan 0.1000 0.0386
## 10 0.6947 nan 0.1000 0.0332
## 20 0.5537 nan 0.1000 0.0100
## 40 0.4715 nan 0.1000 0.0002
## 60 0.4379 nan 0.1000 0.0006
## 80 0.4139 nan 0.1000 -0.0018
## 100 0.3969 nan 0.1000 -0.0020
## 120 0.3803 nan 0.1000 -0.0034
## 140 0.3672 nan 0.1000 -0.0014
## 150 0.3607 nan 0.1000 -0.0013
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.3863 nan 0.1000 0.2869
## 2 1.2193 nan 0.1000 0.2226
## 3 1.0823 nan 0.1000 0.1709
## 4 0.9748 nan 0.1000 0.1338
## 5 0.8919 nan 0.1000 0.0970
## 6 0.8284 nan 0.1000 0.0766
## 7 0.7754 nan 0.1000 0.0662
## 8 0.7310 nan 0.1000 0.0462
## 9 0.6975 nan 0.1000 0.0455
## 10 0.6662 nan 0.1000 0.0324
## 20 0.5202 nan 0.1000 0.0068
## 40 0.4357 nan 0.1000 -0.0013
## 60 0.3961 nan 0.1000 -0.0003
## 80 0.3681 nan 0.1000 -0.0010
## 100 0.3455 nan 0.1000 -0.0027
## 120 0.3265 nan 0.1000 -0.0003
## 140 0.3112 nan 0.1000 -0.0020
## 150 0.3030 nan 0.1000 -0.0015
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.3863 nan 0.1000 0.2963
## 2 1.2093 nan 0.1000 0.2070
## 3 1.0858 nan 0.1000 0.1488
## 4 0.9943 nan 0.1000 0.1158
## 5 0.9223 nan 0.1000 0.0862
## 6 0.8672 nan 0.1000 0.0651
## 7 0.8247 nan 0.1000 0.0525
## 8 0.7888 nan 0.1000 0.0409
## 9 0.7599 nan 0.1000 0.0382
## 10 0.7344 nan 0.1000 0.0332
## 20 0.6010 nan 0.1000 0.0063
## 40 0.5248 nan 0.1000 0.0014
## 60 0.4932 nan 0.1000 0.0002
## 80 0.4743 nan 0.1000 -0.0014
## 100 0.4606 nan 0.1000 -0.0012
## 120 0.4506 nan 0.1000 -0.0011
## 140 0.4423 nan 0.1000 -0.0022
## 150 0.4382 nan 0.1000 -0.0033
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.3863 nan 0.1000 0.3039
## 2 1.2047 nan 0.1000 0.2136
## 3 1.0754 nan 0.1000 0.1632
## 4 0.9774 nan 0.1000 0.1159
## 5 0.9033 nan 0.1000 0.0975
## 6 0.8421 nan 0.1000 0.0719
## 7 0.7930 nan 0.1000 0.0599
## 8 0.7536 nan 0.1000 0.0512
## 9 0.7196 nan 0.1000 0.0388
## 10 0.6904 nan 0.1000 0.0346
## 20 0.5485 nan 0.1000 0.0077
## 40 0.4666 nan 0.1000 -0.0013
## 60 0.4338 nan 0.1000 0.0002
## 80 0.4110 nan 0.1000 -0.0030
## 100 0.3935 nan 0.1000 -0.0013
## 120 0.3782 nan 0.1000 -0.0010
## 140 0.3633 nan 0.1000 -0.0009
## 150 0.3570 nan 0.1000 -0.0012
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.3863 nan 0.1000 0.2896
## 2 1.2218 nan 0.1000 0.2355
## 3 1.0786 nan 0.1000 0.1637
## 4 0.9725 nan 0.1000 0.1241
## 5 0.8910 nan 0.1000 0.0969
## 6 0.8275 nan 0.1000 0.0752
## 7 0.7765 nan 0.1000 0.0611
## 8 0.7350 nan 0.1000 0.0546
## 9 0.6983 nan 0.1000 0.0440
## 10 0.6682 nan 0.1000 0.0390
## 20 0.5154 nan 0.1000 0.0061
## 40 0.4307 nan 0.1000 -0.0017
## 60 0.3897 nan 0.1000 -0.0016
## 80 0.3626 nan 0.1000 -0.0023
## 100 0.3394 nan 0.1000 -0.0012
## 120 0.3203 nan 0.1000 -0.0017
## 140 0.3031 nan 0.1000 -0.0009
## 150 0.2966 nan 0.1000 -0.0026
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.3863 nan 0.1000 0.2901
## 2 1.2097 nan 0.1000 0.2105
## 3 1.0812 nan 0.1000 0.1541
## 4 0.9890 nan 0.1000 0.1153
## 5 0.9170 nan 0.1000 0.0855
## 6 0.8617 nan 0.1000 0.0683
## 7 0.8157 nan 0.1000 0.0546
## 8 0.7797 nan 0.1000 0.0487
## 9 0.7489 nan 0.1000 0.0354
## 10 0.7246 nan 0.1000 0.0339
## 20 0.5937 nan 0.1000 0.0075
## 40 0.5187 nan 0.1000 0.0030
## 60 0.4868 nan 0.1000 -0.0003
## 80 0.4690 nan 0.1000 0.0006
## 100 0.4541 nan 0.1000 0.0002
## 120 0.4435 nan 0.1000 -0.0004
## 140 0.4340 nan 0.1000 -0.0010
## 150 0.4304 nan 0.1000 -0.0028
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.3863 nan 0.1000 0.3066
## 2 1.2002 nan 0.1000 0.2206
## 3 1.0684 nan 0.1000 0.1576
## 4 0.9712 nan 0.1000 0.1197
## 5 0.8973 nan 0.1000 0.0939
## 6 0.8358 nan 0.1000 0.0768
## 7 0.7850 nan 0.1000 0.0613
## 8 0.7449 nan 0.1000 0.0504
## 9 0.7122 nan 0.1000 0.0417
## 10 0.6837 nan 0.1000 0.0300
## 20 0.5436 nan 0.1000 0.0100
## 40 0.4605 nan 0.1000 0.0016
## 60 0.4292 nan 0.1000 -0.0017
## 80 0.4067 nan 0.1000 -0.0019
## 100 0.3884 nan 0.1000 -0.0032
## 120 0.3743 nan 0.1000 -0.0014
## 140 0.3606 nan 0.1000 -0.0027
## 150 0.3546 nan 0.1000 -0.0009
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.3863 nan 0.1000 0.2937
## 2 1.2218 nan 0.1000 0.2347
## 3 1.0779 nan 0.1000 0.1697
## 4 0.9721 nan 0.1000 0.1281
## 5 0.8903 nan 0.1000 0.0951
## 6 0.8274 nan 0.1000 0.0785
## 7 0.7755 nan 0.1000 0.0682
## 8 0.7313 nan 0.1000 0.0525
## 9 0.6953 nan 0.1000 0.0433
## 10 0.6653 nan 0.1000 0.0354
## 20 0.5122 nan 0.1000 0.0032
## 40 0.4262 nan 0.1000 0.0007
## 60 0.3868 nan 0.1000 -0.0004
## 80 0.3585 nan 0.1000 -0.0018
## 100 0.3356 nan 0.1000 -0.0042
## 120 0.3179 nan 0.1000 -0.0023
## 140 0.3021 nan 0.1000 -0.0009
## 150 0.2938 nan 0.1000 -0.0028
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.3863 nan 0.1000 0.2847
## 2 1.2282 nan 0.1000 0.2229
## 3 1.0940 nan 0.1000 0.1621
## 4 0.9907 nan 0.1000 0.1165
## 5 0.9152 nan 0.1000 0.0964
## 6 0.8526 nan 0.1000 0.0757
## 7 0.8027 nan 0.1000 0.0603
## 8 0.7630 nan 0.1000 0.0469
## 9 0.7304 nan 0.1000 0.0440
## 10 0.7015 nan 0.1000 0.0365
## 20 0.5522 nan 0.1000 0.0028
## 40 0.4704 nan 0.1000 0.0010
## 60 0.4380 nan 0.1000 -0.0011
## 80 0.4165 nan 0.1000 -0.0007
## 100 0.3981 nan 0.1000 -0.0011
## 120 0.3823 nan 0.1000 -0.0002
## 140 0.3695 nan 0.1000 -0.0004
## 150 0.3634 nan 0.1000 -0.0012
plot(gbm.model)
pred.gbm = predict(gbm.model, test.data)
result = data.frame(test.data$Rating_Category, pred.gbm)
print(result)
## test.data.Rating_Category pred.gbm
## 1 MEDIUM HIGH
## 2 HIGH HIGH
## 3 MEDIUM HIGH
## 4 MEDIUM MEDIUM
## 5 MEDIUM HIGH
## 6 MEDIUM MEDIUM
## 7 MEDIUM MEDIUM
## 8 HIGH HIGH
## 9 MEDIUM HIGH
## 10 HIGH HIGH
## 11 MEDIUM MEDIUM
## 12 MEDIUM MEDIUM
## 13 MEDIUM MEDIUM
## 14 HIGH MEDIUM
## 15 HIGH HIGH
## 16 HIGH HIGH
## 17 MEDIUM MEDIUM
## 18 HIGH HIGH
## 19 MEDIUM MEDIUM
## 20 HIGH HIGH
## 21 MEDIUM MEDIUM
## 22 HIGH MEDIUM
## 23 MEDIUM MEDIUM
## 24 HIGH HIGH
## 25 MEDIUM MEDIUM
## 26 MEDIUM HIGH
## 27 MEDIUM MEDIUM
## 28 MEDIUM MEDIUM
## 29 MEDIUM MEDIUM
## 30 HIGH HIGH
## 31 HIGH HIGH
## 32 MEDIUM HIGH
## 33 HIGH MEDIUM
## 34 HIGH MEDIUM
## 35 MEDIUM MEDIUM
## 36 MEDIUM MEDIUM
## 37 MEDIUM MEDIUM
## 38 HIGH HIGH
## 39 MEDIUM HIGH
## 40 MEDIUM MEDIUM
## 41 MEDIUM HIGH
## 42 MEDIUM HIGH
## 43 MEDIUM MEDIUM
## 44 MEDIUM MEDIUM
## 45 MEDIUM MEDIUM
## 46 MEDIUM MEDIUM
## 47 MEDIUM MEDIUM
## 48 HIGH MEDIUM
## 49 MEDIUM MEDIUM
## 50 LOW LOW
## 51 HIGH HIGH
## 52 HIGH HIGH
## 53 MEDIUM MEDIUM
## 54 MEDIUM MEDIUM
## 55 MEDIUM MEDIUM
## 56 MEDIUM MEDIUM
## 57 MEDIUM MEDIUM
## 58 MEDIUM MEDIUM
## 59 MEDIUM MEDIUM
## 60 MEDIUM MEDIUM
## 61 MEDIUM MEDIUM
## 62 MEDIUM MEDIUM
## 63 HIGH HIGH
## 64 HIGH HIGH
## 65 MEDIUM MEDIUM
## 66 MEDIUM HIGH
## 67 MEDIUM MEDIUM
## 68 MEDIUM MEDIUM
## 69 MEDIUM MEDIUM
## 70 MEDIUM MEDIUM
## 71 HIGH HIGH
## 72 MEDIUM MEDIUM
## 73 MEDIUM MEDIUM
## 74 HIGH HIGH
## 75 HIGH HIGH
## 76 MEDIUM MEDIUM
## 77 MEDIUM MEDIUM
## 78 HIGH MEDIUM
## 79 MEDIUM HIGH
## 80 MEDIUM MEDIUM
## 81 MEDIUM MEDIUM
## 82 HIGH MEDIUM
## 83 MEDIUM MEDIUM
## 84 MEDIUM MEDIUM
## 85 MEDIUM MEDIUM
## 86 MEDIUM MEDIUM
## 87 MEDIUM MEDIUM
## 88 MEDIUM MEDIUM
## 89 MEDIUM MEDIUM
## 90 MEDIUM MEDIUM
## 91 LOW MEDIUM
## 92 MEDIUM MEDIUM
## 93 MEDIUM MEDIUM
## 94 MEDIUM MEDIUM
## 95 HIGH HIGH
## 96 MEDIUM MEDIUM
## 97 HIGH HIGH
## 98 MEDIUM MEDIUM
## 99 MEDIUM MEDIUM
## 100 HIGH HIGH
## 101 MEDIUM MEDIUM
## 102 MEDIUM MEDIUM
## 103 MEDIUM MEDIUM
## 104 MEDIUM MEDIUM
## 105 HIGH HIGH
## 106 HIGH MEDIUM
## 107 MEDIUM MEDIUM
## 108 HIGH HIGH
## 109 MEDIUM HIGH
## 110 MEDIUM HIGH
## 111 MEDIUM MEDIUM
## 112 MEDIUM MEDIUM
## 113 MEDIUM MEDIUM
## 114 HIGH MEDIUM
## 115 MEDIUM MEDIUM
## 116 HIGH MEDIUM
## 117 MEDIUM MEDIUM
## 118 MEDIUM MEDIUM
## 119 MEDIUM MEDIUM
## 120 MEDIUM MEDIUM
## 121 MEDIUM MEDIUM
## 122 MEDIUM MEDIUM
## 123 MEDIUM MEDIUM
## 124 MEDIUM MEDIUM
## 125 MEDIUM MEDIUM
## 126 MEDIUM MEDIUM
## 127 MEDIUM MEDIUM
## 128 MEDIUM MEDIUM
## 129 MEDIUM HIGH
## 130 MEDIUM MEDIUM
## 131 MEDIUM MEDIUM
## 132 MEDIUM MEDIUM
## 133 MEDIUM MEDIUM
## 134 HIGH HIGH
## 135 MEDIUM MEDIUM
## 136 MEDIUM MEDIUM
## 137 MEDIUM MEDIUM
## 138 MEDIUM MEDIUM
## 139 MEDIUM MEDIUM
## 140 HIGH HIGH
## 141 MEDIUM MEDIUM
## 142 MEDIUM MEDIUM
## 143 MEDIUM MEDIUM
## 144 MEDIUM MEDIUM
## 145 MEDIUM MEDIUM
## 146 MEDIUM MEDIUM
## 147 MEDIUM MEDIUM
## 148 MEDIUM MEDIUM
## 149 MEDIUM MEDIUM
## 150 MEDIUM MEDIUM
## 151 MEDIUM MEDIUM
## 152 MEDIUM MEDIUM
## 153 MEDIUM HIGH
## 154 MEDIUM MEDIUM
## 155 MEDIUM MEDIUM
## 156 MEDIUM MEDIUM
## 157 MEDIUM MEDIUM
## 158 HIGH HIGH
## 159 MEDIUM MEDIUM
## 160 MEDIUM MEDIUM
## 161 MEDIUM MEDIUM
## 162 MEDIUM MEDIUM
## 163 MEDIUM MEDIUM
## 164 MEDIUM MEDIUM
## 165 HIGH HIGH
## 166 MEDIUM MEDIUM
## 167 MEDIUM MEDIUM
## 168 MEDIUM MEDIUM
## 169 MEDIUM MEDIUM
## 170 MEDIUM MEDIUM
## 171 MEDIUM HIGH
## 172 MEDIUM MEDIUM
## 173 HIGH HIGH
## 174 MEDIUM MEDIUM
## 175 MEDIUM MEDIUM
## 176 MEDIUM MEDIUM
## 177 MEDIUM MEDIUM
## 178 MEDIUM MEDIUM
## 179 MEDIUM MEDIUM
## 180 MEDIUM MEDIUM
## 181 HIGH HIGH
## 182 HIGH HIGH
## 183 MEDIUM MEDIUM
## 184 MEDIUM MEDIUM
## 185 MEDIUM MEDIUM
## 186 MEDIUM MEDIUM
## 187 MEDIUM MEDIUM
## 188 MEDIUM MEDIUM
## 189 HIGH HIGH
## 190 MEDIUM MEDIUM
## 191 MEDIUM MEDIUM
## 192 MEDIUM MEDIUM
## 193 MEDIUM MEDIUM
## 194 MEDIUM MEDIUM
## 195 MEDIUM MEDIUM
## 196 HIGH MEDIUM
## 197 MEDIUM MEDIUM
## 198 HIGH HIGH
## 199 MEDIUM MEDIUM
## 200 MEDIUM MEDIUM
## 201 MEDIUM MEDIUM
## 202 MEDIUM MEDIUM
## 203 HIGH HIGH
## 204 MEDIUM MEDIUM
## 205 MEDIUM MEDIUM
## 206 HIGH MEDIUM
## 207 MEDIUM MEDIUM
## 208 MEDIUM MEDIUM
## 209 MEDIUM MEDIUM
## 210 HIGH HIGH
## 211 MEDIUM MEDIUM
## 212 MEDIUM MEDIUM
## 213 HIGH MEDIUM
## 214 MEDIUM MEDIUM
## 215 MEDIUM MEDIUM
## 216 MEDIUM MEDIUM
## 217 MEDIUM MEDIUM
## 218 MEDIUM MEDIUM
## 219 MEDIUM MEDIUM
## 220 HIGH MEDIUM
## 221 MEDIUM MEDIUM
## 222 MEDIUM MEDIUM
## 223 MEDIUM MEDIUM
## 224 MEDIUM MEDIUM
## 225 MEDIUM MEDIUM
## 226 MEDIUM MEDIUM
## 227 HIGH HIGH
## 228 MEDIUM MEDIUM
## 229 MEDIUM MEDIUM
## 230 MEDIUM MEDIUM
## 231 MEDIUM MEDIUM
## 232 MEDIUM HIGH
## 233 MEDIUM MEDIUM
## 234 MEDIUM MEDIUM
## 235 MEDIUM MEDIUM
## 236 MEDIUM MEDIUM
## 237 MEDIUM MEDIUM
## 238 MEDIUM MEDIUM
## 239 MEDIUM MEDIUM
## 240 MEDIUM MEDIUM
## 241 MEDIUM MEDIUM
## 242 MEDIUM MEDIUM
## 243 MEDIUM MEDIUM
## 244 MEDIUM MEDIUM
## 245 MEDIUM MEDIUM
## 246 MEDIUM MEDIUM
## 247 MEDIUM HIGH
## 248 MEDIUM MEDIUM
## 249 MEDIUM MEDIUM
## 250 MEDIUM MEDIUM
## 251 MEDIUM MEDIUM
## 252 MEDIUM MEDIUM
## 253 MEDIUM MEDIUM
## 254 HIGH MEDIUM
## 255 MEDIUM MEDIUM
## 256 MEDIUM MEDIUM
## 257 MEDIUM MEDIUM
## 258 LOW MEDIUM
## 259 MEDIUM MEDIUM
## 260 MEDIUM MEDIUM
## 261 MEDIUM MEDIUM
## 262 HIGH MEDIUM
## 263 HIGH MEDIUM
## 264 MEDIUM MEDIUM
## 265 HIGH HIGH
## 266 MEDIUM MEDIUM
## 267 MEDIUM MEDIUM
## 268 HIGH HIGH
## 269 MEDIUM MEDIUM
## 270 HIGH HIGH
## 271 HIGH HIGH
## 272 MEDIUM MEDIUM
## 273 MEDIUM MEDIUM
## 274 MEDIUM MEDIUM
## 275 HIGH HIGH
## 276 MEDIUM MEDIUM
## 277 HIGH MEDIUM
## 278 MEDIUM MEDIUM
## 279 MEDIUM MEDIUM
## 280 MEDIUM MEDIUM
## 281 MEDIUM MEDIUM
## 282 HIGH HIGH
## 283 MEDIUM MEDIUM
## 284 MEDIUM HIGH
## 285 HIGH MEDIUM
## 286 MEDIUM MEDIUM
## 287 HIGH HIGH
## 288 MEDIUM MEDIUM
## 289 MEDIUM MEDIUM
## 290 MEDIUM MEDIUM
## 291 MEDIUM HIGH
## 292 MEDIUM MEDIUM
## 293 HIGH MEDIUM
## 294 MEDIUM MEDIUM
## 295 MEDIUM MEDIUM
## 296 MEDIUM MEDIUM
## 297 MEDIUM MEDIUM
## 298 MEDIUM MEDIUM
## 299 MEDIUM MEDIUM
## 300 HIGH MEDIUM
## 301 MEDIUM MEDIUM
## 302 HIGH HIGH
## 303 MEDIUM MEDIUM
## 304 HIGH MEDIUM
## 305 MEDIUM MEDIUM
## 306 MEDIUM MEDIUM
## 307 MEDIUM MEDIUM
## 308 MEDIUM MEDIUM
## 309 MEDIUM MEDIUM
## 310 MEDIUM MEDIUM
## 311 MEDIUM MEDIUM
## 312 MEDIUM MEDIUM
## 313 MEDIUM MEDIUM
## 314 HIGH HIGH
## 315 MEDIUM MEDIUM
## 316 HIGH HIGH
## 317 HIGH MEDIUM
## 318 MEDIUM MEDIUM
## 319 HIGH MEDIUM
## 320 HIGH HIGH
## 321 HIGH HIGH
## 322 HIGH MEDIUM
## 323 HIGH MEDIUM
## 324 MEDIUM MEDIUM
## 325 MEDIUM MEDIUM
## 326 MEDIUM MEDIUM
## 327 MEDIUM MEDIUM
## 328 MEDIUM MEDIUM
## 329 MEDIUM MEDIUM
## 330 HIGH MEDIUM
## 331 MEDIUM MEDIUM
## 332 LOW MEDIUM
## 333 MEDIUM MEDIUM
## 334 MEDIUM MEDIUM
## 335 HIGH HIGH
## 336 MEDIUM MEDIUM
## 337 MEDIUM MEDIUM
## 338 MEDIUM MEDIUM
## 339 HIGH HIGH
## 340 HIGH MEDIUM
## 341 MEDIUM MEDIUM
## 342 HIGH HIGH
## 343 MEDIUM MEDIUM
## 344 MEDIUM MEDIUM
## 345 HIGH MEDIUM
## 346 HIGH MEDIUM
## 347 HIGH HIGH
## 348 MEDIUM MEDIUM
## 349 MEDIUM MEDIUM
## 350 MEDIUM MEDIUM
## 351 MEDIUM MEDIUM
## 352 MEDIUM MEDIUM
## 353 MEDIUM MEDIUM
## 354 HIGH MEDIUM
## 355 MEDIUM MEDIUM
## 356 HIGH HIGH
## 357 HIGH HIGH
## 358 HIGH HIGH
## 359 HIGH HIGH
## 360 MEDIUM MEDIUM
## 361 HIGH MEDIUM
## 362 HIGH HIGH
## 363 MEDIUM MEDIUM
## 364 MEDIUM MEDIUM
## 365 MEDIUM MEDIUM
## 366 MEDIUM MEDIUM
## 367 MEDIUM MEDIUM
## 368 MEDIUM MEDIUM
## 369 MEDIUM MEDIUM
## 370 MEDIUM MEDIUM
## 371 MEDIUM MEDIUM
## 372 MEDIUM MEDIUM
## 373 MEDIUM MEDIUM
## 374 HIGH MEDIUM
## 375 MEDIUM MEDIUM
## 376 MEDIUM MEDIUM
## 377 MEDIUM MEDIUM
## 378 MEDIUM MEDIUM
## 379 HIGH MEDIUM
## 380 MEDIUM LOW
## 381 LOW MEDIUM
## 382 HIGH HIGH
## 383 MEDIUM MEDIUM
## 384 MEDIUM MEDIUM
## 385 HIGH MEDIUM
## 386 MEDIUM MEDIUM
## 387 MEDIUM MEDIUM
## 388 HIGH HIGH
## 389 HIGH MEDIUM
## 390 MEDIUM MEDIUM
## 391 MEDIUM LOW
## 392 MEDIUM MEDIUM
## 393 MEDIUM MEDIUM
## 394 MEDIUM MEDIUM
## 395 MEDIUM MEDIUM
## 396 HIGH HIGH
## 397 MEDIUM MEDIUM
## 398 MEDIUM MEDIUM
## 399 HIGH HIGH
## 400 HIGH MEDIUM
## 401 MEDIUM MEDIUM
## 402 MEDIUM MEDIUM
## 403 HIGH MEDIUM
## 404 MEDIUM MEDIUM
## 405 MEDIUM MEDIUM
## 406 HIGH HIGH
## 407 HIGH HIGH
## 408 MEDIUM MEDIUM
## 409 HIGH MEDIUM
## 410 MEDIUM MEDIUM
## 411 MEDIUM MEDIUM
## 412 MEDIUM MEDIUM
## 413 LOW MEDIUM
## 414 HIGH MEDIUM
## 415 MEDIUM MEDIUM
## 416 MEDIUM MEDIUM
## 417 MEDIUM HIGH
## 418 MEDIUM MEDIUM
## 419 MEDIUM MEDIUM
## 420 MEDIUM MEDIUM
## 421 MEDIUM MEDIUM
## 422 HIGH MEDIUM
## 423 MEDIUM MEDIUM
## 424 MEDIUM MEDIUM
## 425 MEDIUM MEDIUM
## 426 HIGH HIGH
## 427 HIGH HIGH
## 428 HIGH HIGH
## 429 LOW MEDIUM
## 430 MEDIUM MEDIUM
## 431 MEDIUM MEDIUM
## 432 MEDIUM MEDIUM
## 433 MEDIUM MEDIUM
## 434 LOW LOW
## 435 MEDIUM MEDIUM
## 436 MEDIUM MEDIUM
## 437 HIGH HIGH
## 438 MEDIUM MEDIUM
## 439 MEDIUM MEDIUM
## 440 MEDIUM MEDIUM
## 441 HIGH HIGH
## 442 HIGH HIGH
## 443 MEDIUM MEDIUM
## 444 MEDIUM MEDIUM
## 445 HIGH HIGH
## 446 MEDIUM MEDIUM
## 447 MEDIUM MEDIUM
## 448 HIGH MEDIUM
## 449 LOW MEDIUM
## 450 MEDIUM MEDIUM
## 451 MEDIUM MEDIUM
## 452 HIGH HIGH
## 453 HIGH HIGH
## 454 MEDIUM MEDIUM
## 455 MEDIUM MEDIUM
## 456 MEDIUM MEDIUM
## 457 MEDIUM MEDIUM
## 458 MEDIUM MEDIUM
## 459 HIGH HIGH
## 460 HIGH HIGH
## 461 HIGH MEDIUM
## 462 MEDIUM MEDIUM
## 463 MEDIUM MEDIUM
## 464 HIGH MEDIUM
## 465 MEDIUM MEDIUM
## 466 MEDIUM MEDIUM
## 467 HIGH HIGH
## 468 MEDIUM MEDIUM
## 469 HIGH HIGH
## 470 MEDIUM HIGH
## 471 HIGH MEDIUM
## 472 HIGH HIGH
## 473 HIGH MEDIUM
## 474 MEDIUM MEDIUM
## 475 MEDIUM MEDIUM
## 476 MEDIUM MEDIUM
## 477 HIGH HIGH
## 478 HIGH MEDIUM
## 479 MEDIUM MEDIUM
## 480 MEDIUM MEDIUM
## 481 MEDIUM MEDIUM
## 482 MEDIUM MEDIUM
## 483 MEDIUM MEDIUM
## 484 HIGH HIGH
## 485 MEDIUM MEDIUM
## 486 MEDIUM MEDIUM
## 487 MEDIUM MEDIUM
## 488 MEDIUM MEDIUM
## 489 MEDIUM MEDIUM
## 490 MEDIUM MEDIUM
## 491 HIGH MEDIUM
## 492 HIGH HIGH
## 493 HIGH HIGH
## 494 MEDIUM MEDIUM
## 495 HIGH MEDIUM
## 496 MEDIUM HIGH
## 497 MEDIUM MEDIUM
## 498 MEDIUM MEDIUM
## 499 HIGH HIGH
## 500 HIGH MEDIUM
## 501 MEDIUM MEDIUM
## 502 MEDIUM MEDIUM
## 503 MEDIUM MEDIUM
## 504 LOW MEDIUM
## 505 MEDIUM MEDIUM
## 506 MEDIUM MEDIUM
## 507 HIGH MEDIUM
## 508 MEDIUM MEDIUM
## 509 HIGH MEDIUM
## 510 LOW MEDIUM
## 511 MEDIUM MEDIUM
## 512 MEDIUM MEDIUM
## 513 HIGH HIGH
## 514 MEDIUM MEDIUM
## 515 MEDIUM MEDIUM
## 516 MEDIUM MEDIUM
## 517 MEDIUM MEDIUM
## 518 HIGH HIGH
## 519 MEDIUM MEDIUM
## 520 MEDIUM MEDIUM
## 521 MEDIUM MEDIUM
## 522 HIGH HIGH
## 523 HIGH HIGH
## 524 MEDIUM MEDIUM
## 525 MEDIUM HIGH
## 526 HIGH HIGH
## 527 MEDIUM MEDIUM
## 528 MEDIUM MEDIUM
## 529 MEDIUM MEDIUM
## 530 MEDIUM MEDIUM
## 531 HIGH HIGH
## 532 HIGH HIGH
## 533 MEDIUM MEDIUM
## 534 HIGH HIGH
## 535 MEDIUM MEDIUM
## 536 MEDIUM MEDIUM
## 537 MEDIUM MEDIUM
## 538 MEDIUM MEDIUM
## 539 MEDIUM MEDIUM
## 540 MEDIUM MEDIUM
## 541 MEDIUM MEDIUM
## 542 MEDIUM MEDIUM
## 543 MEDIUM MEDIUM
## 544 HIGH HIGH
## 545 MEDIUM MEDIUM
## 546 HIGH MEDIUM
## 547 HIGH HIGH
## 548 MEDIUM MEDIUM
## 549 MEDIUM MEDIUM
## 550 MEDIUM MEDIUM
## 551 MEDIUM MEDIUM
## 552 HIGH HIGH
## 553 MEDIUM MEDIUM
## 554 MEDIUM MEDIUM
## 555 MEDIUM MEDIUM
## 556 MEDIUM MEDIUM
## 557 MEDIUM MEDIUM
## 558 HIGH MEDIUM
## 559 HIGH MEDIUM
## 560 HIGH HIGH
## 561 MEDIUM MEDIUM
## 562 MEDIUM MEDIUM
## 563 HIGH HIGH
## 564 MEDIUM MEDIUM
## 565 HIGH MEDIUM
## 566 MEDIUM MEDIUM
## 567 LOW MEDIUM
## 568 HIGH MEDIUM
## 569 MEDIUM MEDIUM
## 570 MEDIUM MEDIUM
## 571 HIGH MEDIUM
## 572 MEDIUM MEDIUM
## 573 HIGH MEDIUM
## 574 MEDIUM MEDIUM
## 575 LOW MEDIUM
## 576 MEDIUM MEDIUM
## 577 MEDIUM MEDIUM
## 578 HIGH HIGH
## 579 MEDIUM MEDIUM
## 580 HIGH HIGH
## 581 MEDIUM MEDIUM
## 582 HIGH HIGH
## 583 MEDIUM HIGH
## 584 MEDIUM MEDIUM
## 585 MEDIUM MEDIUM
## 586 MEDIUM MEDIUM
## 587 MEDIUM MEDIUM
## 588 MEDIUM MEDIUM
## 589 MEDIUM MEDIUM
## 590 HIGH MEDIUM
## 591 HIGH HIGH
## 592 MEDIUM MEDIUM
## 593 HIGH HIGH
## 594 MEDIUM MEDIUM
## 595 HIGH HIGH
## 596 MEDIUM MEDIUM
## 597 HIGH MEDIUM
## 598 MEDIUM MEDIUM
## 599 HIGH MEDIUM
## 600 HIGH MEDIUM
## 601 MEDIUM MEDIUM
## 602 MEDIUM MEDIUM
## 603 HIGH MEDIUM
## 604 MEDIUM MEDIUM
## 605 LOW MEDIUM
## 606 MEDIUM HIGH
## 607 MEDIUM MEDIUM
## 608 HIGH HIGH
## 609 MEDIUM MEDIUM
## 610 MEDIUM MEDIUM
## 611 HIGH HIGH
## 612 LOW MEDIUM
## 613 MEDIUM MEDIUM
## 614 MEDIUM MEDIUM
## 615 HIGH HIGH
## 616 MEDIUM MEDIUM
## 617 LOW MEDIUM
## 618 HIGH HIGH
## 619 HIGH HIGH
## 620 HIGH HIGH
## 621 HIGH HIGH
## 622 MEDIUM HIGH
## 623 MEDIUM MEDIUM
## 624 MEDIUM MEDIUM
## 625 MEDIUM HIGH
## 626 HIGH MEDIUM
## 627 MEDIUM MEDIUM
## 628 HIGH MEDIUM
## 629 MEDIUM MEDIUM
## 630 MEDIUM MEDIUM
## 631 HIGH HIGH
## 632 MEDIUM HIGH
## 633 HIGH HIGH
## 634 MEDIUM MEDIUM
## 635 MEDIUM MEDIUM
## 636 MEDIUM MEDIUM
## 637 HIGH MEDIUM
## 638 MEDIUM MEDIUM
## 639 MEDIUM MEDIUM
## 640 MEDIUM MEDIUM
## 641 MEDIUM MEDIUM
## 642 HIGH MEDIUM
## 643 MEDIUM HIGH
## 644 HIGH HIGH
## 645 HIGH MEDIUM
## 646 MEDIUM MEDIUM
## 647 MEDIUM MEDIUM
## 648 HIGH HIGH
## 649 MEDIUM MEDIUM
## 650 MEDIUM MEDIUM
## 651 HIGH HIGH
## 652 MEDIUM MEDIUM
## 653 MEDIUM MEDIUM
## 654 MEDIUM MEDIUM
## 655 MEDIUM MEDIUM
## 656 HIGH HIGH
## 657 MEDIUM MEDIUM
## 658 MEDIUM HIGH
## 659 MEDIUM MEDIUM
## 660 MEDIUM MEDIUM
## 661 HIGH HIGH
## 662 HIGH HIGH
## 663 MEDIUM MEDIUM
## 664 HIGH MEDIUM
## 665 HIGH HIGH
## 666 MEDIUM MEDIUM
## 667 HIGH HIGH
## 668 HIGH HIGH
## 669 MEDIUM MEDIUM
## 670 MEDIUM MEDIUM
## 671 MEDIUM MEDIUM
## 672 LOW MEDIUM
## 673 MEDIUM MEDIUM
## 674 MEDIUM MEDIUM
## 675 MEDIUM MEDIUM
## 676 MEDIUM MEDIUM
## 677 HIGH HIGH
## 678 MEDIUM MEDIUM
## 679 HIGH HIGH
## 680 MEDIUM MEDIUM
## 681 MEDIUM HIGH
## 682 HIGH MEDIUM
## 683 HIGH HIGH
## 684 MEDIUM MEDIUM
## 685 HIGH MEDIUM
## 686 HIGH HIGH
## 687 MEDIUM MEDIUM
## 688 HIGH MEDIUM
## 689 HIGH HIGH
## 690 MEDIUM MEDIUM
## 691 HIGH HIGH
## 692 HIGH MEDIUM
## 693 MEDIUM MEDIUM
## 694 HIGH MEDIUM
## 695 HIGH HIGH
## 696 MEDIUM MEDIUM
## 697 MEDIUM MEDIUM
## 698 HIGH MEDIUM
## 699 HIGH MEDIUM
## 700 MEDIUM MEDIUM
## 701 MEDIUM MEDIUM
## 702 HIGH HIGH
## 703 MEDIUM MEDIUM
## 704 HIGH HIGH
## 705 LOW MEDIUM
## 706 MEDIUM MEDIUM
## 707 MEDIUM MEDIUM
## 708 HIGH HIGH
## 709 HIGH HIGH
## 710 MEDIUM HIGH
## 711 MEDIUM MEDIUM
## 712 MEDIUM MEDIUM
## 713 HIGH HIGH
## 714 MEDIUM MEDIUM
## 715 MEDIUM MEDIUM
## 716 MEDIUM MEDIUM
## 717 HIGH MEDIUM
## 718 HIGH HIGH
## 719 MEDIUM MEDIUM
## 720 MEDIUM MEDIUM
## 721 MEDIUM MEDIUM
## 722 MEDIUM MEDIUM
## 723 HIGH MEDIUM
## 724 HIGH MEDIUM
## 725 MEDIUM MEDIUM
## 726 LOW MEDIUM
## 727 HIGH MEDIUM
## 728 MEDIUM HIGH
## 729 MEDIUM MEDIUM
## 730 MEDIUM MEDIUM
## 731 HIGH MEDIUM
## 732 MEDIUM MEDIUM
## 733 MEDIUM MEDIUM
## 734 HIGH HIGH
## 735 MEDIUM MEDIUM
## 736 HIGH HIGH
## 737 HIGH MEDIUM
## 738 MEDIUM MEDIUM
## 739 HIGH HIGH
## 740 MEDIUM MEDIUM
## 741 MEDIUM MEDIUM
## 742 HIGH HIGH
## 743 HIGH MEDIUM
## 744 HIGH HIGH
## 745 HIGH HIGH
## 746 HIGH MEDIUM
## 747 HIGH MEDIUM
## 748 MEDIUM MEDIUM
## 749 MEDIUM MEDIUM
## 750 HIGH MEDIUM
## 751 MEDIUM MEDIUM
## 752 HIGH MEDIUM
## 753 HIGH MEDIUM
## 754 HIGH HIGH
## 755 HIGH MEDIUM
## 756 MEDIUM MEDIUM
## 757 HIGH MEDIUM
## 758 MEDIUM MEDIUM
## 759 HIGH HIGH
## 760 MEDIUM HIGH
cm = confusionMatrix(test.data$Rating_Category, as.factor(pred.gbm))
print(cm)
## Confusion Matrix and Statistics
##
## Reference
## Prediction LOW MEDIUM HIGH EXCELLENT
## LOW 2 17 0 0
## MEDIUM 2 474 35 0
## HIGH 0 91 139 0
## EXCELLENT 0 0 0 0
##
## Overall Statistics
##
## Accuracy : 0.8092
## 95% CI : (0.7794, 0.8366)
## No Information Rate : 0.7658
## P-Value [Acc > NIR] : 0.002252
##
## Kappa : 0.541
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: LOW Class: MEDIUM Class: HIGH Class: EXCELLENT
## Sensitivity 0.500000 0.8144 0.7989 NA
## Specificity 0.977513 0.7921 0.8447 1
## Pos Pred Value 0.105263 0.9276 0.6043 NA
## Neg Pred Value 0.997301 0.5663 0.9340 NA
## Prevalence 0.005263 0.7658 0.2289 0
## Detection Rate 0.002632 0.6237 0.1829 0
## Detection Prevalence 0.025000 0.6724 0.3026 0
## Balanced Accuracy 0.738757 0.8033 0.8218 NA