1 Set up, data import, data exploration, data partitioning, and inspection code

# 1A
library(rmarkdown)
library(psych)
library(rpart)
library(RWeka)
library(caret)
## Loading required package: ggplot2
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
## Loading required package: lattice
library(rminer)
library(matrixStats)
library(knitr)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## âś” dplyr     1.1.1     âś” readr     2.1.4
## âś” forcats   1.0.0     âś” stringr   1.5.0
## âś” lubridate 1.9.2     âś” tibble    3.2.1
## âś” purrr     1.0.1     âś” tidyr     1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## âś– ggplot2::%+%()   masks psych::%+%()
## âś– ggplot2::alpha() masks psych::alpha()
## âś– dplyr::count()   masks matrixStats::count()
## âś– dplyr::filter()  masks stats::filter()
## âś– dplyr::lag()     masks stats::lag()
## âś– purrr::lift()    masks caret::lift()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
mydir <- getwd()
setwd(mydir)

na_sales <- read.csv(file = "NA_sales_filtered.csv", stringsAsFactors = FALSE)

str(na_sales)
## 'data.frame':    6345 obs. of  9 variables:
##  $ Name        : chr  "Final Fantasy VII" "Final Fantasy X" "The Sims 3" "Final Fantasy VIII" ...
##  $ Platform    : chr  "PS" "PS2" "PC" "PS" ...
##  $ Genre       : chr  "Role-Playing" "Role-Playing" "Simulation" "Role-Playing" ...
##  $ Rating      : chr  "T" "T" "T" "T" ...
##  $ Critic_Score: int  92 92 86 90 96 88 92 96 93 87 ...
##  $ Critic_Count: int  20 53 75 24 15 29 97 105 57 22 ...
##  $ User_Score  : int  91 86 75 85 90 84 82 87 72 80 ...
##  $ User_Count  : int  1282 1056 886 644 367 135 3712 5187 2182 78 ...
##  $ NA_Sales    : num  3.01 2.91 0.99 2.28 3.27 2.93 2.77 3.27 0.08 3.11 ...
summary(na_sales)
##      Name             Platform            Genre              Rating         
##  Length:6345        Length:6345        Length:6345        Length:6345       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##   Critic_Score    Critic_Count      User_Score      User_Count     
##  Min.   :13.00   Min.   :  3.00   Min.   : 5.00   Min.   :    4.0  
##  1st Qu.:61.00   1st Qu.: 14.00   1st Qu.:64.00   1st Qu.:   10.0  
##  Median :72.00   Median : 25.00   Median :74.00   Median :   24.0  
##  Mean   :69.82   Mean   : 28.97   Mean   :70.77   Mean   :  152.7  
##  3rd Qu.:80.00   3rd Qu.: 40.00   3rd Qu.:81.00   3rd Qu.:   77.0  
##  Max.   :98.00   Max.   :113.00   Max.   :95.00   Max.   :10665.0  
##     NA_Sales     
##  Min.   :0.0100  
##  1st Qu.:0.0800  
##  Median :0.1700  
##  Mean   :0.3542  
##  3rd Qu.:0.4100  
##  Max.   :3.4900
na_sales$Platform <- factor(na_sales$Platform)
na_sales$Genre <- factor(na_sales$Genre)
na_sales$Rating <- factor(na_sales$Rating)

# 1B
na_sales %>% select(where(is.numeric)) %>% pairs.panels()

# 1C
na_sales_sub <- na_sales[,-1]

na_base_model <- lm(NA_Sales ~ ., data = na_sales_sub)
summary(na_base_model)
## 
## Call:
## lm(formula = NA_Sales ~ ., data = na_sales_sub)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.9381 -0.2315 -0.0818  0.0978  3.1769 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -4.045e-01  4.796e-02  -8.435  < 2e-16 ***
## PlatformDS         1.547e-01  4.088e-02   3.783 0.000156 ***
## PlatformGBA        1.705e-01  4.573e-02   3.729 0.000194 ***
## PlatformGC         8.918e-02  4.276e-02   2.085 0.037071 *  
## PlatformPC        -3.054e-01  4.554e-02  -6.706 2.17e-11 ***
## PlatformPS         4.075e-01  5.080e-02   8.020 1.25e-15 ***
## PlatformPS2        1.954e-01  3.851e-02   5.074 4.00e-07 ***
## PlatformPS3        9.718e-02  3.929e-02   2.473 0.013422 *  
## PlatformPS4       -7.379e-02  4.670e-02  -1.580 0.114165    
## PlatformPSP        4.007e-02  4.201e-02   0.954 0.340130    
## PlatformPSV       -1.176e-01  5.741e-02  -2.049 0.040509 *  
## PlatformWii        2.401e-01  4.070e-02   5.898 3.87e-09 ***
## PlatformWiiU       2.866e-02  5.892e-02   0.487 0.626616    
## PlatformX360       1.387e-01  3.906e-02   3.551 0.000386 ***
## PlatformXB         2.122e-02  4.060e-02   0.523 0.601157    
## PlatformXOne       1.621e-01  4.988e-02   3.251 0.001157 ** 
## GenreAdventure    -1.254e-01  3.082e-02  -4.068 4.81e-05 ***
## GenreFighting      3.030e-03  2.539e-02   0.119 0.905002    
## GenreMisc          8.375e-02  2.503e-02   3.346 0.000823 ***
## GenrePlatform     -3.135e-02  2.551e-02  -1.229 0.219107    
## GenrePuzzle       -1.315e-01  4.298e-02  -3.059 0.002230 ** 
## GenreRacing       -4.130e-02  2.282e-02  -1.810 0.070341 .  
## GenreRole-Playing -1.030e-01  2.038e-02  -5.052 4.50e-07 ***
## GenreShooter       2.908e-03  1.894e-02   0.154 0.877966    
## GenreSimulation    5.868e-02  2.951e-02   1.988 0.046825 *  
## GenreSports       -2.387e-02  2.134e-02  -1.118 0.263445    
## GenreStrategy     -1.726e-01  3.297e-02  -5.235 1.71e-07 ***
## RatingE10+        -5.782e-02  1.888e-02  -3.062 0.002205 ** 
## RatingM           -1.121e-01  2.107e-02  -5.322 1.06e-07 ***
## Ratingmissing     -2.244e-01  8.975e-02  -2.500 0.012451 *  
## RatingT           -9.328e-02  1.679e-02  -5.557 2.86e-08 ***
## Critic_Score       1.049e-02  5.435e-04  19.298  < 2e-16 ***
## Critic_Count       6.122e-03  3.687e-04  16.605  < 2e-16 ***
## User_Score        -2.824e-03  4.930e-04  -5.729 1.06e-08 ***
## User_Count         2.027e-04  1.172e-05  17.287  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.421 on 6310 degrees of freedom
## Multiple R-squared:  0.2787, Adjusted R-squared:  0.2749 
## F-statistic: 71.72 on 34 and 6310 DF,  p-value: < 2.2e-16
# 1D
inTrain <- createDataPartition(y=na_sales_sub$NA_Sales, p = 0.70, list=FALSE)
train_target <- na_sales_sub[inTrain,8]
test_target <- na_sales_sub[-inTrain,8]
train_input <- na_sales_sub[inTrain,-8]
test_input <- na_sales_sub[-inTrain,-8]

# 1E
summary(train_target)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0100  0.0800  0.1700  0.3529  0.4100  3.4800
summary(test_target)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0100  0.0800  0.1700  0.3574  0.4100  3.4900
summary(train_input)
##     Platform             Genre          Rating      Critic_Score  
##  PS2    : 814   Action      :1076   E      :1395   Min.   :17.00  
##  X360   : 577   Sports      : 639   E10+   : 626   1st Qu.:61.00  
##  PS3    : 515   Shooter     : 557   M      : 873   Median :72.00  
##  XB     : 396   Role-Playing: 445   missing:  17   Mean   :69.64  
##  Wii    : 338   Racing      : 386   T      :1531   3rd Qu.:80.00  
##  DS     : 312   Platform    : 277                  Max.   :98.00  
##  (Other):1490   (Other)     :1062                                 
##   Critic_Count      User_Score      User_Count     
##  Min.   :  3.00   Min.   : 5.00   Min.   :    4.0  
##  1st Qu.: 14.00   1st Qu.:64.00   1st Qu.:   10.0  
##  Median : 25.00   Median :74.00   Median :   24.0  
##  Mean   : 28.84   Mean   :70.71   Mean   :  145.4  
##  3rd Qu.: 40.00   3rd Qu.:81.00   3rd Qu.:   75.0  
##  Max.   :105.00   Max.   :95.00   Max.   :10179.0  
## 
summary(test_input)
##     Platform            Genre         Rating     Critic_Score  
##  PS2    :321   Action      :473   E      :573   Min.   :13.00  
##  X360   :271   Sports      :282   E10+   :251   1st Qu.:62.00  
##  PS3    :234   Shooter     :241   M      :422   Median :72.00  
##  XB     :182   Role-Playing:198   missing:  6   Mean   :70.24  
##  DS     :145   Racing      :151   T      :651   3rd Qu.:80.00  
##  Wii    :137   Misc        :115                 Max.   :97.00  
##  (Other):613   (Other)     :443                                
##   Critic_Count      User_Score      User_Count     
##  Min.   :  4.00   Min.   : 5.00   Min.   :    4.0  
##  1st Qu.: 15.00   1st Qu.:64.00   1st Qu.:   11.0  
##  Median : 25.00   Median :74.00   Median :   25.0  
##  Mean   : 29.25   Mean   :70.89   Mean   :  169.9  
##  3rd Qu.: 40.00   3rd Qu.:81.00   3rd Qu.:   82.0  
##  Max.   :113.00   Max.   :94.00   Max.   :10665.0  
## 

2 lm, rpart and M5P model training and testing

# 2A
na_base_train_model <- lm(train_target~., data = train_input)
na_rpart_model <- rpart(train_target ~ ., data = train_input)
na_m5p_model <- M5P(train_target ~ ., data = train_input)

# 2Bi
na_base_train_model
## 
## Call:
## lm(formula = train_target ~ ., data = train_input)
## 
## Coefficients:
##       (Intercept)         PlatformDS        PlatformGBA         PlatformGC  
##        -0.4051705          0.1763433          0.1989891          0.0932004  
##        PlatformPC         PlatformPS        PlatformPS2        PlatformPS3  
##        -0.2950359          0.3912538          0.1904385          0.1103856  
##       PlatformPS4        PlatformPSP        PlatformPSV        PlatformWii  
##        -0.1135394          0.0443379         -0.1320338          0.2591237  
##      PlatformWiiU       PlatformX360         PlatformXB       PlatformXOne  
##         0.0572650          0.1462881          0.0393762          0.1350980  
##    GenreAdventure      GenreFighting          GenreMisc      GenrePlatform  
##        -0.1261339         -0.0368658          0.0901604         -0.0106969  
##       GenrePuzzle        GenreRacing  GenreRole-Playing       GenreShooter  
##        -0.1343466         -0.0248400         -0.1140337         -0.0121486  
##   GenreSimulation        GenreSports      GenreStrategy         RatingE10+  
##         0.0521291         -0.0113363         -0.1995889         -0.0420970  
##           RatingM      Ratingmissing            RatingT       Critic_Score  
##        -0.1105039         -0.2083238         -0.0819382          0.0099229  
##      Critic_Count         User_Score         User_Count  
##         0.0063543         -0.0025419          0.0002284
summary(na_base_train_model)
## 
## Call:
## lm(formula = train_target ~ ., data = train_input)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.74551 -0.22916 -0.08086  0.09644  3.13400 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -4.052e-01  5.659e-02  -7.160 9.38e-13 ***
## PlatformDS         1.763e-01  4.833e-02   3.649 0.000267 ***
## PlatformGBA        1.990e-01  5.314e-02   3.745 0.000183 ***
## PlatformGC         9.320e-02  4.984e-02   1.870 0.061560 .  
## PlatformPC        -2.950e-01  5.375e-02  -5.489 4.27e-08 ***
## PlatformPS         3.913e-01  6.016e-02   6.504 8.70e-11 ***
## PlatformPS2        1.904e-01  4.514e-02   4.219 2.50e-05 ***
## PlatformPS3        1.104e-01  4.626e-02   2.386 0.017055 *  
## PlatformPS4       -1.135e-01  5.524e-02  -2.056 0.039888 *  
## PlatformPSP        4.434e-02  4.959e-02   0.894 0.371357    
## PlatformPSV       -1.320e-01  6.803e-02  -1.941 0.052357 .  
## PlatformWii        2.591e-01  4.786e-02   5.414 6.48e-08 ***
## PlatformWiiU       5.727e-02  6.863e-02   0.834 0.404131    
## PlatformX360       1.463e-01  4.607e-02   3.176 0.001506 ** 
## PlatformXB         3.938e-02  4.791e-02   0.822 0.411200    
## PlatformXOne       1.351e-01  6.016e-02   2.246 0.024765 *  
## GenreAdventure    -1.261e-01  3.666e-02  -3.440 0.000586 ***
## GenreFighting     -3.687e-02  3.014e-02  -1.223 0.221282    
## GenreMisc          9.016e-02  2.985e-02   3.020 0.002541 ** 
## GenrePlatform     -1.070e-02  3.007e-02  -0.356 0.722071    
## GenrePuzzle       -1.343e-01  4.939e-02  -2.720 0.006551 ** 
## GenreRacing       -2.484e-02  2.681e-02  -0.927 0.354214    
## GenreRole-Playing -1.140e-01  2.433e-02  -4.686 2.87e-06 ***
## GenreShooter      -1.215e-02  2.263e-02  -0.537 0.591470    
## GenreSimulation    5.213e-02  3.565e-02   1.462 0.143770    
## GenreSports       -1.134e-02  2.531e-02  -0.448 0.654257    
## GenreStrategy     -1.996e-01  3.956e-02  -5.046 4.70e-07 ***
## RatingE10+        -4.210e-02  2.238e-02  -1.881 0.060057 .  
## RatingM           -1.105e-01  2.510e-02  -4.403 1.09e-05 ***
## Ratingmissing     -2.083e-01  1.045e-01  -1.994 0.046258 *  
## RatingT           -8.194e-02  1.988e-02  -4.122 3.83e-05 ***
## Critic_Score       9.923e-03  6.440e-04  15.409  < 2e-16 ***
## Critic_Count       6.354e-03  4.445e-04  14.295  < 2e-16 ***
## User_Score        -2.542e-03  5.919e-04  -4.295 1.79e-05 ***
## User_Count         2.284e-04  1.512e-05  15.107  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4198 on 4407 degrees of freedom
## Multiple R-squared:  0.2814, Adjusted R-squared:  0.2759 
## F-statistic: 50.77 on 34 and 4407 DF,  p-value: < 2.2e-16
na_rpart_model
## n= 4442 
## 
## node), split, n, deviance, yval
##       * denotes terminal node
## 
##   1) root 4442 1080.91600 0.3528636  
##     2) User_Count< 65.5 3226  334.03570 0.2431556  
##       4) User_Count< 26.5 2350  148.15720 0.1975404 *
##       5) User_Count>=26.5 876  167.87130 0.3655251  
##        10) Genre=Action,Adventure,Fighting,Puzzle,Role-Playing,Shooter,Strategy 567   61.47839 0.2725573 *
##        11) Genre=Misc,Platform,Racing,Simulation,Sports 309   92.49994 0.5361165 *
##     3) User_Count>=65.5 1216  605.04480 0.6439145  
##       6) Platform=3DS,PC,PS4,PSP,PSV 367   74.75077 0.3048501  
##        12) Critic_Count< 67.5 310   41.86170 0.2268387 *
##        13) Critic_Count>=67.5 57   20.74206 0.7291228 *
##       7) Platform=DS,GBA,GC,PS,PS2,PS3,Wii,WiiU,X360,XB,XOne 849  469.86350 0.7904829  
##        14) User_Count< 683.5 754  324.26010 0.6881698  
##          28) Critic_Score< 81.5 432  113.23560 0.5349769  
##            56) Platform=PS3,WiiU,X360,XB,XOne 324   61.61972 0.4326543 *
##            57) Platform=DS,GBA,GC,PS,PS2,Wii 108   38.04689 0.8419444 *
##          29) Critic_Score>=81.5 322  187.28470 0.8936957  
##            58) Genre=Action,Adventure,Fighting,Platform,Puzzle,Role-Playing,Shooter,Simulation,Strategy 233   93.72291 0.7518455 *
##            59) Genre=Misc,Racing,Sports 89   76.59962 1.2650560  
##             118) Platform=PS3,X360,XB,XOne 69   44.19688 1.0257970 *
##             119) Platform=GBA,PS,PS2,Wii 20   14.82569 2.0905000 *
##        15) User_Count>=683.5 95   75.06619 1.6025260 *
summary(na_rpart_model)
## Call:
## rpart(formula = train_target ~ ., data = train_input)
##   n= 4442 
## 
##           CP nsplit rel error    xerror       xstd
## 1 0.13121807      0 1.0000000 1.0005690 0.05226357
## 2 0.06058182      1 0.8687819 0.8970743 0.04497328
## 3 0.02196264      3 0.7476183 0.8220437 0.04274786
## 4 0.01665923      4 0.7256557 0.7925870 0.04086064
## 5 0.01597683      5 0.7089964 0.7797883 0.04026017
## 6 0.01285292      7 0.6770428 0.7577736 0.03940764
## 7 0.01255323      8 0.6641899 0.7447725 0.03871714
## 8 0.01123770      9 0.6516366 0.7469888 0.03886968
## 9 0.01000000     10 0.6403989 0.7441961 0.03864088
## 
## Variable importance
##   User_Count     Platform Critic_Count Critic_Score        Genre       Rating 
##           41           21           12           11            7            4 
##   User_Score 
##            4 
## 
## Node number 1: 4442 observations,    complexity param=0.1312181
##   mean=0.3528636, MSE=0.24334 
##   left son=2 (3226 obs) right son=3 (1216 obs)
##   Primary splits:
##       User_Count   < 65.5  to the left,  improve=0.13121810, (0 missing)
##       Critic_Score < 83.5  to the left,  improve=0.12906440, (0 missing)
##       Critic_Count < 65.5  to the left,  improve=0.09647330, (0 missing)
##       User_Score   < 74.5  to the left,  improve=0.02300946, (0 missing)
##       Platform     splits as  LLRLLRRRLLLRRRLR, improve=0.02270351, (0 missing)
##   Surrogate splits:
##       Critic_Count < 45.5  to the left,  agree=0.820, adj=0.344, (0 split)
##       Platform     splits as  LLLLRLLLRLRLRLLR, agree=0.779, adj=0.192, (0 split)
##       Critic_Score < 84.5  to the left,  agree=0.776, adj=0.181, (0 split)
##       Rating       splits as  LLRLL, agree=0.750, adj=0.086, (0 split)
## 
## Node number 2: 3226 observations,    complexity param=0.01665923
##   mean=0.2431556, MSE=0.1035448 
##   left son=4 (2350 obs) right son=5 (876 obs)
##   Primary splits:
##       User_Count   < 26.5  to the left,  improve=0.05390812, (0 missing)
##       Critic_Score < 78.5  to the left,  improve=0.04583342, (0 missing)
##       Genre        splits as  LLLRRLLLLRRL, improve=0.03062039, (0 missing)
##       Rating       splits as  RRLLL, improve=0.02613324, (0 missing)
##       Platform     splits as  LRRLLRRLLLLRLLLL, improve=0.01483623, (0 missing)
##   Surrogate splits:
##       Critic_Count < 38.5  to the left,  agree=0.762, adj=0.122, (0 split)
##       Platform     splits as  LLLLRLLLRLRLRLLR, agree=0.739, adj=0.040, (0 split)
##       Critic_Score < 20.5  to the right, agree=0.729, adj=0.003, (0 split)
## 
## Node number 3: 1216 observations,    complexity param=0.06058182
##   mean=0.6439145, MSE=0.4975697 
##   left son=6 (367 obs) right son=7 (849 obs)
##   Primary splits:
##       Platform     splits as  LRRRLRRRLLLRRRRR, improve=0.09987773, (0 missing)
##       Critic_Score < 83.5  to the left,  improve=0.09271161, (0 missing)
##       Critic_Count < 65.5  to the left,  improve=0.06159384, (0 missing)
##       User_Count   < 684.5 to the left,  improve=0.04815472, (0 missing)
##       User_Score   < 83.5  to the left,  improve=0.03752358, (0 missing)
##   Surrogate splits:
##       User_Count < 1018  to the right,    agree=0.735, adj=0.123, (0 split)
##       Genre      splits as  RRRRRRRRRLRL, agree=0.716, adj=0.060, (0 split)
##       User_Score < 34.5  to the left,     agree=0.701, adj=0.011, (0 split)
## 
## Node number 4: 2350 observations
##   mean=0.1975404, MSE=0.06304561 
## 
## Node number 5: 876 observations,    complexity param=0.01285292
##   mean=0.3655251, MSE=0.1916339 
##   left son=10 (567 obs) right son=11 (309 obs)
##   Primary splits:
##       Genre        splits as  LLLRRLRLLRRL, improve=0.08275941, (0 missing)
##       Platform     splits as  LLRRLRRLLLLRLLRL, improve=0.08114537, (0 missing)
##       Critic_Score < 78.5  to the left,  improve=0.05859242, (0 missing)
##       Rating       splits as  RLLLL, improve=0.05602756, (0 missing)
##       User_Score   < 75.5  to the left,  improve=0.02984551, (0 missing)
##   Surrogate splits:
##       Rating       splits as  RLLLL, agree=0.788, adj=0.398, (0 split)
##       Critic_Score < 87.5  to the left,  agree=0.663, adj=0.045, (0 split)
##       Platform     splits as  LLRRLLLLLLLLLLLL, agree=0.661, adj=0.039, (0 split)
##       User_Count   < 64.5  to the left,  agree=0.651, adj=0.010, (0 split)
##       Critic_Count < 5.5   to the right, agree=0.650, adj=0.006, (0 split)
## 
## Node number 6: 367 observations,    complexity param=0.0112377
##   mean=0.3048501, MSE=0.2036806 
##   left son=12 (310 obs) right son=13 (57 obs)
##   Primary splits:
##       Critic_Count < 67.5  to the left,  improve=0.16250010, (0 missing)
##       User_Count   < 778   to the left,  improve=0.08017802, (0 missing)
##       Platform     splits as  R---L---RRL-----, improve=0.06583444, (0 missing)
##       Critic_Score < 84.5  to the left,  improve=0.05200730, (0 missing)
##       User_Score   < 89.5  to the left,  improve=0.02422292, (0 missing)
##   Surrogate splits:
##       User_Count < 6078  to the left,  agree=0.856, adj=0.07, (0 split)
## 
## Node number 7: 849 observations,    complexity param=0.06058182
##   mean=0.7904829, MSE=0.5534317 
##   left son=14 (754 obs) right son=15 (95 obs)
##   Primary splits:
##       User_Count   < 683.5 to the left,  improve=0.15012280, (0 missing)
##       Critic_Score < 82.5  to the left,  improve=0.10728000, (0 missing)
##       Critic_Count < 65.5  to the left,  improve=0.04701995, (0 missing)
##       User_Score   < 73.5  to the left,  improve=0.04116350, (0 missing)
##       Platform     splits as  -LRR-RRL---RLLLL, improve=0.02959394, (0 missing)
##   Surrogate splits:
##       Critic_Score < 94.5  to the left,  agree=0.900, adj=0.105, (0 split)
##       Critic_Count < 79.5  to the left,  agree=0.899, adj=0.095, (0 split)
## 
## Node number 10: 567 observations
##   mean=0.2725573, MSE=0.1084275 
## 
## Node number 11: 309 observations
##   mean=0.5361165, MSE=0.2993526 
## 
## Node number 12: 310 observations
##   mean=0.2268387, MSE=0.1350377 
## 
## Node number 13: 57 observations
##   mean=0.7291228, MSE=0.3638957 
## 
## Node number 14: 754 observations,    complexity param=0.02196264
##   mean=0.6881698, MSE=0.4300531 
##   left son=28 (432 obs) right son=29 (322 obs)
##   Primary splits:
##       Critic_Score < 81.5  to the left,  improve=0.07321213, (0 missing)
##       Genre        splits as  LLLRLLRLLLRL, improve=0.06489677, (0 missing)
##       Platform     splits as  -LRL-RRL---RLLLL, improve=0.06009541, (0 missing)
##       Rating       splits as  RLLLL, improve=0.05884339, (0 missing)
##       User_Score   < 73.5  to the left,  improve=0.03560626, (0 missing)
##   Surrogate splits:
##       User_Score < 78.5  to the left,  agree=0.692, adj=0.280, (0 split)
##       Platform   splits as  -LRL-RRL---LLLRL, agree=0.634, adj=0.143, (0 split)
##       Genre      splits as  LLLLLRRLLLRR, agree=0.633, adj=0.140, (0 split)
##       Rating     splits as  RLLLL, agree=0.601, adj=0.065, (0 split)
##       User_Count < 295.5 to the left,  agree=0.589, adj=0.037, (0 split)
## 
## Node number 15: 95 observations
##   mean=1.602526, MSE=0.7901705 
## 
## Node number 28: 432 observations,    complexity param=0.01255323
##   mean=0.5349769, MSE=0.2621194 
##   left son=56 (324 obs) right son=57 (108 obs)
##   Primary splits:
##       Platform     splits as  -RRR-RRL---RLLLL, improve=0.11982970, (0 missing)
##       Rating       splits as  RRLLL, improve=0.07555892, (0 missing)
##       Genre        splits as  LLLRRRLLLLR-, improve=0.05110568, (0 missing)
##       Critic_Score < 70.5  to the left,  improve=0.04433744, (0 missing)
##       User_Score   < 64.5  to the left,  improve=0.03329483, (0 missing)
##   Surrogate splits:
##       User_Score < 84.5  to the left,     agree=0.799, adj=0.194, (0 split)
##       Rating     splits as  RLLLL,        agree=0.757, adj=0.028, (0 split)
##       Genre      splits as  LLLLRLLLLLL-, agree=0.752, adj=0.009, (0 split)
##       User_Count < 66.5  to the right,    agree=0.752, adj=0.009, (0 split)
## 
## Node number 29: 322 observations,    complexity param=0.01597683
##   mean=0.8936957, MSE=0.5816295 
##   left son=58 (233 obs) right son=59 (89 obs)
##   Primary splits:
##       Genre        splits as  LLLRLLRLLLRL, improve=0.09056891, (0 missing)
##       Rating       splits as  RLLLR, improve=0.08632519, (0 missing)
##       Platform     splits as  -LLL-RRL---RLRLL, improve=0.04023726, (0 missing)
##       Critic_Count < 8.5   to the left,  improve=0.02783516, (0 missing)
##       Critic_Score < 91.5  to the left,  improve=0.02766431, (0 missing)
##   Surrogate splits:
##       Rating     splits as  RLLLL,     agree=0.798, adj=0.270, (0 split)
##       User_Score < 65.5  to the right, agree=0.752, adj=0.101, (0 split)
## 
## Node number 56: 324 observations
##   mean=0.4326543, MSE=0.1901843 
## 
## Node number 57: 108 observations
##   mean=0.8419444, MSE=0.352286 
## 
## Node number 58: 233 observations
##   mean=0.7518455, MSE=0.4022442 
## 
## Node number 59: 89 observations,    complexity param=0.01597683
##   mean=1.265056, MSE=0.8606699 
##   left son=118 (69 obs) right son=119 (20 obs)
##   Primary splits:
##       Platform     splits as  --R--RRL---R-LLL, improve=0.22946650, (0 missing)
##       User_Score   < 83.5  to the left,  improve=0.10000440, (0 missing)
##       Critic_Score < 91.5  to the left,  improve=0.07534420, (0 missing)
##       Rating       splits as  LLL-R, improve=0.05574002, (0 missing)
##       Critic_Count < 42.5  to the right, improve=0.05029077, (0 missing)
##   Surrogate splits:
##       User_Score   < 82.5  to the left,  agree=0.921, adj=0.65, (0 split)
##       Critic_Score < 92.5  to the left,  agree=0.831, adj=0.25, (0 split)
##       Critic_Count < 19.5  to the right, agree=0.798, adj=0.10, (0 split)
## 
## Node number 118: 69 observations
##   mean=1.025797, MSE=0.6405345 
## 
## Node number 119: 20 observations
##   mean=2.0905, MSE=0.7412847
na_m5p_model
## M5 pruned model tree:
## (using smoothed linear models)
## 
## User_Count <= 38.5 : 
## |   User_Count <= 15.5 : LM1 (1651/38.221%)
## |   User_Count >  15.5 : 
## |   |   Critic_Score <= 66.5 : 
## |   |   |   Platform=XOne,PS3,WiiU,X360,PS <= 0.5 : LM2 (259/54.916%)
## |   |   |   Platform=XOne,PS3,WiiU,X360,PS >  0.5 : LM3 (144/32.408%)
## |   |   Critic_Score >  66.5 : 
## |   |   |   Rating=E10+,M,E <= 0.5 : LM4 (250/49.629%)
## |   |   |   Rating=E10+,M,E >  0.5 : 
## |   |   |   |   Platform=DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS <= 0.5 : LM5 (126/56.803%)
## |   |   |   |   Platform=DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS >  0.5 : 
## |   |   |   |   |   Platform=PS2,Wii,XOne,PS3,WiiU,X360,PS <= 0.5 : 
## |   |   |   |   |   |   Critic_Count <= 34.5 : LM6 (54/58.767%)
## |   |   |   |   |   |   Critic_Count >  34.5 : 
## |   |   |   |   |   |   |   User_Count <= 30.5 : 
## |   |   |   |   |   |   |   |   Critic_Score <= 79.5 : 
## |   |   |   |   |   |   |   |   |   User_Score <= 71.5 : LM7 (2/36.489%)
## |   |   |   |   |   |   |   |   |   User_Score >  71.5 : LM8 (7/53.87%)
## |   |   |   |   |   |   |   |   Critic_Score >  79.5 : LM9 (4/1.379%)
## |   |   |   |   |   |   |   User_Count >  30.5 : LM10 (10/52.85%)
## |   |   |   |   |   Platform=PS2,Wii,XOne,PS3,WiiU,X360,PS >  0.5 : LM11 (246/72.41%)
## User_Count >  38.5 : LM12 (1689/109.554%)
## 
## LM num: 1
## train_target = 
##  0.0015 * Platform=PC,PSP,3DS,XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0009 * Platform=PSP,3DS,XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  + 0.0012 * Platform=3DS,XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  + 0.0357 * Platform=XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.1392 * Platform=PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  + 0.2002 * Platform=DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  + 0.0017 * Platform=GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.057 * Platform=PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  + 0.0962 * Platform=Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.2294 * Platform=XOne,PS3,WiiU,X360,PS 
##  + 0.1247 * Platform=PS3,WiiU,X360,PS 
##  - 0.0009 * Platform=WiiU,X360,PS 
##  + 0.001 * Platform=X360,PS 
##  + 0.001 * Platform=PS 
##  + 0.0008 * Genre=Strategy,Puzzle,Fighting,Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  + 0.001 * Genre=Puzzle,Fighting,Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  + 0.0567 * Genre=Fighting,Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  - 0.0574 * Genre=Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  + 0.0515 * Genre=Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  + 0.072 * Genre=Simulation,Action,Shooter,Platform,Sports,Misc 
##  - 0.0518 * Genre=Action,Shooter,Platform,Sports,Misc 
##  - 0.0248 * Genre=Shooter,Platform,Sports,Misc 
##  + 0.0003 * Genre=Sports,Misc 
##  + 0.0808 * Genre=Misc 
##  + 0.0005 * Rating=T,E10+,M,E 
##  + 0.0335 * Rating=E10+,M,E 
##  - 0.0332 * Rating=M,E 
##  + 0.0874 * Rating=E 
##  + 0.0032 * Critic_Score 
##  + 0.0001 * Critic_Count 
##  - 0.0012 * User_Score 
##  + 0.0087 * User_Count 
##  - 0.1514
## 
## LM num: 2
## train_target = 
##  0.0114 * Platform=PC,PSP,3DS,XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  + 0.155 * Platform=PSP,3DS,XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0017 * Platform=3DS,XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  + 0.007 * Platform=XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0127 * Platform=PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  + 0.0193 * Platform=DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  + 0.0899 * Platform=GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.002 * Platform=PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  + 0.0042 * Platform=Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0191 * Platform=XOne,PS3,WiiU,X360,PS 
##  + 0.0007 * Platform=PS3,WiiU,X360,PS 
##  - 0.0037 * Platform=WiiU,X360,PS 
##  + 0.006 * Platform=X360,PS 
##  + 0.0066 * Platform=PS 
##  + 0.0006 * Genre=Strategy,Puzzle,Fighting,Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  + 0.013 * Genre=Puzzle,Fighting,Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  - 0.0065 * Genre=Fighting,Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  - 0.0059 * Genre=Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  + 0.1539 * Genre=Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  + 0.1141 * Genre=Simulation,Action,Shooter,Platform,Sports,Misc 
##  - 0.215 * Genre=Action,Shooter,Platform,Sports,Misc 
##  - 0.0003 * Genre=Shooter,Platform,Sports,Misc 
##  + 0.0903 * Genre=Platform,Sports,Misc 
##  + 0.0093 * Genre=Sports,Misc 
##  + 0.0007 * Genre=Misc 
##  + 0.0005 * Rating=T,E10+,M,E 
##  + 0.0032 * Rating=E10+,M,E 
##  - 0.1124 * Rating=M,E 
##  + 0.1779 * Rating=E 
##  + 0.0045 * Critic_Score 
##  + 0.0031 * Critic_Count 
##  - 0 * User_Score 
##  + 0.0072 * User_Count 
##  - 0.4901
## 
## LM num: 3
## train_target = 
##  0.0114 * Platform=PC,PSP,3DS,XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  + 0.008 * Platform=PSP,3DS,XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0017 * Platform=3DS,XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  + 0.007 * Platform=XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0127 * Platform=PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  + 0.0222 * Platform=DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  + 0.0017 * Platform=GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.002 * Platform=PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  + 0.0065 * Platform=Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0261 * Platform=XOne,PS3,WiiU,X360,PS 
##  + 0.0007 * Platform=PS3,WiiU,X360,PS 
##  + 0.0371 * Platform=WiiU,X360,PS 
##  + 0.006 * Platform=X360,PS 
##  + 0.0066 * Platform=PS 
##  + 0.0006 * Genre=Strategy,Puzzle,Fighting,Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  + 0.013 * Genre=Puzzle,Fighting,Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  - 0.0065 * Genre=Fighting,Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  - 0.0059 * Genre=Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  + 0.0161 * Genre=Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  + 0.0145 * Genre=Simulation,Action,Shooter,Platform,Sports,Misc 
##  - 0.0194 * Genre=Action,Shooter,Platform,Sports,Misc 
##  - 0.0003 * Genre=Shooter,Platform,Sports,Misc 
##  + 0.0842 * Genre=Platform,Sports,Misc 
##  + 0.0129 * Genre=Sports,Misc 
##  + 0.0007 * Genre=Misc 
##  + 0.0005 * Rating=T,E10+,M,E 
##  + 0.0032 * Rating=E10+,M,E 
##  - 0.0122 * Rating=M,E 
##  + 0.0175 * Rating=E 
##  + 0.001 * Critic_Score 
##  + 0.0003 * Critic_Count 
##  - 0 * User_Score 
##  + 0.0007 * User_Count 
##  + 0.0369
## 
## LM num: 4
## train_target = 
##  0.8031 * Platform=PC,PSP,3DS,XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.671 * Platform=PSP,3DS,XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0006 * Platform=3DS,XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  + 0.1264 * Platform=XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.024 * Platform=PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  + 0.0328 * Platform=DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0049 * Platform=GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0014 * Platform=PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  + 0.0009 * Platform=Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.1117 * Platform=XOne,PS3,WiiU,X360,PS 
##  + 0.0007 * Platform=PS3,WiiU,X360,PS 
##  - 0.0108 * Platform=WiiU,X360,PS 
##  + 0.0181 * Platform=X360,PS 
##  + 0.0044 * Platform=PS 
##  + 0.0006 * Genre=Strategy,Puzzle,Fighting,Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  + 0.0082 * Genre=Puzzle,Fighting,Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  + 0.2418 * Genre=Fighting,Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  - 0.1991 * Genre=Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  + 0.1176 * Genre=Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  + 0.0097 * Genre=Simulation,Action,Shooter,Platform,Sports,Misc 
##  - 0.0106 * Genre=Action,Shooter,Platform,Sports,Misc 
##  + 0.1633 * Genre=Shooter,Platform,Sports,Misc 
##  + 0.0088 * Genre=Sports,Misc 
##  + 0.0007 * Genre=Misc 
##  + 0.0005 * Rating=T,E10+,M,E 
##  + 0.0073 * Rating=E10+,M,E 
##  - 0.0119 * Rating=M,E 
##  + 0.0079 * Rating=E 
##  + 0.0009 * Critic_Score 
##  + 0.0001 * Critic_Count 
##  - 0 * User_Score 
##  + 0.006 * User_Count 
##  - 0.3263
## 
## LM num: 5
## train_target = 
##  0.0199 * Platform=PC,PSP,3DS,XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.012 * Platform=PSP,3DS,XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0006 * Platform=3DS,XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  + 0.03 * Platform=XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0196 * Platform=GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0173 * Platform=PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  + 0.0432 * Platform=DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0021 * Platform=GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0014 * Platform=PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  + 0.0009 * Platform=Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0301 * Platform=XOne,PS3,WiiU,X360,PS 
##  + 0.0007 * Platform=PS3,WiiU,X360,PS 
##  - 0.0216 * Platform=WiiU,X360,PS 
##  + 0.0417 * Platform=X360,PS 
##  + 0.0044 * Platform=PS 
##  + 0.0006 * Genre=Strategy,Puzzle,Fighting,Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  + 0.0082 * Genre=Puzzle,Fighting,Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  + 0.203 * Genre=Fighting,Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  - 0.0108 * Genre=Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  + 0.0125 * Genre=Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  - 0.0607 * Genre=Simulation,Action,Shooter,Platform,Sports,Misc 
##  - 0.0282 * Genre=Action,Shooter,Platform,Sports,Misc 
##  + 0.088 * Genre=Shooter,Platform,Sports,Misc 
##  + 0.0154 * Genre=Sports,Misc 
##  + 0.0007 * Genre=Misc 
##  + 0.0005 * Rating=T,E10+,M,E 
##  + 0.0051 * Rating=E10+,M,E 
##  - 0.0262 * Rating=M,E 
##  + 0.0201 * Rating=E 
##  + 0.0091 * Critic_Score 
##  + 0.0001 * Critic_Count 
##  - 0 * User_Score 
##  + 0.0003 * User_Count 
##  - 0.578
## 
## LM num: 6
## train_target = 
##  0.0199 * Platform=PC,PSP,3DS,XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.012 * Platform=PSP,3DS,XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0006 * Platform=3DS,XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  + 0.0176 * Platform=XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0082 * Platform=GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0173 * Platform=PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  + 0.0315 * Platform=DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0021 * Platform=GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0014 * Platform=PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  + 0.0009 * Platform=Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0514 * Platform=XOne,PS3,WiiU,X360,PS 
##  + 0.0007 * Platform=PS3,WiiU,X360,PS 
##  - 0.0133 * Platform=WiiU,X360,PS 
##  + 0.0485 * Platform=X360,PS 
##  + 0.0044 * Platform=PS 
##  + 0.0006 * Genre=Strategy,Puzzle,Fighting,Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  + 0.0082 * Genre=Puzzle,Fighting,Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  - 0.0523 * Genre=Fighting,Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  - 0.0108 * Genre=Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  + 0.0125 * Genre=Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  + 0.172 * Genre=Simulation,Action,Shooter,Platform,Sports,Misc 
##  + 0.2709 * Genre=Action,Shooter,Platform,Sports,Misc 
##  - 0.2925 * Genre=Shooter,Platform,Sports,Misc 
##  + 0.0339 * Genre=Sports,Misc 
##  + 0.0007 * Genre=Misc 
##  + 0.0005 * Rating=T,E10+,M,E 
##  + 0.0051 * Rating=E10+,M,E 
##  + 0.0441 * Rating=M,E 
##  + 0.0403 * Rating=E 
##  + 0.0011 * Critic_Score 
##  + 0.0017 * Critic_Count 
##  - 0.0058 * User_Score 
##  + 0.0003 * User_Count 
##  + 0.5797
## 
## LM num: 7
## train_target = 
##  0.0199 * Platform=PC,PSP,3DS,XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.012 * Platform=PSP,3DS,XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0006 * Platform=3DS,XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  + 0.0176 * Platform=XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0082 * Platform=GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0173 * Platform=PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  + 0.0315 * Platform=DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0021 * Platform=GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0014 * Platform=PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  + 0.0009 * Platform=Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0514 * Platform=XOne,PS3,WiiU,X360,PS 
##  + 0.0007 * Platform=PS3,WiiU,X360,PS 
##  - 0.0133 * Platform=WiiU,X360,PS 
##  + 0.0485 * Platform=X360,PS 
##  + 0.0044 * Platform=PS 
##  + 0.0006 * Genre=Strategy,Puzzle,Fighting,Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  + 0.0082 * Genre=Puzzle,Fighting,Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  - 0.4746 * Genre=Fighting,Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  - 0.0108 * Genre=Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  + 0.0125 * Genre=Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  + 0.2486 * Genre=Simulation,Action,Shooter,Platform,Sports,Misc 
##  - 0.0461 * Genre=Action,Shooter,Platform,Sports,Misc 
##  + 0.0443 * Genre=Shooter,Platform,Sports,Misc 
##  + 0.0339 * Genre=Sports,Misc 
##  + 0.0007 * Genre=Misc 
##  + 0.0005 * Rating=T,E10+,M,E 
##  + 0.0051 * Rating=E10+,M,E 
##  - 0.0444 * Rating=M,E 
##  + 0.0403 * Rating=E 
##  - 0.0348 * Critic_Score 
##  + 0.0226 * Critic_Count 
##  - 0.0412 * User_Score 
##  - 0.0187 * User_Count 
##  + 6.2868
## 
## LM num: 8
## train_target = 
##  0.0199 * Platform=PC,PSP,3DS,XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.012 * Platform=PSP,3DS,XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0006 * Platform=3DS,XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  + 0.0176 * Platform=XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0082 * Platform=GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0173 * Platform=PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  + 0.0315 * Platform=DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0021 * Platform=GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0014 * Platform=PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  + 0.0009 * Platform=Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0514 * Platform=XOne,PS3,WiiU,X360,PS 
##  + 0.0007 * Platform=PS3,WiiU,X360,PS 
##  - 0.0133 * Platform=WiiU,X360,PS 
##  + 0.0485 * Platform=X360,PS 
##  + 0.0044 * Platform=PS 
##  + 0.0006 * Genre=Strategy,Puzzle,Fighting,Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  + 0.0082 * Genre=Puzzle,Fighting,Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  - 0.4746 * Genre=Fighting,Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  - 0.0108 * Genre=Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  + 0.0125 * Genre=Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  + 0.2486 * Genre=Simulation,Action,Shooter,Platform,Sports,Misc 
##  - 0.0461 * Genre=Action,Shooter,Platform,Sports,Misc 
##  + 0.0443 * Genre=Shooter,Platform,Sports,Misc 
##  + 0.0339 * Genre=Sports,Misc 
##  + 0.0007 * Genre=Misc 
##  + 0.0005 * Rating=T,E10+,M,E 
##  + 0.0051 * Rating=E10+,M,E 
##  - 0.0444 * Rating=M,E 
##  + 0.0403 * Rating=E 
##  - 0.0348 * Critic_Score 
##  + 0.0223 * Critic_Count 
##  - 0.037 * User_Score 
##  - 0.0187 * User_Count 
##  + 5.9374
## 
## LM num: 9
## train_target = 
##  0.0199 * Platform=PC,PSP,3DS,XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.012 * Platform=PSP,3DS,XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0006 * Platform=3DS,XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  + 0.0176 * Platform=XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0082 * Platform=GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0173 * Platform=PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  + 0.0315 * Platform=DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0021 * Platform=GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0014 * Platform=PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  + 0.0009 * Platform=Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0514 * Platform=XOne,PS3,WiiU,X360,PS 
##  + 0.0007 * Platform=PS3,WiiU,X360,PS 
##  - 0.0133 * Platform=WiiU,X360,PS 
##  + 0.0485 * Platform=X360,PS 
##  + 0.0044 * Platform=PS 
##  + 0.0006 * Genre=Strategy,Puzzle,Fighting,Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  + 0.0082 * Genre=Puzzle,Fighting,Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  - 0.4746 * Genre=Fighting,Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  - 0.0108 * Genre=Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  + 0.0125 * Genre=Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  + 0.2486 * Genre=Simulation,Action,Shooter,Platform,Sports,Misc 
##  - 0.0461 * Genre=Action,Shooter,Platform,Sports,Misc 
##  + 0.0443 * Genre=Shooter,Platform,Sports,Misc 
##  + 0.0339 * Genre=Sports,Misc 
##  + 0.0007 * Genre=Misc 
##  + 0.0005 * Rating=T,E10+,M,E 
##  + 0.0051 * Rating=E10+,M,E 
##  - 0.0444 * Rating=M,E 
##  + 0.0403 * Rating=E 
##  - 0.0348 * Critic_Score 
##  + 0.0178 * Critic_Count 
##  - 0.0285 * User_Score 
##  - 0.0187 * User_Count 
##  + 5.4449
## 
## LM num: 10
## train_target = 
##  0.0199 * Platform=PC,PSP,3DS,XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.012 * Platform=PSP,3DS,XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0006 * Platform=3DS,XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  + 0.0176 * Platform=XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0082 * Platform=GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0173 * Platform=PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  + 0.0315 * Platform=DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0021 * Platform=GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0014 * Platform=PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  + 0.0009 * Platform=Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0514 * Platform=XOne,PS3,WiiU,X360,PS 
##  + 0.0007 * Platform=PS3,WiiU,X360,PS 
##  - 0.0133 * Platform=WiiU,X360,PS 
##  + 0.0485 * Platform=X360,PS 
##  + 0.0044 * Platform=PS 
##  + 0.0006 * Genre=Strategy,Puzzle,Fighting,Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  + 0.0082 * Genre=Puzzle,Fighting,Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  - 0.5201 * Genre=Fighting,Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  - 0.0108 * Genre=Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  + 0.0125 * Genre=Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  + 0.2486 * Genre=Simulation,Action,Shooter,Platform,Sports,Misc 
##  - 0.0461 * Genre=Action,Shooter,Platform,Sports,Misc 
##  + 0.0644 * Genre=Shooter,Platform,Sports,Misc 
##  + 0.0339 * Genre=Sports,Misc 
##  + 0.0007 * Genre=Misc 
##  + 0.0005 * Rating=T,E10+,M,E 
##  + 0.0051 * Rating=E10+,M,E 
##  - 0.0444 * Rating=M,E 
##  + 0.0403 * Rating=E 
##  - 0.0391 * Critic_Score 
##  + 0.0031 * Critic_Count 
##  - 0 * User_Score 
##  - 0.021 * User_Count 
##  + 4.1761
## 
## LM num: 11
## train_target = 
##  0.0199 * Platform=PC,PSP,3DS,XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.012 * Platform=PSP,3DS,XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0006 * Platform=3DS,XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  + 0.0176 * Platform=XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0082 * Platform=GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0173 * Platform=PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  + 0.0315 * Platform=DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0021 * Platform=GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0014 * Platform=PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  + 0.0009 * Platform=Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.1913 * Platform=XOne,PS3,WiiU,X360,PS 
##  + 0.0007 * Platform=PS3,WiiU,X360,PS 
##  - 0.1591 * Platform=WiiU,X360,PS 
##  + 0.3172 * Platform=X360,PS 
##  + 0.0044 * Platform=PS 
##  + 0.0006 * Genre=Strategy,Puzzle,Fighting,Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  + 0.0082 * Genre=Puzzle,Fighting,Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  + 0.0006 * Genre=Fighting,Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  - 0.0108 * Genre=Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  + 0.0125 * Genre=Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  + 0.2731 * Genre=Simulation,Action,Shooter,Platform,Sports,Misc 
##  - 0.1925 * Genre=Action,Shooter,Platform,Sports,Misc 
##  - 0.1503 * Genre=Shooter,Platform,Sports,Misc 
##  + 0.1881 * Genre=Sports,Misc 
##  + 0.0007 * Genre=Misc 
##  + 0.0005 * Rating=T,E10+,M,E 
##  + 0.0051 * Rating=E10+,M,E 
##  - 0.1525 * Rating=M,E 
##  + 0.0218 * Rating=E 
##  + 0.0114 * Critic_Score 
##  + 0.0001 * Critic_Count 
##  - 0 * User_Score 
##  + 0.0003 * User_Count 
##  - 0.4138
## 
## LM num: 12
## train_target = 
##  -0.1671 * Platform=PC,PSP,3DS,XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  + 0.4506 * Platform=PSP,3DS,XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.1388 * Platform=3DS,XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  + 0.3057 * Platform=XB,GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.2885 * Platform=GC,PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  + 0.0005 * Platform=PS4,DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  + 0.2762 * Platform=DS,GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  + 0.1982 * Platform=GBA,PS2,Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.0712 * Platform=Wii,XOne,PS3,WiiU,X360,PS 
##  - 0.2731 * Platform=XOne,PS3,WiiU,X360,PS 
##  + 0.0516 * Platform=PS3,WiiU,X360,PS 
##  - 0.1282 * Platform=WiiU,X360,PS 
##  + 0.184 * Platform=X360,PS 
##  + 0.5268 * Platform=PS 
##  + 0.0021 * Genre=Strategy,Puzzle,Fighting,Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  + 0.1692 * Genre=Fighting,Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  - 0.099 * Genre=Role-Playing,Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  + 0.1467 * Genre=Racing,Simulation,Action,Shooter,Platform,Sports,Misc 
##  + 0.0008 * Genre=Simulation,Action,Shooter,Platform,Sports,Misc 
##  + 0.0007 * Genre=Action,Shooter,Platform,Sports,Misc 
##  - 0.0005 * Genre=Shooter,Platform,Sports,Misc 
##  + 0.1004 * Genre=Sports,Misc 
##  + 0.2345 * Genre=Misc 
##  + 0.0009 * Rating=T,E10+,M,E 
##  - 0.0492 * Rating=M,E 
##  + 0.2386 * Rating=E 
##  + 0.0155 * Critic_Score 
##  + 0.005 * Critic_Count 
##  - 0.0035 * User_Score 
##  + 0.0002 * User_Count 
##  - 1.1173
## 
## Number of Rules : 12
summary(na_m5p_model)
## 
## === Summary ===
## 
## Correlation coefficient                  0.6327
## Mean absolute error                      0.2515
## Root mean squared error                  0.3851
## Relative absolute error                 78.9219 %
## Root relative squared error             78.0643 %
## Total Number of Instances             4442
# 2Bii
predictions_base_train <- predict(na_base_train_model, train_input)
predictions_rpart_train <- predict(na_rpart_model, train_input)
predictions_m5p_train <- predict(na_m5p_model, train_input)
predictions_base_test <- predict(na_base_train_model, test_input)
predictions_rpart_test <- predict(na_rpart_model, test_input)
predictions_m5p_test <- predict(na_m5p_model, test_input)

metrics_list <- c("R2","MAE","MAPE","RAE","RMSE","RMSPE","RRSE")
mmetric(train_target,predictions_base_train,metrics_list)
##          R2         MAE        MAPE         RAE        RMSE       RMSPE 
##   0.2814308   0.2673936 253.8517353  83.9224673   0.4181586  60.7410080 
##        RRSE 
##  84.7684593
mmetric(train_target,predictions_rpart_train,metrics_list)
##          R2         MAE        MAPE         RAE        RMSE       RMSPE 
##   0.3596011   0.2473792 244.2440044  77.6408500   0.3947590  55.9358231 
##        RRSE 
##  80.0249290
mmetric(train_target,predictions_m5p_train,metrics_list)
##          R2         MAE        MAPE         RAE        RMSE       RMSPE 
##   0.4003114   0.2514609 243.4360809  78.9219312   0.3850871  62.7899087 
##        RRSE 
##  78.0642575
mmetric(test_target,predictions_base_test,metrics_list)
##          R2         MAE        MAPE         RAE        RMSE       RMSPE 
##   0.2668016   0.2739431 254.5660538  84.1490441   0.4260057  65.7716444 
##        RRSE 
##  85.7431854
mmetric(test_target,predictions_rpart_test,metrics_list)
##          R2         MAE        MAPE         RAE        RMSE       RMSPE 
##   0.3054067   0.2594715 247.2557484  79.7037078   0.4146019  57.9768030 
##        RRSE 
##  83.4479297
mmetric(test_target,predictions_m5p_test,metrics_list)
##          R2         MAE        MAPE         RAE        RMSE       RMSPE 
##   0.3633292   0.2625373 258.6857479  80.6454284   0.4012374  70.3244626 
##        RRSE 
##  80.7580054

3 Cross-validation of lm, rpart, and M5P NA_Sales prediction models

# 3A
cv_function <- function(df, target, nFolds, seedVal, prediction_method, metrics_list)
{
  # create folds
  set.seed(seedVal)
  folds = createFolds(df[,target],nFolds) 
  # perform cross validation
  cv_results <- lapply(folds, function(x)
  { 
    test_target <- df[x,target]
    test_input  <- df[x,-target]

    train_target <- df[-x,target]
    train_input <- df[-x,-target]

    prediction_model <- prediction_method(train_target~.,train_input) 
    pred<- predict(prediction_model,test_input)
    return(mmetric(test_target,pred,metrics_list))
  })
  # generate means and sds and show cv results, means and sds using kable
  cv_results_m <- as.matrix(as.data.frame(cv_results))
  cv_mean<- as.matrix(rowMeans(cv_results_m))
  cv_sd <- as.matrix(rowSds(cv_results_m))
  colnames(cv_mean) <- "Mean"
  colnames(cv_sd) <- "Sd"
  cv_all <- cbind(cv_results_m, cv_mean, cv_sd)
  kable(t(cv_all),digits=2)
}

# 3B
df <- na_sales_sub
target <- 8
nFolds <- 5
seedVal <- 500
metrics_list <- c("R2","MAE","MAPE","RAE","RMSE","RMSPE","RRSE")
assign("prediction_method", lm)
cv_function(df, target, 5, seedVal, prediction_method, metrics_list)
R2 MAE MAPE RAE RMSE RMSPE RRSE
Fold1 0.25 0.27 274.32 86.10 0.41 68.01 86.81
Fold2 0.25 0.28 245.90 83.66 0.45 67.91 86.77
Fold3 0.28 0.28 244.29 82.07 0.44 52.90 85.10
Fold4 0.29 0.26 261.45 85.76 0.39 57.74 84.19
Fold5 0.30 0.27 252.86 84.33 0.41 61.41 83.94
Mean 0.27 0.27 255.76 84.39 0.42 61.59 85.36
Sd 0.02 0.01 12.39 1.64 0.02 6.55 1.37
assign("prediction_method", rpart)
cv_function(df, target, 5, seedVal, rpart, metrics_list)
R2 MAE MAPE RAE RMSE RMSPE RRSE
Fold1 0.23 0.27 266.62 85.85 0.42 58.07 88.31
Fold2 0.20 0.28 240.68 83.40 0.47 51.64 90.43
Fold3 0.28 0.27 258.42 78.96 0.44 59.25 84.88
Fold4 0.29 0.25 274.07 82.98 0.40 66.91 85.09
Fold5 0.29 0.26 272.24 82.69 0.41 71.11 84.28
Mean 0.26 0.27 262.40 82.78 0.43 61.40 86.60
Sd 0.04 0.01 13.59 2.47 0.03 7.67 2.65
assign("prediction_method", M5P)
cv_function(df, target, 5, seedVal, M5P, metrics_list)
R2 MAE MAPE RAE RMSE RMSPE RRSE
Fold1 0.29 0.25 182.16 79.30 0.43 43.81 89.63
Fold2 0.35 0.23 158.69 70.52 0.42 37.58 81.15
Fold3 0.41 0.24 179.14 69.51 0.40 41.69 76.94
Fold4 0.23 0.26 225.03 86.94 0.44 63.37 95.07
Fold5 0.38 0.25 231.17 77.91 0.38 62.87 78.56
Mean 0.33 0.25 195.24 76.84 0.41 49.86 84.27
Sd 0.07 0.01 31.41 7.13 0.02 12.31 7.77

4 Improve the models by adding a quadratic term of Critic_Score

# 4A
na_sales_sub$Critic_Score_Squared <- na_sales_sub$Critic_Score^2

set.seed(500)
inTrain <- createDataPartition(y=na_sales_sub$NA_Sales , p=0.70, list=FALSE)
train_input <- na_sales_sub[inTrain,c(1,2,3,4,5,6,7,9)]
test_input <- na_sales_sub[-inTrain,c(1,2,3,4,5,6,7,9)]

# 4B
na_improved_train_model <- lm(train_target~., data = train_input)
summary(na_improved_train_model)
## 
## Call:
## lm(formula = train_target ~ ., data = train_input)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.77537 -0.22648 -0.08395  0.07681  3.10747 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           7.547e-01  1.211e-01   6.232 5.05e-10 ***
## PlatformDS            4.650e-02  5.172e-02   0.899 0.368655    
## PlatformGBA           8.927e-02  5.840e-02   1.529 0.126411    
## PlatformGC           -5.705e-02  5.399e-02  -1.057 0.290759    
## PlatformPC           -2.221e-01  5.754e-02  -3.860 0.000115 ***
## PlatformPS            2.574e-01  6.410e-02   4.016 6.02e-05 ***
## PlatformPS2           1.396e-01  4.865e-02   2.869 0.004141 ** 
## PlatformPS3           5.423e-02  4.962e-02   1.093 0.274464    
## PlatformPS4          -1.536e-02  5.881e-02  -0.261 0.793985    
## PlatformPSP          -2.303e-02  5.327e-02  -0.432 0.665552    
## PlatformPSV          -1.120e-01  7.427e-02  -1.508 0.131574    
## PlatformWii           1.071e-01  5.152e-02   2.079 0.037705 *  
## PlatformWiiU         -2.037e-02  7.502e-02  -0.272 0.785960    
## PlatformX360         -1.039e-02  4.930e-02  -0.211 0.833009    
## PlatformXB           -1.152e-01  5.152e-02  -2.236 0.025430 *  
## PlatformXOne          1.247e-01  6.300e-02   1.980 0.047806 *  
## GenreAdventure       -9.093e-02  3.880e-02  -2.344 0.019144 *  
## GenreFighting        -2.897e-03  3.170e-02  -0.091 0.927180    
## GenreMisc             5.097e-02  3.192e-02   1.597 0.110426    
## GenrePlatform        -1.198e-03  3.241e-02  -0.037 0.970525    
## GenrePuzzle          -1.307e-01  5.338e-02  -2.448 0.014417 *  
## GenreRacing          -4.780e-02  2.836e-02  -1.686 0.091933 .  
## GenreRole-Playing    -8.989e-02  2.590e-02  -3.471 0.000524 ***
## GenreShooter          3.356e-02  2.354e-02   1.426 0.154056    
## GenreSimulation       3.180e-02  3.727e-02   0.853 0.393579    
## GenreSports          -4.887e-02  2.698e-02  -1.811 0.070202 .  
## GenreStrategy        -1.684e-01  4.062e-02  -4.146 3.44e-05 ***
## RatingE10+           -1.181e-01  2.382e-02  -4.960 7.33e-07 ***
## RatingM              -1.071e-01  2.643e-02  -4.052 5.17e-05 ***
## Ratingmissing        -3.027e-01  1.097e-01  -2.759 0.005813 ** 
## RatingT              -1.215e-01  2.104e-02  -5.773 8.33e-09 ***
## Critic_Score         -2.216e-02  3.586e-03  -6.181 6.96e-10 ***
## Critic_Count          5.601e-03  4.625e-04  12.111  < 2e-16 ***
## User_Score           -2.122e-03  6.394e-04  -3.319 0.000911 ***
## User_Count            8.857e-05  1.487e-05   5.958 2.76e-09 ***
## Critic_Score_Squared  2.363e-04  2.721e-05   8.683  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4421 on 4406 degrees of freedom
## Multiple R-squared:  0.2034, Adjusted R-squared:  0.1971 
## F-statistic: 32.14 on 35 and 4406 DF,  p-value: < 2.2e-16
# 4C
df <- na_sales_sub
target <- 8
nFolds <- 5
seedVal <- 500
metrics_list <- c("R2","MAE","MAPE","RAE","RMSE","RMSPE","RRSE")
assign("prediction_method", lm)
cv_function(df, target, 5, seedVal, prediction_method, metrics_list)
R2 MAE MAPE RAE RMSE RMSPE RRSE
Fold1 0.27 0.27 270.59 84.93 0.41 70.72 85.47
Fold2 0.28 0.27 232.46 81.17 0.44 64.74 84.93
Fold3 0.29 0.28 244.83 81.34 0.44 56.64 84.19
Fold4 0.32 0.25 254.86 83.36 0.38 58.02 82.41
Fold5 0.33 0.26 249.79 82.50 0.40 63.29 82.07
Mean 0.30 0.26 250.51 82.66 0.41 62.68 83.81
Sd 0.03 0.01 13.97 1.55 0.02 5.64 1.51
assign("prediction_method", rpart)
cv_function(df, target, 5, seedVal, rpart, metrics_list)
R2 MAE MAPE RAE RMSE RMSPE RRSE
Fold1 0.23 0.27 266.62 85.85 0.42 58.07 88.31
Fold2 0.20 0.28 240.68 83.40 0.47 51.64 90.43
Fold3 0.28 0.27 258.42 78.96 0.44 59.25 84.88
Fold4 0.29 0.25 274.07 82.98 0.40 66.91 85.09
Fold5 0.29 0.26 272.24 82.69 0.41 71.11 84.28
Mean 0.26 0.27 262.40 82.78 0.43 61.40 86.60
Sd 0.04 0.01 13.59 2.47 0.03 7.67 2.65
assign("prediction_method", M5P)
cv_function(df, target, 5, seedVal, M5P, metrics_list)
R2 MAE MAPE RAE RMSE RMSPE RRSE
Fold1 0.39 0.22 172.61 71.60 0.37 39.95 78.43
Fold2 0.36 0.23 163.70 70.78 0.42 39.13 80.86
Fold3 0.41 0.24 181.04 69.65 0.40 42.29 77.01
Fold4 0.41 0.23 207.65 75.41 0.36 52.37 77.90
Fold5 0.38 0.25 230.07 77.85 0.38 62.68 78.55
Mean 0.39 0.23 191.01 73.06 0.39 47.28 78.55
Sd 0.02 0.01 27.32 3.44 0.02 10.10 1.43

5 Improve the models with the log term of User_Count

# 5A
na_log <- na_sales_sub[,-7]
na_log$log_User_Count <- log(na_sales_sub$User_Count)
summary(na_log)
##     Platform             Genre          Rating      Critic_Score  
##  PS2    :1135   Action      :1549   E      :1968   Min.   :13.00  
##  X360   : 848   Sports      : 921   E10+   : 877   1st Qu.:61.00  
##  PS3    : 749   Shooter     : 798   M      :1295   Median :72.00  
##  XB     : 578   Role-Playing: 643   missing:  23   Mean   :69.82  
##  Wii    : 475   Racing      : 537   T      :2182   3rd Qu.:80.00  
##  DS     : 457   Platform    : 386                  Max.   :98.00  
##  (Other):2103   (Other)     :1511                                 
##   Critic_Count      User_Score       NA_Sales      Critic_Score_Squared
##  Min.   :  3.00   Min.   : 5.00   Min.   :0.0100   Min.   : 169        
##  1st Qu.: 14.00   1st Qu.:64.00   1st Qu.:0.0800   1st Qu.:3721        
##  Median : 25.00   Median :74.00   Median :0.1700   Median :5184        
##  Mean   : 28.97   Mean   :70.77   Mean   :0.3542   Mean   :5069        
##  3rd Qu.: 40.00   3rd Qu.:81.00   3rd Qu.:0.4100   3rd Qu.:6400        
##  Max.   :113.00   Max.   :95.00   Max.   :3.4900   Max.   :9604        
##                                                                        
##  log_User_Count 
##  Min.   :1.386  
##  1st Qu.:2.303  
##  Median :3.178  
##  Mean   :3.483  
##  3rd Qu.:4.344  
##  Max.   :9.275  
## 
set.seed(500)
inTrain <- createDataPartition(y=na_log$NA_Sales , p=0.70, list=FALSE)
train_input <- na_log[inTrain,c(1,2,3,4,5,6,8,9)]
test_input <- na_log[-inTrain,c(1,2,3,4,5,6,8,9)]
summary(train_input)
##     Platform             Genre          Rating      Critic_Score  
##  PS2    : 810   Action      :1096   E      :1374   Min.   :13.00  
##  X360   : 600   Sports      : 637   E10+   : 604   1st Qu.:62.00  
##  PS3    : 535   Shooter     : 571   M      : 924   Median :72.00  
##  XB     : 395   Role-Playing: 435   missing:  17   Mean   :69.85  
##  Wii    : 325   Racing      : 389   T      :1523   3rd Qu.:80.00  
##  DS     : 318   Platform    : 262                  Max.   :98.00  
##  (Other):1459   (Other)     :1052                                 
##   Critic_Count     User_Score    Critic_Score_Squared log_User_Count 
##  Min.   :  3.0   Min.   : 5.00   Min.   : 169         Min.   :1.386  
##  1st Qu.: 15.0   1st Qu.:64.00   1st Qu.:3844         1st Qu.:2.303  
##  Median : 25.0   Median :74.00   Median :5184         Median :3.178  
##  Mean   : 29.1   Mean   :70.97   Mean   :5070         Mean   :3.489  
##  3rd Qu.: 40.0   3rd Qu.:81.00   3rd Qu.:6400         3rd Qu.:4.369  
##  Max.   :113.0   Max.   :95.00   Max.   :9604         Max.   :9.275  
## 
# 5B
na_log_train_model <- lm(train_target~ . -Critic_Score_Squared, data = train_input)
summary(na_log_train_model)
## 
## Call:
## lm(formula = train_target ~ . - Critic_Score_Squared, data = train_input)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.33024 -0.21898 -0.06875  0.09556  3.04342 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -0.4120633  0.0590466  -6.979 3.43e-12 ***
## PlatformDS         0.1789952  0.0498665   3.589 0.000335 ***
## PlatformGBA        0.2729910  0.0563415   4.845 1.31e-06 ***
## PlatformGC         0.1183253  0.0521703   2.268 0.023374 *  
## PlatformPC        -0.4062672  0.0551223  -7.370 2.02e-13 ***
## PlatformPS         0.3539309  0.0612608   5.777 8.11e-09 ***
## PlatformPS2        0.2524820  0.0468386   5.390 7.39e-08 ***
## PlatformPS3        0.0711289  0.0475705   1.495 0.134926    
## PlatformPS4       -0.1040490  0.0563209  -1.847 0.064752 .  
## PlatformPSP        0.1059801  0.0513999   2.062 0.039278 *  
## PlatformPSV       -0.1649633  0.0712515  -2.315 0.020646 *  
## PlatformWii        0.1898869  0.0494596   3.839 0.000125 ***
## PlatformWiiU      -0.1157247  0.0720877  -1.605 0.108492    
## PlatformX360       0.0378887  0.0473008   0.801 0.423165    
## PlatformXB         0.1352410  0.0504048   2.683 0.007322 ** 
## PlatformXOne       0.0111461  0.0606669   0.184 0.854236    
## GenreAdventure    -0.0624571  0.0372134  -1.678 0.093350 .  
## GenreFighting      0.0125593  0.0303706   0.414 0.679234    
## GenreMisc          0.1210069  0.0306981   3.942 8.21e-05 ***
## GenrePlatform     -0.0318627  0.0311047  -1.024 0.305716    
## GenrePuzzle       -0.0326254  0.0513394  -0.635 0.525146    
## GenreRacing       -0.0368912  0.0271780  -1.357 0.174726    
## GenreRole-Playing -0.1406968  0.0249275  -5.644 1.76e-08 ***
## GenreShooter       0.0292796  0.0225704   1.297 0.194609    
## GenreSimulation    0.0847499  0.0358258   2.366 0.018044 *  
## GenreSports        0.0087382  0.0258852   0.338 0.735700    
## GenreStrategy     -0.1590616  0.0389114  -4.088 4.43e-05 ***
## RatingE10+        -0.1026264  0.0228517  -4.491 7.27e-06 ***
## RatingM           -0.2020155  0.0257632  -7.841 5.56e-15 ***
## Ratingmissing     -0.3439931  0.1051707  -3.271 0.001081 ** 
## RatingT           -0.1368702  0.0201749  -6.784 1.32e-11 ***
## Critic_Score       0.0039771  0.0007000   5.682 1.42e-08 ***
## Critic_Count       0.0010704  0.0004951   2.162 0.030667 *  
## User_Score        -0.0022050  0.0006072  -3.631 0.000285 ***
## log_User_Count     0.1790158  0.0077611  23.066  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4239 on 4407 degrees of freedom
## Multiple R-squared:  0.2675, Adjusted R-squared:  0.2619 
## F-statistic: 47.34 on 34 and 4407 DF,  p-value: < 2.2e-16
# 5C
na_log_rpart_model <- rpart(train_target ~ . -Critic_Score_Squared, data = train_input)
summary(na_log_rpart_model)
## Call:
## rpart(formula = train_target ~ . - Critic_Score_Squared, data = train_input)
##   n= 4442 
## 
##           CP nsplit rel error    xerror       xstd
## 1 0.11493923      0 1.0000000 1.0007575 0.05229677
## 2 0.03843600      1 0.8850608 0.9023290 0.04644490
## 3 0.02909071      2 0.8466248 0.8882029 0.04538178
## 4 0.01904853      3 0.8175341 0.8636132 0.04415619
## 5 0.01564045      4 0.7984855 0.8480221 0.04329364
## 6 0.01345197      5 0.7828451 0.8354701 0.04265537
## 7 0.01232494      7 0.7559411 0.8323624 0.04256881
## 8 0.01000000      8 0.7436162 0.8174449 0.04147502
## 
## Variable importance
## log_User_Count       Platform   Critic_Count   Critic_Score         Rating 
##             43             22             19              6              6 
##          Genre     User_Score 
##              3              2 
## 
## Node number 1: 4442 observations,    complexity param=0.1149392
##   mean=0.3528636, MSE=0.24334 
##   left son=2 (3199 obs) right son=3 (1243 obs)
##   Primary splits:
##       log_User_Count < 4.182021 to the left,  improve=0.11493920, (0 missing)
##       Critic_Score   < 78.5     to the left,  improve=0.08191422, (0 missing)
##       Critic_Count   < 57.5     to the left,  improve=0.06701817, (0 missing)
##       Platform       splits as  LLRLLRRRRLLRRRLR, improve=0.02449703, (0 missing)
##       Rating         splits as  RLRLL, improve=0.01273192, (0 missing)
##   Surrogate splits:
##       Critic_Count < 48.5     to the left,  agree=0.813, adj=0.331, (0 split)
##       Platform     splits as  LLLLRLLLRLRLRLLR, agree=0.777, adj=0.204, (0 split)
##       Critic_Score < 82.5     to the left,  agree=0.770, adj=0.178, (0 split)
##       Rating       splits as  LLRLL, agree=0.748, adj=0.100, (0 split)
##       User_Score   < 92.5     to the left,  agree=0.720, adj=0.001, (0 split)
## 
## Node number 2: 3199 observations,    complexity param=0.01904853
##   mean=0.2486152, MSE=0.1193864 
##   left son=4 (2278 obs) right son=5 (921 obs)
##   Primary splits:
##       log_User_Count < 3.238486 to the left,  improve=0.05391187, (0 missing)
##       Critic_Score   < 78.5     to the left,  improve=0.03807676, (0 missing)
##       Rating         splits as  RLLLL, improve=0.02438130, (0 missing)
##       Genre          splits as  LLLRRLLLLRRL, improve=0.02137412, (0 missing)
##       Platform       splits as  LRRLRRRRLLLRLLLR, improve=0.01899238, (0 missing)
##   Surrogate splits:
##       Critic_Count < 43.5     to the left,  agree=0.746, adj=0.117, (0 split)
##       Platform     splits as  LLLLRLLLRLRLLLLR, agree=0.721, adj=0.029, (0 split)
##       Critic_Score < 20.5     to the right, agree=0.713, adj=0.003, (0 split)
## 
## Node number 3: 1243 observations,    complexity param=0.038436
##   mean=0.6211585, MSE=0.4623969 
##   left son=6 (1069 obs) right son=7 (174 obs)
##   Primary splits:
##       Platform       splits as  LLRRLRRLLLLLLLLL, improve=0.07228433, (0 missing)
##       Critic_Score   < 83.5     to the left,  improve=0.03553169, (0 missing)
##       Rating         splits as  RLLLL, improve=0.03237752, (0 missing)
##       Critic_Count   < 65.5     to the left,  improve=0.03033925, (0 missing)
##       log_User_Count < 6.124681 to the left,  improve=0.02802138, (0 missing)
##   Surrogate splits:
##       User_Score   < 87.5     to the left,  agree=0.881, adj=0.149, (0 split)
##       Critic_Score < 95.5     to the left,  agree=0.862, adj=0.017, (0 split)
## 
## Node number 4: 2278 observations
##   mean=0.1976032, MSE=0.07761339 
## 
## Node number 5: 921 observations,    complexity param=0.01232494
##   mean=0.3747883, MSE=0.2003518 
##   left son=10 (667 obs) right son=11 (254 obs)
##   Primary splits:
##       Rating       splits as  RLLLL, improve=0.07219780, (0 missing)
##       Platform     splits as  LRRRLRRLLLLRLLRL, improve=0.06854985, (0 missing)
##       Genre        splits as  LLLRRLLLLLRL, improve=0.06658482, (0 missing)
##       Critic_Score < 80.5     to the left,  improve=0.05690846, (0 missing)
##       User_Score   < 75.5     to the left,  improve=0.02319136, (0 missing)
##   Surrogate splits:
##       Genre        splits as  LLLLRRRLLLRL, agree=0.835, adj=0.402, (0 split)
##       Platform     splits as  LRRRLLLLLLLLLLLL, agree=0.742, adj=0.063, (0 split)
##       Critic_Score < 88.5     to the left,  agree=0.734, adj=0.035, (0 split)
## 
## Node number 6: 1069 observations,    complexity param=0.02909071
##   mean=0.5473994, MSE=0.3795167 
##   left son=12 (727 obs) right son=13 (342 obs)
##   Primary splits:
##       Critic_Count   < 58.5     to the left,  improve=0.07750644, (0 missing)
##       log_User_Count < 5.652483 to the left,  improve=0.05517683, (0 missing)
##       Platform       splits as  RR--L--RRRLRRRRR, improve=0.05250353, (0 missing)
##       Critic_Score   < 81.5     to the left,  improve=0.03842473, (0 missing)
##       Rating         splits as  RLLLL, improve=0.03095676, (0 missing)
##   Surrogate splits:
##       Platform       splits as  LL--L--LLLLLLRLL, agree=0.693, adj=0.041, (0 split)
##       log_User_Count < 7.536078 to the left,  agree=0.687, adj=0.020, (0 split)
##       Genre          splits as  LLLLLRLLLLLL, agree=0.684, adj=0.012, (0 split)
## 
## Node number 7: 174 observations
##   mean=1.07431, MSE=0.7328153 
## 
## Node number 10: 667 observations
##   mean=0.3005697, MSE=0.1313118 
## 
## Node number 11: 254 observations
##   mean=0.569685, MSE=0.3291999 
## 
## Node number 12: 727 observations,    complexity param=0.01345197
##   mean=0.4297662, MSE=0.261144 
##   left son=24 (236 obs) right son=25 (491 obs)
##   Primary splits:
##       Platform     splits as  LR--L--RRLLRLRRR, improve=0.07360632, (0 missing)
##       Rating       splits as  RLLLL, improve=0.06271782, (0 missing)
##       Genre        splits as  LLLRLLRLRLRL, improve=0.04055762, (0 missing)
##       Critic_Score < 70.5     to the left,  improve=0.02985915, (0 missing)
##       Critic_Count < 31.5     to the left,  improve=0.02182532, (0 missing)
##   Surrogate splits:
##       log_User_Count < 6.804059 to the right, agree=0.735, adj=0.182, (0 split)
##       Genre          splits as  RRRRRRRRRLRL, agree=0.696, adj=0.064, (0 split)
##       Critic_Count   < 8.5      to the left,  agree=0.678, adj=0.008, (0 split)
##       User_Score     < 86.5     to the right, agree=0.677, adj=0.004, (0 split)
## 
## Node number 13: 342 observations,    complexity param=0.01564045
##   mean=0.7974561, MSE=0.539202 
##   left son=26 (159 obs) right son=27 (183 obs)
##   Primary splits:
##       log_User_Count < 5.654204 to the left,  improve=0.09167766, (0 missing)
##       Critic_Score   < 81.5     to the left,  improve=0.03829611, (0 missing)
##       Critic_Count   < 90.5     to the left,  improve=0.03556484, (0 missing)
##       Platform       splits as  LR--L--RRRLLRLLR, improve=0.02414832, (0 missing)
##       Genre          splits as  RRRRRRRLRRRL, improve=0.01910185, (0 missing)
##   Surrogate splits:
##       Platform     splits as  LL--R--RRLRLRLLR, agree=0.734, adj=0.428, (0 split)
##       Genre        splits as  RRLLRLLRRRLR, agree=0.646, adj=0.239, (0 split)
##       Critic_Count < 76.5     to the left,  agree=0.637, adj=0.220, (0 split)
##       Critic_Score < 82.5     to the left,  agree=0.611, adj=0.164, (0 split)
##       Rating       splits as  RLR-L, agree=0.570, adj=0.075, (0 split)
## 
## Node number 24: 236 observations
##   mean=0.2297881, MSE=0.07306648 
## 
## Node number 25: 491 observations,    complexity param=0.01345197
##   mean=0.5258859, MSE=0.3230829 
##   left son=50 (412 obs) right son=51 (79 obs)
##   Primary splits:
##       log_User_Count < 5.997694 to the left,  improve=0.09522966, (0 missing)
##       Critic_Score   < 70.5     to the left,  improve=0.07562631, (0 missing)
##       Rating         splits as  RLLLL, improve=0.06666388, (0 missing)
##       Genre          splits as  LLLRRLRLRRRL, improve=0.04676017, (0 missing)
##       Platform       splits as  -L-----LL--R-LLL, improve=0.01828659, (0 missing)
##   Surrogate splits:
##       Critic_Score < 93.5     to the left,  agree=0.849, adj=0.063, (0 split)
## 
## Node number 26: 159 observations
##   mean=0.5589308, MSE=0.2422888 
## 
## Node number 27: 183 observations
##   mean=1.004699, MSE=0.7047932 
## 
## Node number 50: 412 observations
##   mean=0.4490777, MSE=0.203572 
## 
## Node number 51: 79 observations
##   mean=0.9264557, MSE=0.7551317
na_log_m5p_model <- M5P(train_target ~ ., data = train_input[, -8])
summary(na_log_m5p_model)
## 
## === Summary ===
## 
## Correlation coefficient                  0.4693
## Mean absolute error                      0.2675
## Root mean squared error                  0.4356
## Relative absolute error                 83.9478 %
## Root relative squared error             88.3038 %
## Total Number of Instances             4442
df <- na_log
target <- 8
nFolds <- 5
seedVal <- 500
metrics_list <- c("R2","MAE","MAPE","RAE","RMSE","RMSPE","RRSE")
assign("prediction_method", lm)
cv_function(df, target, 5, seedVal, prediction_method, metrics_list)
R2 MAE MAPE RAE RMSE RMSPE RRSE
Fold1 0.99 170.23 5.14 11.44 222.10 1.73 12.24
Fold2 0.98 172.29 7.21 11.60 242.69 4.14 13.35
Fold3 0.98 177.11 6.58 11.80 242.89 2.31 13.21
Fold4 0.98 170.64 6.50 11.40 235.81 2.87 12.93
Fold5 0.98 166.78 5.87 11.31 227.58 2.34 12.62
Mean 0.98 171.41 6.26 11.51 234.21 2.68 12.87
Sd 0.00 3.76 0.78 0.19 9.23 0.91 0.45
assign("prediction_method", rpart)
cv_function(df, target, 5, seedVal, rpart, metrics_list)
R2 MAE MAPE RAE RMSE RMSPE RRSE
Fold1 0.97 238.36 6.25 16.01 289.60 1.31 15.96
Fold2 0.97 242.64 7.49 16.34 290.97 2.70 16.00
Fold3 0.97 244.27 7.06 16.27 293.17 1.62 15.94
Fold4 0.97 243.61 7.08 16.27 291.20 1.96 15.97
Fold5 0.97 242.75 6.64 16.46 294.13 1.59 16.31
Mean 0.97 242.33 6.90 16.27 291.81 1.83 16.04
Sd 0.00 2.31 0.47 0.16 1.82 0.53 0.15
assign("prediction_method", M5P)
cv_function(df, target, 5, seedVal, M5P, metrics_list)
R2 MAE MAPE RAE RMSE RMSPE RRSE
Fold1 1 1.59 0.07 0.11 3.46 0.07 0.19
Fold2 1 1.88 0.24 0.13 8.99 0.45 0.49
Fold3 1 1.50 0.07 0.10 3.44 0.07 0.19
Fold4 1 1.67 0.13 0.11 5.91 0.17 0.32
Fold5 1 1.82 0.10 0.12 4.40 0.09 0.24
Mean 1 1.69 0.12 0.11 5.24 0.17 0.29
Sd 0 0.16 0.07 0.01 2.33 0.16 0.13

6 Relection

The initial linear model provided a reasonable baseline for prediction so that I could compare the performance metrics of the models to determine their performances. M5P model had the lowest error measures, in other words, the highest accuracy out of all models before feature engineering because it takes into account non-linear relationships and interactions in the data. Two adjustments were then made to capture non-linear relationships: squared critic score and logged user count. The addition of the quadratic term and the log term significantly improved model performance across all three models, especially in linear regression. R-squared of linear regression models increased from 0.27 to 0.98 with the addition of the log term. The models with these added terms explain more variance, have lower prediction errors, and are more consistent. Among the models, M5P with logged user count stands out as the best-performing model, providing highly accurate and consistent predictions. Its MAE was 1.69 varying by 0.16 comparing to linear regression’s 171.41 varying by 3.76.