Prompt This dataset contains information on Nintendo Switch video games. It contains three predictor (independent) variables, which include the categorical variables of genre (not sorted in any specific order) and ESRB (Entertainment Software Rating Board) rating, and the continuous variable of critic rating (scale of 0-100; 100 being the highest). The variable will be used to see if there is a predictor relationship of genre, ESRB rating, and critic rating on the global sales (independent variable) of video games on the Nintendo Switch.
Variables: sales - numeric; global units sold. critic - numeric; Metacritic rating (compilation of ratings from multiple newsites). genre - numeric; genre of game. Quantified as Follows: party-1, fitness-2, simulation-3, puzzle-4, strategy-5, sports-6, racing-7, RPG-8, platformer-9, action-10, fighting-11 rating - numeric; ESRB rating of game (age rating). Quantified as Follows: E-1, E10+-2, T-3, M-4
Used to answer the following questions: 1) Do any of the independent variables (critic ratings, genre, and age ratings) correlate (i.e., predict) video game global sales? 2) Does a model with all independent variables work better to predict global sales?
dat <- read.csv("/Users/coleohana/Desktop/DAP_Nintendo.csv")
Descriptive Statistics and Assumptions
# Descriptives
desc <- descriptives(data = dat,
vars = c('genre', 'rating', 'critic', 'sales'),
hist = TRUE,
sd = TRUE,
range = TRUE,
skew = TRUE,
kurt = TRUE)
desc
##
## DESCRIPTIVES
##
## Descriptives
## ──────────────────────────────────────────────────────────────────────────────
## genre rating critic sales
## ──────────────────────────────────────────────────────────────────────────────
## N 58 58 58 58
## Missing 0 0 0 0
## Mean 6.586207 1.724138 80.33621 6.940517
## Median 8.000000 1.000000 80.50000 2.900000
## Standard deviation 3.413258 0.8541561 8.355788 9.273877
## Range 10 3 41.00000 42.35000
## Minimum 1 1 56.00000 1.000000
## Maximum 11 4 97.00000 43.35000
## Skewness -0.4357040 0.7474220 -0.6082925 2.256105
## Std. error skewness 0.3137199 0.3137199 0.3137199 0.3137199
## Kurtosis -1.364371 -0.7125557 0.8135964 5.119000
## Std. error kurtosis 0.6181358 0.6181358 0.6181358 0.6181358
## ──────────────────────────────────────────────────────────────────────────────
# Scatterplots
# Outcome variable first in abline -- then predictor for regression line code
plot(dat$genre, dat$sales, abline(lm(dat$sales ~ dat$genre)))
plot(dat$critic, dat$sales, abline(lm(dat$sales ~ dat$critic)))
plot(dat$rating, dat$sales, abline(lm(dat$sales ~ dat$rating)))
Correlations
# Correlation
# include outcome variable to test total model correlations
cortable <- corrMatrix(data = dat,
vars = c('genre', 'rating', 'critic', 'sales'),
flag = TRUE)
cortable
##
## CORRELATION MATRIX
##
## Correlation Matrix
## ──────────────────────────────────────────────────────────────────────────────
## genre rating critic sales
## ──────────────────────────────────────────────────────────────────────────────
## genre Pearson's r —
## p-value —
##
## rating Pearson's r 0.2490015 —
## p-value 0.0594481 —
##
## critic Pearson's r 0.3995709 0.1078600 —
## p-value 0.0018878 0.4202945 —
##
## sales Pearson's r 0.1009111 -0.2345690 0.4690385 —
## p-value 0.4510215 0.0763356 0.0002041 —
## ──────────────────────────────────────────────────────────────────────────────
## Note. * p < .05, ** p < .01, *** p < .001
Simple Regression
# Simple regression
model1 <- linReg(data = dat,
dep = 'sales',
covs = c('genre'),
blocks = list('genre'),
modelTest = TRUE,
stdEst = TRUE)
model1
##
## LINEAR REGRESSION
##
## Model Fit Measures
## ────────────────────────────────────────────────────────────────────────────
## Model R R² F df1 df2 p
## ────────────────────────────────────────────────────────────────────────────
## 1 0.1009111 0.01018304 0.5761170 1 56 0.4510215
## ────────────────────────────────────────────────────────────────────────────
##
##
## MODEL SPECIFIC RESULTS
##
## MODEL 1
##
## Model Coefficients - sales
## ────────────────────────────────────────────────────────────────────────────────────
## Predictor Estimate SE t p Stand. Estimate
## ────────────────────────────────────────────────────────────────────────────────────
## Intercept 5.1347310 2.6747006 1.9197405 0.0599904
## genre 0.2741770 0.3612232 0.7590237 0.4510215 0.1009111
## ────────────────────────────────────────────────────────────────────────────────────
# Simple regression
model2 <- linReg(data = dat,
dep = 'sales',
covs = c('critic'),
blocks = list('critic'),
modelTest = TRUE,
stdEst = TRUE)
model2
##
## LINEAR REGRESSION
##
## Model Fit Measures
## ──────────────────────────────────────────────────────────────────────────
## Model R R² F df1 df2 p
## ──────────────────────────────────────────────────────────────────────────
## 1 0.4690385 0.2199971 15.79461 1 56 0.0002041
## ──────────────────────────────────────────────────────────────────────────
##
##
## MODEL SPECIFIC RESULTS
##
## MODEL 1
##
## Model Coefficients - sales
## ───────────────────────────────────────────────────────────────────────────────────────
## Predictor Estimate SE t p Stand. Estimate
## ───────────────────────────────────────────────────────────────────────────────────────
## Intercept -34.8804174 10.5787849 -3.297205 0.0016996
## critic 0.5205739 0.1309869 3.974243 0.0002041 0.4690385
## ───────────────────────────────────────────────────────────────────────────────────────
# Simple regression
model3 <- linReg(data = dat,
dep = 'sales',
covs = c('rating'),
blocks = list('rating'),
modelTest = TRUE,
stdEst = TRUE)
model3
##
## LINEAR REGRESSION
##
## Model Fit Measures
## ───────────────────────────────────────────────────────────────────────────
## Model R R² F df1 df2 p
## ───────────────────────────────────────────────────────────────────────────
## 1 0.2345690 0.05502262 3.260677 1 56 0.0763356
## ───────────────────────────────────────────────────────────────────────────
##
##
## MODEL SPECIFIC RESULTS
##
## MODEL 1
##
## Model Coefficients - sales
## ───────────────────────────────────────────────────────────────────────────────────
## Predictor Estimate SE t p Stand. Estimate
## ───────────────────────────────────────────────────────────────────────────────────
## Intercept 11.331551 2.709154 4.182690 0.0001024
## rating -2.546799 1.410395 -1.805735 0.0763356 -0.2345690
## ───────────────────────────────────────────────────────────────────────────────────
Multiple Regression
# Multiple regression
model4 <- linReg(data = dat,
dep = 'sales',
covs = c('genre', 'critic'),
blocks = list(c('genre', 'critic')),
modelTest = TRUE,
stdEst = TRUE,
ciStdEst = TRUE,
r2Adj = TRUE)
model4
##
## LINEAR REGRESSION
##
## Model Fit Measures
## ─────────────────────────────────────────────────────────────────────────────────────────
## Model R R² Adjusted R² F df1 df2 p
## ─────────────────────────────────────────────────────────────────────────────────────────
## 1 0.4784366 0.2289015 0.2008616 8.163409 2 55 0.0007862
## ─────────────────────────────────────────────────────────────────────────────────────────
##
##
## MODEL SPECIFIC RESULTS
##
## MODEL 1
##
## Model Coefficients - sales
## ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## Predictor Estimate SE t p Stand. Estimate Lower Upper
## ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## Intercept -36.7057265 10.8577375 -3.3806055 0.0013367
## genre -0.2796836 0.3509437 -0.7969472 0.4289086 -0.1029378 -0.3617905 0.1559149
## critic 0.5662241 0.1433571 3.9497461 0.0002247 0.5101694 0.2513167 0.7690221
## ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────
model5 <- linReg(data = dat,
dep = 'sales',
covs = c('genre', 'rating'),
blocks = list(c('genre', 'rating')),
modelTest = TRUE,
stdEst = TRUE,
ciStdEst = TRUE,
r2Adj = TRUE)
model5
##
## LINEAR REGRESSION
##
## Model Fit Measures
## ──────────────────────────────────────────────────────────────────────────────────────────
## Model R R² Adjusted R² F df1 df2 p
## ──────────────────────────────────────────────────────────────────────────────────────────
## 1 0.2865013 0.08208298 0.04870418 2.459135 2 55 0.0948624
## ──────────────────────────────────────────────────────────────────────────────────────────
##
##
## MODEL SPECIFIC RESULTS
##
## MODEL 1
##
## Model Coefficients - sales
## ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## Predictor Estimate SE t p Stand. Estimate Lower Upper
## ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## Intercept 9.0838180 3.2210181 2.820170 0.0066621
## genre 0.4614854 0.3624195 1.273346 0.2082508 0.1698501 -0.09746701 0.437167230
## rating -3.0059888 1.4482498 -2.075601 0.0426198 -0.2768619 -0.54417906 -0.009544813
## ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
model6 <- linReg(data = dat,
dep = 'sales',
covs = c('critic', 'rating'),
blocks = list(c('critic', 'rating')),
modelTest = TRUE,
stdEst = TRUE,
ciStdEst = TRUE,
r2Adj = TRUE)
model6
##
## LINEAR REGRESSION
##
## Model Fit Measures
## ─────────────────────────────────────────────────────────────────────────────────────────
## Model R R² Adjusted R² F df1 df2 p
## ─────────────────────────────────────────────────────────────────────────────────────────
## 1 0.5497910 0.3022702 0.2768982 11.91354 2 55 0.0000503
## ─────────────────────────────────────────────────────────────────────────────────────────
##
##
## MODEL SPECIFIC RESULTS
##
## MODEL 1
##
## Model Coefficients - sales
## ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## Predictor Estimate SE t p Stand. Estimate Lower Upper
## ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## Intercept -32.2542151 10.1484072 -3.178254 0.0024322
## critic 0.5551125 0.1257411 4.414726 0.0000477 0.5001578 0.2731134 0.72720221
## rating -3.1325213 1.2300633 -2.546634 0.0137058 -0.2885160 -0.5155604 -0.06147162
## ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
model7 <- linReg(data = dat,
dep = 'sales',
covs = c('genre', 'critic', 'rating'),
blocks = list(c('genre', 'critic','rating')),
modelTest = TRUE,
stdEst = TRUE,
ciStdEst = TRUE,
r2Adj = TRUE)
model7
##
## LINEAR REGRESSION
##
## Model Fit Measures
## ─────────────────────────────────────────────────────────────────────────────────────────
## Model R R² Adjusted R² F df1 df2 p
## ─────────────────────────────────────────────────────────────────────────────────────────
## 1 0.5506277 0.3031909 0.2644793 7.832039 3 54 0.0001976
## ─────────────────────────────────────────────────────────────────────────────────────────
##
##
## MODEL SPECIFIC RESULTS
##
## MODEL 1
##
## Model Coefficients - sales
## ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## Predictor Estimate SE t p Stand. Estimate Lower Upper
## ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## Intercept -32.92117103 10.5353359 -3.1248335 0.0028611
## genre -0.09232161 0.3456218 -0.2671175 0.7903962 -0.03397905 -0.2890125 0.22105441
## critic 0.56933388 0.1375387 4.1394449 0.0001229 0.51297134 0.2645212 0.76142147
## rating -3.05566454 1.2735102 -2.3994034 0.0199021 -0.28143724 -0.5165989 -0.04627557
## ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Visualization
plot1 <- ggplot(dat, aes(x=genre, y=sales))+
geom_point()+
geom_smooth(method=lm, se=TRUE, fullrange=TRUE) + xlab("genre") + ggtitle("Genre predicting sales") + ylab("sales")
plot1 + theme_minimal()
## `geom_smooth()` using formula 'y ~ x'
plot2 <- ggplot(dat, aes(x=critic, y=sales))+
geom_point()+
geom_smooth(method=lm, se=TRUE, fullrange=TRUE) + xlab("critic") + ggtitle("Critic ratings predicting sales") + ylab("sales")
plot2 + theme_minimal()
## `geom_smooth()` using formula 'y ~ x'
plot3 <- ggplot(dat, aes(x=rating, y=sales))+
geom_point()+
geom_smooth(method=lm, se=TRUE, fullrange=TRUE) + xlab("rating") + ggtitle("ESRB (age rating) predicting sales") + ylab("sales")
plot3 + theme_minimal()
## `geom_smooth()` using formula 'y ~ x'
Visualization
# Example of plotting a multiple regression model based on genre, age rating, and critic rating predicting global sales.
# save regression equation in an object
fit1 = lm(sales ~ genre + rating + critic, data = dat)
summary(fit1)
##
## Call:
## lm(formula = sales ~ genre + rating + critic, data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.505 -5.501 -2.488 3.637 27.594
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -32.92117 10.53534 -3.125 0.002861 **
## genre -0.09232 0.34562 -0.267 0.790396
## rating -3.05566 1.27351 -2.399 0.019902 *
## critic 0.56933 0.13754 4.139 0.000123 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.954 on 54 degrees of freedom
## Multiple R-squared: 0.3032, Adjusted R-squared: 0.2645
## F-statistic: 7.832 on 3 and 54 DF, p-value: 0.0001976
# create predicted values from three predictors and save in object
model_p <- ggpredict(fit1, terms = c('genre', 'rating', 'critic'), full.data = TRUE, pretty = FALSE)
model_p
# plot predicted line
plot <- ggplot(model_p, aes(x, predicted)) +
geom_smooth(method = "lm", se = FALSE, fullrange=TRUE) + xlab("genre + rating + critic") + ggtitle("Plot of Model Predicting Global Sales") + ylab("sales") +
geom_point() + theme_minimal()
plot
## `geom_smooth()` using formula 'y ~ x'