This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
It is tough to make good predictions. The numerous factors or variables, independent and dependent, involved in many sporting events contribute to the unpredictability. However, using carefully-selected variables, it is still possible to make marketing promotions more accountable.
The goal of this case study is to analyze if bobblehead promotions increase attendance at Dodgers home games. Using the fitted predictive model we can predict the attendance for the game in the forthcoming season and we can predict the attendance with or without bobblehead promotion.
The motivation of this case study is to design a predictive model, and report any interesting findings to support critical business decision making.
Important Tips: please make sure to reset your working directory before performing the analysis.
Load the required libraries and the data
#rm(list=ls())# clear memory
#setwd("C:/Users/zxu3/Documents/R/regression")
library(lattice) # Graphics Package
library(ggplot2) # Graphical Package
#Create a dataframe with the Dodgers Data - if you import the data from your own drive
#DodgersData <- read.csv("DodgersData.csv")
library(readr)
#adding a hashtag to the beginning of a line of syntax allows you to take notes or add descriptions.
#Now upload the following dataset to your work environment.
DodgersData <- read_csv("DodgersData.csv")
## Rows: 81 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (9): month, day_of_week, opponent, skies, day_night, cap, shirt, firewor...
## dbl (3): day, attend, temp
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Alternatively, you can read the data from my Github website.
str(DodgersData)
## spc_tbl_ [81 × 12] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ month : chr [1:81] "APR" "APR" "APR" "APR" ...
## $ day : num [1:81] 10 11 12 13 14 15 23 24 25 27 ...
## $ attend : num [1:81] 56000 29729 28328 31601 46549 ...
## $ day_of_week: chr [1:81] "Tuesday" "Wednesday" "Thursday" "Friday" ...
## $ opponent : chr [1:81] "Pirates" "Pirates" "Pirates" "Padres" ...
## $ temp : num [1:81] 67 58 57 54 57 65 60 63 64 66 ...
## $ skies : chr [1:81] "Clear" "Cloudy" "Cloudy" "Cloudy" ...
## $ day_night : chr [1:81] "Day" "Night" "Night" "Night" ...
## $ cap : chr [1:81] "NO" "NO" "NO" "NO" ...
## $ shirt : chr [1:81] "NO" "NO" "NO" "NO" ...
## $ fireworks : chr [1:81] "NO" "NO" "NO" "YES" ...
## $ bobblehead : chr [1:81] "NO" "NO" "NO" "NO" ...
## - attr(*, "spec")=
## .. cols(
## .. month = col_character(),
## .. day = col_double(),
## .. attend = col_double(),
## .. day_of_week = col_character(),
## .. opponent = col_character(),
## .. temp = col_double(),
## .. skies = col_character(),
## .. day_night = col_character(),
## .. cap = col_character(),
## .. shirt = col_character(),
## .. fireworks = col_character(),
## .. bobblehead = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
head(DodgersData)
## # A tibble: 6 × 12
## month day attend day_of_week opponent temp skies day_night cap shirt
## <chr> <dbl> <dbl> <chr> <chr> <dbl> <chr> <chr> <chr> <chr>
## 1 APR 10 56000 Tuesday Pirates 67 Clear Day NO NO
## 2 APR 11 29729 Wednesday Pirates 58 Cloudy Night NO NO
## 3 APR 12 28328 Thursday Pirates 57 Cloudy Night NO NO
## 4 APR 13 31601 Friday Padres 54 Cloudy Night NO NO
## 5 APR 14 46549 Saturday Padres 57 Cloudy Night NO NO
## 6 APR 15 38359 Sunday Padres 65 Clear Day NO NO
## # ℹ 2 more variables: fireworks <chr>, bobblehead <chr>
# Evaluate the factor levels for day_of_week
# levels(DodgersData$day_of_week)
# Evaluate the factor levels for month
levels(DodgersData$month)
## NULL
# First 10 rows of the data frame
head(DodgersData, 10)
## # A tibble: 10 × 12
## month day attend day_of_week opponent temp skies day_night cap shirt
## <chr> <dbl> <dbl> <chr> <chr> <dbl> <chr> <chr> <chr> <chr>
## 1 APR 10 56000 Tuesday Pirates 67 Clear Day NO NO
## 2 APR 11 29729 Wednesday Pirates 58 Cloudy Night NO NO
## 3 APR 12 28328 Thursday Pirates 57 Cloudy Night NO NO
## 4 APR 13 31601 Friday Padres 54 Cloudy Night NO NO
## 5 APR 14 46549 Saturday Padres 57 Cloudy Night NO NO
## 6 APR 15 38359 Sunday Padres 65 Clear Day NO NO
## 7 APR 23 26376 Monday Braves 60 Cloudy Night NO NO
## 8 APR 24 44014 Tuesday Braves 63 Cloudy Night NO NO
## 9 APR 25 26345 Wednesday Braves 64 Cloudy Night NO NO
## 10 APR 27 44807 Friday Nationals 66 Clear Night NO NO
## # ℹ 2 more variables: fireworks <chr>, bobblehead <chr>
#Strip Plot of Attendance by opponent or visiting team
ggplot(DodgersData, aes(x=attend/1000, y=opponent, color=day_night)) +
geom_point() +
ggtitle("Dodgers Attendance By Opponent") +
theme(plot.title = element_text(lineheight=3,
face="bold",
color="black", size=10)) +
xlab("Attendance (Thousands)") +
ylab("Opponent (Visiting Team)")
##What is the median value of attendance?
medianattend <- median(DodgersData$attend)
medianattend
## [1] 40284
promotions <- sum(DodgersData$bobblehead=="YES")
promotions
## [1] 11
## in-class notes - How many night games did the Dodgers have? Please review your in-class notes and write your function and answer below.
Number_of_nights <- sum(DodgersData$day_night=="Night")
Number_of_nights
## [1] 66