Lucas Young
Load Relevant Libraries
library(ggplot2)
library(ggthemes)
library(dplyr)
library(lubridate)
library(scales)
library(ggpubr)
library(stringr)
library(reshape2)
library(hexbin)
library(GGally)
Set Global Image Options
knitr::opts_chunk$set(dpi = 200)
Set Colors
Hex_Dark_Red <- '#DE5246'
Hex_Google_Blue <- '#4285F4'
Hex_Google_Red <- '#EA4335'
Hex_Google_Yellow <- '#FBBC05'
Hex_Google_Green <- '#34A853'
Load 2018 Data
Golf_2018 <- read.csv('./2018 Golf.csv', nrows = 105)
Golf_2018$Wind <- as.integer(Golf_2018$Wind)
## Warning: NAs introduced by coercion
Golf_2018$Temp <- as.integer(substr(Golf_2018$Temp,0,2))
Golf_2018$Fee.Avoided <- as.numeric((substr(Golf_2018$Fee.Avoided,2,6)))
Load 2019 Data
Golf_2019 <- read.csv('./2019 Golf.csv', nrows = 143)
Golf_2019 <- Golf_2019[,1:23]
Golf_2019$Wind <- as.integer(Golf_2019$Wind)
## Warning: NAs introduced by coercion
Golf_2019$Temp <- as.integer(substr(Golf_2019$Temp,0,2))
Golf_2019$Fee.Avoided <- as.numeric((substr(Golf_2019$Fee.Avoided,2,6)))
Load 2020 Data
Golf_2020 <- read.csv('./2020 Golf.csv', nrows = 177)
Golf_2020 <- Golf_2020[,1:23]
Golf_2020$Wind <- as.integer(Golf_2020$Wind)
## Warning: NAs introduced by coercion
Golf_2020$Temp <- as.integer(substr(Golf_2020$Temp,0,2))
Golf_2020$Fee.Avoided <- as.numeric((substr(Golf_2020$Fee.Avoided,2,6)))
Merge Data
Merged_Golf <- bind_rows(Golf_2018, Golf_2019, Golf_2020)
Merged_Golf$DateTime <- paste(Merged_Golf$Date, Merged_Golf$Time)
Merged_Golf$DateTime <- parse_date_time(Merged_Golf$DateTime, '%d-%b-%y %H:%M %p')
Merged_Golf$Date <- as.Date(strptime(Merged_Golf$Date, format = '%d-%b-%y'))
Merged_Golf$Time <- parse_date_time(Merged_Golf$Time, '%H:%M %p')
Merged_Golf$Weekday <- as.factor(weekdays(Merged_Golf$Date))
Merged_Golf$Weekday <-
factor(
Merged_Golf$Weekday, levels = c(
'Monday',
'Tuesday',
'Wednesday',
'Thursday',
'Friday',
'Saturday',
'Sunday'
),
ordered = TRUE
)
Merged_Golf$Year <- as.factor(year(Merged_Golf$Date))
Merged_Golf$WeekNumber <- week(Merged_Golf$Date)
Merged_Golf$DayNumber <- strftime(Merged_Golf$Date, format = '%j')
Merged_Golf_Fargo <-
Merged_Golf[
(Merged_Golf$Course == 'Rose Creek Front' |
Merged_Golf$Course == 'Rose Creek Back' |
Merged_Golf$Course == 'Edgewood Front' |
Merged_Golf$Course == 'Edgewood Back' |
Merged_Golf$Course == 'Osgood' |
Merged_Golf$Course == 'Prairiewood' |
Merged_Golf$Course == 'El Zagal') &
!is.na(Merged_Golf$To.Par),
]
Merged_Golf_Fargo$Course <-
factor(Merged_Golf_Fargo$Course, levels = c(
'Rose Creek Front',
'Rose Creek Back',
'Edgewood Front',
'Edgewood Back',
'Osgood',
'Prairiewood',
'El Zagal'
),
ordered = TRUE
)
Approximate Number of Playing Partners
PartnerCounter <- function(Partners){
PartnerCount <- 0
CommaCount <- str_count(Partners, ',')
AndCount <- str_count(Partners, 'and')
AndSymbolCount <- str_count(Partners, '&')
PartnerCount <- CommaCount + AndCount + AndSymbolCount + 1
if (is.na(Partners)){
PartnerCount <- 0
}
if (Partners == 'None'){
PartnerCount <- 0
}
return(PartnerCount)
}
Merged_Golf$PartnerCount <- sapply(Merged_Golf$Partners, PartnerCounter)
Histogram
Merged_Golf %>%
filter(!is.na(GIR)) %>%
ggplot(aes(x = GIR)) +
geom_histogram(binwidth = 0.11, fill = Hex_Dark_Red, color = 'black') +
theme_bw() +
xlab(paste('\n', 'GIRs (%)')) +
ylab(paste('Count', '\n')) +
ggtitle(paste('Histogram of GIRs', '\n')) +
theme(plot.title = element_text(hjust = 0.5))
Histogram with set breaks / labels
Hole_Scores <-
c(
Merged_Golf$Hole.1,
Merged_Golf$Hole.2,
Merged_Golf$Hole.3,
Merged_Golf$Hole.4,
Merged_Golf$Hole.5,
Merged_Golf$Hole.6,
Merged_Golf$Hole.7,
Merged_Golf$Hole.8,
Merged_Golf$Hole.9
)
Hole_Scores <- Hole_Scores[!is.na(Hole_Scores)]
Hole_Scores <- as.data.frame(Hole_Scores)
colnames(Hole_Scores) <- 'Scores'
Hole_Scores %>%
ggplot(aes(x = Scores)) +
geom_histogram(binwidth = 1, fill = Hex_Dark_Red, color = 'black') +
scale_x_continuous(
labels = c('1', '2', '3', '4', '5', '6', '7', '8', '9'),
breaks = c(1, 2, 3, 4, 5, 6, 7, 8, 9)
) +
theme_bw() +
xlab(paste('\n', 'text')) +
ylab(paste('text', '\n')) +
ggtitle(paste('text', '\n')) +
xlab('Score') +
ylab('Count') +
ggtitle('Histogram of Individual Hole Scores') +
theme(plot.title = element_text(hjust = 0.5))
Faceted histogram
Merged_Golf %>%
filter(!is.na(To.Par)) %>%
ggplot(aes(x = To.Par)) +
geom_histogram(binwidth = 2, fill = Hex_Dark_Red, color = 'black') +
facet_grid(. ~ Year) +
theme_bw() +
xlab(paste('\n', 'Score (To Par)')) +
ylab(paste('Number of 9-Hole Rounds', '\n')) +
ggtitle(paste('Histogram of 9-Hole Round Scores Relative to Par', '\n')) +
theme(plot.title = element_text(hjust = 0.5))
Histogram of multiple variables
Merged_Golf %>%
filter(!is.na(Total)) %>%
ggplot(aes(x = To.Par, fill = Year)) +
geom_histogram(binwidth = 1, alpha = 0.5, position = 'identity') +
scale_fill_gdocs() +
theme_bw() +
xlab(paste('\n', 'Score (To Par)')) +
ylab(paste('Count', '\n')) +
ggtitle(paste('Histogram of 9-Hole Round Scores', '\n')) +
theme(plot.title = element_text(hjust = 0.5))
Histogram of multiple variables stacked
Merged_Golf %>%
filter(!is.na(Total)) %>%
ggplot(aes(x = To.Par, fill = Year)) +
geom_histogram(binwidth = 1, alpha = 0.7) +
scale_fill_gdocs() +
theme_bw() +
xlab(paste('\n', 'Score (To Par)')) +
ylab(paste('Count', '\n')) +
ggtitle(paste('Histogram of 9-Hole Round Scores', '\n')) +
theme(plot.title = element_text(hjust = 0.5))
Histogram with added stat lines
GIR_Mean <- mean(Merged_Golf$GIR, na.rm = TRUE)
Merged_Golf %>%
filter(!is.na(GIR)) %>%
ggplot(aes(x = GIR)) +
geom_histogram(binwidth = 0.11, fill = Hex_Dark_Red, color = 'black') +
theme_bw() +
xlab(paste('\n', 'GIRs (%)')) +
ylab(paste('Count', '\n')) +
ggtitle(paste('Histogram of GIRs', '\n')) +
theme(plot.title = element_text(hjust = 0.5)) +
geom_vline(aes(xintercept = GIR_Mean), color = Hex_Google_Blue, size = 1) +
labs(caption = paste('\n', 'Blue line: Sample mean')) +
theme(plot.caption = element_text(hjust = 0))
GIR_Mean <- mean(Merged_Golf$GIR, na.rm = TRUE)
GIR_SD <- sqrt(var(Merged_Golf$GIR, na.rm = TRUE))
Merged_Golf %>%
filter(!is.na(GIR)) %>%
ggplot(aes(x = GIR)) +
geom_histogram(binwidth = 0.11, aes(y = ..density..),fill = Hex_Dark_Red, color = 'black') +
theme_bw() +
xlab(paste('\n','GIRs (%)')) +
ylab(paste('Density', '\n')) +
ggtitle(paste('Density of GIRs', '\n')) +
theme(plot.title = element_text(hjust = 0.5)) +
stat_function(fun = dnorm, args = list(mean = GIR_Mean, sd = GIR_SD), color = Hex_Google_Blue, size = 1.0) +
labs(caption = paste('\n', 'Blue line: Sample mean')) +
theme(plot.caption = element_text(hjust = 0))
Merged_Golf %>%
filter(!is.na(Total)) %>%
ggplot(aes(x = To.Par, color = Year)) +
geom_freqpoly(binwidth = 1) +
scale_color_gdocs() +
theme_bw() +
xlab(paste('\n', 'Score (To Par)')) +
ylab(paste('Count', '\n')) +
ggtitle(paste('Frequency Plot of 9-Hole Round Scores', '\n')) +
theme(plot.title = element_text(hjust = 0.5))
Overlayed density plot
Merged_Golf %>%
filter(!is.na(Total)) %>%
ggplot(aes(x = To.Par, fill = Year)) +
geom_density(alpha = 0.5) +
scale_fill_gdocs() +
theme_bw() +
xlab(paste('\n', 'Score (To Par)')) +
ylab(paste('Density', '\n')) +
ggtitle(paste('Density Plot of 9-Hole Round Scores', '\n')) +
theme(plot.title = element_text(hjust = 0.5))
Merged_Golf %>%
filter(!is.na(Total)) %>%
ggplot(aes(x = To.Par, color = Year)) +
geom_density(alpha = 1) +
scale_color_gdocs() +
theme_bw() +
xlab(paste('\n', 'Score (To Par)')) +
ylab(paste('Density', '\n')) +
ggtitle(paste('Density Plot of 9-Hole Round Scores', '\n')) +
theme(plot.title = element_text(hjust = 0.5))
Faceted density plot
Merged_Golf_Fargo[
(Merged_Golf_Fargo$Year == 2020) &
(Merged_Golf_Fargo$Course == 'Rose Creek Front' |
Merged_Golf_Fargo$Course == 'Rose Creek Back' |
Merged_Golf_Fargo$Course == 'Edgewood Front' |
Merged_Golf_Fargo$Course == 'Edgewood Back'),
] %>%
ggplot(aes(x = To.Par, fill = Course)) +
facet_grid(. ~ Course) +
geom_density(alpha = 0.7) +
xlim(0, 15) +
scale_fill_gdocs() +
theme_bw() +
xlab(paste('\n', 'Score (To Par)')) +
ylab(paste('Density', '\n')) +
ggtitle(paste('Density Plot of Par 5 Course Scores (2020)', '\n')) +
theme(
plot.title = element_text(hjust = 0.5),
legend.position = 'none'
)
Density plot with distribution lines
GIR_Mean <- mean(Merged_Golf$GIR, na.rm = TRUE)
GIR_SD <- sqrt(var(Merged_Golf$GIR, na.rm = TRUE))
Merged_Golf %>%
filter(!is.na(GIR)) %>%
ggplot(aes(x = GIR)) +
geom_density(fill = Hex_Dark_Red) +
theme_bw() +
xlab(paste('\n','GIRs (%)')) +
ylab(paste('Density', '\n')) +
ggtitle(paste('Density of GIRs', '\n')) +
theme(plot.title = element_text(hjust = 0.5)) +
stat_function(fun = dnorm, args = list(mean = GIR_Mean, sd = GIR_SD), color = Hex_Google_Blue, size = 1.0) +
labs(caption = paste('\n', 'Blue line: Sample mean')) +
theme(plot.caption = element_text(hjust = 0))
Merged_Golf_Fargo %>%
ggplot(aes(x = Course)) +
geom_bar(fill = Hex_Dark_Red, color = 'black') +
theme_bw() +
theme(
axis.text.x = element_text(
angle = 45,
vjust = 1,
size = 10,
hjust = 1
)
) +
xlab(paste('\n', 'Course')) +
ylab(paste('Count', '\n')) +
ggtitle(paste('Number of 9-Hole Rounds by Course', '\n')) +
theme(plot.title = element_text(hjust = 0.5))
Merged_Golf_Fargo %>%
ggplot(aes(x = Course)) +
geom_bar(fill = Hex_Dark_Red, color = 'black') +
facet_grid(. ~ Year) +
theme_bw() +
theme(
axis.text.x = element_text(
angle = 45,
vjust = 1,
size = 10,
hjust = 1,
color = 'black'
)
) +
xlab(paste('\n', 'Course')) +
ylab(paste('Count', '\n')) +
ggtitle(paste('Number of 9-Hole Rounds by Course', '\n')) +
theme(plot.title = element_text(hjust = 0.5))
Faceted histogram with correlation stats
Merged_Golf %>%
filter(!is.na(To.Par)) %>%
ggplot(aes(x = Date, y = To.Par)) +
geom_point(color = Hex_Dark_Red, size = 1.5) +
facet_grid(. ~ Year, scale = 'free_x') +
geom_smooth(method = 'lm', formula = y ~ x, se = TRUE, color = Hex_Google_Blue) +
stat_cor(
method = 'pearson',
color = 'black',
size = 4,
label.x.npc = 0.97,
label.y.npc = 0.90,
hjust = 1
) +
theme_bw() +
xlab(paste('\n', 'Month')) +
ylab(paste('Score (To Par)', '\n')) +
ggtitle(paste('9-Hole Scores Over Time', '\n')) +
theme(plot.title = element_text(hjust = 0.5))
Merged_Golf_Fargo %>%
filter(
!is.na(To.Par),
Year == 2020
) %>%
ggplot(aes(x = Date, y = To.Par)) +
geom_point(color = Hex_Dark_Red, size = 2) +
stat_smooth(method = 'lm', formula = y ~ poly(x, 5), se = FALSE) +
theme_bw() +
xlab(paste('\n', 'Month')) +
ylab(paste('Score (To Par)', '\n')) +
ggtitle(paste('9-Hole Scores Over Time', '\n')) +
theme(plot.title = element_text(hjust = 0.5))
Merged_Golf %>%
filter(
!is.na(To.Par),
!is.na(WeekNumber)
) %>%
ggplot(aes(x = WeekNumber, y = To.Par, color = Year)) +
geom_point(size = 2) +
stat_smooth(method = 'lm', formula = y ~ poly(x, 5), se = FALSE) +
scale_color_gdocs() +
theme_bw() +
xlab(paste('\n', 'Score (To Par)')) +
ylab(paste('Week #', '\n')) +
ggtitle(paste('Score Over Time', '\n')) +
theme(plot.title = element_text(hjust = 0.5))
Merged_Golf %>%
filter(
!is.na(To.Par),
!is.na(WeekNumber)
) %>%
ggplot(aes(x = WeekNumber, y = To.Par, color = Year)) +
geom_point(size = 2, alpha = 0.3) +
#geom_line(size = 1, alpha = 0.3) +
stat_smooth(method = 'lm', formula = y ~ poly(x, 5), se = TRUE) +
scale_color_gdocs() +
theme_bw() +
xlab(paste('\n', 'Score (To Par)')) +
ylab(paste('Week #', '\n')) +
ggtitle(paste('Score Over Time', '\n')) +
theme(plot.title = element_text(hjust = 0.5))
Basic box plot
Merged_Golf %>%
filter(!is.na(Total)) %>%
ggplot(aes(y = To.Par, fill = Year)) +
geom_boxplot(alpha = 0.5) +
scale_fill_gdocs() +
theme_bw() +
ylab(paste('Score (To Par)', '\n')) +
ggtitle(paste('Comparison of 9-Hole Round Scores', '\n')) +
theme(
plot.title = element_text(hjust = 0.5),
axis.title.x=element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
)
Box plot with specified order
Merged_Golf$Weekday <-
factor(
Merged_Golf$Weekday,
levels = c(
'Monday',
'Tuesday',
'Wednesday',
'Thursday',
'Friday',
'Saturday',
'Sunday'
),
ordered = TRUE
)
Merged_Golf %>%
filter(
!is.na(To.Par),
!is.na(Weekday),
Year == 2020
) %>%
ggplot(aes(Weekday, To.Par, fill = Weekday)) +
geom_boxplot(alpha = 0.5) +
scale_fill_gdocs() +
theme_bw() +
ylab(paste('Score (To Par)', '\n')) +
ggtitle(paste('Comparison of Scores by Weekday (2020)', '\n')) +
theme(
plot.title = element_text(hjust = 0.5),
axis.title.x=element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
)
Merged_Golf %>%
filter(!is.na(Total)) %>%
ggplot(aes(x = Year, y = To.Par, fill = Year)) +
geom_violin(alpha = 0.5) +
scale_fill_gdocs() +
theme_bw() +
ylab(paste('Score (To Par)', '\n')) +
ggtitle(paste('Comparison of 9-Hole Round Scores', '\n')) +
theme(
plot.title = element_text(hjust = 0.5),
axis.title.x=element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
)
Merged_Golf %>%
filter(!is.na(Total)) %>%
ggplot(aes(x = Year, y = To.Par, fill = Year)) +
geom_violin(alpha = 0.5) +
scale_fill_gdocs() +
theme_bw() +
ylab(paste('Score (To Par)', '\n')) +
ggtitle(paste('Comparison of 9-Hole Round Scores', '\n')) +
theme(
plot.title = element_text(hjust = 0.5),
legend.position = 'none'
)
Merged_Golf %>%
filter(!is.na(Total)) %>%
ggplot(aes(x = Year, y = To.Par, fill = Year)) +
geom_violin(width = 1.0, alpha = 0.5) +
geom_boxplot(width = 0.2, color = 'black', alpha = 1.0) +
scale_fill_gdocs() +
theme_bw() +
ylab(paste('Score (To Par)', '\n')) +
ggtitle(paste('Comparison of 9-Hole Round Scores', '\n')) +
theme(
plot.title = element_text(hjust = 0.5),
legend.position = 'none'
)
Merged_Golf %>%
filter(!is.na(Total)) %>%
ggplot(aes(x = Year, y = To.Par, fill = Year)) +
geom_dotplot(binaxis = 'y', stackdir = 'center', binwidth = 0.35) +
scale_fill_gdocs() +
theme_bw() +
ylab(paste('Score (To Par)', '\n')) +
ggtitle(paste('Comparison of 9-Hole Round Scores', '\n')) +
theme(
plot.title = element_text(hjust = 0.5),
axis.title.x=element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
)
Merged_Golf %>%
filter(!is.na(Total)) %>%
ggplot(aes(x = Year, y = To.Par)) +
geom_jitter(aes(color = Year), alpha = 0.5) +
scale_fill_gdocs() +
theme_bw() +
ylab(paste('Score (To Par)', '\n')) +
ggtitle(paste('Comparison of 9-Hole Round Scores', '\n')) +
theme(plot.title = element_text(hjust = 0.5))
mydata <- mtcars[, c(1,3,4,5,6,7)]
cormat <- round(cor(mydata),2)
melted_cormat <- melt(cormat)
melted_cormat %>%
ggplot(aes(x=Var1, y=Var2, fill=value)) +
geom_tile()
# Get lower triangle of the correlation matrix
get_lower_tri<-function(cormat){
cormat[upper.tri(cormat)] <- NA
return(cormat)
}
# Get upper triangle of the correlation matrix
get_upper_tri <- function(cormat){
cormat[lower.tri(cormat)]<- NA
return(cormat)
}
upper_tri <- get_upper_tri(cormat)
melted_cormat <- melt(upper_tri, na.rm = TRUE)
ggplot(data = melted_cormat, aes(Var2, Var1, fill = value))+
geom_tile(color = "white")+
scale_fill_gradient2(low = "blue", high = "red", mid = "white",
midpoint = 0, limit = c(-1,1), space = "Lab",
name="Pearson\nCorrelation") +
theme_minimal()+
theme(axis.text.x = element_text(angle = 45, vjust = 1,
size = 12, hjust = 1))+
coord_fixed()
PairsPlot <- Merged_Golf[,c('To.Par', 'FIR', 'GIR')]
ggpairs(PairsPlot, lower = list(continuous = wrap("smooth", alpha = 0.5, size = 1))) +
theme_bw()
Merged_Golf %>%
filter(
!is.na(To.Par),
!is.na(GIR),
Year != 2018
) %>%
ggplot(aes(x = GIR, y = To.Par)) +
geom_hex(binwidth = c(0.1,1)) +
xlim(0, 1) +
theme_bw() +
xlab(paste('\n', 'GIRs (%)')) +
ylab(paste('Score (To Par)', '\n')) +
ggtitle(paste('Score vs GIRs', '\n')) +
theme(plot.title = element_text(hjust = 0.5))
Merged_Golf %>%
filter(
!is.na(To.Par),
!is.na(GIR),
Year != 2018
) %>%
ggplot(aes(x = GIR, y = To.Par)) +
geom_hex(binwidth = c(0.1,1)) +
facet_grid(. ~ Year, scale = 'free_x') +
scale_fill_viridis_c() +
xlim(0, 1) +
theme_bw() +
xlab(paste('\n', 'GIRs (%)')) +
ylab(paste('Score (To Par)', '\n')) +
ggtitle(paste('Score vs GIRs', '\n')) +
theme(plot.title = element_text(hjust = 0.5))
## Warning: Removed 1 rows containing missing values (geom_hex).
Merged_Golf %>%
filter(!is.na(GIR)) %>%
ggplot(aes(sample = GIR)) +
geom_qq(color = Hex_Dark_Red) +
stat_qq_line() +
theme_bw() +
xlab(paste('\n', 'Theoretical')) +
ylab(paste('Sample', '\n')) +
ggtitle(paste('GIR Q-Q Plot', '\n')) +
theme(plot.title = element_text(hjust = 0.5))