Load Relevant Libraries
library(ggplot2)
library(ggthemes)
library(dplyr)
library(lubridate)
library(scales)
library(ggpubr)
library(stringr)
library(reshape2)
library(hexbin)
library(GGally)
library(DT)
library(forcats)
Set Global Image Options
knitr::opts_chunk$set(dpi = 200)
Set Colors
Hex_Dark_Red <- '#DE5246'
Hex_Google_Blue <- '#4285F4'
Hex_Google_Red <- '#EA4335'
Hex_Google_Yellow <- '#FBBC05'
Hex_Google_Green <- '#34A853'
Load 2018 Data
Golf_2018 <-
read.csv('./2018 Golf.csv', nrows = 105) %>%
mutate(
Date = dmy(Date),
Wind = as.numeric(gsub('mph', '', Wind)),
Temp = as.numeric(gsub('°F', '', Temp)),
FIR = NA,
GIR = NA,
Putts_Per_Hole = NA
) %>%
rename(
'Hole_1' = 'Hole.1',
'Hole_2' = 'Hole.2',
'Hole_3' = 'Hole.3',
'Hole_4' = 'Hole.4',
'Hole_5' = 'Hole.5',
'Hole_6' = 'Hole.6',
'Hole_7' = 'Hole.7',
'Hole_8' = 'Hole.8',
'Hole_9' = 'Hole.9',
'To_Par' = 'To.Par'
) %>%
select(
-Partners,
-Notes,
-Fee.Avoided
)
datatable(
Golf_2018 %>%
select(
Date,
Course,
Tee,
Total,
To_Par,
Wind,
Temp
) %>%
mutate(
Date = format(Date, '%d %b %Y %H:%M:%S'),
Wind = format(Wind, format = 'e', digits = 2, nsmall = 2),
Temp = format(round(Temp, -1), nsmall = 1)
)
,
rownames = FALSE,
selection = 'none',
options = list(
columnDefs = list(
list(className = 'dt-left', targets = c(0:2)),
list(className = 'dt-right', targets = c(3:6))
)
)
) %>%
formatStyle(
'Wind',
backgroundColor = styleInterval(c(10, 20), c('#5cdb5c', '#ffff00', '#ff0021'))
) %>%
formatStyle(
'Temp',
backgroundColor = styleInterval(c(75, 85), c('#5cdb5c', '#ffff00', '#ff0021'))
)
Load 2019 Data
Golf_2019 <-
read.csv('./2019 Golf.csv', nrows = 143) %>%
mutate(
Date = dmy(Date),
Wind = as.numeric(gsub('mph', '', Wind)),
Temp = as.numeric(gsub('°F', '', Temp))
) %>%
rename(
'Hole_1' = 'Hole.1',
'Hole_2' = 'Hole.2',
'Hole_3' = 'Hole.3',
'Hole_4' = 'Hole.4',
'Hole_5' = 'Hole.5',
'Hole_6' = 'Hole.6',
'Hole_7' = 'Hole.7',
'Hole_8' = 'Hole.8',
'Hole_9' = 'Hole.9',
'To_Par' = 'To.Par',
'Putts_Per_Hole' = 'Putts.Per.Hole'
) %>%
select(
-Partners,
-Notes,
-Fee.Avoided,
-Time,
-X,
-X.1,
-X.2,
-X.3,
-X.4,
-X.5,
-X.6,
-X.7
)
Load 2020 Data
Golf_2020 <-
read.csv('./2020 Golf.csv', nrows = 177) %>%
mutate(
Date = dmy(Date),
Wind = as.numeric(gsub('mph', '', Wind)),
Temp = as.numeric(gsub('°F', '', Temp))
) %>%
rename(
'Hole_1' = 'Hole.1',
'Hole_2' = 'Hole.2',
'Hole_3' = 'Hole.3',
'Hole_4' = 'Hole.4',
'Hole_5' = 'Hole.5',
'Hole_6' = 'Hole.6',
'Hole_7' = 'Hole.7',
'Hole_8' = 'Hole.8',
'Hole_9' = 'Hole.9',
'To_Par' = 'To.Par',
'Putts_Per_Hole' = 'Putts.Per.Hole'
) %>%
select(
-Partners,
-Notes,
-Fee.Avoided,
-Time
)
Merge Data
Merged_Golf <-
rbind(
Golf_2018,
Golf_2019,
Golf_2020
) %>%
mutate(
Week_Number = week(Date),
Year = as.factor(year(Date)),
Course = as.factor(Course),
Weekday = weekdays(Date),
Weekday = fct_relevel(
Weekday,
'Monday',
'Tuesday',
'Wednesday',
'Thursday',
'Friday',
'Saturday',
'Sunday'
)
)
Merged_Golf_Fargo <-
Merged_Golf %>%
filter(
Course %in% c(
'Rose Creek Front',
'Rose Creek Back',
'Edgewood Front',
'Edgewood Back',
'Osgood',
'Prairiewood',
'El Zagal'
),
!is.na(To_Par)
) %>%
mutate(
Course = fct_relevel(
Course,
'Rose Creek Front',
'Rose Creek Back',
'Edgewood Front',
'Edgewood Back',
'Osgood',
'Prairiewood',
'El Zagal'
)
)
Histogram
Merged_Golf %>%
filter(!is.na(GIR)) %>%
ggplot(aes(x = GIR)) +
geom_histogram(binwidth = 0.11, fill = Hex_Dark_Red, color = 'black') +
theme_bw() +
xlab(paste('\n', 'GIRs (%)')) +
ylab(paste('Count', '\n')) +
ggtitle('Histogram of GIRs')
Histogram with set breaks / labels
Hole_Scores <-
c(
Merged_Golf$Hole_1,
Merged_Golf$Hole_2,
Merged_Golf$Hole_3,
Merged_Golf$Hole_4,
Merged_Golf$Hole_5,
Merged_Golf$Hole_6,
Merged_Golf$Hole_7,
Merged_Golf$Hole_8,
Merged_Golf$Hole_9
)
Hole_Scores <- Hole_Scores[!is.na(Hole_Scores)]
Hole_Scores <- as.data.frame(Hole_Scores)
colnames(Hole_Scores) <- 'Scores'
Hole_Scores %>%
ggplot(aes(x = Scores)) +
geom_histogram(binwidth = 1, fill = Hex_Dark_Red, color = 'black') +
scale_x_continuous(
labels = c('1', '2', '3', '4', '5', '6', '7', '8', '9'),
breaks = c(1, 2, 3, 4, 5, 6, 7, 8, 9)
) +
theme_bw() +
xlab(paste('\n', 'text')) +
ylab(paste('text', '\n')) +
ggtitle(paste('text', '\n')) +
xlab(paste('\n', 'Score')) +
ylab(paste('Count', '\n')) +
ggtitle('Histogram of Individual Hole Scores')
Faceted histogram
Merged_Golf %>%
filter(
!is.na(To_Par)
) %>%
ggplot(aes(x = To_Par)) +
geom_histogram(binwidth = 2, fill = Hex_Dark_Red, color = 'black') +
facet_grid(. ~ Year) +
theme_bw() +
xlab(paste('\n', 'Score (To Par)')) +
ylab(paste('Number of 9-Hole Rounds', '\n')) +
ggtitle('Histogram of 9-Hole Round Scores Relative to Par')
Histogram of multiple variables
Merged_Golf %>%
filter(!is.na(Total)) %>%
ggplot(aes(x = To_Par, fill = Year)) +
geom_histogram(
binwidth = 1,
alpha = 0.5,
position = 'identity'
) +
scale_fill_gdocs() +
theme_bw() +
xlab(paste('\n', 'Score (To Par)')) +
ylab(paste('Count', '\n')) +
ggtitle('Histogram of 9-Hole Round Scores')
Histogram of multiple variables stacked
Merged_Golf %>%
filter(!is.na(Total)) %>%
ggplot(aes(x = To_Par, fill = Year)) +
geom_histogram(binwidth = 1, alpha = 0.7) +
scale_fill_gdocs() +
theme_bw() +
xlab(paste('\n', 'Score (To Par)')) +
ylab(paste('Count', '\n')) +
ggtitle('Histogram of 9-Hole Round Scores')
Histogram with added stat lines
GIR_Mean <- mean(Merged_Golf$GIR, na.rm = TRUE)
Merged_Golf %>%
filter(!is.na(GIR)) %>%
ggplot(aes(x = GIR)) +
geom_histogram(binwidth = 0.11, fill = Hex_Dark_Red, color = 'black') +
theme_bw() +
xlab(paste('\n', 'GIRs (%)')) +
ylab(paste('Count', '\n')) +
ggtitle('Histogram of GIRs') +
geom_vline(aes(xintercept = GIR_Mean), color = Hex_Google_Blue, size = 1) +
labs(caption = paste('\n', 'Blue line: Sample mean'))
GIR_Mean <- mean(Merged_Golf$GIR, na.rm = TRUE)
GIR_SD <- sqrt(var(Merged_Golf$GIR, na.rm = TRUE))
Merged_Golf %>%
filter(!is.na(GIR)) %>%
ggplot(aes(x = GIR)) +
geom_histogram(binwidth = 0.11, aes(y = ..density..),fill = Hex_Dark_Red, color = 'black') +
theme_bw() +
xlab(paste('\n','GIRs (%)')) +
ylab(paste('Density', '\n')) +
ggtitle('Density of GIRs') +
stat_function(fun = dnorm, args = list(mean = GIR_Mean, sd = GIR_SD), color = Hex_Google_Blue, size = 1.0) +
labs(caption = paste('\n', 'Blue line: Sample mean')) +
theme(plot.caption = element_text(hjust = 0))
Merged_Golf %>%
filter(!is.na(Total)) %>%
ggplot(aes(x = To_Par, color = Year)) +
geom_freqpoly(binwidth = 1) +
scale_color_gdocs() +
theme_bw() +
xlab(paste('\n', 'Score (To Par)')) +
ylab(paste('Count', '\n')) +
ggtitle('Frequency Plot of 9-Hole Round Scores') +
theme(plot.caption = element_text(hjust = 0))
Overlayed density plot
Merged_Golf %>%
filter(!is.na(Total)) %>%
ggplot(aes(x = To_Par, fill = Year)) +
geom_density(alpha = 0.5) +
scale_fill_gdocs() +
theme_bw() +
xlab(paste('\n', 'Score (To Par)')) +
ylab(paste('Density', '\n')) +
ggtitle('Density Plot of 9-Hole Round Scores')
Merged_Golf %>%
filter(!is.na(Total)) %>%
ggplot(aes(x = To_Par, color = Year)) +
geom_density(alpha = 1) +
scale_color_gdocs() +
theme_bw() +
xlab(paste('\n', 'Score (To Par)')) +
ylab(paste('Density', '\n')) +
ggtitle('Density Plot of 9-Hole Round Scores')
Faceted density plot
Merged_Golf_Fargo %>%
filter(
Year == 2020,
Course %in% c(
'Rose Creek Front',
'Rose Creek Back',
'Edgewood Front',
'Edgewood Back'
)
) %>%
ggplot(aes(x = To_Par, fill = Course)) +
facet_grid(. ~ Course) +
geom_density(alpha = 0.7) +
xlim(0, 15) +
scale_fill_gdocs() +
theme_bw() +
xlab(paste('\n', 'Score (To Par)')) +
ylab(paste('Density', '\n')) +
ggtitle('Density Plot of Par 5 Course Scores (2020)') +
theme(
legend.position = 'none'
)
Density plot with distribution lines
GIR_Mean <- mean(Merged_Golf$GIR, na.rm = TRUE)
GIR_SD <- sqrt(var(Merged_Golf$GIR, na.rm = TRUE))
Merged_Golf %>%
filter(!is.na(GIR)) %>%
ggplot(aes(x = GIR)) +
geom_density(fill = Hex_Dark_Red) +
theme_bw() +
xlab(paste('\n','GIRs (%)')) +
ylab(paste('Density', '\n')) +
ggtitle('Density of GIRs') +
stat_function(fun = dnorm, args = list(mean = GIR_Mean, sd = GIR_SD), color = Hex_Google_Blue, size = 1.0) +
labs(caption = paste('\n', 'Blue line: Sample mean')) +
theme(plot.caption = element_text(hjust = 0))
Merged_Golf_Fargo %>%
ggplot(aes(x = Course)) +
geom_bar(fill = Hex_Dark_Red, color = 'black') +
theme_bw() +
theme(
axis.text.x = element_text(
angle = 45,
vjust = 1,
size = 10,
hjust = 1
)
) +
xlab(paste('\n', 'Course')) +
ylab(paste('Count', '\n')) +
ggtitle('Number of 9-Hole Rounds by Course')
Merged_Golf_Fargo %>%
ggplot(aes(x = Course)) +
geom_bar(fill = Hex_Dark_Red, color = 'black') +
facet_grid(. ~ Year) +
theme_bw() +
theme(
axis.text.x = element_text(
angle = 45,
vjust = 1,
size = 8,
hjust = 1,
color = 'black'
)
) +
xlab(paste('\n', 'Course')) +
ylab(paste('Count', '\n')) +
ggtitle('Number of 9-Hole Rounds by Course')
Faceted histogram with correlation stats
Merged_Golf %>%
filter(!is.na(To_Par)) %>%
ggplot(aes(x = Date, y = To_Par)) +
geom_point(color = Hex_Dark_Red, size = 1.5) +
facet_grid(. ~ Year, scale = 'free_x') +
geom_smooth(method = 'lm', formula = y ~ x, se = TRUE, color = Hex_Google_Blue) +
stat_cor(
method = 'pearson',
color = 'black',
size = 4,
label.x.npc = 0.97,
label.y.npc = 0.90,
hjust = 1
) +
theme(
axis.text.x = element_text(
size = 10,
color = 'black'
)
) +
theme_bw() +
xlab(paste('\n', 'Month')) +
ylab(paste('Score (To Par)', '\n')) +
ggtitle('9-Hole Scores Over Time')
Merged_Golf_Fargo %>%
filter(
!is.na(To_Par),
Year == 2020
) %>%
ggplot(aes(x = Date, y = To_Par)) +
geom_point(color = Hex_Dark_Red, size = 2) +
stat_smooth(method = 'lm', formula = y ~ poly(x, 5), se = FALSE) +
theme_bw() +
xlab(paste('\n', 'Month')) +
ylab(paste('Score (To Par)', '\n')) +
ggtitle('9-Hole Scores Over Time (2020)')
Merged_Golf %>%
filter(
!is.na(To_Par),
!is.na(Week_Number)
) %>%
ggplot(aes(x = Week_Number, y = To_Par, color = Year)) +
geom_point(size = 2) +
stat_smooth(method = 'lm', formula = y ~ poly(x, 5), se = FALSE) +
scale_color_gdocs() +
theme_bw() +
xlab(paste('\n', 'Score (To Par)')) +
ylab(paste('Week #', '\n')) +
ggtitle('Score Over Time')
Merged_Golf %>%
filter(
!is.na(To_Par),
!is.na(Week_Number)
) %>%
ggplot(aes(x = Week_Number, y = To_Par, color = Year)) +
geom_point(size = 2, alpha = 0.3) +
stat_smooth(method = 'lm', formula = y ~ poly(x, 5), se = TRUE) +
scale_color_gdocs() +
theme_bw() +
xlab(paste('\n', 'Score (To Par)')) +
ylab(paste('Week #', '\n')) +
ggtitle('Score Over Time')
Basic box plot
Merged_Golf %>%
filter(!is.na(Total)) %>%
ggplot(aes(y = To_Par, fill = Year)) +
geom_boxplot(alpha = 0.5) +
scale_fill_gdocs() +
theme_bw() +
ylab(paste('Score (To Par)', '\n')) +
ggtitle('Comparison of 9-Hole Round Scores') +
theme(
axis.title.x=element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
)
Box plot with specified order
Merged_Golf %>%
filter(
!is.na(To_Par),
!is.na(Weekday),
Year == 2020
) %>%
ggplot(aes(Weekday, To_Par, fill = Weekday)) +
geom_boxplot(alpha = 0.5) +
scale_fill_gdocs() +
theme_bw() +
ylab(paste('Score (To Par)', '\n')) +
ggtitle('Comparison of Scores by Weekday (2020)') +
theme(
axis.title.x=element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
)
Merged_Golf %>%
filter(!is.na(Total)) %>%
ggplot(aes(x = Year, y = To_Par, fill = Year)) +
geom_violin(alpha = 0.5) +
scale_fill_gdocs() +
theme_bw() +
ylab(paste('Score (To Par)', '\n')) +
ggtitle('Comparison of 9-Hole Round Scores') +
theme(
axis.title.x=element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
)
Merged_Golf %>%
filter(!is.na(Total)) %>%
ggplot(aes(x = Year, y = To_Par, fill = Year)) +
geom_violin(alpha = 0.5) +
scale_fill_gdocs() +
theme_bw() +
ylab(paste('Score (To Par)', '\n')) +
ggtitle('Comparison of 9-Hole Round Scores') +
theme(
legend.position = 'none'
)
Merged_Golf %>%
filter(!is.na(Total)) %>%
ggplot(aes(x = Year, y = To_Par, fill = Year)) +
geom_violin(width = 1.0, alpha = 0.5) +
geom_boxplot(width = 0.2, color = 'black', alpha = 1.0) +
scale_fill_gdocs() +
theme_bw() +
ylab(paste('Score (To Par)', '\n')) +
ggtitle('Comparison of 9-Hole Round Scores') +
theme(
legend.position = 'none'
)
Merged_Golf %>%
filter(!is.na(Total)) %>%
ggplot(aes(x = Year, y = To_Par, fill = Year)) +
geom_dotplot(binaxis = 'y', stackdir = 'center', binwidth = 0.35) +
scale_fill_gdocs() +
theme_bw() +
ylab(paste('Score (To Par)', '\n')) +
ggtitle('Comparison of 9-Hole Round Scores') +
theme(
axis.title.x=element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
)
Merged_Golf %>%
filter(!is.na(Total)) %>%
ggplot(aes(x = Year, y = To_Par)) +
geom_jitter(aes(color = Year), alpha = 0.5) +
scale_fill_gdocs() +
theme_bw() +
ylab(paste('Score (To Par)', '\n')) +
ggtitle('Comparison of 9-Hole Round Scores')
mydata <- mtcars[, c(1,3,4,5,6,7)]
cormat <- round(cor(mydata),2)
melted_cormat <- melt(cormat)
melted_cormat %>%
ggplot(aes(x=Var1, y=Var2, fill=value)) +
geom_tile()
# Get lower triangle of the correlation matrix
get_lower_tri<-function(cormat){
cormat[upper.tri(cormat)] <- NA
return(cormat)
}
# Get upper triangle of the correlation matrix
get_upper_tri <- function(cormat){
cormat[lower.tri(cormat)]<- NA
return(cormat)
}
upper_tri <- get_upper_tri(cormat)
melted_cormat <- melt(upper_tri, na.rm = TRUE)
ggplot(data = melted_cormat, aes(Var2, Var1, fill = value))+
geom_tile(color = "white")+
scale_fill_gradient2(low = "blue", high = "red", mid = "white",
midpoint = 0, limit = c(-1,1), space = "Lab",
name="Pearson\nCorrelation") +
theme_minimal()+
theme(axis.text.x = element_text(angle = 45, vjust = 1,
size = 12, hjust = 1))+
coord_fixed()
PairsPlot <- Merged_Golf[,c('To_Par', 'FIR', 'GIR')]
ggpairs(PairsPlot, lower = list(continuous = wrap("smooth", alpha = 0.5, size = 1))) +
theme_bw()
Merged_Golf %>%
filter(
!is.na(To_Par),
!is.na(GIR),
Year != 2018
) %>%
ggplot(aes(x = GIR, y = To_Par)) +
geom_hex(binwidth = c(0.1,1)) +
xlim(0, 1) +
theme_bw() +
xlab(paste('\n', 'GIRs (%)')) +
ylab(paste('Score (To Par)', '\n')) +
ggtitle(paste('Score vs GIRs', '\n')) +
theme(plot.title = element_text(hjust = 0.5))
Merged_Golf %>%
filter(
!is.na(To_Par),
!is.na(GIR),
Year != 2018
) %>%
ggplot(aes(x = GIR, y = To_Par)) +
geom_hex(binwidth = c(0.1,1)) +
facet_grid(. ~ Year, scale = 'free_x') +
scale_fill_viridis_c() +
xlim(0, 1) +
theme_bw() +
xlab(paste('\n', 'GIRs (%)')) +
ylab(paste('Score (To Par)', '\n')) +
ggtitle(paste('Score vs GIRs', '\n')) +
theme(plot.title = element_text(hjust = 0.5))
## Warning: Removed 1 rows containing missing values (`geom_hex()`).
Merged_Golf %>%
filter(!is.na(GIR)) %>%
ggplot(aes(sample = GIR)) +
geom_qq(color = Hex_Dark_Red) +
stat_qq_line() +
theme_bw() +
xlab(paste('\n', 'Theoretical')) +
ylab(paste('Sample', '\n')) +
ggtitle(paste('GIR Q-Q Plot', '\n')) +
theme(plot.title = element_text(hjust = 0.5))