ggplot Templates


Lucas Young


Load Data / Packages


Load Relevant Libraries

library(ggplot2)
library(ggthemes)
library(dplyr)
library(lubridate)
library(scales)
library(ggpubr)
library(stringr)
library(reshape2)
library(hexbin)
library(GGally)


Set Global Image Options

knitr::opts_chunk$set(dpi = 200)

Set Colors

Hex_Dark_Red <- '#DE5246'

Hex_Google_Blue <- '#4285F4'
Hex_Google_Red <- '#EA4335'
Hex_Google_Yellow <- '#FBBC05'
Hex_Google_Green <- '#34A853'


Load 2018 Data

Golf_2018 <- read.csv('./2018 Golf.csv', nrows = 105)

Golf_2018$Wind <- as.integer(Golf_2018$Wind)
## Warning: NAs introduced by coercion
Golf_2018$Temp <- as.integer(substr(Golf_2018$Temp,0,2))
Golf_2018$Fee.Avoided <- as.numeric((substr(Golf_2018$Fee.Avoided,2,6)))


Load 2019 Data

Golf_2019 <- read.csv('./2019 Golf.csv', nrows = 143)

Golf_2019 <- Golf_2019[,1:23]

Golf_2019$Wind <- as.integer(Golf_2019$Wind)
## Warning: NAs introduced by coercion
Golf_2019$Temp <- as.integer(substr(Golf_2019$Temp,0,2))
Golf_2019$Fee.Avoided <- as.numeric((substr(Golf_2019$Fee.Avoided,2,6)))


Load 2020 Data

Golf_2020 <- read.csv('./2020 Golf.csv', nrows = 177)

Golf_2020 <- Golf_2020[,1:23]

Golf_2020$Wind <- as.integer(Golf_2020$Wind)
## Warning: NAs introduced by coercion
Golf_2020$Temp <- as.integer(substr(Golf_2020$Temp,0,2))
Golf_2020$Fee.Avoided <- as.numeric((substr(Golf_2020$Fee.Avoided,2,6)))


Merge Data

Merged_Golf <- bind_rows(Golf_2018, Golf_2019, Golf_2020)

Merged_Golf$DateTime <- paste(Merged_Golf$Date, Merged_Golf$Time)
Merged_Golf$DateTime <- parse_date_time(Merged_Golf$DateTime, '%d-%b-%y %H:%M %p')

Merged_Golf$Date <- as.Date(strptime(Merged_Golf$Date, format = '%d-%b-%y'))

Merged_Golf$Time <- parse_date_time(Merged_Golf$Time, '%H:%M %p')

Merged_Golf$Weekday <- as.factor(weekdays(Merged_Golf$Date))

Merged_Golf$Weekday <- 
  factor(
    Merged_Golf$Weekday, levels = c(
      'Monday',
      'Tuesday',
      'Wednesday',
      'Thursday',
      'Friday',
      'Saturday',
      'Sunday'
    ), 
    ordered = TRUE
  )

Merged_Golf$Year <- as.factor(year(Merged_Golf$Date))

Merged_Golf$WeekNumber <- week(Merged_Golf$Date)

Merged_Golf$DayNumber <- strftime(Merged_Golf$Date, format = '%j')

Merged_Golf_Fargo <- 
  Merged_Golf[
    (Merged_Golf$Course == 'Rose Creek Front' |
     Merged_Golf$Course == 'Rose Creek Back' |
     Merged_Golf$Course == 'Edgewood Front' |
     Merged_Golf$Course == 'Edgewood Back' |
     Merged_Golf$Course == 'Osgood' |                               
     Merged_Golf$Course == 'Prairiewood' |
     Merged_Golf$Course == 'El Zagal') &
     !is.na(Merged_Golf$To.Par),
  ]

Merged_Golf_Fargo$Course <- 
  factor(Merged_Golf_Fargo$Course, levels = c(
    'Rose Creek Front',
    'Rose Creek Back',
    'Edgewood Front',
    'Edgewood Back',
    'Osgood',
    'Prairiewood',
    'El Zagal'
  ),
  ordered = TRUE
)


Approximate Number of Playing Partners

PartnerCounter <- function(Partners){
  
  PartnerCount <- 0
  
  CommaCount <- str_count(Partners, ',')
  
  AndCount <- str_count(Partners, 'and')
  
  AndSymbolCount <- str_count(Partners, '&')
  
  PartnerCount <- CommaCount + AndCount + AndSymbolCount + 1
  
  if (is.na(Partners)){
    PartnerCount <- 0
  }
  
  if (Partners == 'None'){
    PartnerCount <- 0
  }
  
  return(PartnerCount)
  
}

Merged_Golf$PartnerCount <- sapply(Merged_Golf$Partners, PartnerCounter)


Histograms

Histogram

Merged_Golf %>% 
filter(!is.na(GIR)) %>% 

ggplot(aes(x = GIR)) + 
geom_histogram(binwidth = 0.11, fill = Hex_Dark_Red, color = 'black') +
theme_bw() + 
xlab(paste('\n', 'GIRs (%)')) + 
ylab(paste('Count', '\n')) +
ggtitle(paste('Histogram of GIRs', '\n')) +
theme(plot.title = element_text(hjust = 0.5))

Histogram with set breaks / labels

Hole_Scores <- 
  c(
    Merged_Golf$Hole.1,
    Merged_Golf$Hole.2,
    Merged_Golf$Hole.3,
    Merged_Golf$Hole.4,
    Merged_Golf$Hole.5,
    Merged_Golf$Hole.6,
    Merged_Golf$Hole.7,
    Merged_Golf$Hole.8,
    Merged_Golf$Hole.9
  )

Hole_Scores <- Hole_Scores[!is.na(Hole_Scores)]

Hole_Scores <- as.data.frame(Hole_Scores)

colnames(Hole_Scores) <- 'Scores'

Hole_Scores %>% 
  
ggplot(aes(x = Scores)) + 
geom_histogram(binwidth = 1, fill = Hex_Dark_Red, color = 'black') + 
scale_x_continuous(
  labels = c('1', '2', '3', '4', '5', '6', '7', '8', '9'), 
  breaks = c(1, 2, 3, 4, 5, 6, 7, 8, 9)
) +
theme_bw() + 
xlab(paste('\n', 'text')) + 
ylab(paste('text', '\n')) +
ggtitle(paste('text', '\n')) +
xlab('Score') +
ylab('Count') +
ggtitle('Histogram of Individual Hole Scores') + 
theme(plot.title = element_text(hjust = 0.5))

Faceted histogram

Merged_Golf %>% 
filter(!is.na(To.Par)) %>% 
  
ggplot(aes(x = To.Par)) +
geom_histogram(binwidth = 2, fill = Hex_Dark_Red, color = 'black') + 
facet_grid(. ~ Year) +
theme_bw() + 
xlab(paste('\n', 'Score (To Par)')) + 
ylab(paste('Number of 9-Hole Rounds', '\n')) +
ggtitle(paste('Histogram of 9-Hole Round Scores Relative to Par', '\n')) +
theme(plot.title = element_text(hjust = 0.5))

Histogram of multiple variables

Merged_Golf %>% 
filter(!is.na(Total)) %>% 

ggplot(aes(x = To.Par, fill = Year)) +
geom_histogram(binwidth = 1, alpha = 0.5, position = 'identity') + 
scale_fill_gdocs() +
theme_bw() + 
xlab(paste('\n', 'Score (To Par)')) + 
ylab(paste('Count', '\n')) +
ggtitle(paste('Histogram of 9-Hole Round Scores', '\n')) +
theme(plot.title = element_text(hjust = 0.5))

Histogram of multiple variables stacked

Merged_Golf %>% 
filter(!is.na(Total)) %>%

ggplot(aes(x = To.Par, fill = Year)) +
geom_histogram(binwidth = 1, alpha = 0.7) + 
scale_fill_gdocs() +
theme_bw() + 
xlab(paste('\n', 'Score (To Par)')) + 
ylab(paste('Count', '\n')) +
ggtitle(paste('Histogram of 9-Hole Round Scores', '\n')) +
theme(plot.title = element_text(hjust = 0.5))

Histogram with added stat lines

GIR_Mean <- mean(Merged_Golf$GIR, na.rm = TRUE)

Merged_Golf %>% 
filter(!is.na(GIR)) %>%

ggplot(aes(x = GIR)) + 
geom_histogram(binwidth = 0.11, fill = Hex_Dark_Red, color = 'black') +
theme_bw() + 
xlab(paste('\n', 'GIRs (%)')) + 
ylab(paste('Count', '\n')) +
ggtitle(paste('Histogram of GIRs', '\n')) +
theme(plot.title = element_text(hjust = 0.5)) + 
geom_vline(aes(xintercept = GIR_Mean), color = Hex_Google_Blue, size = 1) +
labs(caption = paste('\n', 'Blue line: Sample mean')) +
theme(plot.caption = element_text(hjust = 0))

GIR_Mean <- mean(Merged_Golf$GIR, na.rm = TRUE)

GIR_SD <- sqrt(var(Merged_Golf$GIR, na.rm = TRUE))

Merged_Golf %>% 
filter(!is.na(GIR)) %>%

ggplot(aes(x = GIR)) + 
geom_histogram(binwidth = 0.11, aes(y = ..density..),fill = Hex_Dark_Red, color = 'black') +
theme_bw() +
xlab(paste('\n','GIRs (%)')) +
ylab(paste('Density', '\n')) +
ggtitle(paste('Density of GIRs', '\n')) + 
theme(plot.title = element_text(hjust = 0.5)) +
stat_function(fun = dnorm, args = list(mean = GIR_Mean, sd = GIR_SD), color = Hex_Google_Blue, size = 1.0) +
labs(caption = paste('\n', 'Blue line: Sample mean')) +
theme(plot.caption = element_text(hjust = 0))


Freq-Poly

Merged_Golf %>% 
filter(!is.na(Total)) %>%

ggplot(aes(x = To.Par, color = Year)) +
geom_freqpoly(binwidth = 1) +
scale_color_gdocs() +
theme_bw() +
xlab(paste('\n', 'Score (To Par)')) + 
ylab(paste('Count', '\n')) +
ggtitle(paste('Frequency Plot of 9-Hole Round Scores', '\n')) +
theme(plot.title = element_text(hjust = 0.5))


Density Plots

Overlayed density plot

Merged_Golf %>% 
filter(!is.na(Total)) %>%

ggplot(aes(x = To.Par, fill = Year)) +
geom_density(alpha = 0.5) +
scale_fill_gdocs() +
theme_bw() + 
xlab(paste('\n', 'Score (To Par)')) + 
ylab(paste('Density', '\n')) +
ggtitle(paste('Density Plot of 9-Hole Round Scores', '\n')) +
theme(plot.title = element_text(hjust = 0.5))

Merged_Golf %>% 
filter(!is.na(Total)) %>%

ggplot(aes(x = To.Par, color = Year)) +
geom_density(alpha = 1) +
scale_color_gdocs() +
theme_bw() + 
xlab(paste('\n', 'Score (To Par)')) + 
ylab(paste('Density', '\n')) +
ggtitle(paste('Density Plot of 9-Hole Round Scores', '\n')) +
theme(plot.title = element_text(hjust = 0.5))

Faceted density plot

Merged_Golf_Fargo[
  (Merged_Golf_Fargo$Year == 2020) &
  (Merged_Golf_Fargo$Course == 'Rose Creek Front' |
  Merged_Golf_Fargo$Course == 'Rose Creek Back' |
  Merged_Golf_Fargo$Course == 'Edgewood Front' |
  Merged_Golf_Fargo$Course == 'Edgewood Back'), 
] %>% 
  
ggplot(aes(x = To.Par, fill = Course)) +
facet_grid(. ~ Course) +
geom_density(alpha = 0.7) +
xlim(0, 15) +
scale_fill_gdocs() +
theme_bw() +
xlab(paste('\n', 'Score (To Par)')) + 
ylab(paste('Density', '\n')) +
ggtitle(paste('Density Plot of Par 5 Course Scores (2020)', '\n')) +
theme(
  plot.title = element_text(hjust = 0.5),
  legend.position = 'none'
)

Density plot with distribution lines

GIR_Mean <- mean(Merged_Golf$GIR, na.rm = TRUE)

GIR_SD <- sqrt(var(Merged_Golf$GIR, na.rm = TRUE))

Merged_Golf %>% 
filter(!is.na(GIR)) %>%

ggplot(aes(x = GIR)) + 
geom_density(fill = Hex_Dark_Red) +
theme_bw() +
xlab(paste('\n','GIRs (%)')) +
ylab(paste('Density', '\n')) +
ggtitle(paste('Density of GIRs', '\n')) + 
theme(plot.title = element_text(hjust = 0.5)) +
stat_function(fun = dnorm, args = list(mean = GIR_Mean, sd = GIR_SD), color = Hex_Google_Blue, size = 1.0) + 
labs(caption = paste('\n', 'Blue line: Sample mean')) +
theme(plot.caption = element_text(hjust = 0))


Bar Graph

Merged_Golf_Fargo %>% 

ggplot(aes(x = Course)) +
geom_bar(fill = Hex_Dark_Red, color = 'black') +
theme_bw() +
theme(
  axis.text.x = element_text(
    angle = 45, 
    vjust = 1, 
    size = 10, 
    hjust = 1
    )
) + 
xlab(paste('\n', 'Course')) + 
ylab(paste('Count', '\n')) +
ggtitle(paste('Number of 9-Hole Rounds by Course', '\n')) +
theme(plot.title = element_text(hjust = 0.5))

Merged_Golf_Fargo %>% 
  
ggplot(aes(x = Course)) +
geom_bar(fill = Hex_Dark_Red, color = 'black') +
facet_grid(. ~ Year) +
theme_bw() +
theme(
  axis.text.x = element_text(
    angle = 45, 
    vjust = 1, 
    size = 10, 
    hjust = 1, 
    color = 'black'
  )
) + 
xlab(paste('\n', 'Course')) + 
ylab(paste('Count', '\n')) +
ggtitle(paste('Number of 9-Hole Rounds by Course', '\n')) +
theme(plot.title = element_text(hjust = 0.5))


Scatter Plots

Faceted histogram with correlation stats

Merged_Golf %>% 
filter(!is.na(To.Par)) %>% 

ggplot(aes(x = Date, y = To.Par)) + 
geom_point(color = Hex_Dark_Red, size = 1.5) +
facet_grid(. ~ Year, scale = 'free_x') +
geom_smooth(method = 'lm', formula = y ~ x, se = TRUE, color = Hex_Google_Blue) +
stat_cor(
  method = 'pearson', 
  color = 'black',
  size = 4,
  label.x.npc = 0.97, 
  label.y.npc = 0.90, 
  hjust = 1
) +
theme_bw() + 
xlab(paste('\n', 'Month')) + 
ylab(paste('Score (To Par)', '\n')) +
ggtitle(paste('9-Hole Scores Over Time', '\n')) +
theme(plot.title = element_text(hjust = 0.5))

Line / Time series

Merged_Golf_Fargo %>% 
filter(
  !is.na(To.Par), 
  Year == 2020
) %>% 

ggplot(aes(x = Date, y = To.Par)) + 
geom_point(color = Hex_Dark_Red, size = 2) +
stat_smooth(method = 'lm', formula = y ~ poly(x, 5), se = FALSE) +
theme_bw() + 
xlab(paste('\n', 'Month')) + 
ylab(paste('Score (To Par)', '\n')) +
ggtitle(paste('9-Hole Scores Over Time', '\n')) +
theme(plot.title = element_text(hjust = 0.5))

Multiple Line / Time series

Merged_Golf %>% 
filter(
  !is.na(To.Par),
  !is.na(WeekNumber)
) %>% 

ggplot(aes(x = WeekNumber, y = To.Par, color = Year)) + 
geom_point(size = 2) +
stat_smooth(method = 'lm', formula = y ~ poly(x, 5), se = FALSE) +
scale_color_gdocs() +
theme_bw() +
xlab(paste('\n', 'Score (To Par)')) + 
ylab(paste('Week #', '\n')) +
ggtitle(paste('Score Over Time', '\n')) +
theme(plot.title = element_text(hjust = 0.5))

Merged_Golf %>% 
filter(
  !is.na(To.Par),
  !is.na(WeekNumber)
) %>% 

ggplot(aes(x = WeekNumber, y = To.Par, color = Year)) + 
geom_point(size = 2, alpha = 0.3) +
#geom_line(size = 1, alpha = 0.3) + 
stat_smooth(method = 'lm', formula = y ~ poly(x, 5), se = TRUE) +
scale_color_gdocs() +
theme_bw() +
xlab(paste('\n', 'Score (To Par)')) + 
ylab(paste('Week #', '\n')) +
ggtitle(paste('Score Over Time', '\n')) +
theme(plot.title = element_text(hjust = 0.5))


Box Plots

Basic box plot

Merged_Golf %>% 
filter(!is.na(Total)) %>% 

ggplot(aes(y = To.Par, fill = Year)) +
geom_boxplot(alpha = 0.5) +
scale_fill_gdocs() +
theme_bw() +
ylab(paste('Score (To Par)', '\n')) +
ggtitle(paste('Comparison of 9-Hole Round Scores', '\n')) +
theme(
  plot.title = element_text(hjust = 0.5),
  axis.title.x=element_blank(),
  axis.text.x = element_blank(),
  axis.ticks.x = element_blank()
)

Box plot with specified order

Merged_Golf$Weekday <- 
  factor(
    Merged_Golf$Weekday, 
    levels = c(
      'Monday',
      'Tuesday',
      'Wednesday',
      'Thursday',
      'Friday',
      'Saturday',
      'Sunday'
  ), 
  ordered = TRUE
)

Merged_Golf %>% 
filter(
  !is.na(To.Par),
  !is.na(Weekday),
  Year == 2020
) %>% 

ggplot(aes(Weekday, To.Par, fill = Weekday)) +
geom_boxplot(alpha = 0.5) +
scale_fill_gdocs() +
theme_bw() +
ylab(paste('Score (To Par)', '\n')) +
ggtitle(paste('Comparison of Scores by Weekday (2020)', '\n')) +
theme(
  plot.title = element_text(hjust = 0.5),
  axis.title.x=element_blank(),
  axis.text.x = element_blank(),
  axis.ticks.x = element_blank()
)


Violin

Merged_Golf %>% 
filter(!is.na(Total)) %>% 

ggplot(aes(x = Year, y = To.Par, fill = Year)) +
geom_violin(alpha = 0.5) +
scale_fill_gdocs() +
theme_bw() +
ylab(paste('Score (To Par)', '\n')) +
ggtitle(paste('Comparison of 9-Hole Round Scores', '\n')) +
theme(
  plot.title = element_text(hjust = 0.5),
  axis.title.x=element_blank(),
  axis.text.x = element_blank(),
  axis.ticks.x = element_blank()
)

Merged_Golf %>% 
filter(!is.na(Total)) %>%

ggplot(aes(x = Year, y = To.Par, fill = Year)) +
geom_violin(alpha = 0.5) +
scale_fill_gdocs() +
theme_bw() +
ylab(paste('Score (To Par)', '\n')) +
ggtitle(paste('Comparison of 9-Hole Round Scores', '\n')) +
theme(
  plot.title = element_text(hjust = 0.5),
  legend.position = 'none'
)

Merged_Golf %>% 
filter(!is.na(Total)) %>%

ggplot(aes(x = Year, y = To.Par, fill = Year)) +
geom_violin(width = 1.0, alpha = 0.5) +
geom_boxplot(width = 0.2, color = 'black', alpha = 1.0) +
scale_fill_gdocs() +
theme_bw() +
ylab(paste('Score (To Par)', '\n')) +
ggtitle(paste('Comparison of 9-Hole Round Scores', '\n')) +
theme(
  plot.title = element_text(hjust = 0.5),
  legend.position = 'none'
)


Dot Plot

Merged_Golf %>% 
filter(!is.na(Total)) %>%

ggplot(aes(x = Year, y = To.Par, fill = Year)) +
geom_dotplot(binaxis = 'y', stackdir = 'center', binwidth = 0.35) +
scale_fill_gdocs() +
theme_bw() +
ylab(paste('Score (To Par)', '\n')) +
ggtitle(paste('Comparison of 9-Hole Round Scores', '\n')) +
theme(
  plot.title = element_text(hjust = 0.5),
  axis.title.x=element_blank(),
  axis.text.x = element_blank(),
  axis.ticks.x = element_blank()
)


Jitter Plot

Merged_Golf %>% 
filter(!is.na(Total)) %>%

ggplot(aes(x = Year, y = To.Par)) +
geom_jitter(aes(color = Year), alpha = 0.5) +
scale_fill_gdocs() +
theme_bw() +
ylab(paste('Score (To Par)', '\n')) +
ggtitle(paste('Comparison of 9-Hole Round Scores', '\n')) +
theme(plot.title = element_text(hjust = 0.5))


Correlation Matrix

mydata <- mtcars[, c(1,3,4,5,6,7)]
cormat <- round(cor(mydata),2)
melted_cormat <- melt(cormat)

melted_cormat %>% 
  
ggplot(aes(x=Var1, y=Var2, fill=value)) + 
geom_tile() 

# Get lower triangle of the correlation matrix
  get_lower_tri<-function(cormat){
    cormat[upper.tri(cormat)] <- NA
    return(cormat)
  }
  # Get upper triangle of the correlation matrix
  get_upper_tri <- function(cormat){
    cormat[lower.tri(cormat)]<- NA
    return(cormat)
  }

upper_tri <- get_upper_tri(cormat)
melted_cormat <- melt(upper_tri, na.rm = TRUE)

ggplot(data = melted_cormat, aes(Var2, Var1, fill = value))+
 geom_tile(color = "white")+
 scale_fill_gradient2(low = "blue", high = "red", mid = "white", 
   midpoint = 0, limit = c(-1,1), space = "Lab", 
   name="Pearson\nCorrelation") +
  theme_minimal()+ 
 theme(axis.text.x = element_text(angle = 45, vjust = 1, 
    size = 12, hjust = 1))+
 coord_fixed()

PairsPlot <- Merged_Golf[,c('To.Par', 'FIR', 'GIR')]

ggpairs(PairsPlot, lower = list(continuous = wrap("smooth", alpha = 0.5, size = 1))) +
        theme_bw() 


Heatmap

Merged_Golf %>% 
filter(
  !is.na(To.Par), 
  !is.na(GIR),
  Year != 2018
) %>% 

ggplot(aes(x = GIR, y = To.Par)) + 
 geom_hex(binwidth = c(0.1,1)) +
 xlim(0, 1) +
 theme_bw() +
 xlab(paste('\n', 'GIRs (%)')) + 
 ylab(paste('Score (To Par)', '\n')) +
 ggtitle(paste('Score vs GIRs', '\n')) +
 theme(plot.title = element_text(hjust = 0.5))

Merged_Golf %>% 
filter(
  !is.na(To.Par), 
  !is.na(GIR),
  Year != 2018
) %>%

ggplot(aes(x = GIR, y = To.Par)) + 
geom_hex(binwidth = c(0.1,1)) +
facet_grid(. ~ Year, scale = 'free_x') +
scale_fill_viridis_c() + 
xlim(0, 1) +
theme_bw() +
xlab(paste('\n', 'GIRs (%)')) + 
ylab(paste('Score (To Par)', '\n')) +
ggtitle(paste('Score vs GIRs', '\n')) +
theme(plot.title = element_text(hjust = 0.5))
## Warning: Removed 1 rows containing missing values (geom_hex).


Q-Q Plot

Merged_Golf %>% 
filter(!is.na(GIR)) %>% 

ggplot(aes(sample = GIR)) + 
geom_qq(color = Hex_Dark_Red) +
stat_qq_line() +
theme_bw() + 
xlab(paste('\n', 'Theoretical')) + 
ylab(paste('Sample', '\n')) +
ggtitle(paste('GIR Q-Q Plot', '\n')) +
theme(plot.title = element_text(hjust = 0.5))