library(readr)
df <- read_csv("ASA All PGA Raw Data - Tourn Level.csv")
## Rows: 36864 Columns: 37
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Player_initial_last, player, tournament name, course, Finish
## dbl (28): tournament id, player id, hole_par, strokes, hole_DKP, hole_FDP, ...
## lgl (3): Unnamed: 2, Unnamed: 3, Unnamed: 4
## date (1): date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
summary(df)
## Player_initial_last tournament id player id hole_par
## Length:36864 Min. : 2230 Min. : 5 Min. : 70.0
## Class :character 1st Qu.: 2696 1st Qu.: 1170 1st Qu.:143.0
## Mode :character Median :401056503 Median : 3793 Median :280.0
## Mean :233180667 Mean : 79790 Mean :225.5
## 3rd Qu.:401219498 3rd Qu.: 6151 3rd Qu.:286.0
## Max. :401366873 Max. :4845309 Max. :292.0
##
## strokes hole_DKP hole_FDP hole_SDP
## Min. : 66.0 Min. : -2.50 Min. :-21.40 Min. :-11.00
## 1st Qu.:146.0 1st Qu.: 27.00 1st Qu.: 22.60 1st Qu.: 28.00
## Median :272.0 Median : 53.50 Median : 46.10 Median : 55.00
## Mean :224.1 Mean : 50.13 Mean : 44.38 Mean : 49.32
## 3rd Qu.:281.0 3rd Qu.: 69.00 3rd Qu.: 64.00 3rd Qu.: 69.00
## Max. :325.0 Max. :174.00 Max. :134.70 Max. :107.00
##
## streak_DKP streak_FDP streak_SDP n_rounds
## Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. :1.000
## 1st Qu.: 0.000 1st Qu.: 0.800 1st Qu.: 0.000 1st Qu.:2.000
## Median : 0.000 Median : 6.400 Median : 0.000 Median :4.000
## Mean : 1.764 Mean : 7.687 Mean : 1.683 Mean :3.175
## 3rd Qu.: 3.000 3rd Qu.:12.400 3rd Qu.: 3.000 3rd Qu.:4.000
## Max. :23.000 Max. :43.600 Max. :22.000 Max. :4.000
##
## made_cut pos finish_DKP finish_FDP
## Min. :0.0000 Min. : 1.00 Min. : 0.000 Min. : 0.000
## 1st Qu.:0.0000 1st Qu.: 15.00 1st Qu.: 0.000 1st Qu.: 0.000
## Median :1.0000 Median : 32.00 Median : 0.000 Median : 0.000
## Mean :0.6059 Mean : 34.17 Mean : 2.489 Mean : 2.134
## 3rd Qu.:1.0000 3rd Qu.: 51.00 3rd Qu.: 3.000 3rd Qu.: 2.000
## Max. :1.0000 Max. :999.00 Max. :30.000 Max. :30.000
## NA's :15547
## finish_SDP total_DKP total_FDP total_SDP
## Min. : 0.000 Min. : -2.50 Min. :-21.40 Min. :-11.00
## 1st Qu.: 0.000 1st Qu.: 27.50 1st Qu.: 24.70 1st Qu.: 28.00
## Median : 0.000 Median : 55.50 Median : 52.15 Median : 56.00
## Mean : 1.171 Mean : 54.38 Mean : 54.20 Mean : 52.18
## 3rd Qu.: 0.000 3rd Qu.: 75.00 3rd Qu.: 78.50 3rd Qu.: 72.00
## Max. :15.000 Max. :205.50 Max. :202.60 Max. :141.00
##
## player Unnamed: 2 Unnamed: 3 Unnamed: 4
## Length:36864 Mode:logical Mode:logical Mode:logical
## Class :character NA's:36864 NA's:36864 NA's:36864
## Mode :character
##
##
##
##
## tournament name course date purse
## Length:36864 Length:36864 Min. :2014-10-12 Min. : 3.00
## Class :character Class :character 1st Qu.:2017-01-15 1st Qu.: 6.40
## Mode :character Mode :character Median :2018-11-04 Median : 7.10
## Mean :2018-10-10 Mean : 7.53
## 3rd Qu.:2020-09-13 3rd Qu.: 8.70
## Max. :2022-06-05 Max. :20.00
##
## season no_cut Finish sg_putt
## Min. :2015 Min. :0.00000 Length:36864 Min. :-5.990
## 1st Qu.:2017 1st Qu.:0.00000 Class :character 1st Qu.:-0.770
## Median :2019 Median :0.00000 Mode :character Median :-0.040
## Mean :2019 Mean :0.06529 Mean :-0.121
## 3rd Qu.:2021 3rd Qu.:0.00000 3rd Qu.: 0.630
## Max. :2022 Max. :1.00000 Max. : 4.430
## NA's :7684
## sg_arg sg_app sg_ott sg_t2g
## Min. :-6.4300 Min. :-9.2500 Min. :-7.7400 Min. :-13.9500
## 1st Qu.:-0.4500 1st Qu.:-0.7400 1st Qu.:-0.4500 1st Qu.: -1.0800
## Median : 0.0000 Median : 0.0000 Median : 0.0500 Median : -0.0100
## Mean :-0.0407 Mean :-0.1018 Mean :-0.0459 Mean : -0.1883
## 3rd Qu.: 0.4200 3rd Qu.: 0.6400 3rd Qu.: 0.4800 3rd Qu.: 0.9200
## Max. : 3.1700 Max. : 4.6700 Max. : 2.7700 Max. : 6.3000
## NA's :7684 NA's :7684 NA's :7684 NA's :7684
## sg_total
## Min. :-13.6700
## 1st Qu.: -1.3700
## Median : -0.1600
## Mean : -0.3055
## 3rd Qu.: 1.0600
## Max. : 8.5200
## NA's :7683
#Understanding average of Cuts Made
df$made_cut <- as.numeric(as.character(df$made_cut))
mean_made_cut <- mean(df$made_cut, na.rm = TRUE)
round(mean_made_cut, 2)
## [1] 0.61
mean_made_cut
## [1] 0.6059028
#Understanding median of Cuts Made
median_made_cut <- median(df$made_cut, na.rm = TRUE)
round(mean_made_cut, 2)
## [1] 0.61
#Understanding in agrregate players that are over or under par
df$to_par <- df$strokes - df$hole_par # negative = under par, positive = over par
#Correlation between par of the hole and strokes
cor(df$hole_par, df$strokes, use = "complete.obs")
## [1] 0.9958413