library(readr)
df <- read_csv("ASA All PGA Raw Data - Tourn Level.csv")
## Rows: 36864 Columns: 37
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr   (5): Player_initial_last, player, tournament name, course, Finish
## dbl  (28): tournament id, player id, hole_par, strokes, hole_DKP, hole_FDP, ...
## lgl   (3): Unnamed: 2, Unnamed: 3, Unnamed: 4
## date  (1): date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
summary(df)
##  Player_initial_last tournament id         player id          hole_par    
##  Length:36864        Min.   :     2230   Min.   :      5   Min.   : 70.0  
##  Class :character    1st Qu.:     2696   1st Qu.:   1170   1st Qu.:143.0  
##  Mode  :character    Median :401056503   Median :   3793   Median :280.0  
##                      Mean   :233180667   Mean   :  79790   Mean   :225.5  
##                      3rd Qu.:401219498   3rd Qu.:   6151   3rd Qu.:286.0  
##                      Max.   :401366873   Max.   :4845309   Max.   :292.0  
##                                                                           
##     strokes         hole_DKP         hole_FDP         hole_SDP     
##  Min.   : 66.0   Min.   : -2.50   Min.   :-21.40   Min.   :-11.00  
##  1st Qu.:146.0   1st Qu.: 27.00   1st Qu.: 22.60   1st Qu.: 28.00  
##  Median :272.0   Median : 53.50   Median : 46.10   Median : 55.00  
##  Mean   :224.1   Mean   : 50.13   Mean   : 44.38   Mean   : 49.32  
##  3rd Qu.:281.0   3rd Qu.: 69.00   3rd Qu.: 64.00   3rd Qu.: 69.00  
##  Max.   :325.0   Max.   :174.00   Max.   :134.70   Max.   :107.00  
##                                                                    
##    streak_DKP       streak_FDP       streak_SDP        n_rounds    
##  Min.   : 0.000   Min.   : 0.000   Min.   : 0.000   Min.   :1.000  
##  1st Qu.: 0.000   1st Qu.: 0.800   1st Qu.: 0.000   1st Qu.:2.000  
##  Median : 0.000   Median : 6.400   Median : 0.000   Median :4.000  
##  Mean   : 1.764   Mean   : 7.687   Mean   : 1.683   Mean   :3.175  
##  3rd Qu.: 3.000   3rd Qu.:12.400   3rd Qu.: 3.000   3rd Qu.:4.000  
##  Max.   :23.000   Max.   :43.600   Max.   :22.000   Max.   :4.000  
##                                                                    
##     made_cut           pos           finish_DKP       finish_FDP    
##  Min.   :0.0000   Min.   :  1.00   Min.   : 0.000   Min.   : 0.000  
##  1st Qu.:0.0000   1st Qu.: 15.00   1st Qu.: 0.000   1st Qu.: 0.000  
##  Median :1.0000   Median : 32.00   Median : 0.000   Median : 0.000  
##  Mean   :0.6059   Mean   : 34.17   Mean   : 2.489   Mean   : 2.134  
##  3rd Qu.:1.0000   3rd Qu.: 51.00   3rd Qu.: 3.000   3rd Qu.: 2.000  
##  Max.   :1.0000   Max.   :999.00   Max.   :30.000   Max.   :30.000  
##                   NA's   :15547                                     
##    finish_SDP       total_DKP        total_FDP        total_SDP     
##  Min.   : 0.000   Min.   : -2.50   Min.   :-21.40   Min.   :-11.00  
##  1st Qu.: 0.000   1st Qu.: 27.50   1st Qu.: 24.70   1st Qu.: 28.00  
##  Median : 0.000   Median : 55.50   Median : 52.15   Median : 56.00  
##  Mean   : 1.171   Mean   : 54.38   Mean   : 54.20   Mean   : 52.18  
##  3rd Qu.: 0.000   3rd Qu.: 75.00   3rd Qu.: 78.50   3rd Qu.: 72.00  
##  Max.   :15.000   Max.   :205.50   Max.   :202.60   Max.   :141.00  
##                                                                     
##     player          Unnamed: 2     Unnamed: 3     Unnamed: 4    
##  Length:36864       Mode:logical   Mode:logical   Mode:logical  
##  Class :character   NA's:36864     NA's:36864     NA's:36864    
##  Mode  :character                                               
##                                                                 
##                                                                 
##                                                                 
##                                                                 
##  tournament name       course               date                purse      
##  Length:36864       Length:36864       Min.   :2014-10-12   Min.   : 3.00  
##  Class :character   Class :character   1st Qu.:2017-01-15   1st Qu.: 6.40  
##  Mode  :character   Mode  :character   Median :2018-11-04   Median : 7.10  
##                                        Mean   :2018-10-10   Mean   : 7.53  
##                                        3rd Qu.:2020-09-13   3rd Qu.: 8.70  
##                                        Max.   :2022-06-05   Max.   :20.00  
##                                                                            
##      season         no_cut           Finish             sg_putt      
##  Min.   :2015   Min.   :0.00000   Length:36864       Min.   :-5.990  
##  1st Qu.:2017   1st Qu.:0.00000   Class :character   1st Qu.:-0.770  
##  Median :2019   Median :0.00000   Mode  :character   Median :-0.040  
##  Mean   :2019   Mean   :0.06529                      Mean   :-0.121  
##  3rd Qu.:2021   3rd Qu.:0.00000                      3rd Qu.: 0.630  
##  Max.   :2022   Max.   :1.00000                      Max.   : 4.430  
##                                                      NA's   :7684    
##      sg_arg            sg_app            sg_ott            sg_t2g        
##  Min.   :-6.4300   Min.   :-9.2500   Min.   :-7.7400   Min.   :-13.9500  
##  1st Qu.:-0.4500   1st Qu.:-0.7400   1st Qu.:-0.4500   1st Qu.: -1.0800  
##  Median : 0.0000   Median : 0.0000   Median : 0.0500   Median : -0.0100  
##  Mean   :-0.0407   Mean   :-0.1018   Mean   :-0.0459   Mean   : -0.1883  
##  3rd Qu.: 0.4200   3rd Qu.: 0.6400   3rd Qu.: 0.4800   3rd Qu.:  0.9200  
##  Max.   : 3.1700   Max.   : 4.6700   Max.   : 2.7700   Max.   :  6.3000  
##  NA's   :7684      NA's   :7684      NA's   :7684      NA's   :7684      
##     sg_total       
##  Min.   :-13.6700  
##  1st Qu.: -1.3700  
##  Median : -0.1600  
##  Mean   : -0.3055  
##  3rd Qu.:  1.0600  
##  Max.   :  8.5200  
##  NA's   :7683
#Understanding average of Cuts Made
df$made_cut <- as.numeric(as.character(df$made_cut))
mean_made_cut <- mean(df$made_cut, na.rm = TRUE)
round(mean_made_cut, 2)
## [1] 0.61
mean_made_cut
## [1] 0.6059028
#Understanding median of Cuts Made
median_made_cut <- median(df$made_cut, na.rm = TRUE)
round(mean_made_cut, 2)
## [1] 0.61
#Understanding in agrregate players that are over or under par
df$to_par <- df$strokes - df$hole_par   # negative = under par, positive = over par
#Correlation between par of the hole and strokes
cor(df$hole_par, df$strokes, use = "complete.obs") 
## [1] 0.9958413