Import Dataset
setwd("/Users/allisontewksbury/Downloads")
library(formattable) #loading packages
library(readr)
library(psych)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ purrr 1.0.2
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::%+%() masks psych::%+%()
## ✖ ggplot2::alpha() masks psych::alpha()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
college_bball=read_csv("cbb.csv")
## Rows: 3523 Columns: 24
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): TEAM, CONF, POSTSEASON, SEED
## dbl (20): G, W, ADJOE, ADJDE, BARTHAG, EFG_O, EFG_D, TOR, TORD, ORB, DRB, FT...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
college_bball=as_tibble(college_bball)
head(college_bball,10) #previewing table
## # A tibble: 10 × 24
## TEAM CONF G W ADJOE ADJDE BARTHAG EFG_O EFG_D TOR TORD ORB
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 North Ca… ACC 40 33 123. 94.9 0.953 52.6 48.1 15.4 18.2 40.7
## 2 Wisconsin B10 40 36 129. 93.6 0.976 54.8 47.7 12.4 15.8 32.1
## 3 Michigan B10 40 33 114. 90.4 0.938 53.9 47.7 14 19.5 25.5
## 4 Texas Te… B12 38 31 115. 85.2 0.970 53.5 43 17.7 22.8 27.4
## 5 Gonzaga WCC 39 37 118. 86.3 0.973 56.6 41.1 16.2 17.1 30
## 6 Kentucky SEC 40 29 117. 96.2 0.906 49.9 46 18.1 16.1 42
## 7 Michigan B10 38 30 122. 93.7 0.952 54.6 48 14.6 18.7 32.5
## 8 Duke ACC 39 35 125. 90.6 0.976 56.6 46.5 16.3 18.6 35.8
## 9 Virginia ACC 38 35 123 89.9 0.974 55.2 44.7 14.7 17.5 30.4
## 10 North Ca… ACC 39 33 121 91.5 0.962 51.7 48.1 16.2 18.6 41.3
## # ℹ 12 more variables: DRB <dbl>, FTR <dbl>, FTRD <dbl>, `2P_O` <dbl>,
## # `2P_D` <dbl>, `3P_O` <dbl>, `3P_D` <dbl>, ADJ_T <dbl>, WAB <dbl>,
## # POSTSEASON <chr>, SEED <chr>, YEAR <dbl>
dimension=dim(college_bball) #creating vector with # of rows and columns
cat("There are",dimension[1],"schools represented.")
## There are 3523 schools represented.
attach(college_bball)
describeBy(college_bball$W,group=college_bball$CONF)
##
## Descriptive statistics by group
## group: A10
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 142 16.99 6.04 16 17 7.41 2 28 26 0.01 -0.89 0.51
## ------------------------------------------------------------
## group: ACC
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 147 19.67 6.83 20 19.63 7.41 4 35 31 0.03 -0.64 0.56
## ------------------------------------------------------------
## group: AE
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 92 14.08 6.91 13 13.86 7.41 2 29 27 0.25 -1.01 0.72
## ------------------------------------------------------------
## group: Amer
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 100 17.82 7.1 17 17.59 8.9 4 33 29 0.23 -0.75 0.71
## ------------------------------------------------------------
## group: ASun
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 95 14.44 5.74 14 14.23 5.93 3 29 26 0.36 -0.6 0.59
## ------------------------------------------------------------
## group: B10
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 136 19.91 5.95 20 19.85 6.67 6 36 30 0.12 -0.45 0.51
## ------------------------------------------------------------
## group: B12
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 100 20.55 6 21 20.69 5.93 2 34 32 -0.28 -0.06 0.6
## ------------------------------------------------------------
## group: BE
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 108 19.56 6.48 20 19.41 5.93 5 36 31 0.22 -0.15 0.62
## ------------------------------------------------------------
## group: BSky
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 114 14.1 5.92 13.5 13.99 5.93 1 28 27 0.17 -0.46 0.55
## ------------------------------------------------------------
## group: BSth
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 111 14.15 5.87 15 14.09 5.93 3 29 26 0.06 -0.66 0.56
## ------------------------------------------------------------
## group: BW
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 97 14.36 5.91 14 14.14 7.41 4 31 27 0.3 -0.65 0.6
## ------------------------------------------------------------
## group: CAA
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 103 15.25 6.15 14 15.01 5.93 4 31 27 0.35 -0.72 0.61
## ------------------------------------------------------------
## group: CUSA
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 137 16.57 6.54 17 16.4 5.93 4 35 31 0.23 -0.45 0.56
## ------------------------------------------------------------
## group: GWC
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 5 10.4 2.07 11 10.4 1.48 7 12 5 -0.69 -1.41 0.93
## ------------------------------------------------------------
## group: Horz
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 102 14.94 6.12 15 14.98 7.41 3 28 25 -0.05 -0.95 0.61
## ------------------------------------------------------------
## group: ind
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 5 7.8 3.11 9 7.8 2.97 4 11 7 -0.21 -2.15 1.39
## ------------------------------------------------------------
## group: Ind
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 1 17 NA 17 17 0 17 17 0 NA NA NA
## ------------------------------------------------------------
## group: Ivy
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 72 14.22 5.38 13 14.12 5.93 1 26 25 0.12 -0.73 0.63
## ------------------------------------------------------------
## group: MAAC
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 109 14.87 5.32 14 14.66 5.93 6 28 22 0.35 -0.61 0.51
## ------------------------------------------------------------
## group: MAC
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 120 16.3 5.93 16 16.31 5.93 3 32 29 0.01 -0.46 0.54
## ------------------------------------------------------------
## group: MEAC
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 115 11.79 5.72 11 11.62 5.93 1 26 25 0.26 -0.69 0.53
## ------------------------------------------------------------
## group: MVC
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 102 17.01 6.43 17 16.72 5.93 4 34 30 0.41 -0.24 0.64
## ------------------------------------------------------------
## group: MWC
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 108 17.35 6.64 17.5 17.5 8.15 0 32 32 -0.17 -0.49 0.64
## ------------------------------------------------------------
## group: NEC
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 101 13.28 5.07 14 13.32 5.93 3 24 21 -0.05 -0.75 0.5
## ------------------------------------------------------------
## group: OVC
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 116 14.64 6.29 14 14.44 7.41 3 31 28 0.31 -0.69 0.58
## ------------------------------------------------------------
## group: P12
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 120 18.85 6.58 18.5 18.81 6.67 3 34 31 0.03 -0.19 0.6
## ------------------------------------------------------------
## group: Pat
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 98 14.16 5.15 14 14 5.93 4 27 23 0.31 -0.28 0.52
## ------------------------------------------------------------
## group: SB
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 117 15.92 5.24 16 15.77 5.93 4 28 24 0.18 -0.61 0.48
## ------------------------------------------------------------
## group: SC
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 103 15.25 6.28 14 14.95 5.93 4 30 26 0.41 -0.66 0.62
## ------------------------------------------------------------
## group: SEC
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 140 19.17 6.19 19 19.06 5.93 6 38 32 0.26 -0.08 0.52
## ------------------------------------------------------------
## group: Slnd
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 120 13.43 6.25 12.5 13.17 6.67 2 30 28 0.38 -0.65 0.57
## ------------------------------------------------------------
## group: Sum
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 89 14.72 6.09 15 14.58 5.93 2 30 28 0.24 -0.37 0.65
## ------------------------------------------------------------
## group: SWAC
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 104 11.46 5.39 12 11.4 5.93 0 24 24 0.1 -0.52 0.53
## ------------------------------------------------------------
## group: WAC
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 95 15.39 7.03 15 15.42 7.41 0 30 30 0.02 -0.85 0.72
## ------------------------------------------------------------
## group: WCC
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 99 17.3 7.46 16 16.91 7.41 3 37 34 0.46 -0.58 0.75
bigeast_conference=subset(college_bball, CONF=="BE")
summary(bigeast_conference$W)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 5.00 15.00 20.00 19.56 23.25 36.00
bigeast_rank=bigeast_conference[order(bigeast_conference$W,bigeast_conference$TEAM),]
paste("The team with the most wins in the Big East Conference is", bigeast_rank$TEAM[dim(bigeast_rank)[1]], "in", bigeast_rank$YEAR[dim(bigeast_rank)[1]])
## [1] "The team with the most wins in the Big East Conference is Villanova in 2018"
villanova=subset(bigeast_conference, bigeast_conference$TEAM=="Villanova")
avwins=mean(villanova$W)
paste("Villanova has on average", avwins, "wins per season")
## [1] "Villanova has on average 27.3 wins per season"
paste("The correlation coefficient between games played vs won for Villanova is", cor(villanova$G, villanova$W))
## [1] "The correlation coefficient between games played vs won for Villanova is 0.785070963076589"