Import Dataset

setwd("/Users/allisontewksbury/Downloads")
library(formattable) #loading packages
library(readr)
library(psych)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ purrr     1.0.2
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::%+%()   masks psych::%+%()
## ✖ ggplot2::alpha() masks psych::alpha()
## ✖ dplyr::filter()  masks stats::filter()
## ✖ dplyr::lag()     masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
college_bball=read_csv("cbb.csv")
## Rows: 3523 Columns: 24
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (4): TEAM, CONF, POSTSEASON, SEED
## dbl (20): G, W, ADJOE, ADJDE, BARTHAG, EFG_O, EFG_D, TOR, TORD, ORB, DRB, FT...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
college_bball=as_tibble(college_bball)
head(college_bball,10) #previewing table
## # A tibble: 10 × 24
##    TEAM      CONF      G     W ADJOE ADJDE BARTHAG EFG_O EFG_D   TOR  TORD   ORB
##    <chr>     <chr> <dbl> <dbl> <dbl> <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1 North Ca… ACC      40    33  123.  94.9   0.953  52.6  48.1  15.4  18.2  40.7
##  2 Wisconsin B10      40    36  129.  93.6   0.976  54.8  47.7  12.4  15.8  32.1
##  3 Michigan  B10      40    33  114.  90.4   0.938  53.9  47.7  14    19.5  25.5
##  4 Texas Te… B12      38    31  115.  85.2   0.970  53.5  43    17.7  22.8  27.4
##  5 Gonzaga   WCC      39    37  118.  86.3   0.973  56.6  41.1  16.2  17.1  30  
##  6 Kentucky  SEC      40    29  117.  96.2   0.906  49.9  46    18.1  16.1  42  
##  7 Michigan  B10      38    30  122.  93.7   0.952  54.6  48    14.6  18.7  32.5
##  8 Duke      ACC      39    35  125.  90.6   0.976  56.6  46.5  16.3  18.6  35.8
##  9 Virginia  ACC      38    35  123   89.9   0.974  55.2  44.7  14.7  17.5  30.4
## 10 North Ca… ACC      39    33  121   91.5   0.962  51.7  48.1  16.2  18.6  41.3
## # ℹ 12 more variables: DRB <dbl>, FTR <dbl>, FTRD <dbl>, `2P_O` <dbl>,
## #   `2P_D` <dbl>, `3P_O` <dbl>, `3P_D` <dbl>, ADJ_T <dbl>, WAB <dbl>,
## #   POSTSEASON <chr>, SEED <chr>, YEAR <dbl>
dimension=dim(college_bball) #creating vector with # of rows and columns
cat("There are",dimension[1],"schools represented.")
## There are 3523 schools represented.
attach(college_bball)
describeBy(college_bball$W,group=college_bball$CONF)
## 
##  Descriptive statistics by group 
## group: A10
##    vars   n  mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 142 16.99 6.04     16      17 7.41   2  28    26 0.01    -0.89 0.51
## ------------------------------------------------------------ 
## group: ACC
##    vars   n  mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 147 19.67 6.83     20   19.63 7.41   4  35    31 0.03    -0.64 0.56
## ------------------------------------------------------------ 
## group: AE
##    vars  n  mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 92 14.08 6.91     13   13.86 7.41   2  29    27 0.25    -1.01 0.72
## ------------------------------------------------------------ 
## group: Amer
##    vars   n  mean  sd median trimmed mad min max range skew kurtosis   se
## X1    1 100 17.82 7.1     17   17.59 8.9   4  33    29 0.23    -0.75 0.71
## ------------------------------------------------------------ 
## group: ASun
##    vars  n  mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 95 14.44 5.74     14   14.23 5.93   3  29    26 0.36     -0.6 0.59
## ------------------------------------------------------------ 
## group: B10
##    vars   n  mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 136 19.91 5.95     20   19.85 6.67   6  36    30 0.12    -0.45 0.51
## ------------------------------------------------------------ 
## group: B12
##    vars   n  mean sd median trimmed  mad min max range  skew kurtosis  se
## X1    1 100 20.55  6     21   20.69 5.93   2  34    32 -0.28    -0.06 0.6
## ------------------------------------------------------------ 
## group: BE
##    vars   n  mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 108 19.56 6.48     20   19.41 5.93   5  36    31 0.22    -0.15 0.62
## ------------------------------------------------------------ 
## group: BSky
##    vars   n mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 114 14.1 5.92   13.5   13.99 5.93   1  28    27 0.17    -0.46 0.55
## ------------------------------------------------------------ 
## group: BSth
##    vars   n  mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 111 14.15 5.87     15   14.09 5.93   3  29    26 0.06    -0.66 0.56
## ------------------------------------------------------------ 
## group: BW
##    vars  n  mean   sd median trimmed  mad min max range skew kurtosis  se
## X1    1 97 14.36 5.91     14   14.14 7.41   4  31    27  0.3    -0.65 0.6
## ------------------------------------------------------------ 
## group: CAA
##    vars   n  mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 103 15.25 6.15     14   15.01 5.93   4  31    27 0.35    -0.72 0.61
## ------------------------------------------------------------ 
## group: CUSA
##    vars   n  mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 137 16.57 6.54     17    16.4 5.93   4  35    31 0.23    -0.45 0.56
## ------------------------------------------------------------ 
## group: GWC
##    vars n mean   sd median trimmed  mad min max range  skew kurtosis   se
## X1    1 5 10.4 2.07     11    10.4 1.48   7  12     5 -0.69    -1.41 0.93
## ------------------------------------------------------------ 
## group: Horz
##    vars   n  mean   sd median trimmed  mad min max range  skew kurtosis   se
## X1    1 102 14.94 6.12     15   14.98 7.41   3  28    25 -0.05    -0.95 0.61
## ------------------------------------------------------------ 
## group: ind
##    vars n mean   sd median trimmed  mad min max range  skew kurtosis   se
## X1    1 5  7.8 3.11      9     7.8 2.97   4  11     7 -0.21    -2.15 1.39
## ------------------------------------------------------------ 
## group: Ind
##    vars n mean sd median trimmed mad min max range skew kurtosis se
## X1    1 1   17 NA     17      17   0  17  17     0   NA       NA NA
## ------------------------------------------------------------ 
## group: Ivy
##    vars  n  mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 72 14.22 5.38     13   14.12 5.93   1  26    25 0.12    -0.73 0.63
## ------------------------------------------------------------ 
## group: MAAC
##    vars   n  mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 109 14.87 5.32     14   14.66 5.93   6  28    22 0.35    -0.61 0.51
## ------------------------------------------------------------ 
## group: MAC
##    vars   n mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 120 16.3 5.93     16   16.31 5.93   3  32    29 0.01    -0.46 0.54
## ------------------------------------------------------------ 
## group: MEAC
##    vars   n  mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 115 11.79 5.72     11   11.62 5.93   1  26    25 0.26    -0.69 0.53
## ------------------------------------------------------------ 
## group: MVC
##    vars   n  mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 102 17.01 6.43     17   16.72 5.93   4  34    30 0.41    -0.24 0.64
## ------------------------------------------------------------ 
## group: MWC
##    vars   n  mean   sd median trimmed  mad min max range  skew kurtosis   se
## X1    1 108 17.35 6.64   17.5    17.5 8.15   0  32    32 -0.17    -0.49 0.64
## ------------------------------------------------------------ 
## group: NEC
##    vars   n  mean   sd median trimmed  mad min max range  skew kurtosis  se
## X1    1 101 13.28 5.07     14   13.32 5.93   3  24    21 -0.05    -0.75 0.5
## ------------------------------------------------------------ 
## group: OVC
##    vars   n  mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 116 14.64 6.29     14   14.44 7.41   3  31    28 0.31    -0.69 0.58
## ------------------------------------------------------------ 
## group: P12
##    vars   n  mean   sd median trimmed  mad min max range skew kurtosis  se
## X1    1 120 18.85 6.58   18.5   18.81 6.67   3  34    31 0.03    -0.19 0.6
## ------------------------------------------------------------ 
## group: Pat
##    vars  n  mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 98 14.16 5.15     14      14 5.93   4  27    23 0.31    -0.28 0.52
## ------------------------------------------------------------ 
## group: SB
##    vars   n  mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 117 15.92 5.24     16   15.77 5.93   4  28    24 0.18    -0.61 0.48
## ------------------------------------------------------------ 
## group: SC
##    vars   n  mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 103 15.25 6.28     14   14.95 5.93   4  30    26 0.41    -0.66 0.62
## ------------------------------------------------------------ 
## group: SEC
##    vars   n  mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 140 19.17 6.19     19   19.06 5.93   6  38    32 0.26    -0.08 0.52
## ------------------------------------------------------------ 
## group: Slnd
##    vars   n  mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 120 13.43 6.25   12.5   13.17 6.67   2  30    28 0.38    -0.65 0.57
## ------------------------------------------------------------ 
## group: Sum
##    vars  n  mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 89 14.72 6.09     15   14.58 5.93   2  30    28 0.24    -0.37 0.65
## ------------------------------------------------------------ 
## group: SWAC
##    vars   n  mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 104 11.46 5.39     12    11.4 5.93   0  24    24  0.1    -0.52 0.53
## ------------------------------------------------------------ 
## group: WAC
##    vars  n  mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 95 15.39 7.03     15   15.42 7.41   0  30    30 0.02    -0.85 0.72
## ------------------------------------------------------------ 
## group: WCC
##    vars  n mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 99 17.3 7.46     16   16.91 7.41   3  37    34 0.46    -0.58 0.75
bigeast_conference=subset(college_bball, CONF=="BE")
summary(bigeast_conference$W)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    5.00   15.00   20.00   19.56   23.25   36.00
bigeast_rank=bigeast_conference[order(bigeast_conference$W,bigeast_conference$TEAM),]
paste("The team with the most wins in the Big East Conference is", bigeast_rank$TEAM[dim(bigeast_rank)[1]], "in", bigeast_rank$YEAR[dim(bigeast_rank)[1]])
## [1] "The team with the most wins in the Big East Conference is Villanova in 2018"
villanova=subset(bigeast_conference, bigeast_conference$TEAM=="Villanova")
avwins=mean(villanova$W)
paste("Villanova has on average", avwins, "wins per season")
## [1] "Villanova has on average 27.3 wins per season"
paste("The correlation coefficient between games played vs won for Villanova is", cor(villanova$G, villanova$W))
## [1] "The correlation coefficient between games played vs won for Villanova is 0.785070963076589"