Import data

data <- read_excel("myData.xlsx")
data
## # A tibble: 236 × 20
##    TEAMID TEAM   PAKE PAKERANK  PASE PASERANK GAMES     W     L WINPERCENT   R64
##     <dbl> <chr> <dbl>    <dbl> <dbl>    <dbl> <dbl> <dbl> <dbl>      <dbl> <dbl>
##  1      1 Abil…   0.7       45   0.7       52     3     1     2      0.333     2
##  2      2 Akron  -0.9      179  -1.1      187     4     0     4      0         4
##  3      3 Alab…  -2.1      211  -2.9      220    10     5     5      0.5       5
##  4      4 Alba…  -0.4      147  -0.3      138     3     0     3      0         3
##  5      6 Amer…  -0.5      160  -0.4      150     3     0     3      0         3
##  6      8 Ariz…  -1.7      206  -2.5      216    28    17    11      0.607    11
##  7      9 Ariz…  -2        209  -1.9      206     5     1     4      0.2       4
##  8     10 Arka…   4.3       11   3.5       16    18    11     7      0.611     7
##  9     11 Arka…   0         76   0         78     1     0     1      0         1
## 10     12 Aubu…   0.6       53   1.4       30    11     7     4      0.636     4
## # ℹ 226 more rows
## # ℹ 9 more variables: R32 <dbl>, S16 <dbl>, E8 <dbl>, F4 <dbl>, F2 <dbl>,
## #   CHAMP <dbl>, `2` <dbl>, F4PERCENT <dbl>, CHAMPPERCENT <dbl>

Chapter 15

Create a factor

Unordered factor levels

# Transform data: calculate average tv hours by religion
data_by_pake <- data %>%
    
    group_by(PAKE) %>%
    summarise(
        avg_PAKE = mean(PAKE, na.rm = TRUE)
    )

data_by_pake
## # A tibble: 75 × 2
##     PAKE avg_PAKE
##    <dbl>    <dbl>
##  1  -6.7     -6.7
##  2  -6.2     -6.2
##  3  -5.5     -5.5
##  4  -4.4     -4.4
##  5  -4.2     -4.2
##  6  -4.1     -4.1
##  7  -3.6     -3.6
##  8  -3.5     -3.5
##  9  -3.4     -3.4
## 10  -3.3     -3.3
## # ℹ 65 more rows
# Plot
data %>%
    
    ggplot(aes((x = avg_PAKE = mean(PAKE, na.rm = TRUE)), y = TEAM)) +
    geom_point()

Modify factor order

Ordered factor levels

data %>%
    
    ggplot(aes(x = PAKE, y = fct_reorder(.f = TEAM, .x = PAKE))) +
    geom_point() +
    
    # Labeling
    labs(y = NULL, x = "Mean PAKE")

Modify factor levels

Show examples of three functions:

data_small <- data %>%
    select(TEAM, PAKE, PAKERANK)
data %>%
    
    transmute(PAKE) %>%
    
    ggplot(aes(x = PAKE, y = 1)) 

Chapter 16

No need to do anything here.