Import your data

data <- read_csv("C:/Users/ejp14/OneDrive/Desktop/PSU_DAT3000_IntroToDA/01_module4/Data/myData.csv")
## Rows: 81525 Columns: 24
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (3): name, team, position
## dbl (21): game_year, game_week, rush_att, rush_yds, rush_avg, rush_tds, rush...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data
## # A tibble: 81,525 × 24
##    name            team  game_year game_week rush_att rush_yds rush_avg rush_tds
##    <chr>           <chr>     <dbl>     <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
##  1 Duce Staley     PHI        2000         1       26      201      7.7        1
##  2 Lamar Smith     MIA        2000         1       27      145      5.4        1
##  3 Tiki Barber     NYG        2000         1       13      144     11.1        2
##  4 Stephen Davis   WAS        2000         1       23      133      5.8        1
##  5 Edgerrin James  IND        2000         1       28      124      4.4        1
##  6 Priest Holmes   BAL        2000         1       27      119      4.4        0
##  7 Curtis Martin   NYJ        2000         1       30      110      3.7        1
##  8 Robert Smith    MIN        2000         1       14      109      7.8        0
##  9 Tim Biakabutuka CAR        2000         1       15       88      5.9        0
## 10 Cade McNown     CHI        2000         1       10       87      8.7        1
## # ℹ 81,515 more rows
## # ℹ 16 more variables: rush_fumbles <dbl>, rec <dbl>, rec_yds <dbl>,
## #   rec_avg <dbl>, rec_tds <dbl>, rec_fumbles <dbl>, pass_att <dbl>,
## #   pass_yds <dbl>, pass_tds <dbl>, int <dbl>, sck <dbl>, pass_fumbles <dbl>,
## #   rate <dbl>, position <chr>, total_yards <dbl>, `total tds` <dbl>

Chapter 15

Create a factor

data %>% count(position)
## # A tibble: 3 × 2
##   position     n
##   <chr>    <int>
## 1 QB        9178
## 2 RB       13486
## 3 WR/TE    58861
position_levels <- c("QB", "RB", "WR/TE")

data_rev <- data %>%
    mutate(position = position %>% factor(levels = position_levels))

Modify factor order

Make two bar charts here - one before ordering another after

data_summary <- data %>%
  group_by(position) %>%
  summarise(
    total_yards = mean(total_yards, na.rm = TRUE)
  )
data_summary 
## # A tibble: 3 × 2
##   position total_yards
##   <chr>          <dbl>
## 1 QB             230. 
## 2 RB              75.7
## 3 WR/TE           32.2
ggplot(data_summary, aes(total_yards, position)) +
  geom_point()

Modify factor levels

Show examples of three functions:

  • fct_recode
data %>%
    mutate(position = fct_recode(position,
                                 "Quarterback" = "QB",
                                 "Running Back" = "RB", 
                                 "wide Reciever, Tight End" = "WR/TE")) %>%
    count(position)
## # A tibble: 3 × 2
##   position                     n
##   <fct>                    <int>
## 1 Quarterback               9178
## 2 Running Back             13486
## 3 wide Reciever, Tight End 58861
  • fct_collapse
data %>%
    mutate(position = fct_collapse(position,
                                 "Quarterback" = "QB",
                                 "Running Back" = "RB", 
                                 "wide Reciever, Tight End" = "WR/TE")) %>%
    count(position)
## # A tibble: 3 × 2
##   position                     n
##   <fct>                    <int>
## 1 Quarterback               9178
## 2 Running Back             13486
## 3 wide Reciever, Tight End 58861
  • fct_lump
data %>%
    mutate(position = fct_lump(position)) %>%
    count(position)
## # A tibble: 2 × 2
##   position     n
##   <fct>    <int>
## 1 WR/TE    58861
## 2 Other    22664

```

Chapter 16

No need to do anything here.