data <- read_csv("C:/Users/ejp14/OneDrive/Desktop/PSU_DAT3000_IntroToDA/01_module4/Data/myData.csv")
## Rows: 81525 Columns: 24
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): name, team, position
## dbl (21): game_year, game_week, rush_att, rush_yds, rush_avg, rush_tds, rush...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data
## # A tibble: 81,525 × 24
## name team game_year game_week rush_att rush_yds rush_avg rush_tds
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Duce Staley PHI 2000 1 26 201 7.7 1
## 2 Lamar Smith MIA 2000 1 27 145 5.4 1
## 3 Tiki Barber NYG 2000 1 13 144 11.1 2
## 4 Stephen Davis WAS 2000 1 23 133 5.8 1
## 5 Edgerrin James IND 2000 1 28 124 4.4 1
## 6 Priest Holmes BAL 2000 1 27 119 4.4 0
## 7 Curtis Martin NYJ 2000 1 30 110 3.7 1
## 8 Robert Smith MIN 2000 1 14 109 7.8 0
## 9 Tim Biakabutuka CAR 2000 1 15 88 5.9 0
## 10 Cade McNown CHI 2000 1 10 87 8.7 1
## # ℹ 81,515 more rows
## # ℹ 16 more variables: rush_fumbles <dbl>, rec <dbl>, rec_yds <dbl>,
## # rec_avg <dbl>, rec_tds <dbl>, rec_fumbles <dbl>, pass_att <dbl>,
## # pass_yds <dbl>, pass_tds <dbl>, int <dbl>, sck <dbl>, pass_fumbles <dbl>,
## # rate <dbl>, position <chr>, total_yards <dbl>, `total tds` <dbl>
data %>% count(position)
## # A tibble: 3 × 2
## position n
## <chr> <int>
## 1 QB 9178
## 2 RB 13486
## 3 WR/TE 58861
position_levels <- c("QB", "RB", "WR/TE")
data_rev <- data %>%
mutate(position = position %>% factor(levels = position_levels))
Make two bar charts here - one before ordering another after
data_summary <- data %>%
group_by(position) %>%
summarise(
total_yards = mean(total_yards, na.rm = TRUE)
)
data_summary
## # A tibble: 3 × 2
## position total_yards
## <chr> <dbl>
## 1 QB 230.
## 2 RB 75.7
## 3 WR/TE 32.2
ggplot(data_summary, aes(total_yards, position)) +
geom_point()
Show examples of three functions:
data %>%
mutate(position = fct_recode(position,
"Quarterback" = "QB",
"Running Back" = "RB",
"wide Reciever, Tight End" = "WR/TE")) %>%
count(position)
## # A tibble: 3 × 2
## position n
## <fct> <int>
## 1 Quarterback 9178
## 2 Running Back 13486
## 3 wide Reciever, Tight End 58861
data %>%
mutate(position = fct_collapse(position,
"Quarterback" = "QB",
"Running Back" = "RB",
"wide Reciever, Tight End" = "WR/TE")) %>%
count(position)
## # A tibble: 3 × 2
## position n
## <fct> <int>
## 1 Quarterback 9178
## 2 Running Back 13486
## 3 wide Reciever, Tight End 58861
data %>%
mutate(position = fct_lump(position)) %>%
count(position)
## # A tibble: 2 × 2
## position n
## <fct> <int>
## 1 WR/TE 58861
## 2 Other 22664
```
No need to do anything here.