Import your data

# excel file
data <- read_csv("C:/Users/ejp14/OneDrive/Desktop/PSU_DAT3000_IntroToDA/01_module4/Data/myData.csv")
## Rows: 81525 Columns: 24
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (3): name, team, position
## dbl (21): game_year, game_week, rush_att, rush_yds, rush_avg, rush_tds, rush...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data
## # A tibble: 81,525 × 24
##    name            team  game_year game_week rush_att rush_yds rush_avg rush_tds
##    <chr>           <chr>     <dbl>     <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
##  1 Duce Staley     PHI        2000         1       26      201      7.7        1
##  2 Lamar Smith     MIA        2000         1       27      145      5.4        1
##  3 Tiki Barber     NYG        2000         1       13      144     11.1        2
##  4 Stephen Davis   WAS        2000         1       23      133      5.8        1
##  5 Edgerrin James  IND        2000         1       28      124      4.4        1
##  6 Priest Holmes   BAL        2000         1       27      119      4.4        0
##  7 Curtis Martin   NYJ        2000         1       30      110      3.7        1
##  8 Robert Smith    MIN        2000         1       14      109      7.8        0
##  9 Tim Biakabutuka CAR        2000         1       15       88      5.9        0
## 10 Cade McNown     CHI        2000         1       10       87      8.7        1
## # ℹ 81,515 more rows
## # ℹ 16 more variables: rush_fumbles <dbl>, rec <dbl>, rec_yds <dbl>,
## #   rec_avg <dbl>, rec_tds <dbl>, rec_fumbles <dbl>, pass_att <dbl>,
## #   pass_yds <dbl>, pass_tds <dbl>, int <dbl>, sck <dbl>, pass_fumbles <dbl>,
## #   rate <dbl>, position <chr>, total_yards <dbl>, `total tds` <dbl>
data_small <- data %>%
  filter(name %in% c("Tom Brady", "Frank Gore", "Larry Fitzgerald")) %>%
  select(name, total_yards, game_year)

Pivoting

long to wide form

data_long <- data %>%
  pivot_longer(
    cols = c(rush_yds, rec_yds, pass_yds),
    names_to = "yard_type",
    values_to = "yards"
  )

wide to long form

data_year <- data_small %>%
  group_by(name, game_year) %>%
  summarise(total_yards = sum(total_yards, na.rm = TRUE), .groups = "drop") 

data_year %>%
    
    pivot_wider(names_from = name, values_from = total_yards)
## # A tibble: 18 × 4
##    game_year `Frank Gore` `Larry Fitzgerald` `Tom Brady`
##        <dbl>        <dbl>              <dbl>       <dbl>
##  1      2005          618               1370        4160
##  2      2006         1995                913        3407
##  3      2007         1423               1238        4552
##  4      2008         1345               1301          76
##  5      2009         1394               1075        4256
##  6      2010         1305               1012        3731
##  7      2011         1316               1262        5006
##  8      2012         1359                785        4575
##  9      2013         1255                849        4242
## 10      2014         1068                755        4077
## 11      2015         1158               1160        4726
## 12      2016         1226                985        3336
## 13      2017         1095               1122        4415
## 14      2004           NA                757        3491
## 15      2000           NA                 NA           6
## 16      2001           NA                 NA        2709
## 17      2002           NA                 NA        3645
## 18      2003           NA                 NA        3456

Separating and Uniting

Separate a column

data_combined <- data_small %>%
  unite(col = "yards_year", total_yards, game_year, sep = "/") 

data_sep <- data_combined %>%
  separate(col = yards_year, into = c("total_yards", "game_year"), sep = "/")

Unite two columns

data_unite <- data_sep %>%
  unite(col = "yards_year", total_yards, game_year, sep = "/")

Missing Values

data_small %>%
  filter(name %in% c("Tom Brady", "Frank Gore", "Larry Fitzgerald")) %>%
  select(name, total_yards, game_year)
## # A tibble: 625 × 3
##    name      total_yards game_year
##    <chr>           <dbl>     <dbl>
##  1 Tom Brady          55      2001
##  2 Tom Brady         170      2001
##  3 Tom Brady          95      2001
##  4 Tom Brady         364      2001
##  5 Tom Brady         200      2001
##  6 Tom Brady         202      2001
##  7 Tom Brady         250      2001
##  8 Tom Brady         106      2001
##  9 Tom Brady         191      2001
## 10 Tom Brady         272      2001
## # ℹ 615 more rows
data_year <- data_small %>%
  group_by(name, game_year) %>%
  summarise(total_yards = sum(total_yards, na.rm = TRUE), .groups = "drop")

data_year_complete <- data_year %>%
  complete(name, game_year = full_seq(game_year, 1)) 

data_wide_complete <- data_year_complete %>%
  pivot_wider(names_from = name, values_from = total_yards)

data_wide_complete
## # A tibble: 18 × 4
##    game_year `Frank Gore` `Larry Fitzgerald` `Tom Brady`
##        <dbl>        <dbl>              <dbl>       <dbl>
##  1      2000           NA                 NA           6
##  2      2001           NA                 NA        2709
##  3      2002           NA                 NA        3645
##  4      2003           NA                 NA        3456
##  5      2004           NA                757        3491
##  6      2005          618               1370        4160
##  7      2006         1995                913        3407
##  8      2007         1423               1238        4552
##  9      2008         1345               1301          76
## 10      2009         1394               1075        4256
## 11      2010         1305               1012        3731
## 12      2011         1316               1262        5006
## 13      2012         1359                785        4575
## 14      2013         1255                849        4242
## 15      2014         1068                755        4077
## 16      2015         1158               1160        4726
## 17      2016         1226                985        3336
## 18      2017         1095               1122        4415