Import your data
# excel file
data <- read_csv("C:/Users/ejp14/OneDrive/Desktop/PSU_DAT3000_IntroToDA/01_module4/Data/myData.csv")
## Rows: 81525 Columns: 24
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): name, team, position
## dbl (21): game_year, game_week, rush_att, rush_yds, rush_avg, rush_tds, rush...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data
## # A tibble: 81,525 × 24
## name team game_year game_week rush_att rush_yds rush_avg rush_tds
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Duce Staley PHI 2000 1 26 201 7.7 1
## 2 Lamar Smith MIA 2000 1 27 145 5.4 1
## 3 Tiki Barber NYG 2000 1 13 144 11.1 2
## 4 Stephen Davis WAS 2000 1 23 133 5.8 1
## 5 Edgerrin James IND 2000 1 28 124 4.4 1
## 6 Priest Holmes BAL 2000 1 27 119 4.4 0
## 7 Curtis Martin NYJ 2000 1 30 110 3.7 1
## 8 Robert Smith MIN 2000 1 14 109 7.8 0
## 9 Tim Biakabutuka CAR 2000 1 15 88 5.9 0
## 10 Cade McNown CHI 2000 1 10 87 8.7 1
## # ℹ 81,515 more rows
## # ℹ 16 more variables: rush_fumbles <dbl>, rec <dbl>, rec_yds <dbl>,
## # rec_avg <dbl>, rec_tds <dbl>, rec_fumbles <dbl>, pass_att <dbl>,
## # pass_yds <dbl>, pass_tds <dbl>, int <dbl>, sck <dbl>, pass_fumbles <dbl>,
## # rate <dbl>, position <chr>, total_yards <dbl>, `total tds` <dbl>
data_small <- data %>%
filter(name %in% c("Tom Brady", "Frank Gore", "Larry Fitzgerald")) %>%
select(name, total_yards, game_year)
Separating and Uniting
Separate a column
data_combined <- data_small %>%
unite(col = "yards_year", total_yards, game_year, sep = "/")
data_sep <- data_combined %>%
separate(col = yards_year, into = c("total_yards", "game_year"), sep = "/")
Unite two columns
data_unite <- data_sep %>%
unite(col = "yards_year", total_yards, game_year, sep = "/")
Missing Values
data_small %>%
filter(name %in% c("Tom Brady", "Frank Gore", "Larry Fitzgerald")) %>%
select(name, total_yards, game_year)
## # A tibble: 625 × 3
## name total_yards game_year
## <chr> <dbl> <dbl>
## 1 Tom Brady 55 2001
## 2 Tom Brady 170 2001
## 3 Tom Brady 95 2001
## 4 Tom Brady 364 2001
## 5 Tom Brady 200 2001
## 6 Tom Brady 202 2001
## 7 Tom Brady 250 2001
## 8 Tom Brady 106 2001
## 9 Tom Brady 191 2001
## 10 Tom Brady 272 2001
## # ℹ 615 more rows
data_year <- data_small %>%
group_by(name, game_year) %>%
summarise(total_yards = sum(total_yards, na.rm = TRUE), .groups = "drop")
data_year_complete <- data_year %>%
complete(name, game_year = full_seq(game_year, 1))
data_wide_complete <- data_year_complete %>%
pivot_wider(names_from = name, values_from = total_yards)
data_wide_complete
## # A tibble: 18 × 4
## game_year `Frank Gore` `Larry Fitzgerald` `Tom Brady`
## <dbl> <dbl> <dbl> <dbl>
## 1 2000 NA NA 6
## 2 2001 NA NA 2709
## 3 2002 NA NA 3645
## 4 2003 NA NA 3456
## 5 2004 NA 757 3491
## 6 2005 618 1370 4160
## 7 2006 1995 913 3407
## 8 2007 1423 1238 4552
## 9 2008 1345 1301 76
## 10 2009 1394 1075 4256
## 11 2010 1305 1012 3731
## 12 2011 1316 1262 5006
## 13 2012 1359 785 4575
## 14 2013 1255 849 4242
## 15 2014 1068 755 4077
## 16 2015 1158 1160 4726
## 17 2016 1226 985 3336
## 18 2017 1095 1122 4415