# excel file
data <- read_csv("C:/Users/ejp14/OneDrive/Desktop/PSU_DAT3000_IntroToDA/01_module4/Data/myData.csv")
## Rows: 81525 Columns: 24
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): name, team, position
## dbl (21): game_year, game_week, rush_att, rush_yds, rush_avg, rush_tds, rush...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data
## # A tibble: 81,525 × 24
## name team game_year game_week rush_att rush_yds rush_avg rush_tds
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Duce Staley PHI 2000 1 26 201 7.7 1
## 2 Lamar Smith MIA 2000 1 27 145 5.4 1
## 3 Tiki Barber NYG 2000 1 13 144 11.1 2
## 4 Stephen Davis WAS 2000 1 23 133 5.8 1
## 5 Edgerrin James IND 2000 1 28 124 4.4 1
## 6 Priest Holmes BAL 2000 1 27 119 4.4 0
## 7 Curtis Martin NYJ 2000 1 30 110 3.7 1
## 8 Robert Smith MIN 2000 1 14 109 7.8 0
## 9 Tim Biakabutuka CAR 2000 1 15 88 5.9 0
## 10 Cade McNown CHI 2000 1 10 87 8.7 1
## # ℹ 81,515 more rows
## # ℹ 16 more variables: rush_fumbles <dbl>, rec <dbl>, rec_yds <dbl>,
## # rec_avg <dbl>, rec_tds <dbl>, rec_fumbles <dbl>, pass_att <dbl>,
## # pass_yds <dbl>, pass_tds <dbl>, int <dbl>, sck <dbl>, pass_fumbles <dbl>,
## # rate <dbl>, position <chr>, total_yards <dbl>, `total tds` <dbl>
data %>%
ggplot(aes(x = position)) +
geom_bar()
data %>%
ggplot(aes(x = position)) +
geom_bar()
data %>%
ggplot(mapping = aes(x = total_yards)) +
geom_histogram(binwidth = 0.5)
data %>%
filter(total_yards < 2000) %>%
ggplot(aes(x = total_yards)) +
geom_histogram(binwidth = 0.5)
data %>%
ggplot(aes(x = total_yards, color = position)) +
geom_freqpoly()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
data %>%
# Filter out diamonds > 3 carat
filter(total_yards > 1000) %>%
# Plot
ggplot(aes(x = total_yards)) +
geom_histogram(binwidth = 0.01)
data %>%
ggplot(aes(`total tds`)) +
geom_histogram(binwidth = 0.25)
data %>%
ggplot(aes(x = total_yards)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
data %>%
ggplot(aes(x = total_yards)) +
geom_histogram() +
coord_cartesian(ylim = c(0,50))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
data %>%
mutate(total_yards_rev = ifelse(total_yards < 3 | total_yards > 20, NA, total_yards)) %>%
ggplot(aes(x = game_year, y = total_yards_rev)) +
geom_point()
## Warning: Removed 59809 rows containing missing values or values outside the scale range
## (`geom_point()`).
didn’t do this in CA
data %>%
ggplot(aes(x = position, y = total_yards)) +
geom_boxplot()
data %>%
count(game_year, total_yards) %>%
ggplot(aes(x = game_year, y = total_yards)) +
geom_tile(aes(fill = n))
library(hexbin)
## Warning: package 'hexbin' was built under R version 4.5.2
data %>%
ggplot(aes(x = total_yards, y = position)) +
geom_hex()
data %>% ggplot(aes(total_yards, game_week)) + geom_point()