data <- read_excel("../00_data/myData.xlsx")
## New names:
## • `` -> `...1`
data
## # A tibble: 4,810 × 24
## ...1 rank position hand player years total…¹ status yr_st…² season age
## <dbl> <dbl> <chr> <chr> <chr> <chr> <dbl> <chr> <dbl> <chr> <dbl>
## 1 1 1 C Left Wayne G… 1979… 894 Retir… 1979 1978-… 18
## 2 2 1 C Left Wayne G… 1979… 894 Retir… 1979 1978-… 18
## 3 3 1 C Left Wayne G… 1979… 894 Retir… 1979 1978-… 18
## 4 4 1 C Left Wayne G… 1979… 894 Retir… 1979 1979-… 19
## 5 5 1 C Left Wayne G… 1979… 894 Retir… 1979 1980-… 20
## 6 6 1 C Left Wayne G… 1979… 894 Retir… 1979 1981-… 21
## 7 7 1 C Left Wayne G… 1979… 894 Retir… 1979 1982-… 22
## 8 8 1 C Left Wayne G… 1979… 894 Retir… 1979 1983-… 23
## 9 9 1 C Left Wayne G… 1979… 894 Retir… 1979 1984-… 24
## 10 10 1 C Left Wayne G… 1979… 894 Retir… 1979 1985-… 25
## # … with 4,800 more rows, 13 more variables: team <chr>, league <chr>,
## # season_games <dbl>, goals <dbl>, assists <dbl>, points <dbl>,
## # plus_minus <chr>, penalty_min <dbl>, goals_even <chr>,
## # goals_power_play <chr>, goals_short_handed <chr>, goals_game_winner <chr>,
## # headshot <chr>, and abbreviated variable names ¹​total_goals, ²​yr_start
data %>%
ggplot(aes(x = age)) +
geom_bar()
data %>%
filter(age < 41) %>%
ggplot(aes(x = age)) +
geom_histogram(binwidth = 0.5)
data %>%
ggplot(aes(goals)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
data %>%
ggplot(aes(goals)) +
geom_histogram() +
coord_cartesian(ylim = c(0,400))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
data %>%
mutate(y = ifelse(age < 41, NA, age)) %>%
ggplot(aes(x = goals, y = age)) +
geom_point()
data %>%
ggplot(aes(x = goals, y = position)) +
geom_boxplot()
data %>%
count(position, hand) %>%
ggplot(aes(x = position, y = hand, fill = n)) +
geom_tile()
library(hexbin)
data %>%
ggplot(aes(x = goals, y = assists)) +
geom_hex()
could not get this one to work as it says I have NA in my y category, and when i tried to filter them out, it didnt change the error.