# excel file
olympics <- read_excel("../02_module5/data/myData.xlsx") %>%
mutate(age = as.numeric(age),
weight = as.numeric(weight),
height = as.numeric(height))
In this analysis I will be exploring the Olympics dataset. This dataset contains information on Olympic athletes such as their age, weight, sport, and what medals they won.
Variation is how much a variable changes. In this dataset, athlete age and weight vary a lot depending on the sport they compete in.
## bar shows them to be uneven but this is equal medals.
olympics %>%
filter(medal != "NA") %>%
ggplot(aes(x = medal)) +
geom_bar()
olympics %>%
ggplot(mapping = aes(x = age)) +
geom_histogram(binwidth = 1)
olympics %>%
filter(age < 50) %>%
ggplot(mapping = aes(x = age)) +
geom_histogram(binwidth = 1)
olympics %>%
filter(medal != "NA") %>%
ggplot(aes(x = age, color = medal)) +
geom_freqpoly()
olympics %>%
filter(age < 50) %>%
ggplot(aes(x = age)) +
geom_histogram(binwidth = 1)
olympics %>%
ggplot(aes(x = age)) +
geom_histogram() +
coord_cartesian(ylim = c(0, 50))
olympics %>%
mutate(age = ifelse(age < 10 | age > 80, NA, age)) %>%
ggplot(aes(x = age, y = weight)) +
geom_point()
olympics %>%
ggplot(aes(x = medal, y = age)) +
geom_boxplot()
olympics %>%
filter(medal != "NA") %>%
count(sport, medal) %>%
ggplot(aes(x = medal, y = sport, fill = n)) +
geom_tile() +
theme(axis.text.y = element_text(size = 4))
olympics %>%
ggplot(aes(x = age, y = weight)) +
geom_hex()
olympics %>%
ggplot(aes(x = age, y = weight)) +
geom_boxplot(aes(group = cut_width(age, 5)))