In this exploration, you are asked to
library("ggplot2")
#the above line loads a library that allows us to plot data
N <- 32
#this above line sets the variable N as 32
shoe_sizes <- 6:14
#the above line sets a range of the available shoe sizes
without_outlier <- sample(shoe_sizes, N, replace = TRUE)
#the above line takes a sample from the available shoe sizes 32 times and allows values to repeat with the replace = TRUE, it also assigns this to the without_outlier variable
df_without_outlier <- data.frame(without_outlier)
outlier <- 100
with_outlier <- c(without_outlier, outlier)
df_with_outlier <- data.frame(with_outlier)
#the lines bellow creates bar graph of the distribution of shoe sizes and adds a blue dashed line at the mean
ggplot(df_without_outlier, aes(x = without_outlier)) +
geom_histogram(binwidth = 1, color = "black", fill = "white") +
geom_vline(aes(xintercept = mean(without_outlier)),
color = "blue", linetype = "dashed", size = 2) +
labs(title = "Shoe Sizes", subtitle = "Exploring Means",
caption = "Math 32", x = "shoe sizes") +
theme_minimal() +
theme(legend.position = "none")
In the code block below,
eval parameter for the code block to TRUEggplot linegeom_vline layer that
ggplot(df_with_outlier, aes(x = with_outlier)) +
geom_histogram(binwidth = 1, color = "black", fill = "white") +
geom_vline(aes(xintercept = mean(with_outlier)),
color = "blue", linetype = "dashed", size = 2) +
geom_vline(aes(xintercept = mean(with_outlier)),
color = "red", linetype = "dotted", size = 2) +
labs(title = "Shoe Sizes", subtitle = "Exploring Means",
caption = "Humberto Flores", x = "shoe sizes") +
theme_minimal() +
theme(legend.position = "none")
median without and with the outlier.ggplot(df_without_outlier, aes(x = without_outlier)) +
geom_histogram(binwidth = 1, color = "black", fill = "white") +
geom_vline(aes(xintercept = median(without_outlier)),
color = "red", linetype = "dotted", size = 2) +
labs(title = "Shoe Sizes", subtitle = "Exploring Medians",
caption = "Math 32", x = "shoe sizes") +
theme_minimal() +
theme(legend.position = "none")
ggplot(df_with_outlier, aes(x = with_outlier)) +
geom_histogram(binwidth = 1, color = "black", fill = "white") +
geom_vline(aes(xintercept = median(with_outlier)),
color = "red", linetype = "dotted", size = 2) +
labs(title = "Shoe Sizes", subtitle = "Exploring Medians",
caption = "Humberto Flores", x = "shoe sizes") +
theme_minimal() +
theme(legend.position = "none")
diff(mean(without_outlier), mean(with_outlier))
## numeric(0)
diff(median(without_outlier), median(with_outlier))
## numeric(0)
knit your work as either an HTML or PDF document, and upload that document back into the CatCourses assignment.