## Warning: `data_frame()` is deprecated, use `tibble()`.
## This warning is displayed once per session.
This is raw data, with no manipulation.
This represents unmanipulated data where we have changed the binwidth
This is the same data, only we have bucketed anything over a specific amount (outliers)
This is an illustration where we have manipulated for outliers. We have also modified binwidth and it is in an overly wide state. Below, you will find a more readable sample where the binwidth is calculated based on the interquartile range and population size.
hist(hist_data$spanCount, breaks = "FD")
hist_data <-
mutate(hist_data, spanCountNew = ifelse(hist_data$spanCount > 10, 10, hist_data$spanCount))
bw <- 2 * IQR(hist_data$spanCount) / length(hist_data$spanCount)^(1/3)
hist_data %>%
ggplot(aes(spanCountNew)) +
geom_histogram(binwidth = bw, col = "black", fill="blue")
hist(hist_data$spanCount)