Import data
data <- read_excel("../00_data/my_data.xlsx")
Variation
Visualizing distributions
data %>%
ggplot(aes(x = service)) +
geom_bar()

Typical values
data %>%
ggplot(aes(x = service)) +
geom_bar() +
labs(
title = "Distribution of Service",
x = "Severity Level",
y = "Count")

Missing Values
data %>%
# Remove missing values
filter(!is.na(diagnosed))
## # A tibble: 450 × 5
## service component severity diagnosed year
## <chr> <chr> <chr> <chr> <dbl>
## 1 Army Active Penetrating 189 2006
## 2 Army Active Severe 102 2006
## 3 Army Active Moderate 709 2006
## 4 Army Active Mild 5896 2006
## 5 Army Active Not Classifiable 122 2006
## 6 Army Guard Penetrating 33 2006
## 7 Army Guard Severe 26 2006
## 8 Army Guard Moderate 177 2006
## 9 Army Guard Mild 1332 2006
## 10 Army Guard Not Classifiable 29 2006
## # ℹ 440 more rows
A categorical and continuous variable
ggplot(data, aes(x = severity, y = diagnosed)) +
stat_summary(fun = mean, geom = "bar")

Two categorical variables
ggplot(data, aes(x = component, fill = severity)) +
geom_bar()
