Import data
data <- read_excel("../00_data/my_data.xlsx")
Variation
Visualizing distributions
data %>%
ggplot(aes(x = service)) +
geom_bar()

Typical values
data %>%
ggplot(aes(x = service)) +
geom_bar() +
labs(
title = "Distribution of Service",
x = "Severity Level",
y = "Count")

Missing Values
data %>%
# Remove missing values
filter(!is.na(diagnosed))
## # A tibble: 450 × 5
## service component severity diagnosed year
## <chr> <chr> <chr> <chr> <dbl>
## 1 Army Active Penetrating 189 2006
## 2 Army Active Severe 102 2006
## 3 Army Active Moderate 709 2006
## 4 Army Active Mild 5896 2006
## 5 Army Active Not Classifiable 122 2006
## 6 Army Guard Penetrating 33 2006
## 7 Army Guard Severe 26 2006
## 8 Army Guard Moderate 177 2006
## 9 Army Guard Mild 1332 2006
## 10 Army Guard Not Classifiable 29 2006
## # ℹ 440 more rows
A categorical and continuous variable
ggplot(data, aes(x = severity, y = diagnosed)) +
geom_boxplot() +
labs(
title = "Diagnosed Cases by Severity",
x = "Severity",
y = "Diagnosed"
)

Two categorical variables
data %>%
count(component, severity) %>%
ggplot(aes(x = component, y = severity, fill = n)) +
geom_tile()

Two numeric variables
ggplot(data, aes(x = year, y = diagnosed)) +
geom_point() +
labs(
title = "Relationship Between Year and Diagnosed Cases",
x = "Year",
y = "Diagnosed"
)
