Import data

data <- read_excel("../00_data/my_data.xlsx")

Variation

Visualizing distributions

data %>%
    ggplot(aes(x = service)) +
    geom_bar()

Typical values

data %>%
  ggplot(aes(x = service)) +
  geom_bar() +
  labs(
    title = "Distribution of Service",
    x = "Severity Level",
    y = "Count")

Missing Values

data %>%
    
    # Remove missing values
    filter(!is.na(diagnosed))
## # A tibble: 450 × 5
##    service component severity         diagnosed  year
##    <chr>   <chr>     <chr>            <chr>     <dbl>
##  1 Army    Active    Penetrating      189        2006
##  2 Army    Active    Severe           102        2006
##  3 Army    Active    Moderate         709        2006
##  4 Army    Active    Mild             5896       2006
##  5 Army    Active    Not Classifiable 122        2006
##  6 Army    Guard     Penetrating      33         2006
##  7 Army    Guard     Severe           26         2006
##  8 Army    Guard     Moderate         177        2006
##  9 Army    Guard     Mild             1332       2006
## 10 Army    Guard     Not Classifiable 29         2006
## # ℹ 440 more rows

A categorical and continuous variable

ggplot(data, aes(x = severity, y = diagnosed)) +
  stat_summary(fun = mean, geom = "bar")

Two categorical variables

ggplot(data, aes(x = component, fill = severity)) +
  geom_bar()