Import Data

data <- read_excel("../00_data/my_data.xlsx")

Apply the following dplyr verbs to your data

Filter rows

data %>%
  filter(severity != "Not Classifiable")
## # A tibble: 360 × 5
##    service component severity    diagnosed  year
##    <chr>   <chr>     <chr>       <chr>     <dbl>
##  1 Army    Active    Penetrating 189        2006
##  2 Army    Active    Severe      102        2006
##  3 Army    Active    Moderate    709        2006
##  4 Army    Active    Mild        5896       2006
##  5 Army    Guard     Penetrating 33         2006
##  6 Army    Guard     Severe      26         2006
##  7 Army    Guard     Moderate    177        2006
##  8 Army    Guard     Mild        1332       2006
##  9 Army    Reserve   Penetrating 12         2006
## 10 Army    Reserve   Severe      11         2006
## # ℹ 350 more rows

Arrange rows

data %>% 
   arrange(desc(severity))
## # A tibble: 450 × 5
##    service   component severity diagnosed  year
##    <chr>     <chr>     <chr>    <chr>     <dbl>
##  1 Army      Active    Severe   102        2006
##  2 Army      Guard     Severe   26         2006
##  3 Army      Reserve   Severe   11         2006
##  4 Navy      Active    Severe   28         2006
##  5 Navy      Reserve   Severe   1          2006
##  6 Air Force Active    Severe   24         2006
##  7 Air Force Guard     Severe   4          2006
##  8 Air Force Reserve   Severe   2          2006
##  9 Marines   Active    Severe   28         2006
## 10 Marines   Reserve   Severe   5          2006
## # ℹ 440 more rows

Select columns

select(data, service, severity, diagnosed)
## # A tibble: 450 × 3
##    service severity         diagnosed
##    <chr>   <chr>            <chr>    
##  1 Army    Penetrating      189      
##  2 Army    Severe           102      
##  3 Army    Moderate         709      
##  4 Army    Mild             5896     
##  5 Army    Not Classifiable 122      
##  6 Army    Penetrating      33       
##  7 Army    Severe           26       
##  8 Army    Moderate         177      
##  9 Army    Mild             1332     
## 10 Army    Not Classifiable 29       
## # ℹ 440 more rows

Add columns

data %>%
  mutate(severity_level = ifelse(severity == "Mild", "Low", "High"))
## # A tibble: 450 × 6
##    service component severity         diagnosed  year severity_level
##    <chr>   <chr>     <chr>            <chr>     <dbl> <chr>         
##  1 Army    Active    Penetrating      189        2006 High          
##  2 Army    Active    Severe           102        2006 High          
##  3 Army    Active    Moderate         709        2006 High          
##  4 Army    Active    Mild             5896       2006 Low           
##  5 Army    Active    Not Classifiable 122        2006 High          
##  6 Army    Guard     Penetrating      33         2006 High          
##  7 Army    Guard     Severe           26         2006 High          
##  8 Army    Guard     Moderate         177        2006 High          
##  9 Army    Guard     Mild             1332       2006 Low           
## 10 Army    Guard     Not Classifiable 29         2006 High          
## # ℹ 440 more rows

Summarize by groups

data %>%
  # Ensure diagnosed is numeric
  mutate(diagnosed = as.numeric(diagnosed)) %>%
  
  # Group by service
  group_by(service) %>%
  
  # Calculate average diagnoses
  summarise(avg_diagnoses = mean(diagnosed, na.rm = TRUE)) %>%
  
  # Sort it
  arrange(desc(avg_diagnoses))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `diagnosed = as.numeric(diagnosed)`.
## Caused by warning:
## ! NAs introduced by coercion
## # A tibble: 4 × 2
##   service   avg_diagnoses
##   <chr>             <dbl>
## 1 Army              1133.
## 2 Marines            380.
## 3 Navy               312.
## 4 Air Force          229.