Import Data
data <- read_excel("../00_data/my_data.xlsx")
Apply the following dplyr verbs to your data
Filter rows
data %>%
filter(severity != "Not Classifiable")
## # A tibble: 360 × 5
## service component severity diagnosed year
## <chr> <chr> <chr> <chr> <dbl>
## 1 Army Active Penetrating 189 2006
## 2 Army Active Severe 102 2006
## 3 Army Active Moderate 709 2006
## 4 Army Active Mild 5896 2006
## 5 Army Guard Penetrating 33 2006
## 6 Army Guard Severe 26 2006
## 7 Army Guard Moderate 177 2006
## 8 Army Guard Mild 1332 2006
## 9 Army Reserve Penetrating 12 2006
## 10 Army Reserve Severe 11 2006
## # ℹ 350 more rows
Arrange rows
data %>%
arrange(desc(severity))
## # A tibble: 450 × 5
## service component severity diagnosed year
## <chr> <chr> <chr> <chr> <dbl>
## 1 Army Active Severe 102 2006
## 2 Army Guard Severe 26 2006
## 3 Army Reserve Severe 11 2006
## 4 Navy Active Severe 28 2006
## 5 Navy Reserve Severe 1 2006
## 6 Air Force Active Severe 24 2006
## 7 Air Force Guard Severe 4 2006
## 8 Air Force Reserve Severe 2 2006
## 9 Marines Active Severe 28 2006
## 10 Marines Reserve Severe 5 2006
## # ℹ 440 more rows
Select columns
select(data, service, severity, diagnosed)
## # A tibble: 450 × 3
## service severity diagnosed
## <chr> <chr> <chr>
## 1 Army Penetrating 189
## 2 Army Severe 102
## 3 Army Moderate 709
## 4 Army Mild 5896
## 5 Army Not Classifiable 122
## 6 Army Penetrating 33
## 7 Army Severe 26
## 8 Army Moderate 177
## 9 Army Mild 1332
## 10 Army Not Classifiable 29
## # ℹ 440 more rows
Add columns
data %>%
mutate(severity_level = ifelse(severity == "Mild", "Low", "High"))
## # A tibble: 450 × 6
## service component severity diagnosed year severity_level
## <chr> <chr> <chr> <chr> <dbl> <chr>
## 1 Army Active Penetrating 189 2006 High
## 2 Army Active Severe 102 2006 High
## 3 Army Active Moderate 709 2006 High
## 4 Army Active Mild 5896 2006 Low
## 5 Army Active Not Classifiable 122 2006 High
## 6 Army Guard Penetrating 33 2006 High
## 7 Army Guard Severe 26 2006 High
## 8 Army Guard Moderate 177 2006 High
## 9 Army Guard Mild 1332 2006 Low
## 10 Army Guard Not Classifiable 29 2006 High
## # ℹ 440 more rows
Summarize by groups
data %>%
# Ensure diagnosed is numeric
mutate(diagnosed = as.numeric(diagnosed)) %>%
# Group by service
group_by(service) %>%
# Calculate average diagnoses
summarise(avg_diagnoses = mean(diagnosed, na.rm = TRUE)) %>%
# Sort it
arrange(desc(avg_diagnoses))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `diagnosed = as.numeric(diagnosed)`.
## Caused by warning:
## ! NAs introduced by coercion
## # A tibble: 4 × 2
## service avg_diagnoses
## <chr> <dbl>
## 1 Army 1133.
## 2 Marines 380.
## 3 Navy 312.
## 4 Air Force 229.