data <- read_excel("../00_data/my_data.xlsx")
data
## # A tibble: 450 × 5
## service component severity diagnosed year
## <chr> <chr> <chr> <chr> <dbl>
## 1 Army Active Penetrating 189 2006
## 2 Army Active Severe 102 2006
## 3 Army Active Moderate 709 2006
## 4 Army Active Mild 5896 2006
## 5 Army Active Not Classifiable 122 2006
## 6 Army Guard Penetrating 33 2006
## 7 Army Guard Severe 26 2006
## 8 Army Guard Moderate 177 2006
## 9 Army Guard Mild 1332 2006
## 10 Army Guard Not Classifiable 29 2006
## # ℹ 440 more rows
set.seed(123456)
data_small <- data %>%
select(service, severity, diagnosed) %>%
sample_n(5)
data_small
## # A tibble: 5 × 3
## service severity diagnosed
## <chr> <chr> <chr>
## 1 Army Not Classifiable 50
## 2 Air Force Mild 344
## 3 Marines Severe 28
## 4 Navy Mild 2637
## 5 Air Force Penetrating 25
data_small$severity
## [1] "Not Classifiable" "Mild" "Severe" "Mild"
## [5] "Penetrating"
str_detect(data_small$severity, "Mild")
## [1] FALSE TRUE FALSE TRUE FALSE
sum(str_detect(data_small$severity, "Mild"))
## [1] 2
data_small %>%
summarise(num_Mild = sum(str_detect(severity, "Mild")))
## # A tibble: 1 × 1
## num_Mild
## <int>
## 1 2
data_small %>%
mutate(col_Mild = str_extract(severity, "Mild")) %>%
select(severity, col_Mild) %>%
filter(!is.na(col_Mild))
## # A tibble: 2 × 2
## severity col_Mild
## <chr> <chr>
## 1 Mild Mild
## 2 Mild Mild
data_small %>%
mutate(col_Moderate = str_replace(severity, "Mild", "Moderate")) %>%
select(severity, col_Moderate)
## # A tibble: 5 × 2
## severity col_Moderate
## <chr> <chr>
## 1 Not Classifiable Not Classifiable
## 2 Mild Moderate
## 3 Severe Severe
## 4 Mild Moderate
## 5 Penetrating Penetrating