Import your data

data <- read_excel("../00_data/my_data.xlsx")
data
## # A tibble: 450 × 5
##    service component severity         diagnosed  year
##    <chr>   <chr>     <chr>            <chr>     <dbl>
##  1 Army    Active    Penetrating      189        2006
##  2 Army    Active    Severe           102        2006
##  3 Army    Active    Moderate         709        2006
##  4 Army    Active    Mild             5896       2006
##  5 Army    Active    Not Classifiable 122        2006
##  6 Army    Guard     Penetrating      33         2006
##  7 Army    Guard     Severe           26         2006
##  8 Army    Guard     Moderate         177        2006
##  9 Army    Guard     Mild             1332       2006
## 10 Army    Guard     Not Classifiable 29         2006
## # ℹ 440 more rows
set.seed(123456)
data_small <- data %>%
    select(service, severity, diagnosed) %>%
    sample_n(5)
data_small
## # A tibble: 5 × 3
##   service   severity         diagnosed
##   <chr>     <chr>            <chr>    
## 1 Army      Not Classifiable 50       
## 2 Air Force Mild             344      
## 3 Marines   Severe           28       
## 4 Navy      Mild             2637     
## 5 Air Force Penetrating      25

Chapter 14

Tools

Detect matches

data_small$severity
## [1] "Not Classifiable" "Mild"             "Severe"           "Mild"            
## [5] "Penetrating"
str_detect(data_small$severity, "Mild")
## [1] FALSE  TRUE FALSE  TRUE FALSE
sum(str_detect(data_small$severity, "Mild"))
## [1] 2
data_small %>%
    summarise(num_Mild = sum(str_detect(severity, "Mild")))
## # A tibble: 1 × 1
##   num_Mild
##      <int>
## 1        2

Extract matches

data_small %>%
    mutate(col_Mild = str_extract(severity, "Mild")) %>%
    select(severity, col_Mild) %>%
    filter(!is.na(col_Mild))
## # A tibble: 2 × 2
##   severity col_Mild
##   <chr>    <chr>   
## 1 Mild     Mild    
## 2 Mild     Mild

Replacing matches

data_small %>%
    mutate(col_Moderate = str_replace(severity, "Mild", "Moderate")) %>%
    select(severity, col_Moderate)
## # A tibble: 5 × 2
##   severity         col_Moderate    
##   <chr>            <chr>           
## 1 Not Classifiable Not Classifiable
## 2 Mild             Moderate        
## 3 Severe           Severe          
## 4 Mild             Moderate        
## 5 Penetrating      Penetrating