Import your data

library(readxl)
# excel file
data <- read_excel("../00_data/data/myData.xlsx")
data
## # A tibble: 9,355 × 12
##    work_year job_title    job_category      salary_currency salary salary_in_usd
##        <dbl> <chr>        <chr>             <chr>            <dbl>         <dbl>
##  1      2023 AI Architect Machine Learning… USD             305100        305100
##  2      2023 AI Architect Machine Learning… USD             146900        146900
##  3      2023 AI Architect Machine Learning… USD             330000        330000
##  4      2023 AI Architect Machine Learning… USD             204000        204000
##  5      2023 AI Architect Machine Learning… USD             330000        330000
##  6      2023 AI Architect Machine Learning… USD             204000        204000
##  7      2023 AI Architect Machine Learning… EUR             200000        215936
##  8      2023 AI Architect Machine Learning… USD             330000        330000
##  9      2023 AI Architect Machine Learning… USD             204000        204000
## 10      2023 AI Architect Machine Learning… USD             200000        200000
## # ℹ 9,345 more rows
## # ℹ 6 more variables: employee_residence <chr>, experience_level <chr>,
## #   employment_type <chr>, work_setting <chr>, company_location <chr>,
## #   company_size <chr>

Chapter 14

Tools

Detect matches

data$job_category %>% .[1:10]
##  [1] "Machine Learning and AI" "Machine Learning and AI"
##  [3] "Machine Learning and AI" "Machine Learning and AI"
##  [5] "Machine Learning and AI" "Machine Learning and AI"
##  [7] "Machine Learning and AI" "Machine Learning and AI"
##  [9] "Machine Learning and AI" "Machine Learning and AI"
str_detect(data$job_category, "AI") %>% .[1:10]
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
sum(str_detect(data$job_category, "AI"))
## [1] 1428
data %>% 
    summarise(num_AIjobs = sum(str_detect(job_category, "AI")))
## # A tibble: 1 × 1
##   num_AIjobs
##        <int>
## 1       1428

Extract matches

data %>%
    mutate(col_AIjobs = str_extract(job_category, "AI")) %>%
    select(job_category, col_AIjobs) %>%
    filter(!is.na(col_AIjobs))
## # A tibble: 1,428 × 2
##    job_category            col_AIjobs
##    <chr>                   <chr>     
##  1 Machine Learning and AI AI        
##  2 Machine Learning and AI AI        
##  3 Machine Learning and AI AI        
##  4 Machine Learning and AI AI        
##  5 Machine Learning and AI AI        
##  6 Machine Learning and AI AI        
##  7 Machine Learning and AI AI        
##  8 Machine Learning and AI AI        
##  9 Machine Learning and AI AI        
## 10 Machine Learning and AI AI        
## # ℹ 1,418 more rows

Replacing matches

data %>% 
    mutate(col_abbreviation = str_replace(experience_level, "Senior|Mid-level", "Sr")) %>%
    select(job_category, col_abbreviation)
## # A tibble: 9,355 × 2
##    job_category            col_abbreviation
##    <chr>                   <chr>           
##  1 Machine Learning and AI Sr              
##  2 Machine Learning and AI Sr              
##  3 Machine Learning and AI Sr              
##  4 Machine Learning and AI Sr              
##  5 Machine Learning and AI Sr              
##  6 Machine Learning and AI Sr              
##  7 Machine Learning and AI Executive       
##  8 Machine Learning and AI Sr              
##  9 Machine Learning and AI Sr              
## 10 Machine Learning and AI Sr              
## # ℹ 9,345 more rows