library(readxl)
# excel file
data <- read_excel("../00_data/data/myData.xlsx")
data
## # A tibble: 9,355 × 12
## work_year job_title job_category salary_currency salary salary_in_usd
## <dbl> <chr> <chr> <chr> <dbl> <dbl>
## 1 2023 AI Architect Machine Learning… USD 305100 305100
## 2 2023 AI Architect Machine Learning… USD 146900 146900
## 3 2023 AI Architect Machine Learning… USD 330000 330000
## 4 2023 AI Architect Machine Learning… USD 204000 204000
## 5 2023 AI Architect Machine Learning… USD 330000 330000
## 6 2023 AI Architect Machine Learning… USD 204000 204000
## 7 2023 AI Architect Machine Learning… EUR 200000 215936
## 8 2023 AI Architect Machine Learning… USD 330000 330000
## 9 2023 AI Architect Machine Learning… USD 204000 204000
## 10 2023 AI Architect Machine Learning… USD 200000 200000
## # ℹ 9,345 more rows
## # ℹ 6 more variables: employee_residence <chr>, experience_level <chr>,
## # employment_type <chr>, work_setting <chr>, company_location <chr>,
## # company_size <chr>
data$job_category %>% .[1:10]
## [1] "Machine Learning and AI" "Machine Learning and AI"
## [3] "Machine Learning and AI" "Machine Learning and AI"
## [5] "Machine Learning and AI" "Machine Learning and AI"
## [7] "Machine Learning and AI" "Machine Learning and AI"
## [9] "Machine Learning and AI" "Machine Learning and AI"
str_detect(data$job_category, "AI") %>% .[1:10]
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
sum(str_detect(data$job_category, "AI"))
## [1] 1428
data %>%
summarise(num_AIjobs = sum(str_detect(job_category, "AI")))
## # A tibble: 1 × 1
## num_AIjobs
## <int>
## 1 1428
data %>%
mutate(col_AIjobs = str_extract(job_category, "AI")) %>%
select(job_category, col_AIjobs) %>%
filter(!is.na(col_AIjobs))
## # A tibble: 1,428 × 2
## job_category col_AIjobs
## <chr> <chr>
## 1 Machine Learning and AI AI
## 2 Machine Learning and AI AI
## 3 Machine Learning and AI AI
## 4 Machine Learning and AI AI
## 5 Machine Learning and AI AI
## 6 Machine Learning and AI AI
## 7 Machine Learning and AI AI
## 8 Machine Learning and AI AI
## 9 Machine Learning and AI AI
## 10 Machine Learning and AI AI
## # ℹ 1,418 more rows
data %>%
mutate(col_abbreviation = str_replace(experience_level, "Senior|Mid-level", "Sr")) %>%
select(job_category, col_abbreviation)
## # A tibble: 9,355 × 2
## job_category col_abbreviation
## <chr> <chr>
## 1 Machine Learning and AI Sr
## 2 Machine Learning and AI Sr
## 3 Machine Learning and AI Sr
## 4 Machine Learning and AI Sr
## 5 Machine Learning and AI Sr
## 6 Machine Learning and AI Sr
## 7 Machine Learning and AI Executive
## 8 Machine Learning and AI Sr
## 9 Machine Learning and AI Sr
## 10 Machine Learning and AI Sr
## # ℹ 9,345 more rows