# excel file
data <- read_excel("../00_data/data/myData.xlsx")
data
## # A tibble: 9,355 × 12
## work_year job_title job_category salary_currency salary salary_in_usd
## <dbl> <chr> <chr> <chr> <dbl> <dbl>
## 1 2023 AI Architect Machine Learning… USD 305100 305100
## 2 2023 AI Architect Machine Learning… USD 146900 146900
## 3 2023 AI Architect Machine Learning… USD 330000 330000
## 4 2023 AI Architect Machine Learning… USD 204000 204000
## 5 2023 AI Architect Machine Learning… USD 330000 330000
## 6 2023 AI Architect Machine Learning… USD 204000 204000
## 7 2023 AI Architect Machine Learning… EUR 200000 215936
## 8 2023 AI Architect Machine Learning… USD 330000 330000
## 9 2023 AI Architect Machine Learning… USD 204000 204000
## 10 2023 AI Architect Machine Learning… USD 200000 200000
## # ℹ 9,345 more rows
## # ℹ 6 more variables: employee_residence <chr>, experience_level <chr>,
## # employment_type <chr>, work_setting <chr>, company_location <chr>,
## # company_size <chr>
filtered <-filter(data, job_category == "Data Science and Research")
filtered %>%
arrange(desc(salary_in_usd))
## # A tibble: 3,014 × 12
## work_year job_title job_category salary_currency salary salary_in_usd
## <dbl> <chr> <chr> <chr> <dbl> <dbl>
## 1 2020 Research Scienti… Data Scienc… USD 450000 450000
## 2 2021 Principal Data S… Data Scienc… USD 416000 416000
## 3 2020 Data Scientist Data Scienc… USD 412000 412000
## 4 2023 Research Scienti… Data Scienc… USD 405000 405000
## 5 2023 Research Engineer Data Scienc… USD 385000 385000
## 6 2022 Applied Data Sci… Data Scienc… USD 380000 380000
## 7 2023 Director of Data… Data Scienc… USD 375500 375500
## 8 2022 Data Science Tec… Data Scienc… USD 375000 375000
## 9 2023 Research Scienti… Data Scienc… USD 374000 374000
## 10 2023 Data Scientist Data Scienc… USD 370000 370000
## # ℹ 3,004 more rows
## # ℹ 6 more variables: employee_residence <chr>, experience_level <chr>,
## # employment_type <chr>, work_setting <chr>, company_location <chr>,
## # company_size <chr>
select(data, employee_residence, company_size)
## # A tibble: 9,355 × 2
## employee_residence company_size
## <chr> <chr>
## 1 United States M
## 2 United States M
## 3 United States M
## 4 United States M
## 5 United States M
## 6 United States M
## 7 Belgium L
## 8 United States M
## 9 United States M
## 10 United States L
## # ℹ 9,345 more rows
No Data to add in my set.
Summarise the dataset by calculating the mean salary of different job categories
data %>%
# Group by job category
group_by(job_category) %>%
# Calculate mean Salaries in USD
summarise(salary = mean(salary_in_usd, )) %>%
# Sort it
arrange(desc(salary)) %>%
ungroup()
## # A tibble: 10 × 2
## job_category salary
## <chr> <dbl>
## 1 Machine Learning and AI 178926.
## 2 Data Science and Research 163759.
## 3 Data Architecture and Modeling 156002.
## 4 Cloud and Database 155000
## 5 Data Engineering 146198.
## 6 Leadership and Management 145476.
## 7 BI and Visualization 135092.
## 8 Data Analysis 108506.
## 9 Data Management and Strategy 103140.
## 10 Data Quality and Operations 100879.