library(readxl)
# excel file
data <- read_excel("../00_data/data/myData.xlsx")
data
## # A tibble: 9,355 × 12
## work_year job_title job_category salary_currency salary salary_in_usd
## <dbl> <chr> <chr> <chr> <dbl> <dbl>
## 1 2023 AI Architect Machine Learning… USD 305100 305100
## 2 2023 AI Architect Machine Learning… USD 146900 146900
## 3 2023 AI Architect Machine Learning… USD 330000 330000
## 4 2023 AI Architect Machine Learning… USD 204000 204000
## 5 2023 AI Architect Machine Learning… USD 330000 330000
## 6 2023 AI Architect Machine Learning… USD 204000 204000
## 7 2023 AI Architect Machine Learning… EUR 200000 215936
## 8 2023 AI Architect Machine Learning… USD 330000 330000
## 9 2023 AI Architect Machine Learning… USD 204000 204000
## 10 2023 AI Architect Machine Learning… USD 200000 200000
## # ℹ 9,345 more rows
## # ℹ 6 more variables: employee_residence <chr>, experience_level <chr>,
## # employment_type <chr>, work_setting <chr>, company_location <chr>,
## # company_size <chr>
ggplot(data) +
geom_bar(mapping = aes(x = job_category))
ggplot(data) +
geom_histogram(mapping = aes(x = salary_in_usd))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = data, mapping = aes(x = salary_in_usd, colour = job_category)) +
geom_freqpoly()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
data %>%
ggplot(aes(x = job_category)) +
geom_bar()
data %>%
filter(employee_residence == "United States" & job_category == "Data Analysis") %>%
ggplot(aes(x = work_year)) +
geom_histogram(binwidth = 0.5)
data %>%
# Filter out diamonds > 3 carat
filter(salary_currency == "EUR") %>%
# Plot
ggplot(aes(x = work_year)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
data %>%
ggplot(aes(x = salary)) +
geom_histogram() +
coord_cartesian(ylim = c(0, 1200))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
data %>%
#filter(y < 3 | y > 20) %>%
mutate(salary_currency = ifelse(salary_currency == "USD" | salary_currency == "CAD", NA, salary_currency)) %>%
# Plot
ggplot(aes(x = work_year, y = salary_in_usd)) +
geom_point()
## Covariation
data %>%
ggplot(aes(x = work_year, y = salary_in_usd)) +
geom_boxplot()
## Warning: Continuous x aesthetic
## ℹ did you forget `aes(group = ...)`?
### Two categorical variables
data %>%
count(salary_currency, employee_residence) %>%
ggplot(aes(x = salary_currency, y = employee_residence, fill = n)) +
geom_tile()
### Two continous variables
library(hexbin)
data %>%
ggplot(aes(x = salary_currency, y = salary_in_usd)) +
geom_hex()
data %>%
filter(job_category == "Data Analysis") %>%
ggplot(aes(x = job_category, y = salary_in_usd)) +
geom_boxplot()
## Patterns and models