library(ggplot2)
library(ggplot2) # for data visualization
library(dplyr) # for data wrangling
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggfortify)
library(moderndive) # package of datasets and regression functions
library(readr)
library(skimr) # provides a simple-to-use functions
# for summary statistics
library(readr)
Data_Science_Jobs_Salaries <- read_csv("~/Project/Data Science Jobs Salaries.csv")
## Rows: 245 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): experience_level, employment_type, job_title, salary_currency, empl...
## dbl (5): work_year, salary, salary_in_usd, salary_in_inr, remote_ratio
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#View(Data_Science_Jobs_Salaries)
library(readr)
ds_salaries <- read_csv("~/Project/ds_salaries.csv")
## New names:
## Rows: 607 Columns: 12
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (7): experience_level, employment_type, job_title, salary_currency, empl... dbl
## (5): ...1, work_year, salary, salary_in_usd, remote_ratio
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
#View(ds_salaries)
Data_Science <- Data_Science_Jobs_Salaries %>%
select(work_year, experience_level, employment_type, salary_in_usd, company_size, remote_ratio, job_title)
ds <- ds_salaries %>%
select(...1, work_year, experience_level, employment_type, salary_in_usd, company_size, remote_ratio, job_title)
adata <- merge(ds, Data_Science)
rws <- read_csv("~/Project/2020_rws.csv", col_names = FALSE, skip = 1)
## Rows: 1507 Columns: 73
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (63): X3, X4, X5, X6, X7, X8, X9, X10, X11, X12, X13, X14, X15, X16, X17...
## dbl (10): X1, X2, X34, X35, X36, X37, X38, X39, X40, X41
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
rws2 <- rws %>%
select(X1, X2, X3)
ndata <- merge(adata, rws2, by.x = c('...1'), by.y = c('X1'))
data <- ndata %>%
select(X2, X3, work_year, experience_level, employment_type, salary_in_usd, company_size, remote_ratio, job_title)
library(readr)
ds_salaries <- read_csv("~/Project/ds_salaries.csv")
## New names:
## Rows: 607 Columns: 12
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (7): experience_level, employment_type, job_title, salary_currency, empl... dbl
## (5): ...1, work_year, salary, salary_in_usd, remote_ratio
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
#View(ds_salaries)
ggplot(ds_salaries, aes(x = salary_in_usd, y = employment_type, color = company_size)) +
geom_point() +
labs(x = "Salary (USD)", y = "Employment Type", color = "Company Size")
ggplot(ds_salaries, aes(x = salary_in_usd, y = job_title, color = experience_level)) +
geom_point() +
labs(x = "Salary (USD)", y = "Job Title", color = "Experience Level")
ggplot(ds_salaries, aes(x = salary_in_usd)) +
geom_histogram(binwidth = 300, color = "blue") +
labs(x = "Salaries", y = "Number of employees",
title = "Histogram of distribution of Salaries")
ggplot(ds_salaries, aes(x = salary_in_usd, y = company_size)) +
geom_boxplot() +
xlim(0,600000) +
labs(x = "Salary", y = "Company Size", title = "Salary and Company Size")
ds <- ds_salaries %>%
select(experience_level, employment_type, salary_in_usd, company_size, remote_ratio)
glimpse(ds)
## Rows: 607
## Columns: 5
## $ experience_level <chr> "MI", "SE", "SE", "MI", "SE", "EN", "SE", "MI", "MI",…
## $ employment_type <chr> "FT", "FT", "FT", "FT", "FT", "FT", "FT", "FT", "FT",…
## $ salary_in_usd <dbl> 79833, 260000, 109024, 20000, 150000, 72000, 190000, …
## $ company_size <chr> "L", "S", "M", "S", "L", "L", "S", "L", "L", "S", "S"…
## $ remote_ratio <dbl> 0, 0, 50, 0, 50, 100, 100, 50, 100, 50, 0, 0, 0, 100,…
skim(ds)
Name | ds |
Number of rows | 607 |
Number of columns | 5 |
_______________________ | |
Column type frequency: | |
character | 3 |
numeric | 2 |
________________________ | |
Group variables | None |
Variable type: character
skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
---|---|---|---|---|---|---|---|
experience_level | 0 | 1 | 2 | 2 | 0 | 4 | 0 |
employment_type | 0 | 1 | 2 | 2 | 0 | 4 | 0 |
company_size | 0 | 1 | 1 | 1 | 0 | 3 | 0 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
salary_in_usd | 0 | 1 | 112297.87 | 70957.26 | 2859 | 62726 | 101570 | 150000 | 6e+05 | ▇▅▁▁▁ |
remote_ratio | 0 | 1 | 70.92 | 40.71 | 0 | 50 | 100 | 100 | 1e+02 | ▂▁▂▁▇ |
ggplot(ds, aes(x = company_size, y = employment_type, color = experience_level)) +
geom_jitter() +
labs(x = "Company Size", y = "Employment Type", color = "Experience Level",
title = 'How a Companies size relates to Employment Type and Expereince level')
ggplot(ds, aes(x = company_size, y = remote_ratio, color = experience_level)) +
geom_jitter() +
ylim(0,100) +
labs(x = "Company Size", y = "Remote Work Ratio", color = "Experience Level",
title = "Relationship betweern a Companys size, the amount of Remote work, and Experience level")
## Warning: Removed 257 rows containing missing values (geom_point).
library(ggplot2)
theme_set(theme_minimal())
ggplot(ds, aes(x = company_size, y = remote_ratio, color = salary_in_usd)) +
geom_jitter() +
ylim(0, 100) +
labs(x = "Company Size", y = "Remote Ratio", color = "Salary") +
geom_smooth(method = "lm", se = FALSE) +
scale_color_gradient2(mid = 0.5,
high = "red", space = "Lab" )
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 250 rows containing missing values (geom_point).
ggplot(ds, aes(x = experience_level, y = remote_ratio, color = salary_in_usd)) +
geom_jitter() +
ylim(0, 100) +
labs(x = "Experience Level", y = "Remote Ratio", color = "Salary") +
geom_smooth(method = "lm", se = FALSE) +
scale_color_gradient2(mid = 0.5,
high = "orange", space = "Lab" )
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 261 rows containing missing values (geom_point).
ggplot(ds, aes(x = employment_type, y = salary_in_usd, color = experience_level)) +
geom_jitter() +
labs(x = "Company Size", y = "Remote Ratio", color = "Salary") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'
#Box plot showing relationship between Salary and Remote work
library(ggplot2)
ggplot(data, aes(y = salary_in_usd)) +
geom_boxplot() +
labs(x = "Remote Work", y = "Salary",
title = "Box plot showing relationship between Salary and Remote work") +
facet_wrap( ~ remote_ratio)
ggplot(data, aes(x = work_year)) +
geom_histogram() +
labs(x = "Remote Work by Year", y = "Number of Companies",
title = "Relationship between Remote Work by Year and the Amount of Companies
Implimenting Remote Work") +
facet_wrap(~ remote_ratio)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
data_lm <- lm(salary_in_usd ~ remote_ratio, data = data)
summary(data_lm)
##
## Call:
## lm(formula = salary_in_usd ~ remote_ratio, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -127924 -55381 -19924 33076 468076
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 94837.9 15793.6 6.005 1.31e-08 ***
## remote_ratio 370.9 193.1 1.920 0.0567 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 93960 on 155 degrees of freedom
## Multiple R-squared: 0.02324, Adjusted R-squared: 0.01694
## F-statistic: 3.688 on 1 and 155 DF, p-value: 0.05666
ggplot(data, aes(x = X2, y = salary_in_usd, color = employment_type)) +
geom_point()
ggplot(data, aes(x = remote_ratio, y = X2, color = X3)) +
facet_wrap( ~ X3) +
geom_boxplot()
How does gender and experience relate to salary?
q1_data <- data %>%
select(X2, X3, experience_level, salary_in_usd)
library(resampledata)
##
## Attaching package: 'resampledata'
## The following object is masked from 'package:datasets':
##
## Titanic
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ tibble 3.1.8 ✔ stringr 1.4.1
## ✔ tidyr 1.2.0 ✔ forcats 0.5.2
## ✔ purrr 0.3.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(moderndive)
library(infer)
q1_data %>%
group_by(X3) %>%
summarize(Means = mean(salary_in_usd), n = n())
## # A tibble: 2 × 3
## X3 Means n
## <chr> <dbl> <int>
## 1 Female 133992. 70
## 2 Male 111504. 87
q1_data %>%
group_by(X2) %>%
summarize(Means = mean(salary_in_usd), n = n())
## # A tibble: 41 × 3
## X2 Means n
## <dbl> <dbl> <int>
## 1 1955 131000 2
## 2 1956 41363 2
## 3 1957 132533. 6
## 4 1958 89476 3
## 5 1959 84064 7
## 6 1960 80047 8
## 7 1961 149795. 6
## 8 1962 90430. 8
## 9 1963 137000 6
## 10 1964 108091. 8
## # … with 31 more rows
q1_data %>%
group_by(experience_level) %>%
summarize(Means = mean(salary_in_usd), n = n())
## # A tibble: 4 × 3
## experience_level Means n
## <chr> <dbl> <int>
## 1 EN 67513. 37
## 2 EX 265104. 8
## 3 MI 109921. 61
## 4 SE 152083. 51
ggplot(q1_data, aes(x = salary_in_usd, y = experience_level, color = X3)) +
geom_boxplot() +
labs(title = "Experience and Salary", color = "Gender", x = "Salary", y = "Experience")
library(resampledata)
library(lattice)
q1_data %>%group_by(X3)%>%
summarize(Mean = mean(salary_in_usd),SD=sd(salary_in_usd), n = n())
## # A tibble: 2 × 4
## X3 Mean SD n
## <chr> <dbl> <dbl> <int>
## 1 Female 133992. 105301. 70
## 2 Male 111504. 84643. 87
qqmath(~salary_in_usd|X3,data = q1_data, col = rgb(1,0,0,.5))
ggplot(q1_data, aes(x = X2)) +
geom_histogram(binwidth = 10, color = "white")
x_bar <- q1_data %>% summarize(mean_year = mean(X2))
x_bar
## mean_year
## 1 1972.962
set.seed(10)
virtual_resample <- q1_data %>% rep_sample_n(size = 50, replace = TRUE)
virtual_resample
## # A tibble: 50 × 5
## # Groups: replicate [1]
## replicate X2 X3 experience_level salary_in_usd
## <int> <dbl> <chr> <chr> <dbl>
## 1 1 1990 Female MI 110000
## 2 1 1970 Female SE 80000
## 3 1 1983 Male EN 21844
## 4 1 1964 Male MI 110000
## 5 1 1957 Male EN 90000
## 6 1 1959 Male EN 41689
## 7 1 1988 Female MI 93000
## 8 1 1970 Female SE 80000
## 9 1 1962 Male EX 325000
## 10 1 1970 Female MI 85000
## # … with 40 more rows
set.seed(10)
virtual_resampled_means <- q1_data %>%
rep_sample_n(size = 50, replace = FALSE, reps = 1000) %>%
group_by(experience_level) %>%
summarize(mean_salary = mean(salary_in_usd))
ggplot(virtual_resampled_means, aes(x = mean_salary, y = experience_level)) +
geom_violin()
## Warning: Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Warning in max(data$density): no non-missing arguments to max; returning -Inf
## Warning: Computation failed in `stat_ydensity()`:
## replacement has 1 row, data has 0
set.seed(10)
virtual_resampled_means <- q1_data %>%
rep_sample_n(size = 500, replace = TRUE, reps = 10000) %>%
group_by(experience_level)
ggplot(virtual_resampled_means, aes(x = salary_in_usd, y = experience_level, color = experience_level)) +
geom_violin() +
labs(title = "Bootstrapped data on Salary and Experince Level", x = "Salary", y = "Experience", color = "Expereince")
How does the pay distribution look?
salary <- data %>%
select(salary_in_usd)
q2_data <- data %>%
select(salary_in_usd, job_title, company_size)
ggplot(q2_data, aes(x = salary_in_usd)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(q2_data, aes(x = salary_in_usd, color = company_size)) +
geom_histogram(aes(y = ..density..)) +
geom_density() +
labs(title = "Trends within Salary and Comapny Sizes", x = "Salary",
y = "Density", color = "Company Size")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Remote work
q3_data <- data %>%
select(work_year, experience_level, salary_in_usd, company_size, remote_ratio)
library(ggplot2) # for data visualization
library(dplyr) # for data wrangling
library(readr) # for importing spreadsheet data into R
library(moderndive) # package of datasets and regression functions
library(skimr)
glimpse(q3_data)
## Rows: 157
## Columns: 5
## $ work_year <dbl> 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020,…
## $ experience_level <chr> "SE", "SE", "SE", "EN", "SE", "MI", "MI", "SE", "EN",…
## $ salary_in_usd <dbl> 260000, 109024, 150000, 72000, 190000, 35735, 135000,…
## $ company_size <chr> "S", "M", "L", "L", "S", "L", "L", "S", "S", "L", "M"…
## $ remote_ratio <dbl> 0, 50, 50, 100, 100, 50, 100, 50, 0, 0, 0, 100, 100, …
q3_data %>%
summarize(mean_salary = mean(salary_in_usd), mean_remote = mean(remote_ratio),
median_salary = median(salary_in_usd), median_remote = median(remote_ratio))
## mean_salary mean_remote median_salary median_remote
## 1 121530.1 71.97452 105000 100
library(knitr)
q3_data %>% select(salary_in_usd, remote_ratio) %>% skim()
Name | Piped data |
Number of rows | 157 |
Number of columns | 2 |
_______________________ | |
Column type frequency: | |
numeric | 2 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
salary_in_usd | 0 | 1 | 121530.11 | 94761.98 | 4000 | 60000 | 105000 | 160000 | 6e+05 | ▇▃▁▁▁ |
remote_ratio | 0 | 1 | 71.97 | 38.95 | 0 | 50 | 100 | 100 | 1e+02 | ▂▁▂▁▇ |
ggplot(q3_data, aes(x = salary_in_usd, y = remote_ratio)) +
geom_jitter() +
labs(x = "Salary", y = "Remote Ratio",
title = "Relationship between Salary and Remote Work Ratio") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'
#Fit regression model:
score_model <- lm(salary_in_usd ~ remote_ratio, data = q3_data)
#Get regression table:
get_regression_table(score_model)
## # A tibble: 2 × 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 94838. 15794. 6.00 0 63639. 126036.
## 2 remote_ratio 371. 193. 1.92 0.057 -10.6 752.
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 157 × 5
## ID salary_in_usd remote_ratio salary_in_usd_hat residual
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 260000 0 94838. 165162.
## 2 2 109024 50 113381. -4357.
## 3 3 150000 50 113381. 36619.
## 4 4 72000 100 131924. -59924.
## 5 5 190000 100 131924. 58076.
## 6 6 35735 50 113381. -77646.
## 7 7 135000 100 131924. 3076.
## 8 8 125000 50 113381. 11619.
## 9 9 51321 0 94838. -43517.
## 10 10 40481 0 94838. -54357.
## # … with 147 more rows
library(ggfortify)
autoplot(score_model, ncol = 8, nrow = 2, which = 8:2) + theme_bw()
summary(score_model)
##
## Call:
## lm(formula = salary_in_usd ~ remote_ratio, data = q3_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -127924 -55381 -19924 33076 468076
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 94837.9 15793.6 6.005 1.31e-08 ***
## remote_ratio 370.9 193.1 1.920 0.0567 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 93960 on 155 degrees of freedom
## Multiple R-squared: 0.02324, Adjusted R-squared: 0.01694
## F-statistic: 3.688 on 1 and 155 DF, p-value: 0.05666
ggplot(data = q3_data, aes(sample = salary_in_usd)) +
stat_qq(color = rgb(1,0,0,.1)) +
stat_qq_line() +
facet_grid(cols = vars(remote_ratio)) +
labs(title = "Quantile plot of Salary based on Remote Ratio", x = "Remote Ratio", y = "Salary")
theme_bw()
## List of 93
## $ line :List of 6
## ..$ colour : chr "black"
## ..$ size : num 0.5
## ..$ linetype : num 1
## ..$ lineend : chr "butt"
## ..$ arrow : logi FALSE
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_line" "element"
## $ rect :List of 5
## ..$ fill : chr "white"
## ..$ colour : chr "black"
## ..$ size : num 0.5
## ..$ linetype : num 1
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_rect" "element"
## $ text :List of 11
## ..$ family : chr ""
## ..$ face : chr "plain"
## ..$ colour : chr "black"
## ..$ size : num 11
## ..$ hjust : num 0.5
## ..$ vjust : num 0.5
## ..$ angle : num 0
## ..$ lineheight : num 0.9
## ..$ margin : 'margin' num [1:4] 0pt 0pt 0pt 0pt
## .. ..- attr(*, "valid.unit")= int 8
## .. ..- attr(*, "unit")= chr "pt"
## ..$ debug : logi FALSE
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ title : NULL
## $ aspect.ratio : NULL
## $ axis.title : NULL
## $ axis.title.x :List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : NULL
## ..$ vjust : num 1
## ..$ angle : NULL
## ..$ lineheight : NULL
## ..$ margin : 'margin' num [1:4] 2.75pt 0pt 0pt 0pt
## .. ..- attr(*, "valid.unit")= int 8
## .. ..- attr(*, "unit")= chr "pt"
## ..$ debug : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ axis.title.x.top :List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : NULL
## ..$ vjust : num 0
## ..$ angle : NULL
## ..$ lineheight : NULL
## ..$ margin : 'margin' num [1:4] 0pt 0pt 2.75pt 0pt
## .. ..- attr(*, "valid.unit")= int 8
## .. ..- attr(*, "unit")= chr "pt"
## ..$ debug : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ axis.title.x.bottom : NULL
## $ axis.title.y :List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : NULL
## ..$ vjust : num 1
## ..$ angle : num 90
## ..$ lineheight : NULL
## ..$ margin : 'margin' num [1:4] 0pt 2.75pt 0pt 0pt
## .. ..- attr(*, "valid.unit")= int 8
## .. ..- attr(*, "unit")= chr "pt"
## ..$ debug : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ axis.title.y.left : NULL
## $ axis.title.y.right :List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : NULL
## ..$ vjust : num 0
## ..$ angle : num -90
## ..$ lineheight : NULL
## ..$ margin : 'margin' num [1:4] 0pt 0pt 0pt 2.75pt
## .. ..- attr(*, "valid.unit")= int 8
## .. ..- attr(*, "unit")= chr "pt"
## ..$ debug : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ axis.text :List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : chr "grey30"
## ..$ size : 'rel' num 0.8
## ..$ hjust : NULL
## ..$ vjust : NULL
## ..$ angle : NULL
## ..$ lineheight : NULL
## ..$ margin : NULL
## ..$ debug : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ axis.text.x :List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : NULL
## ..$ vjust : num 1
## ..$ angle : NULL
## ..$ lineheight : NULL
## ..$ margin : 'margin' num [1:4] 2.2pt 0pt 0pt 0pt
## .. ..- attr(*, "valid.unit")= int 8
## .. ..- attr(*, "unit")= chr "pt"
## ..$ debug : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ axis.text.x.top :List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : NULL
## ..$ vjust : num 0
## ..$ angle : NULL
## ..$ lineheight : NULL
## ..$ margin : 'margin' num [1:4] 0pt 0pt 2.2pt 0pt
## .. ..- attr(*, "valid.unit")= int 8
## .. ..- attr(*, "unit")= chr "pt"
## ..$ debug : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ axis.text.x.bottom : NULL
## $ axis.text.y :List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : num 1
## ..$ vjust : NULL
## ..$ angle : NULL
## ..$ lineheight : NULL
## ..$ margin : 'margin' num [1:4] 0pt 2.2pt 0pt 0pt
## .. ..- attr(*, "valid.unit")= int 8
## .. ..- attr(*, "unit")= chr "pt"
## ..$ debug : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ axis.text.y.left : NULL
## $ axis.text.y.right :List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : num 0
## ..$ vjust : NULL
## ..$ angle : NULL
## ..$ lineheight : NULL
## ..$ margin : 'margin' num [1:4] 0pt 0pt 0pt 2.2pt
## .. ..- attr(*, "valid.unit")= int 8
## .. ..- attr(*, "unit")= chr "pt"
## ..$ debug : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ axis.ticks :List of 6
## ..$ colour : chr "grey20"
## ..$ size : NULL
## ..$ linetype : NULL
## ..$ lineend : NULL
## ..$ arrow : logi FALSE
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_line" "element"
## $ axis.ticks.x : NULL
## $ axis.ticks.x.top : NULL
## $ axis.ticks.x.bottom : NULL
## $ axis.ticks.y : NULL
## $ axis.ticks.y.left : NULL
## $ axis.ticks.y.right : NULL
## $ axis.ticks.length : 'unit' num 2.75pt
## ..- attr(*, "valid.unit")= int 8
## ..- attr(*, "unit")= chr "pt"
## $ axis.ticks.length.x : NULL
## $ axis.ticks.length.x.top : NULL
## $ axis.ticks.length.x.bottom: NULL
## $ axis.ticks.length.y : NULL
## $ axis.ticks.length.y.left : NULL
## $ axis.ticks.length.y.right : NULL
## $ axis.line : list()
## ..- attr(*, "class")= chr [1:2] "element_blank" "element"
## $ axis.line.x : NULL
## $ axis.line.x.top : NULL
## $ axis.line.x.bottom : NULL
## $ axis.line.y : NULL
## $ axis.line.y.left : NULL
## $ axis.line.y.right : NULL
## $ legend.background :List of 5
## ..$ fill : NULL
## ..$ colour : logi NA
## ..$ size : NULL
## ..$ linetype : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_rect" "element"
## $ legend.margin : 'margin' num [1:4] 5.5pt 5.5pt 5.5pt 5.5pt
## ..- attr(*, "valid.unit")= int 8
## ..- attr(*, "unit")= chr "pt"
## $ legend.spacing : 'unit' num 11pt
## ..- attr(*, "valid.unit")= int 8
## ..- attr(*, "unit")= chr "pt"
## $ legend.spacing.x : NULL
## $ legend.spacing.y : NULL
## $ legend.key :List of 5
## ..$ fill : chr "white"
## ..$ colour : logi NA
## ..$ size : NULL
## ..$ linetype : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_rect" "element"
## $ legend.key.size : 'unit' num 1.2lines
## ..- attr(*, "valid.unit")= int 3
## ..- attr(*, "unit")= chr "lines"
## $ legend.key.height : NULL
## $ legend.key.width : NULL
## $ legend.text :List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : 'rel' num 0.8
## ..$ hjust : NULL
## ..$ vjust : NULL
## ..$ angle : NULL
## ..$ lineheight : NULL
## ..$ margin : NULL
## ..$ debug : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ legend.text.align : NULL
## $ legend.title :List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : num 0
## ..$ vjust : NULL
## ..$ angle : NULL
## ..$ lineheight : NULL
## ..$ margin : NULL
## ..$ debug : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ legend.title.align : NULL
## $ legend.position : chr "right"
## $ legend.direction : NULL
## $ legend.justification : chr "center"
## $ legend.box : NULL
## $ legend.box.just : NULL
## $ legend.box.margin : 'margin' num [1:4] 0cm 0cm 0cm 0cm
## ..- attr(*, "valid.unit")= int 1
## ..- attr(*, "unit")= chr "cm"
## $ legend.box.background : list()
## ..- attr(*, "class")= chr [1:2] "element_blank" "element"
## $ legend.box.spacing : 'unit' num 11pt
## ..- attr(*, "valid.unit")= int 8
## ..- attr(*, "unit")= chr "pt"
## $ panel.background :List of 5
## ..$ fill : chr "white"
## ..$ colour : logi NA
## ..$ size : NULL
## ..$ linetype : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_rect" "element"
## $ panel.border :List of 5
## ..$ fill : logi NA
## ..$ colour : chr "grey20"
## ..$ size : NULL
## ..$ linetype : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_rect" "element"
## $ panel.spacing : 'unit' num 5.5pt
## ..- attr(*, "valid.unit")= int 8
## ..- attr(*, "unit")= chr "pt"
## $ panel.spacing.x : NULL
## $ panel.spacing.y : NULL
## $ panel.grid :List of 6
## ..$ colour : chr "grey92"
## ..$ size : NULL
## ..$ linetype : NULL
## ..$ lineend : NULL
## ..$ arrow : logi FALSE
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_line" "element"
## $ panel.grid.major : NULL
## $ panel.grid.minor :List of 6
## ..$ colour : NULL
## ..$ size : 'rel' num 0.5
## ..$ linetype : NULL
## ..$ lineend : NULL
## ..$ arrow : logi FALSE
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_line" "element"
## $ panel.grid.major.x : NULL
## $ panel.grid.major.y : NULL
## $ panel.grid.minor.x : NULL
## $ panel.grid.minor.y : NULL
## $ panel.ontop : logi FALSE
## $ plot.background :List of 5
## ..$ fill : NULL
## ..$ colour : chr "white"
## ..$ size : NULL
## ..$ linetype : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_rect" "element"
## $ plot.title :List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : 'rel' num 1.2
## ..$ hjust : num 0
## ..$ vjust : num 1
## ..$ angle : NULL
## ..$ lineheight : NULL
## ..$ margin : 'margin' num [1:4] 0pt 0pt 5.5pt 0pt
## .. ..- attr(*, "valid.unit")= int 8
## .. ..- attr(*, "unit")= chr "pt"
## ..$ debug : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ plot.title.position : chr "panel"
## $ plot.subtitle :List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : num 0
## ..$ vjust : num 1
## ..$ angle : NULL
## ..$ lineheight : NULL
## ..$ margin : 'margin' num [1:4] 0pt 0pt 5.5pt 0pt
## .. ..- attr(*, "valid.unit")= int 8
## .. ..- attr(*, "unit")= chr "pt"
## ..$ debug : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ plot.caption :List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : 'rel' num 0.8
## ..$ hjust : num 1
## ..$ vjust : num 1
## ..$ angle : NULL
## ..$ lineheight : NULL
## ..$ margin : 'margin' num [1:4] 5.5pt 0pt 0pt 0pt
## .. ..- attr(*, "valid.unit")= int 8
## .. ..- attr(*, "unit")= chr "pt"
## ..$ debug : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ plot.caption.position : chr "panel"
## $ plot.tag :List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : 'rel' num 1.2
## ..$ hjust : num 0.5
## ..$ vjust : num 0.5
## ..$ angle : NULL
## ..$ lineheight : NULL
## ..$ margin : NULL
## ..$ debug : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ plot.tag.position : chr "topleft"
## $ plot.margin : 'margin' num [1:4] 5.5pt 5.5pt 5.5pt 5.5pt
## ..- attr(*, "valid.unit")= int 8
## ..- attr(*, "unit")= chr "pt"
## $ strip.background :List of 5
## ..$ fill : chr "grey85"
## ..$ colour : chr "grey20"
## ..$ size : NULL
## ..$ linetype : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_rect" "element"
## $ strip.background.x : NULL
## $ strip.background.y : NULL
## $ strip.placement : chr "inside"
## $ strip.text :List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : chr "grey10"
## ..$ size : 'rel' num 0.8
## ..$ hjust : NULL
## ..$ vjust : NULL
## ..$ angle : NULL
## ..$ lineheight : NULL
## ..$ margin : 'margin' num [1:4] 4.4pt 4.4pt 4.4pt 4.4pt
## .. ..- attr(*, "valid.unit")= int 8
## .. ..- attr(*, "unit")= chr "pt"
## ..$ debug : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ strip.text.x : NULL
## $ strip.text.y :List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : NULL
## ..$ vjust : NULL
## ..$ angle : num -90
## ..$ lineheight : NULL
## ..$ margin : NULL
## ..$ debug : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ strip.switch.pad.grid : 'unit' num 2.75pt
## ..- attr(*, "valid.unit")= int 8
## ..- attr(*, "unit")= chr "pt"
## $ strip.switch.pad.wrap : 'unit' num 2.75pt
## ..- attr(*, "valid.unit")= int 8
## ..- attr(*, "unit")= chr "pt"
## $ strip.text.y.left :List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : NULL
## ..$ vjust : NULL
## ..$ angle : num 90
## ..$ lineheight : NULL
## ..$ margin : NULL
## ..$ debug : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## - attr(*, "class")= chr [1:2] "theme" "gg"
## - attr(*, "complete")= logi TRUE
## - attr(*, "validate")= logi TRUE
ggplot(q3_data, aes(x = remote_ratio)) +
geom_bar() +
facet_wrap(vars(work_year)) +
labs(title = "Two Year Comparision of the Remote Work Ratio",
x = "Remote Ratio (0%, 50%, and 100%)", y = "Amount of Companies" )
set.seed(75)
library(infer)
q3 <- q3_data %>%
specify(response = remote_ratio) %>%
generate(reps = 10^4, type = "bootstrap") %>%
calculate(stat = "mean")
visualize(q3)
ggplot(q3_data, aes(x = salary_in_usd, color = experience_level)) +
geom_histogram(aes(y = ..density..)) +
geom_density() +
labs(title = "Trends within Salary and Experience Level", x = "Salary",
y = "Density", color = "Experience")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
library(tidyverse)
library(moderndive)
data
## X2 X3 work_year experience_level employment_type salary_in_usd
## 1 1972 Female 2020 SE FT 260000
## 2 1972 Male 2020 SE FT 109024
## 3 1987 Female 2020 SE FT 150000
## 4 1991 Male 2020 EN FT 72000
## 5 1989 Male 2020 SE FT 190000
## 6 1973 Male 2020 MI FT 35735
## 7 1992 Male 2020 MI FT 135000
## 8 1977 Male 2020 SE FT 125000
## 9 1985 Male 2020 EN FT 51321
## 10 1980 Female 2020 MI FT 40481
## 11 1989 Male 2020 EN FT 39916
## 12 1994 Female 2020 MI FT 87000
## 13 1970 Female 2020 MI FT 85000
## 14 1962 Female 2020 MI FT 8000
## 15 1959 Male 2020 EN FT 41689
## 16 1971 Female 2020 SE FT 114047
## 17 1960 Male 2020 EN FT 5707
## 18 1966 Male 2020 MI FT 56000
## 19 1969 Male 2020 MI FT 43331
## 20 1961 Male 2020 MI FT 6072
## 21 1969 Male 2020 SE FT 47899
## 22 1959 Male 2020 MI FT 98000
## 23 1963 Male 2020 MI FT 115000
## 24 1962 Male 2020 EX FT 325000
## 25 1965 Male 2020 EN FT 42000
## 26 1962 Female 2020 SE FT 33511
## 27 1963 Female 2020 EN CT 100000
## 28 1958 Male 2020 SE FT 68428
## 29 1961 Male 2020 MI FT 450000
## 30 1959 Male 2020 MI FT 46759
## 31 1980 Female 2020 MI FT 74130
## 32 1991 Female 2020 MI FT 103000
## 33 1973 Male 2020 EN FT 250000
## 34 1971 Female 2020 EN FT 10000
## 35 1996 Male 2020 EN FT 138000
## 36 1975 Male 2020 MI FT 45760
## 37 1996 Female 2020 EX FT 79833
## 38 1987 Male 2020 MI FT 50180
## 39 1966 Female 2020 MI FT 106000
## 40 1976 Female 2020 MI FT 112872
## 41 1967 Female 2020 EN PT 15966
## 42 1965 Male 2020 MI FT 76958
## 43 1961 Female 2020 SE FT 188000
## 44 1964 Male 2020 MI FT 105000
## 45 1978 Female 2020 MI FT 70139
## 46 1979 Male 2020 EN FT 6072
## 47 1974 Male 2020 EN FT 91000
## 48 1969 Male 2020 EN FT 45896
## 49 1960 Female 2020 SE FL 60000
## 50 1985 Male 2020 SE FT 148261
## 51 1962 Male 2020 MI FT 38776
## 52 1964 Male 2020 MI FT 118000
## 53 1965 Male 2020 SE FT 120000
## 54 1988 Male 2020 MI FT 138350
## 55 1959 Male 2020 MI FT 110000
## 56 1966 Female 2020 MI FT 130800
## 57 1960 Male 2020 EN PT 21669
## 58 1979 Female 2020 SE FT 412000
## 59 1966 Female 2020 SE FT 45618
## 60 1956 Male 2020 EN FT 62726
## 61 1957 Male 2020 SE FT 190200
## 62 1986 Male 2020 EN FT 105000
## 63 1987 Female 2020 SE FT 91237
## 64 1964 Female 2020 MI FT 62726
## 65 1961 Male 2020 MI FT 42197
## 66 1959 Female 2021 EX FT 150000
## 67 1985 Female 2021 EX FT 235000
## 68 1992 Female 2021 MI FT 100000
## 69 1973 Male 2021 MI CT 270000
## 70 1963 Female 2021 EN FT 80000
## 71 1961 Male 2021 MI FT 140000
## 72 1964 Male 2021 MI FT 110000
## 73 1957 Male 2021 SE FT 170000
## 74 1970 Female 2021 SE FT 80000
## 75 1963 Male 2021 SE FT 276000
## 76 1971 Male 2021 EN PT 12000
## 77 1970 Female 2021 MI FT 450000
## 78 1964 Male 2021 EN FT 70000
## 79 1962 Male 2021 MI FT 75000
## 80 1974 Male 2021 SE FT 150000
## 81 1955 Male 2021 MI FT 62000
## 82 1960 Male 2021 MI FT 73000
## 83 1968 Male 2021 SE FT 115000
## 84 1969 Male 2021 SE FT 150000
## 85 1959 Male 2021 EN PT 12000
## 86 1960 Male 2021 EN FT 225000
## 87 1962 Male 2021 MI FT 50000
## 88 1957 Male 2021 EN FT 90000
## 89 1964 Male 2021 MI FT 200000
## 90 1963 Female 2021 MI FT 151000
## 91 1973 Female 2021 MI FT 90000
## 92 1991 Male 2021 SE FT 153000
## 93 1987 Male 2021 SE FT 160000
## 94 1993 Female 2021 SE FT 168000
## 95 1975 Male 2021 MI FT 150000
## 96 1991 Male 2021 EN FT 13400
## 97 1990 Male 2021 MI FT 423000
## 98 1957 Female 2021 SE FT 120000
## 99 1971 Male 2021 EN FT 125000
## 100 1987 Female 2021 EX FT 230000
## 101 1967 Male 2021 EX FT 85000
## 102 1965 Female 2021 SE FT 165000
## 103 1989 Female 2021 EN FT 60000
## 104 1985 Female 2021 SE FT 235000
## 105 1990 Female 2021 SE FT 174000
## 106 1977 Female 2021 EN FT 81000
## 107 1973 Male 2021 MI FL 12000
## 108 1964 Female 2021 MI FT 4000
## 109 1992 Female 2021 SE FT 50000
## 110 1990 Female 2021 MI FT 74000
## 111 1992 Female 2021 SE FT 152000
## 112 1983 Male 2021 EN FT 21844
## 113 1986 Female 2021 MI FT 18000
## 114 1969 Female 2021 SE FT 174000
## 115 1975 Female 2021 MI FT 147000
## 116 1966 Female 2021 EN FT 9272
## 117 1958 Female 2021 EN FT 90000
## 118 1964 Female 2021 SE FT 195000
## 119 1972 Female 2021 SE FT 50000
## 120 1988 Female 2021 MI FT 160000
## 121 1973 Female 2021 SE FT 200000
## 122 1988 Female 2021 SE FT 165000
## 123 1956 Male 2021 MI FL 20000
## 124 1957 Male 2021 SE FT 120000
## 125 1994 Male 2021 SE FT 185000
## 126 1987 Male 2021 SE FT 140000
## 127 1987 Male 2021 SE FT 225000
## 128 1987 Female 2021 EX CT 416000
## 129 1982 Female 2021 SE FT 135000
## 130 1975 Male 2021 SE FT 256000
## 131 1967 Female 2021 SE FT 200000
## 132 1955 Female 2021 SE FT 200000
## 133 1979 Female 2021 MI FT 180000
## 134 1958 Male 2021 MI FT 110000
## 135 1978 Female 2021 EN FT 4000
## 136 1989 Male 2021 MI FT 80000
## 137 1990 Female 2021 MI FT 110000
## 138 1965 Male 2021 SE FT 165000
## 139 1999 Female 2021 SE FT 170000
## 140 1980 Male 2021 MI FT 115000
## 141 1977 Female 2021 EN FT 90000
## 142 1990 Female 2021 EX FT 600000
## 143 1988 Female 2021 MI FT 93000
## 144 1990 Female 2021 MI FT 200000
## 145 1968 Male 2021 SE FT 185000
## 146 1959 Male 2021 MI FT 130000
## 147 1990 Female 2021 SE FT 160000
## 148 1962 Male 2021 MI FT 93150
## 149 1987 Female 2021 MI FT 111775
## 150 1961 Male 2021 EN FT 72500
## 151 1960 Male 2021 EN FT 85000
## 152 1962 Female 2021 EN FT 100000
## 153 1960 Male 2021 EN FT 58000
## 154 1997 Male 2021 SE FT 55000
## 155 1960 Female 2021 MI FT 112000
## 156 1963 Male 2021 EN FT 100000
## 157 1957 Male 2021 SE CT 105000
## company_size remote_ratio job_title
## 1 S 0 Machine Learning Scientist
## 2 M 50 Big Data Engineer
## 3 L 50 Machine Learning Engineer
## 4 L 100 Data Analyst
## 5 S 100 Lead Data Scientist
## 6 L 50 Data Scientist
## 7 L 100 Business Data Analyst
## 8 S 50 Lead Data Engineer
## 9 S 0 Data Scientist
## 10 L 0 Data Scientist
## 11 M 0 Data Scientist
## 12 L 100 Lead Data Analyst
## 13 L 100 Data Analyst
## 14 L 50 Data Analyst
## 15 S 100 Data Engineer
## 16 S 100 Big Data Engineer
## 17 M 50 Data Science Consultant
## 18 M 100 Lead Data Engineer
## 19 M 0 Machine Learning Engineer
## 20 L 100 Product Data Analyst
## 21 L 50 Data Engineer
## 22 M 0 BI Data Analyst
## 23 L 0 Lead Data Scientist
## 24 L 100 Director of Data Science
## 25 L 50 Research Scientist
## 26 S 0 Data Engineer
## 27 L 100 Business Data Analyst
## 28 L 100 Data Scientist
## 29 M 0 Research Scientist
## 30 L 50 Data Analyst
## 31 L 50 Data Engineer
## 32 L 100 Data Science Consultant
## 33 L 50 Machine Learning Engineer
## 34 S 100 Data Analyst
## 35 S 100 Machine Learning Engineer
## 36 S 100 Data Scientist
## 37 L 50 Data Engineering Manager
## 38 M 0 Machine Learning Infrastructure Engineer
## 39 L 100 Data Engineer
## 40 L 50 Data Engineer
## 41 S 100 ML Engineer
## 42 S 100 Data Scientist
## 43 L 100 Data Engineer
## 44 L 100 Data Scientist
## 45 L 50 Data Engineer
## 46 S 0 Data Analyst
## 47 L 100 Data Analyst
## 48 S 50 AI Scientist
## 49 S 100 Computer Vision Engineer
## 50 M 100 Principal Data Scientist
## 51 M 100 Data Scientist
## 52 M 100 Data Scientist
## 53 L 50 Data Scientist
## 54 M 100 Data Scientist
## 55 L 100 Data Engineer
## 56 M 100 Data Engineer
## 57 S 50 Data Scientist
## 58 L 100 Data Scientist
## 59 S 100 Machine Learning Engineer
## 60 S 50 Data Scientist
## 61 M 100 Data Science Manager
## 62 S 100 Data Scientist
## 63 S 0 Data Scientist
## 64 S 50 Data Scientist
## 65 S 50 Data Scientist
## 66 L 100 BI Data Analyst
## 67 L 100 Head of Data
## 68 M 100 BI Data Analyst
## 69 L 100 ML Engineer
## 70 M 100 Data Analyst
## 71 L 100 Data Engineer
## 72 L 100 Data Analytics Engineer
## 73 L 100 Lead Data Analyst
## 74 S 100 Data Analyst
## 75 L 0 Lead Data Engineer
## 76 S 100 AI Scientist
## 77 L 100 Financial Data Analyst
## 78 M 100 Computer Vision Software Engineer
## 79 L 0 Data Analyst
## 80 L 100 Data Engineer
## 81 L 0 Data Analyst
## 82 L 0 Data Scientist
## 83 S 100 Data Engineer
## 84 M 100 Data Engineer
## 85 M 100 AI Scientist
## 86 L 100 Machine Learning Scientist
## 87 L 100 Data Scientist
## 88 S 100 Data Analyst
## 89 L 100 Data Engineer
## 90 L 100 Principal Data Scientist
## 91 L 100 Data Engineer
## 92 L 100 Data Engineering Manager
## 93 S 100 Cloud Data Engineer
## 94 S 0 Director of Data Science
## 95 M 100 Data Scientist
## 96 L 100 Data Scientist
## 97 L 50 Applied Machine Learning Scientist
## 98 M 100 Data Analytics Manager
## 99 S 100 Machine Learning Engineer
## 100 L 50 Head of Data
## 101 M 0 Head of Data Science
## 102 L 100 Data Specialist
## 103 S 100 Data Analyst
## 104 L 100 Principal Data Scientist
## 105 L 100 Data Engineering Manager
## 106 S 50 Machine Learning Engineer
## 107 M 50 Machine Learning Scientist
## 108 M 100 Data Engineer
## 109 M 100 Data Analytics Engineer
## 110 S 50 Machine Learning Engineer
## 111 L 100 Data Science Manager
## 112 M 50 Machine Learning Engineer
## 113 S 0 Big Data Engineer
## 114 L 100 Data Science Manager
## 115 L 50 Data Scientist
## 116 S 100 BI Data Analyst
## 117 S 100 Data Science Consultant
## 118 M 100 Machine Learning Infrastructure Engineer
## 119 S 100 Research Scientist
## 120 L 100 Data Scientist
## 121 L 100 Machine Learning Engineer
## 122 M 0 Data Engineer
## 123 L 0 Data Engineer
## 124 L 0 Data Analytics Manager
## 125 L 100 Principal Data Engineer
## 126 L 100 Data Analytics Manager
## 127 L 100 Machine Learning Scientist
## 128 S 100 Principal Data Scientist
## 129 L 0 Data Scientist
## 130 S 100 ML Engineer
## 131 L 100 Director of Data Engineering
## 132 L 100 Data Analyst
## 133 L 100 Data Architect
## 134 S 0 Head of Data Science
## 135 M 0 Data Scientist
## 136 L 100 Data Analyst
## 137 L 100 Data Engineer
## 138 L 100 Data Scientist
## 139 M 100 Principal Data Analyst
## 140 L 50 Data Scientist
## 141 S 100 Data Scientist
## 142 L 100 Principal Data Engineer
## 143 L 100 Data Analyst
## 144 L 100 Data Engineer
## 145 L 50 Machine Learning Engineer
## 146 L 50 Data Scientist
## 147 S 50 Lead Data Engineer
## 148 M 0 Data Engineer
## 149 M 0 Data Engineer
## 150 L 100 Data Engineer
## 151 S 100 Machine Learning Engineer
## 152 M 100 Data Scientist
## 153 L 50 Data Scientist
## 154 L 100 AI Scientist
## 155 L 100 Data Engineer
## 156 L 0 Research Scientist
## 157 M 100 Staff Data Scientist
data_sample_wy <- data %>%
select(work_year)
Bootstrapping
virtual_samples_25 <- data_sample_wy %>%
rep_sample_n(size = 25, reps = 1000)
virtual_25 <- virtual_samples_25 %>%
group_by(replicate) %>%
summarize(year = sum(work_year == "2020"))
plot1<-ggplot(virtual_25, aes(x = year)) +
geom_histogram(binwidth = 2, boundary = 4, color = "white") +
labs(x = "Proportion of 25 balls that were red", title = "25")
#View(plot1)
virtual_samples_50 <- data_sample_wy %>%
rep_sample_n(size = 50, reps = 1000)
virtual_50 <- virtual_samples_50 %>%
group_by(replicate) %>%
summarize(year = sum(work_year == "2020"))
plot2<-ggplot(virtual_50, aes(x = year)) +
geom_histogram(binwidth = 2, boundary = 4, color = "white") +
labs(x = "Proportion of 50 balls that were red", title = "50")
virtual_samples_100 <- data_sample_wy %>%
rep_sample_n(size = 100, reps = 1000)
virtual_100 <- virtual_samples_100 %>%
group_by(replicate) %>%
summarize(year = sum(work_year == "2020"))
plot3<-ggplot(virtual_100, aes(x = year)) +
geom_histogram(binwidth = 2, boundary = 4, color = "white") +
labs(x = "Proportion of 100 balls that were red", title = "100")
require(gridExtra)
## Loading required package: gridExtra
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
grid.arrange(plot1, plot2,plot3, ncol=3)
data_sample_sal <- data %>%
select(salary_in_usd)
virtual_samples_25.1 <- data_sample_sal %>%
rep_sample_n(size = 25, reps = 1000)
plot1<-ggplot(virtual_25, aes(x = year)) +
geom_histogram(binwidth = 2, boundary = 4, color = "white") +
labs(x = "Proportion of 25 balls that were red", title = "25")
#View(plot1)
virtual_samples_50 <- data_sample_wy %>%
rep_sample_n(size = 50, reps = 1000)
virtual_50 <- virtual_samples_50 %>%
group_by(replicate) %>%
summarize(year = sum(work_year == "2020"))
plot2<-ggplot(virtual_50, aes(x = year)) +
geom_histogram(binwidth = 2, boundary = 4, color = "white") +
labs(x = "Proportion of 50 balls that were red", title = "50")
virtual_samples_100 <- data_sample_wy %>%
rep_sample_n(size = 100, reps = 1000)
virtual_100 <- virtual_samples_100 %>%
group_by(replicate) %>%
summarize(year = sum(work_year == "2020"))
plot3<-ggplot(virtual_100, aes(x = year)) +
geom_histogram(binwidth = 2, boundary = 4, color = "white") +
labs(x = "Proportion of 100 balls that were red", title = "100")
require(gridExtra)
grid.arrange(plot1, plot2,plot3, ncol=3)
#Fit regression model:
Lin_model <- lm(salary_in_usd ~ remote_ratio, data = data)
#Get regression table:
get_regression_table(Lin_model)
## # A tibble: 2 × 7
## term estimate std_error statistic p_value lower_ci upper_ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 94838. 15794. 6.00 0 63639. 126036.
## 2 remote_ratio 371. 193. 1.92 0.057 -10.6 752.