library(ggplot2)
library(ggplot2) # for data visualization
library(dplyr) # for data wrangling
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggfortify)
library(moderndive) # package of datasets and regression functions
library(readr)
library(skimr) # provides a simple-to-use functions
# for summary statistics
library(readr)
Data_Science_Jobs_Salaries <- read_csv("~/Project/Data Science Jobs Salaries.csv")
## Rows: 245 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): experience_level, employment_type, job_title, salary_currency, empl...
## dbl (5): work_year, salary, salary_in_usd, salary_in_inr, remote_ratio
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#View(Data_Science_Jobs_Salaries)

library(readr)
ds_salaries <- read_csv("~/Project/ds_salaries.csv")
## New names:
## Rows: 607 Columns: 12
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (7): experience_level, employment_type, job_title, salary_currency, empl... dbl
## (5): ...1, work_year, salary, salary_in_usd, remote_ratio
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
#View(ds_salaries)

Data_Science <- Data_Science_Jobs_Salaries %>%
  select(work_year, experience_level, employment_type, salary_in_usd, company_size, remote_ratio, job_title)

ds <- ds_salaries %>%
  select(...1, work_year, experience_level, employment_type, salary_in_usd, company_size, remote_ratio, job_title)

adata <- merge(ds, Data_Science)

rws <- read_csv("~/Project/2020_rws.csv", col_names = FALSE, skip = 1)
## Rows: 1507 Columns: 73
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (63): X3, X4, X5, X6, X7, X8, X9, X10, X11, X12, X13, X14, X15, X16, X17...
## dbl (10): X1, X2, X34, X35, X36, X37, X38, X39, X40, X41
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
rws2 <- rws %>%
  select(X1, X2, X3)

ndata <- merge(adata, rws2, by.x = c('...1'), by.y = c('X1'))

data <- ndata %>%
  select(X2, X3, work_year, experience_level, employment_type, salary_in_usd, company_size, remote_ratio, job_title)



library(readr)
ds_salaries <- read_csv("~/Project/ds_salaries.csv")
## New names:
## Rows: 607 Columns: 12
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (7): experience_level, employment_type, job_title, salary_currency, empl... dbl
## (5): ...1, work_year, salary, salary_in_usd, remote_ratio
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
#View(ds_salaries)
ggplot(ds_salaries, aes(x = salary_in_usd, y = employment_type, color = company_size)) +
  geom_point() +
  labs(x = "Salary (USD)", y = "Employment Type", color = "Company Size") 

  ggplot(ds_salaries, aes(x = salary_in_usd, y = job_title, color = experience_level)) +
  geom_point() +
  labs(x = "Salary (USD)", y = "Job Title", color = "Experience Level") 

ggplot(ds_salaries, aes(x = salary_in_usd)) +
  geom_histogram(binwidth = 300, color = "blue") +
  labs(x = "Salaries", y = "Number of employees",
       title = "Histogram of distribution of Salaries")

ggplot(ds_salaries, aes(x = salary_in_usd, y = company_size)) +
  geom_boxplot() +
  xlim(0,600000) +
  labs(x = "Salary", y = "Company Size", title = "Salary and Company Size")

ds <- ds_salaries %>%
  select(experience_level, employment_type, salary_in_usd, company_size, remote_ratio)


glimpse(ds)
## Rows: 607
## Columns: 5
## $ experience_level <chr> "MI", "SE", "SE", "MI", "SE", "EN", "SE", "MI", "MI",…
## $ employment_type  <chr> "FT", "FT", "FT", "FT", "FT", "FT", "FT", "FT", "FT",…
## $ salary_in_usd    <dbl> 79833, 260000, 109024, 20000, 150000, 72000, 190000, …
## $ company_size     <chr> "L", "S", "M", "S", "L", "L", "S", "L", "L", "S", "S"…
## $ remote_ratio     <dbl> 0, 0, 50, 0, 50, 100, 100, 50, 100, 50, 0, 0, 0, 100,…
skim(ds)
Data summary
Name ds
Number of rows 607
Number of columns 5
_______________________
Column type frequency:
character 3
numeric 2
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
experience_level 0 1 2 2 0 4 0
employment_type 0 1 2 2 0 4 0
company_size 0 1 1 1 0 3 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
salary_in_usd 0 1 112297.87 70957.26 2859 62726 101570 150000 6e+05 ▇▅▁▁▁
remote_ratio 0 1 70.92 40.71 0 50 100 100 1e+02 ▂▁▂▁▇
ggplot(ds, aes(x = company_size, y = employment_type, color = experience_level)) +
  geom_jitter() +
  labs(x = "Company Size", y = "Employment Type", color = "Experience Level", 
       title = 'How a Companies size relates to Employment Type and Expereince level')

ggplot(ds, aes(x = company_size, y = remote_ratio, color = experience_level)) +
  geom_jitter() +
  ylim(0,100) +
  labs(x = "Company Size", y = "Remote Work Ratio", color = "Experience Level", 
       title = "Relationship betweern a Companys size, the amount of Remote work, and Experience level")
## Warning: Removed 257 rows containing missing values (geom_point).

library(ggplot2)
theme_set(theme_minimal())


ggplot(ds, aes(x = company_size, y = remote_ratio, color = salary_in_usd)) +
  geom_jitter() +
  ylim(0, 100) +
  labs(x = "Company Size", y = "Remote Ratio", color = "Salary") +
  geom_smooth(method = "lm", se = FALSE) + 
  scale_color_gradient2(mid = 0.5,
                        high = "red", space = "Lab" )
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 250 rows containing missing values (geom_point).

ggplot(ds, aes(x = experience_level, y = remote_ratio, color = salary_in_usd)) +
  geom_jitter() +
  ylim(0, 100) +
  labs(x = "Experience Level", y = "Remote Ratio", color = "Salary") +
  geom_smooth(method = "lm", se = FALSE) + 
  scale_color_gradient2(mid = 0.5,
                        high = "orange", space = "Lab" )
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 261 rows containing missing values (geom_point).

ggplot(ds, aes(x = employment_type, y = salary_in_usd, color = experience_level)) +
  geom_jitter() +
  labs(x = "Company Size", y = "Remote Ratio", color = "Salary") +
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#Box plot showing relationship between Salary and Remote work

library(ggplot2)
ggplot(data, aes(y = salary_in_usd)) +
  geom_boxplot() +
  labs(x = "Remote Work", y = "Salary",
       title = "Box plot showing relationship between Salary and Remote work") +
  facet_wrap( ~ remote_ratio)

ggplot(data, aes(x = work_year)) +
  geom_histogram() + 
  labs(x = "Remote Work by Year", y = "Number of Companies",
       title = "Relationship between Remote Work by Year and the Amount of Companies 
                              Implimenting Remote Work") +
  facet_wrap(~ remote_ratio)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

data_lm <- lm(salary_in_usd ~ remote_ratio, data = data)
summary(data_lm)
## 
## Call:
## lm(formula = salary_in_usd ~ remote_ratio, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -127924  -55381  -19924   33076  468076 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   94837.9    15793.6   6.005 1.31e-08 ***
## remote_ratio    370.9      193.1   1.920   0.0567 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 93960 on 155 degrees of freedom
## Multiple R-squared:  0.02324,    Adjusted R-squared:  0.01694 
## F-statistic: 3.688 on 1 and 155 DF,  p-value: 0.05666
ggplot(data, aes(x = X2, y = salary_in_usd, color = employment_type)) +
  geom_point()

ggplot(data, aes(x = remote_ratio, y = X2, color = X3)) +
  facet_wrap( ~ X3) +
  geom_boxplot()

How does gender and experience relate to salary?

q1_data <- data %>%
  select(X2, X3, experience_level, salary_in_usd)

library(resampledata)
## 
## Attaching package: 'resampledata'
## The following object is masked from 'package:datasets':
## 
##     Titanic
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ tibble  3.1.8     ✔ stringr 1.4.1
## ✔ tidyr   1.2.0     ✔ forcats 0.5.2
## ✔ purrr   0.3.5     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(moderndive)
library(infer)

q1_data %>%
  group_by(X3) %>%
  summarize(Means = mean(salary_in_usd), n = n())
## # A tibble: 2 × 3
##   X3       Means     n
##   <chr>    <dbl> <int>
## 1 Female 133992.    70
## 2 Male   111504.    87
q1_data %>%
  group_by(X2) %>%
  summarize(Means = mean(salary_in_usd), n = n())
## # A tibble: 41 × 3
##       X2   Means     n
##    <dbl>   <dbl> <int>
##  1  1955 131000      2
##  2  1956  41363      2
##  3  1957 132533.     6
##  4  1958  89476      3
##  5  1959  84064      7
##  6  1960  80047      8
##  7  1961 149795.     6
##  8  1962  90430.     8
##  9  1963 137000      6
## 10  1964 108091.     8
## # … with 31 more rows
q1_data %>%
  group_by(experience_level) %>%
  summarize(Means = mean(salary_in_usd), n = n())
## # A tibble: 4 × 3
##   experience_level   Means     n
##   <chr>              <dbl> <int>
## 1 EN                67513.    37
## 2 EX               265104.     8
## 3 MI               109921.    61
## 4 SE               152083.    51
ggplot(q1_data, aes(x = salary_in_usd, y = experience_level, color = X3)) +
  geom_boxplot() +
  labs(title = "Experience and Salary", color = "Gender", x = "Salary", y = "Experience")

library(resampledata)
library(lattice)
q1_data %>%group_by(X3)%>%
  summarize(Mean = mean(salary_in_usd),SD=sd(salary_in_usd), n = n())
## # A tibble: 2 × 4
##   X3        Mean      SD     n
##   <chr>    <dbl>   <dbl> <int>
## 1 Female 133992. 105301.    70
## 2 Male   111504.  84643.    87
qqmath(~salary_in_usd|X3,data = q1_data, col = rgb(1,0,0,.5)) 

ggplot(q1_data, aes(x = X2)) +
  geom_histogram(binwidth = 10, color = "white")

x_bar <- q1_data %>% summarize(mean_year = mean(X2))
x_bar
##   mean_year
## 1  1972.962
set.seed(10)
virtual_resample <- q1_data %>% rep_sample_n(size = 50, replace = TRUE)
virtual_resample
## # A tibble: 50 × 5
## # Groups:   replicate [1]
##    replicate    X2 X3     experience_level salary_in_usd
##        <int> <dbl> <chr>  <chr>                    <dbl>
##  1         1  1990 Female MI                      110000
##  2         1  1970 Female SE                       80000
##  3         1  1983 Male   EN                       21844
##  4         1  1964 Male   MI                      110000
##  5         1  1957 Male   EN                       90000
##  6         1  1959 Male   EN                       41689
##  7         1  1988 Female MI                       93000
##  8         1  1970 Female SE                       80000
##  9         1  1962 Male   EX                      325000
## 10         1  1970 Female MI                       85000
## # … with 40 more rows
set.seed(10)
virtual_resampled_means <- q1_data %>%
  rep_sample_n(size = 50, replace = FALSE, reps = 1000) %>%
  group_by(experience_level) %>%
  summarize(mean_salary = mean(salary_in_usd))
ggplot(virtual_resampled_means, aes(x = mean_salary, y = experience_level)) +
  geom_violin() 
## Warning: Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Warning in max(data$density): no non-missing arguments to max; returning -Inf
## Warning: Computation failed in `stat_ydensity()`:
## replacement has 1 row, data has 0

set.seed(10)
virtual_resampled_means <- q1_data %>%
  rep_sample_n(size = 500, replace = TRUE, reps = 10000) %>%
  group_by(experience_level) 
ggplot(virtual_resampled_means, aes(x = salary_in_usd, y = experience_level, color = experience_level)) +
  geom_violin() +
  labs(title = "Bootstrapped data on Salary and Experince Level", x = "Salary", y = "Experience", color = "Expereince")

How does the pay distribution look?

salary <- data %>%
  select(salary_in_usd)
q2_data <- data %>% 
  select(salary_in_usd, job_title, company_size)

ggplot(q2_data, aes(x = salary_in_usd)) +
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(q2_data, aes(x = salary_in_usd, color = company_size)) + 
  geom_histogram(aes(y = ..density..)) + 
  geom_density() +
  labs(title = "Trends within Salary and Comapny Sizes", x = "Salary", 
       y = "Density", color = "Company Size")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Remote work

q3_data <- data %>%
  select(work_year, experience_level, salary_in_usd, company_size, remote_ratio)

library(ggplot2) # for data visualization
library(dplyr) # for data wrangling
library(readr) # for importing spreadsheet data into R
library(moderndive) # package of datasets and regression functions
library(skimr)

glimpse(q3_data)
## Rows: 157
## Columns: 5
## $ work_year        <dbl> 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020,…
## $ experience_level <chr> "SE", "SE", "SE", "EN", "SE", "MI", "MI", "SE", "EN",…
## $ salary_in_usd    <dbl> 260000, 109024, 150000, 72000, 190000, 35735, 135000,…
## $ company_size     <chr> "S", "M", "L", "L", "S", "L", "L", "S", "S", "L", "M"…
## $ remote_ratio     <dbl> 0, 50, 50, 100, 100, 50, 100, 50, 0, 0, 0, 100, 100, …
q3_data %>%
  summarize(mean_salary = mean(salary_in_usd), mean_remote = mean(remote_ratio),
            median_salary = median(salary_in_usd), median_remote = median(remote_ratio))
##   mean_salary mean_remote median_salary median_remote
## 1    121530.1    71.97452        105000           100
library(knitr)
q3_data %>% select(salary_in_usd, remote_ratio) %>% skim()
Data summary
Name Piped data
Number of rows 157
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
salary_in_usd 0 1 121530.11 94761.98 4000 60000 105000 160000 6e+05 ▇▃▁▁▁
remote_ratio 0 1 71.97 38.95 0 50 100 100 1e+02 ▂▁▂▁▇
ggplot(q3_data, aes(x = salary_in_usd, y = remote_ratio)) +
  geom_jitter() +
  labs(x = "Salary", y = "Remote Ratio",
       title = "Relationship between Salary and Remote Work Ratio") +
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

 #Fit regression model:
score_model <- lm(salary_in_usd ~ remote_ratio, data = q3_data)
#Get regression table:
get_regression_table(score_model)
## # A tibble: 2 × 7
##   term         estimate std_error statistic p_value lower_ci upper_ci
##   <chr>           <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept      94838.    15794.      6.00   0      63639.   126036.
## 2 remote_ratio     371.      193.      1.92   0.057    -10.6     752.
regression_points <- get_regression_points(score_model)
regression_points
## # A tibble: 157 × 5
##       ID salary_in_usd remote_ratio salary_in_usd_hat residual
##    <int>         <dbl>        <dbl>             <dbl>    <dbl>
##  1     1        260000            0            94838.  165162.
##  2     2        109024           50           113381.   -4357.
##  3     3        150000           50           113381.   36619.
##  4     4         72000          100           131924.  -59924.
##  5     5        190000          100           131924.   58076.
##  6     6         35735           50           113381.  -77646.
##  7     7        135000          100           131924.    3076.
##  8     8        125000           50           113381.   11619.
##  9     9         51321            0            94838.  -43517.
## 10    10         40481            0            94838.  -54357.
## # … with 147 more rows
library(ggfortify)
autoplot(score_model, ncol = 8, nrow = 2, which = 8:2) + theme_bw()

summary(score_model)
## 
## Call:
## lm(formula = salary_in_usd ~ remote_ratio, data = q3_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -127924  -55381  -19924   33076  468076 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   94837.9    15793.6   6.005 1.31e-08 ***
## remote_ratio    370.9      193.1   1.920   0.0567 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 93960 on 155 degrees of freedom
## Multiple R-squared:  0.02324,    Adjusted R-squared:  0.01694 
## F-statistic: 3.688 on 1 and 155 DF,  p-value: 0.05666
ggplot(data = q3_data, aes(sample = salary_in_usd)) +
  stat_qq(color = rgb(1,0,0,.1)) +
  stat_qq_line() +
  facet_grid(cols = vars(remote_ratio)) +
  labs(title = "Quantile plot of Salary based on Remote Ratio", x = "Remote Ratio", y = "Salary")

theme_bw()
## List of 93
##  $ line                      :List of 6
##   ..$ colour       : chr "black"
##   ..$ size         : num 0.5
##   ..$ linetype     : num 1
##   ..$ lineend      : chr "butt"
##   ..$ arrow        : logi FALSE
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_line" "element"
##  $ rect                      :List of 5
##   ..$ fill         : chr "white"
##   ..$ colour       : chr "black"
##   ..$ size         : num 0.5
##   ..$ linetype     : num 1
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_rect" "element"
##  $ text                      :List of 11
##   ..$ family       : chr ""
##   ..$ face         : chr "plain"
##   ..$ colour       : chr "black"
##   ..$ size         : num 11
##   ..$ hjust        : num 0.5
##   ..$ vjust        : num 0.5
##   ..$ angle        : num 0
##   ..$ lineheight   : num 0.9
##   ..$ margin       : 'margin' num [1:4] 0pt 0pt 0pt 0pt
##   .. ..- attr(*, "valid.unit")= int 8
##   .. ..- attr(*, "unit")= chr "pt"
##   ..$ debug        : logi FALSE
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ title                     : NULL
##  $ aspect.ratio              : NULL
##  $ axis.title                : NULL
##  $ axis.title.x              :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : num 1
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 2.75pt 0pt 0pt 0pt
##   .. ..- attr(*, "valid.unit")= int 8
##   .. ..- attr(*, "unit")= chr "pt"
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.title.x.top          :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : num 0
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0pt 0pt 2.75pt 0pt
##   .. ..- attr(*, "valid.unit")= int 8
##   .. ..- attr(*, "unit")= chr "pt"
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.title.x.bottom       : NULL
##  $ axis.title.y              :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : num 1
##   ..$ angle        : num 90
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0pt 2.75pt 0pt 0pt
##   .. ..- attr(*, "valid.unit")= int 8
##   .. ..- attr(*, "unit")= chr "pt"
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.title.y.left         : NULL
##  $ axis.title.y.right        :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : num 0
##   ..$ angle        : num -90
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0pt 0pt 0pt 2.75pt
##   .. ..- attr(*, "valid.unit")= int 8
##   .. ..- attr(*, "unit")= chr "pt"
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.text                 :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : chr "grey30"
##   ..$ size         : 'rel' num 0.8
##   ..$ hjust        : NULL
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.text.x               :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : num 1
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 2.2pt 0pt 0pt 0pt
##   .. ..- attr(*, "valid.unit")= int 8
##   .. ..- attr(*, "unit")= chr "pt"
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.text.x.top           :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : num 0
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0pt 0pt 2.2pt 0pt
##   .. ..- attr(*, "valid.unit")= int 8
##   .. ..- attr(*, "unit")= chr "pt"
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.text.x.bottom        : NULL
##  $ axis.text.y               :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : num 1
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0pt 2.2pt 0pt 0pt
##   .. ..- attr(*, "valid.unit")= int 8
##   .. ..- attr(*, "unit")= chr "pt"
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.text.y.left          : NULL
##  $ axis.text.y.right         :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : num 0
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0pt 0pt 0pt 2.2pt
##   .. ..- attr(*, "valid.unit")= int 8
##   .. ..- attr(*, "unit")= chr "pt"
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.ticks                :List of 6
##   ..$ colour       : chr "grey20"
##   ..$ size         : NULL
##   ..$ linetype     : NULL
##   ..$ lineend      : NULL
##   ..$ arrow        : logi FALSE
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_line" "element"
##  $ axis.ticks.x              : NULL
##  $ axis.ticks.x.top          : NULL
##  $ axis.ticks.x.bottom       : NULL
##  $ axis.ticks.y              : NULL
##  $ axis.ticks.y.left         : NULL
##  $ axis.ticks.y.right        : NULL
##  $ axis.ticks.length         : 'unit' num 2.75pt
##   ..- attr(*, "valid.unit")= int 8
##   ..- attr(*, "unit")= chr "pt"
##  $ axis.ticks.length.x       : NULL
##  $ axis.ticks.length.x.top   : NULL
##  $ axis.ticks.length.x.bottom: NULL
##  $ axis.ticks.length.y       : NULL
##  $ axis.ticks.length.y.left  : NULL
##  $ axis.ticks.length.y.right : NULL
##  $ axis.line                 : list()
##   ..- attr(*, "class")= chr [1:2] "element_blank" "element"
##  $ axis.line.x               : NULL
##  $ axis.line.x.top           : NULL
##  $ axis.line.x.bottom        : NULL
##  $ axis.line.y               : NULL
##  $ axis.line.y.left          : NULL
##  $ axis.line.y.right         : NULL
##  $ legend.background         :List of 5
##   ..$ fill         : NULL
##   ..$ colour       : logi NA
##   ..$ size         : NULL
##   ..$ linetype     : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_rect" "element"
##  $ legend.margin             : 'margin' num [1:4] 5.5pt 5.5pt 5.5pt 5.5pt
##   ..- attr(*, "valid.unit")= int 8
##   ..- attr(*, "unit")= chr "pt"
##  $ legend.spacing            : 'unit' num 11pt
##   ..- attr(*, "valid.unit")= int 8
##   ..- attr(*, "unit")= chr "pt"
##  $ legend.spacing.x          : NULL
##  $ legend.spacing.y          : NULL
##  $ legend.key                :List of 5
##   ..$ fill         : chr "white"
##   ..$ colour       : logi NA
##   ..$ size         : NULL
##   ..$ linetype     : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_rect" "element"
##  $ legend.key.size           : 'unit' num 1.2lines
##   ..- attr(*, "valid.unit")= int 3
##   ..- attr(*, "unit")= chr "lines"
##  $ legend.key.height         : NULL
##  $ legend.key.width          : NULL
##  $ legend.text               :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : 'rel' num 0.8
##   ..$ hjust        : NULL
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ legend.text.align         : NULL
##  $ legend.title              :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : num 0
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ legend.title.align        : NULL
##  $ legend.position           : chr "right"
##  $ legend.direction          : NULL
##  $ legend.justification      : chr "center"
##  $ legend.box                : NULL
##  $ legend.box.just           : NULL
##  $ legend.box.margin         : 'margin' num [1:4] 0cm 0cm 0cm 0cm
##   ..- attr(*, "valid.unit")= int 1
##   ..- attr(*, "unit")= chr "cm"
##  $ legend.box.background     : list()
##   ..- attr(*, "class")= chr [1:2] "element_blank" "element"
##  $ legend.box.spacing        : 'unit' num 11pt
##   ..- attr(*, "valid.unit")= int 8
##   ..- attr(*, "unit")= chr "pt"
##  $ panel.background          :List of 5
##   ..$ fill         : chr "white"
##   ..$ colour       : logi NA
##   ..$ size         : NULL
##   ..$ linetype     : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_rect" "element"
##  $ panel.border              :List of 5
##   ..$ fill         : logi NA
##   ..$ colour       : chr "grey20"
##   ..$ size         : NULL
##   ..$ linetype     : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_rect" "element"
##  $ panel.spacing             : 'unit' num 5.5pt
##   ..- attr(*, "valid.unit")= int 8
##   ..- attr(*, "unit")= chr "pt"
##  $ panel.spacing.x           : NULL
##  $ panel.spacing.y           : NULL
##  $ panel.grid                :List of 6
##   ..$ colour       : chr "grey92"
##   ..$ size         : NULL
##   ..$ linetype     : NULL
##   ..$ lineend      : NULL
##   ..$ arrow        : logi FALSE
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_line" "element"
##  $ panel.grid.major          : NULL
##  $ panel.grid.minor          :List of 6
##   ..$ colour       : NULL
##   ..$ size         : 'rel' num 0.5
##   ..$ linetype     : NULL
##   ..$ lineend      : NULL
##   ..$ arrow        : logi FALSE
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_line" "element"
##  $ panel.grid.major.x        : NULL
##  $ panel.grid.major.y        : NULL
##  $ panel.grid.minor.x        : NULL
##  $ panel.grid.minor.y        : NULL
##  $ panel.ontop               : logi FALSE
##  $ plot.background           :List of 5
##   ..$ fill         : NULL
##   ..$ colour       : chr "white"
##   ..$ size         : NULL
##   ..$ linetype     : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_rect" "element"
##  $ plot.title                :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : 'rel' num 1.2
##   ..$ hjust        : num 0
##   ..$ vjust        : num 1
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0pt 0pt 5.5pt 0pt
##   .. ..- attr(*, "valid.unit")= int 8
##   .. ..- attr(*, "unit")= chr "pt"
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ plot.title.position       : chr "panel"
##  $ plot.subtitle             :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : num 0
##   ..$ vjust        : num 1
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0pt 0pt 5.5pt 0pt
##   .. ..- attr(*, "valid.unit")= int 8
##   .. ..- attr(*, "unit")= chr "pt"
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ plot.caption              :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : 'rel' num 0.8
##   ..$ hjust        : num 1
##   ..$ vjust        : num 1
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 5.5pt 0pt 0pt 0pt
##   .. ..- attr(*, "valid.unit")= int 8
##   .. ..- attr(*, "unit")= chr "pt"
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ plot.caption.position     : chr "panel"
##  $ plot.tag                  :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : 'rel' num 1.2
##   ..$ hjust        : num 0.5
##   ..$ vjust        : num 0.5
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ plot.tag.position         : chr "topleft"
##  $ plot.margin               : 'margin' num [1:4] 5.5pt 5.5pt 5.5pt 5.5pt
##   ..- attr(*, "valid.unit")= int 8
##   ..- attr(*, "unit")= chr "pt"
##  $ strip.background          :List of 5
##   ..$ fill         : chr "grey85"
##   ..$ colour       : chr "grey20"
##   ..$ size         : NULL
##   ..$ linetype     : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_rect" "element"
##  $ strip.background.x        : NULL
##  $ strip.background.y        : NULL
##  $ strip.placement           : chr "inside"
##  $ strip.text                :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : chr "grey10"
##   ..$ size         : 'rel' num 0.8
##   ..$ hjust        : NULL
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 4.4pt 4.4pt 4.4pt 4.4pt
##   .. ..- attr(*, "valid.unit")= int 8
##   .. ..- attr(*, "unit")= chr "pt"
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ strip.text.x              : NULL
##  $ strip.text.y              :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : NULL
##   ..$ angle        : num -90
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ strip.switch.pad.grid     : 'unit' num 2.75pt
##   ..- attr(*, "valid.unit")= int 8
##   ..- attr(*, "unit")= chr "pt"
##  $ strip.switch.pad.wrap     : 'unit' num 2.75pt
##   ..- attr(*, "valid.unit")= int 8
##   ..- attr(*, "unit")= chr "pt"
##  $ strip.text.y.left         :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : NULL
##   ..$ angle        : num 90
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  - attr(*, "class")= chr [1:2] "theme" "gg"
##  - attr(*, "complete")= logi TRUE
##  - attr(*, "validate")= logi TRUE
ggplot(q3_data, aes(x = remote_ratio)) +
  geom_bar() +
  facet_wrap(vars(work_year)) +
  labs(title = "Two Year Comparision of the Remote Work Ratio", 
       x = "Remote Ratio (0%, 50%, and 100%)", y = "Amount of Companies" )

set.seed(75)
library(infer)
q3 <- q3_data %>%
  specify(response = remote_ratio) %>%
  generate(reps = 10^4, type = "bootstrap") %>%
  calculate(stat = "mean")
visualize(q3)

ggplot(q3_data, aes(x = salary_in_usd, color = experience_level)) + 
  geom_histogram(aes(y = ..density..)) + 
  geom_density() +
  labs(title = "Trends within Salary and Experience Level", x = "Salary", 
       y = "Density", color = "Experience")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

library(tidyverse)
library(moderndive)
data
##       X2     X3 work_year experience_level employment_type salary_in_usd
## 1   1972 Female      2020               SE              FT        260000
## 2   1972   Male      2020               SE              FT        109024
## 3   1987 Female      2020               SE              FT        150000
## 4   1991   Male      2020               EN              FT         72000
## 5   1989   Male      2020               SE              FT        190000
## 6   1973   Male      2020               MI              FT         35735
## 7   1992   Male      2020               MI              FT        135000
## 8   1977   Male      2020               SE              FT        125000
## 9   1985   Male      2020               EN              FT         51321
## 10  1980 Female      2020               MI              FT         40481
## 11  1989   Male      2020               EN              FT         39916
## 12  1994 Female      2020               MI              FT         87000
## 13  1970 Female      2020               MI              FT         85000
## 14  1962 Female      2020               MI              FT          8000
## 15  1959   Male      2020               EN              FT         41689
## 16  1971 Female      2020               SE              FT        114047
## 17  1960   Male      2020               EN              FT          5707
## 18  1966   Male      2020               MI              FT         56000
## 19  1969   Male      2020               MI              FT         43331
## 20  1961   Male      2020               MI              FT          6072
## 21  1969   Male      2020               SE              FT         47899
## 22  1959   Male      2020               MI              FT         98000
## 23  1963   Male      2020               MI              FT        115000
## 24  1962   Male      2020               EX              FT        325000
## 25  1965   Male      2020               EN              FT         42000
## 26  1962 Female      2020               SE              FT         33511
## 27  1963 Female      2020               EN              CT        100000
## 28  1958   Male      2020               SE              FT         68428
## 29  1961   Male      2020               MI              FT        450000
## 30  1959   Male      2020               MI              FT         46759
## 31  1980 Female      2020               MI              FT         74130
## 32  1991 Female      2020               MI              FT        103000
## 33  1973   Male      2020               EN              FT        250000
## 34  1971 Female      2020               EN              FT         10000
## 35  1996   Male      2020               EN              FT        138000
## 36  1975   Male      2020               MI              FT         45760
## 37  1996 Female      2020               EX              FT         79833
## 38  1987   Male      2020               MI              FT         50180
## 39  1966 Female      2020               MI              FT        106000
## 40  1976 Female      2020               MI              FT        112872
## 41  1967 Female      2020               EN              PT         15966
## 42  1965   Male      2020               MI              FT         76958
## 43  1961 Female      2020               SE              FT        188000
## 44  1964   Male      2020               MI              FT        105000
## 45  1978 Female      2020               MI              FT         70139
## 46  1979   Male      2020               EN              FT          6072
## 47  1974   Male      2020               EN              FT         91000
## 48  1969   Male      2020               EN              FT         45896
## 49  1960 Female      2020               SE              FL         60000
## 50  1985   Male      2020               SE              FT        148261
## 51  1962   Male      2020               MI              FT         38776
## 52  1964   Male      2020               MI              FT        118000
## 53  1965   Male      2020               SE              FT        120000
## 54  1988   Male      2020               MI              FT        138350
## 55  1959   Male      2020               MI              FT        110000
## 56  1966 Female      2020               MI              FT        130800
## 57  1960   Male      2020               EN              PT         21669
## 58  1979 Female      2020               SE              FT        412000
## 59  1966 Female      2020               SE              FT         45618
## 60  1956   Male      2020               EN              FT         62726
## 61  1957   Male      2020               SE              FT        190200
## 62  1986   Male      2020               EN              FT        105000
## 63  1987 Female      2020               SE              FT         91237
## 64  1964 Female      2020               MI              FT         62726
## 65  1961   Male      2020               MI              FT         42197
## 66  1959 Female      2021               EX              FT        150000
## 67  1985 Female      2021               EX              FT        235000
## 68  1992 Female      2021               MI              FT        100000
## 69  1973   Male      2021               MI              CT        270000
## 70  1963 Female      2021               EN              FT         80000
## 71  1961   Male      2021               MI              FT        140000
## 72  1964   Male      2021               MI              FT        110000
## 73  1957   Male      2021               SE              FT        170000
## 74  1970 Female      2021               SE              FT         80000
## 75  1963   Male      2021               SE              FT        276000
## 76  1971   Male      2021               EN              PT         12000
## 77  1970 Female      2021               MI              FT        450000
## 78  1964   Male      2021               EN              FT         70000
## 79  1962   Male      2021               MI              FT         75000
## 80  1974   Male      2021               SE              FT        150000
## 81  1955   Male      2021               MI              FT         62000
## 82  1960   Male      2021               MI              FT         73000
## 83  1968   Male      2021               SE              FT        115000
## 84  1969   Male      2021               SE              FT        150000
## 85  1959   Male      2021               EN              PT         12000
## 86  1960   Male      2021               EN              FT        225000
## 87  1962   Male      2021               MI              FT         50000
## 88  1957   Male      2021               EN              FT         90000
## 89  1964   Male      2021               MI              FT        200000
## 90  1963 Female      2021               MI              FT        151000
## 91  1973 Female      2021               MI              FT         90000
## 92  1991   Male      2021               SE              FT        153000
## 93  1987   Male      2021               SE              FT        160000
## 94  1993 Female      2021               SE              FT        168000
## 95  1975   Male      2021               MI              FT        150000
## 96  1991   Male      2021               EN              FT         13400
## 97  1990   Male      2021               MI              FT        423000
## 98  1957 Female      2021               SE              FT        120000
## 99  1971   Male      2021               EN              FT        125000
## 100 1987 Female      2021               EX              FT        230000
## 101 1967   Male      2021               EX              FT         85000
## 102 1965 Female      2021               SE              FT        165000
## 103 1989 Female      2021               EN              FT         60000
## 104 1985 Female      2021               SE              FT        235000
## 105 1990 Female      2021               SE              FT        174000
## 106 1977 Female      2021               EN              FT         81000
## 107 1973   Male      2021               MI              FL         12000
## 108 1964 Female      2021               MI              FT          4000
## 109 1992 Female      2021               SE              FT         50000
## 110 1990 Female      2021               MI              FT         74000
## 111 1992 Female      2021               SE              FT        152000
## 112 1983   Male      2021               EN              FT         21844
## 113 1986 Female      2021               MI              FT         18000
## 114 1969 Female      2021               SE              FT        174000
## 115 1975 Female      2021               MI              FT        147000
## 116 1966 Female      2021               EN              FT          9272
## 117 1958 Female      2021               EN              FT         90000
## 118 1964 Female      2021               SE              FT        195000
## 119 1972 Female      2021               SE              FT         50000
## 120 1988 Female      2021               MI              FT        160000
## 121 1973 Female      2021               SE              FT        200000
## 122 1988 Female      2021               SE              FT        165000
## 123 1956   Male      2021               MI              FL         20000
## 124 1957   Male      2021               SE              FT        120000
## 125 1994   Male      2021               SE              FT        185000
## 126 1987   Male      2021               SE              FT        140000
## 127 1987   Male      2021               SE              FT        225000
## 128 1987 Female      2021               EX              CT        416000
## 129 1982 Female      2021               SE              FT        135000
## 130 1975   Male      2021               SE              FT        256000
## 131 1967 Female      2021               SE              FT        200000
## 132 1955 Female      2021               SE              FT        200000
## 133 1979 Female      2021               MI              FT        180000
## 134 1958   Male      2021               MI              FT        110000
## 135 1978 Female      2021               EN              FT          4000
## 136 1989   Male      2021               MI              FT         80000
## 137 1990 Female      2021               MI              FT        110000
## 138 1965   Male      2021               SE              FT        165000
## 139 1999 Female      2021               SE              FT        170000
## 140 1980   Male      2021               MI              FT        115000
## 141 1977 Female      2021               EN              FT         90000
## 142 1990 Female      2021               EX              FT        600000
## 143 1988 Female      2021               MI              FT         93000
## 144 1990 Female      2021               MI              FT        200000
## 145 1968   Male      2021               SE              FT        185000
## 146 1959   Male      2021               MI              FT        130000
## 147 1990 Female      2021               SE              FT        160000
## 148 1962   Male      2021               MI              FT         93150
## 149 1987 Female      2021               MI              FT        111775
## 150 1961   Male      2021               EN              FT         72500
## 151 1960   Male      2021               EN              FT         85000
## 152 1962 Female      2021               EN              FT        100000
## 153 1960   Male      2021               EN              FT         58000
## 154 1997   Male      2021               SE              FT         55000
## 155 1960 Female      2021               MI              FT        112000
## 156 1963   Male      2021               EN              FT        100000
## 157 1957   Male      2021               SE              CT        105000
##     company_size remote_ratio                                job_title
## 1              S            0               Machine Learning Scientist
## 2              M           50                        Big Data Engineer
## 3              L           50                Machine Learning Engineer
## 4              L          100                             Data Analyst
## 5              S          100                      Lead Data Scientist
## 6              L           50                           Data Scientist
## 7              L          100                    Business Data Analyst
## 8              S           50                       Lead Data Engineer
## 9              S            0                           Data Scientist
## 10             L            0                           Data Scientist
## 11             M            0                           Data Scientist
## 12             L          100                        Lead Data Analyst
## 13             L          100                             Data Analyst
## 14             L           50                             Data Analyst
## 15             S          100                            Data Engineer
## 16             S          100                        Big Data Engineer
## 17             M           50                  Data Science Consultant
## 18             M          100                       Lead Data Engineer
## 19             M            0                Machine Learning Engineer
## 20             L          100                     Product Data Analyst
## 21             L           50                            Data Engineer
## 22             M            0                          BI Data Analyst
## 23             L            0                      Lead Data Scientist
## 24             L          100                 Director of Data Science
## 25             L           50                       Research Scientist
## 26             S            0                            Data Engineer
## 27             L          100                    Business Data Analyst
## 28             L          100                           Data Scientist
## 29             M            0                       Research Scientist
## 30             L           50                             Data Analyst
## 31             L           50                            Data Engineer
## 32             L          100                  Data Science Consultant
## 33             L           50                Machine Learning Engineer
## 34             S          100                             Data Analyst
## 35             S          100                Machine Learning Engineer
## 36             S          100                           Data Scientist
## 37             L           50                 Data Engineering Manager
## 38             M            0 Machine Learning Infrastructure Engineer
## 39             L          100                            Data Engineer
## 40             L           50                            Data Engineer
## 41             S          100                              ML Engineer
## 42             S          100                           Data Scientist
## 43             L          100                            Data Engineer
## 44             L          100                           Data Scientist
## 45             L           50                            Data Engineer
## 46             S            0                             Data Analyst
## 47             L          100                             Data Analyst
## 48             S           50                             AI Scientist
## 49             S          100                 Computer Vision Engineer
## 50             M          100                 Principal Data Scientist
## 51             M          100                           Data Scientist
## 52             M          100                           Data Scientist
## 53             L           50                           Data Scientist
## 54             M          100                           Data Scientist
## 55             L          100                            Data Engineer
## 56             M          100                            Data Engineer
## 57             S           50                           Data Scientist
## 58             L          100                           Data Scientist
## 59             S          100                Machine Learning Engineer
## 60             S           50                           Data Scientist
## 61             M          100                     Data Science Manager
## 62             S          100                           Data Scientist
## 63             S            0                           Data Scientist
## 64             S           50                           Data Scientist
## 65             S           50                           Data Scientist
## 66             L          100                          BI Data Analyst
## 67             L          100                             Head of Data
## 68             M          100                          BI Data Analyst
## 69             L          100                              ML Engineer
## 70             M          100                             Data Analyst
## 71             L          100                            Data Engineer
## 72             L          100                  Data Analytics Engineer
## 73             L          100                        Lead Data Analyst
## 74             S          100                             Data Analyst
## 75             L            0                       Lead Data Engineer
## 76             S          100                             AI Scientist
## 77             L          100                   Financial Data Analyst
## 78             M          100        Computer Vision Software Engineer
## 79             L            0                             Data Analyst
## 80             L          100                            Data Engineer
## 81             L            0                             Data Analyst
## 82             L            0                           Data Scientist
## 83             S          100                            Data Engineer
## 84             M          100                            Data Engineer
## 85             M          100                             AI Scientist
## 86             L          100               Machine Learning Scientist
## 87             L          100                           Data Scientist
## 88             S          100                             Data Analyst
## 89             L          100                            Data Engineer
## 90             L          100                 Principal Data Scientist
## 91             L          100                            Data Engineer
## 92             L          100                 Data Engineering Manager
## 93             S          100                      Cloud Data Engineer
## 94             S            0                 Director of Data Science
## 95             M          100                           Data Scientist
## 96             L          100                           Data Scientist
## 97             L           50       Applied Machine Learning Scientist
## 98             M          100                   Data Analytics Manager
## 99             S          100                Machine Learning Engineer
## 100            L           50                             Head of Data
## 101            M            0                     Head of Data Science
## 102            L          100                          Data Specialist
## 103            S          100                             Data Analyst
## 104            L          100                 Principal Data Scientist
## 105            L          100                 Data Engineering Manager
## 106            S           50                Machine Learning Engineer
## 107            M           50               Machine Learning Scientist
## 108            M          100                            Data Engineer
## 109            M          100                  Data Analytics Engineer
## 110            S           50                Machine Learning Engineer
## 111            L          100                     Data Science Manager
## 112            M           50                Machine Learning Engineer
## 113            S            0                        Big Data Engineer
## 114            L          100                     Data Science Manager
## 115            L           50                           Data Scientist
## 116            S          100                          BI Data Analyst
## 117            S          100                  Data Science Consultant
## 118            M          100 Machine Learning Infrastructure Engineer
## 119            S          100                       Research Scientist
## 120            L          100                           Data Scientist
## 121            L          100                Machine Learning Engineer
## 122            M            0                            Data Engineer
## 123            L            0                            Data Engineer
## 124            L            0                   Data Analytics Manager
## 125            L          100                  Principal Data Engineer
## 126            L          100                   Data Analytics Manager
## 127            L          100               Machine Learning Scientist
## 128            S          100                 Principal Data Scientist
## 129            L            0                           Data Scientist
## 130            S          100                              ML Engineer
## 131            L          100             Director of Data Engineering
## 132            L          100                             Data Analyst
## 133            L          100                           Data Architect
## 134            S            0                     Head of Data Science
## 135            M            0                           Data Scientist
## 136            L          100                             Data Analyst
## 137            L          100                            Data Engineer
## 138            L          100                           Data Scientist
## 139            M          100                   Principal Data Analyst
## 140            L           50                           Data Scientist
## 141            S          100                           Data Scientist
## 142            L          100                  Principal Data Engineer
## 143            L          100                             Data Analyst
## 144            L          100                            Data Engineer
## 145            L           50                Machine Learning Engineer
## 146            L           50                           Data Scientist
## 147            S           50                       Lead Data Engineer
## 148            M            0                            Data Engineer
## 149            M            0                            Data Engineer
## 150            L          100                            Data Engineer
## 151            S          100                Machine Learning Engineer
## 152            M          100                           Data Scientist
## 153            L           50                           Data Scientist
## 154            L          100                             AI Scientist
## 155            L          100                            Data Engineer
## 156            L            0                       Research Scientist
## 157            M          100                     Staff Data Scientist
data_sample_wy <- data %>%
  select(work_year)

Bootstrapping

virtual_samples_25 <- data_sample_wy %>%
  rep_sample_n(size = 25, reps = 1000)

virtual_25 <- virtual_samples_25 %>%
  group_by(replicate) %>%
  summarize(year = sum(work_year == "2020"))

plot1<-ggplot(virtual_25, aes(x = year)) +
  geom_histogram(binwidth = 2, boundary = 4, color = "white") +
  labs(x = "Proportion of 25 balls that were red", title = "25")
#View(plot1)


virtual_samples_50 <- data_sample_wy %>%
  rep_sample_n(size = 50, reps = 1000)

virtual_50 <- virtual_samples_50 %>%
  group_by(replicate) %>%
  summarize(year = sum(work_year == "2020"))

plot2<-ggplot(virtual_50, aes(x = year)) +
  geom_histogram(binwidth = 2, boundary = 4, color = "white") +
  labs(x = "Proportion of 50 balls that were red", title = "50")

virtual_samples_100 <- data_sample_wy %>%
  rep_sample_n(size = 100, reps = 1000)

virtual_100 <- virtual_samples_100 %>%
  group_by(replicate) %>%
  summarize(year = sum(work_year == "2020"))

plot3<-ggplot(virtual_100, aes(x = year)) +
  geom_histogram(binwidth = 2, boundary = 4, color = "white") +
  labs(x = "Proportion of 100 balls that were red", title = "100")

require(gridExtra)
## Loading required package: gridExtra
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
grid.arrange(plot1, plot2,plot3, ncol=3)

data_sample_sal <- data %>% 
  select(salary_in_usd)



virtual_samples_25.1 <- data_sample_sal %>%
  rep_sample_n(size = 25, reps = 1000)

plot1<-ggplot(virtual_25, aes(x = year)) +
  geom_histogram(binwidth = 2, boundary = 4, color = "white") +
  labs(x = "Proportion of 25 balls that were red", title = "25")
#View(plot1)

virtual_samples_50 <- data_sample_wy %>%
  rep_sample_n(size = 50, reps = 1000)

virtual_50 <- virtual_samples_50 %>%
  group_by(replicate) %>%
  summarize(year = sum(work_year == "2020"))

plot2<-ggplot(virtual_50, aes(x = year)) +
  geom_histogram(binwidth = 2, boundary = 4, color = "white") +
  labs(x = "Proportion of 50 balls that were red", title = "50")


virtual_samples_100 <- data_sample_wy %>%
  rep_sample_n(size = 100, reps = 1000)

virtual_100 <- virtual_samples_100 %>%
  group_by(replicate) %>%
  summarize(year = sum(work_year == "2020"))

plot3<-ggplot(virtual_100, aes(x = year)) +
  geom_histogram(binwidth = 2, boundary = 4, color = "white") +
  labs(x = "Proportion of 100 balls that were red", title = "100")

require(gridExtra)
grid.arrange(plot1, plot2,plot3, ncol=3)

#Fit regression model:
Lin_model <- lm(salary_in_usd ~ remote_ratio, data = data)
#Get regression table:
get_regression_table(Lin_model)
## # A tibble: 2 × 7
##   term         estimate std_error statistic p_value lower_ci upper_ci
##   <chr>           <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
## 1 intercept      94838.    15794.      6.00   0      63639.   126036.
## 2 remote_ratio     371.      193.      1.92   0.057    -10.6     752.