Warning: package 'readxl' was built under R version 4.4.3
Warning: package 'tidyverse' was built under R version 4.4.3
Warning: package 'ggplot2' was built under R version 4.4.3
Warning: package 'tibble' was built under R version 4.4.3
Warning: package 'tidyr' was built under R version 4.4.3
Warning: package 'readr' was built under R version 4.4.3
Warning: package 'purrr' was built under R version 4.4.3
Warning: package 'dplyr' was built under R version 4.4.3
Warning: package 'stringr' was built under R version 4.4.3
Warning: package 'forcats' was built under R version 4.4.3
Warning: package 'lubridate' was built under R version 4.4.3
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.2.0 ✔ readr 2.2.0
✔ forcats 1.0.1 ✔ stringr 1.6.0
✔ ggplot2 4.0.2 ✔ tibble 3.3.1
✔ lubridate 1.9.5 ✔ tidyr 1.3.2
✔ purrr 1.2.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
df <- read_excel(file.choose())
head(df)
# A tibble: 6 × 6
Observation Wage Female Age Educ Parttime
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 119 32 1 31 1 1
2 2 34 1 42 1 1
3 41 37 1 31 1 1
4 65 38 1 33 1 1
5 246 38 1 21 2 1
6 254 38 0 28 2 1
ggplot(df, aes(x=Wage)) +
geom_histogram(bins=30, fill="skyblue", color="black")
ggplot(df, aes(x=factor(Female), y=Wage)) +
geom_boxplot(fill=c("lightblue","pink"))
df %>%
group_by(Female) %>%
summarise(mean_wage = mean(Wage),
median_wage = median(Wage),
sd_wage = sd(Wage),
min_wage = min(Wage),
max_wage = max(Wage))
# A tibble: 2 × 6
Female mean_wage median_wage sd_wage min_wage max_wage
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 0 125. 111 57.3 38 384
2 1 97.3 83.5 46.3 32 364
mean(df$Wage[df$Female==0]) - mean(df$Wage[df$Female==1])
df$l_wage <- log(df$Wage)
ggplot(df, aes(x=l_wage)) +
geom_histogram(bins=30, fill="orange")
ggplot(df, aes(x=factor(Female), y=l_wage)) +
geom_boxplot()
mean(df$l_wage[df$Female==0]) - mean(df$l_wage[df$Female==1])
table(df$Educ, df$Female)
0 1
1 108 88
2 77 57
3 72 33
4 59 6
df %>%
group_by(Female) %>%
summarise(parttime_rate = mean(Parttime))
# A tibble: 2 × 2
Female parttime_rate
<dbl> <dbl>
1 0 0.225
2 1 0.560
df %>%
group_by(Female) %>%
summarise(mean_age = mean(Age),
median_age = median(Age))
# A tibble: 2 × 3
Female mean_age median_age
<dbl> <dbl> <dbl>
1 0 40.1 39
2 1 39.9 39
df$Gender <- factor(df$Female, levels = c(0, 1), labels = c("Men", "Women"))