The process of loading packages is hidden.

ess <- read_fst("All-ESS-Data.fst")

This is my first programming line

print("hell yeah this works!")
## [1] "hell yeah this works!"

Mission 1

# clean variables of interest
ess_clean <- ess %>% mutate(eisced = if_else(eisced %in% c(1:7), eisced, NA), 
                      eiscedp = if_else(eiscedp %in% c(1:7), eiscedp, NA),
                      hinctnta = if_else(hinctnta %in% c(1:10), hinctnta, NA))

ess_clean <- ess_clean %>% select(eisced, eiscedp, hinctnta)
ess_clean <- na.omit(ess_clean)

# create frequency tables
freq1 <- as.data.frame(table(ess_clean$eisced))
colnames(freq1) <- c("Highest Level of Education", "Frequency")
kable(freq1, caption = "Frequency Table for Highest Level of Education",
      align = c('c', 'r'))
Frequency Table for Highest Level of Education
Highest Level of Education Frequency
1 11328
2 20835
3 27768
4 30112
5 22228
6 18490
7 23419
freq2 <- as.data.frame(table(ess_clean$eiscedp))
colnames(freq2) <- c("Partner Highest Edu", "Frequency")
kable(freq2, caption = "Frequency Table for Partner's Highest Level of Education",
      align = c('c', 'r'))
Frequency Table for Partner’s Highest Level of Education
Partner Highest Edu Frequency
1 12294
2 21989
3 29357
4 30577
5 20873
6 17519
7 21571
freq3 <- as.data.frame(table(ess_clean$hinctnta))
colnames(freq3) <- c("Household Income", "Frequency")
kable(freq3, caption = "Frequency Table for Household Income Decile",
      align = c('c', 'r'))
Frequency Table for Household Income Decile
Household Income Frequency
1 6048
2 10517
3 14131
4 17054
5 17904
6 18533
7 19413
8 18781
9 16130
10 15669

Mission 3

# select variables of interest from GB: highest level of education, partner's highest level of education, household income decile
ess_gb <- ess %>% filter(cntry == "GB") %>% select(eisced, eiscedp, hinctnta)
ess_gb <- ess_gb %>% mutate(eisced = if_else(eisced %in% c(1:7), eisced, NA), 
                      eiscedp = if_else(eiscedp %in% c(1:7), eiscedp, NA),
                      hinctnta = if_else(hinctnta %in% c(1:10), hinctnta, NA))
ess_gb <- na.omit(ess_gb)
# mutate educational diff between self and partner
ess_gb <- ess_gb %>% mutate(edu_diff = abs(eisced-eiscedp))

Mission 4

datasummary_skim(ess_gb)
Unique (#) Missing (%) Mean SD Min Median Max
eisced 7 0 4.2 2.0 1.0 5.0 7.0
eiscedp 7 0 4.0 2.1 1.0 4.0 7.0
hinctnta 10 0 6.3 2.7 1.0 7.0 10.0
edu_diff 7 0 1.4 1.5 0.0 1.0 6.0

Mission 5

freq(ess_gb$hinctnta)
##      n    % val%
## 1  203  4.1  4.1
## 2  368  7.5  7.5
## 3  399  8.1  8.1
## 4  408  8.3  8.3
## 5  460  9.3  9.3
## 6  532 10.8 10.8
## 7  580 11.7 11.7
## 8  650 13.2 13.2
## 9  589 11.9 11.9
## 10 748 15.2 15.2
ess_gb %>%
select(hinctnta) %>%
  freq() %>%
  as.data.frame() %>%
  ggplot(aes(x=factor(rownames(.),
                     levels= c(1:10)), 
             y=`%`)) +
  geom_col() +
  labs(title = "Distribution of Household Income Deciles",
       x = "Household Income Decile")

It is important to note that a greater proportion of the individuals surveyed have relatively high household income; rich people are over-represented while poor people are under-represented in the ESS dataset.

Mission 6

table(ess_gb$eisced, ess_gb$eiscedp) %>%
 cprop()
##        
##         1     2     3     4     5     6     7     All  
##   1      48.9   9.2  13.6   8.7   6.7   3.1   3.1  14.5
##   2      11.3  26.5  12.8  11.1  11.3   5.6   5.0  11.4
##   3      11.6  15.0  18.0  10.0   9.6   5.2   4.4  10.1
##   4       9.1  13.9  16.6  27.3  13.0  10.2   7.2  13.3
##   5      11.2  17.9  21.8  19.0  29.6  15.3  13.0  18.0
##   6       4.4  11.5  10.1  13.9  17.0  39.2  19.8  16.8
##   7       3.5   5.9   7.1  10.0  12.8  21.4  47.5  15.9
##   Total 100.0 100.0 100.0 100.0 100.0 100.0 100.0 100.0

This table provides some interesting information; we can see people’s inclination to find partners who have similar or the same education level as them.

Mission 7

eduincome <- datasummary_crosstab(hinctnta ~ eisced, data = ess_gb)
eduincome
hinctnta 1  2  3  4  5  6  7 All
1 N 72 24 26 31 19 21 10 203
% row 35.5 11.8 12.8 15.3 9.4 10.3 4.9 100.0
2 N 157 41 44 41 50 15 20 368
% row 42.7 11.1 12.0 11.1 13.6 4.1 5.4 100.0
3 N 134 50 55 58 60 27 15 399
% row 33.6 12.5 13.8 14.5 15.0 6.8 3.8 100.0
4 N 107 50 46 52 84 33 36 408
% row 26.2 12.3 11.3 12.7 20.6 8.1 8.8 100.0
5 N 70 72 66 66 80 57 49 460
% row 15.2 15.7 14.3 14.3 17.4 12.4 10.7 100.0
6 N 64 77 66 77 115 77 56 532
% row 12.0 14.5 12.4 14.5 21.6 14.5 10.5 100.0
7 N 42 74 63 82 132 99 88 580
% row 7.2 12.8 10.9 14.1 22.8 17.1 15.2 100.0
8 N 33 75 59 106 130 136 111 650
% row 5.1 11.5 9.1 16.3 20.0 20.9 17.1 100.0
9 N 17 51 42 65 108 151 155 589
% row 2.9 8.7 7.1 11.0 18.3 25.6 26.3 100.0
10 N 21 48 31 79 113 211 245 748
% row 2.8 6.4 4.1 10.6 15.1 28.2 32.8 100.0
All N 717 562 498 657 891 827 785 4937
% row 14.5 11.4 10.1 13.3 18.0 16.8 15.9 100.0

The correlation between educational level and household income decile is evident. Higher income deciles consist of more highly educated individuals.

Mission 9

edu_levels <- c(1:7)
incomes <- c()
for (i in 1:7) {
  nmsl <- ess_gb %>% filter(eisced == i)
  incomes <- c(incomes, mean(nmsl$hinctnta))
}

df <- data.frame(
  inc = incomes,
  edu = edu_levels
)

graph <- ggplot(df, aes(x = edu_levels, y = incomes)) + 
  geom_line() + labs(title = "Relationship Between Income Decile and Highest Edu",
                     x = "Highest Level of Education, ES-ISCED",
                     y = "Total Household Income Decile")

print(graph)

Mission 10

df_summary <- ess_gb %>%
  group_by(eiscedp, hinctnta) %>%
  summarize(Count = n()) %>%
  mutate(Proportion = Count / sum(Count))
## `summarise()` has grouped output by 'eiscedp'. You can override using the
## `.groups` argument.
# Plot
ggplot(df_summary, aes(x = eiscedp, y = Proportion, fill = hinctnta)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Proportion of Income Decile by Partner Edu", 
       y = "Proportion", 
       x = "Partner's Highest Level of Education") +
  theme_minimal()