Participation_3

This is my first programming line

print("hell yeah this works!")

## [1] "hell yeah this works!"

Mission 1

# clean variables of interest
ess_clean <- ess %>% mutate(eisced = if_else(eisced %in% c(1:7), eisced, NA), 
                      eiscedp = if_else(eiscedp %in% c(1:7), eiscedp, NA),
                      hinctnta = if_else(hinctnta %in% c(1:10), hinctnta, NA))

ess_clean <- ess_clean %>% select(eisced, eiscedp, hinctnta)
ess_clean <- na.omit(ess_clean)

# create frequency tables
freq1 <- as.data.frame(table(ess_clean$eisced))
colnames(freq1) <- c("Highest Level of Education", "Frequency")
kable(freq1, caption = "Frequency Table for Highest Level of Education",
      align = c('c', 'r'))

Frequency Table for Highest Level of Education
Highest Level of Education	Frequency
1	11328
2	20835
3	27768
4	30112
5	22228
6	18490
7	23419

freq2 <- as.data.frame(table(ess_clean$eiscedp))
colnames(freq2) <- c("Partner Highest Edu", "Frequency")
kable(freq2, caption = "Frequency Table for Partner's Highest Level of Education",
      align = c('c', 'r'))

Frequency Table for Partner’s Highest Level of Education
Partner Highest Edu	Frequency
1	12294
2	21989
3	29357
4	30577
5	20873
6	17519
7	21571

freq3 <- as.data.frame(table(ess_clean$hinctnta))
colnames(freq3) <- c("Household Income", "Frequency")
kable(freq3, caption = "Frequency Table for Household Income Decile",
      align = c('c', 'r'))

Frequency Table for Household Income Decile
Household Income	Frequency
1	6048
2	10517
3	14131
4	17054
5	17904
6	18533
7	19413
8	18781
9	16130
10	15669

Mission 3

# select variables of interest from GB: highest level of education, partner's highest level of education, household income decile
ess_gb <- ess %>% filter(cntry == "GB") %>% select(eisced, eiscedp, hinctnta)
ess_gb <- ess_gb %>% mutate(eisced = if_else(eisced %in% c(1:7), eisced, NA), 
                      eiscedp = if_else(eiscedp %in% c(1:7), eiscedp, NA),
                      hinctnta = if_else(hinctnta %in% c(1:10), hinctnta, NA))
ess_gb <- na.omit(ess_gb)
# mutate educational diff between self and partner
ess_gb <- ess_gb %>% mutate(edu_diff = abs(eisced-eiscedp))

Mission 4

datasummary_skim(ess_gb)

	Unique (#)	Mean	SD	Min	Median	Max
eisced	7	4.2	2.0	1.0	5.0	7.0
eiscedp	7	4.0	2.1	1.0	4.0	7.0
hinctnta	10	6.3	2.7	1.0	7.0	10.0
edu_diff	7	1.4	1.5	0.0	1.0	6.0

Mission 5

freq(ess_gb$hinctnta)

##      n    % val%
## 1  203  4.1  4.1
## 2  368  7.5  7.5
## 3  399  8.1  8.1
## 4  408  8.3  8.3
## 5  460  9.3  9.3
## 6  532 10.8 10.8
## 7  580 11.7 11.7
## 8  650 13.2 13.2
## 9  589 11.9 11.9
## 10 748 15.2 15.2

ess_gb %>%
select(hinctnta) %>%
  freq() %>%
  as.data.frame() %>%
  ggplot(aes(x=factor(rownames(.),
                     levels= c(1:10)), 
             y=`%`)) +
  geom_col() +
  labs(title = "Distribution of Household Income Deciles",
       x = "Household Income Decile")

It is important to note that a greater proportion of the individuals surveyed have relatively high household income; rich people are over-represented while poor people are under-represented in the ESS dataset.

Mission 6

table(ess_gb$eisced, ess_gb$eiscedp) %>%
 cprop()

##        
##         1     2     3     4     5     6     7     All  
##   1      48.9   9.2  13.6   8.7   6.7   3.1   3.1  14.5
##   2      11.3  26.5  12.8  11.1  11.3   5.6   5.0  11.4
##   3      11.6  15.0  18.0  10.0   9.6   5.2   4.4  10.1
##   4       9.1  13.9  16.6  27.3  13.0  10.2   7.2  13.3
##   5      11.2  17.9  21.8  19.0  29.6  15.3  13.0  18.0
##   6       4.4  11.5  10.1  13.9  17.0  39.2  19.8  16.8
##   7       3.5   5.9   7.1  10.0  12.8  21.4  47.5  15.9
##   Total 100.0 100.0 100.0 100.0 100.0 100.0 100.0 100.0

This table provides some interesting information; we can see people’s inclination to find partners who have similar or the same education level as them.

Mission 7

eduincome <- datasummary_crosstab(hinctnta ~ eisced, data = ess_gb)
eduincome

hinctnta		1	2	3	4	5	6	7	All
1	N	72	24	26	31	19	21	10	203
	% row	35.5	11.8	12.8	15.3	9.4	10.3	4.9	100.0
2	N	157	41	44	41	50	15	20	368
	% row	42.7	11.1	12.0	11.1	13.6	4.1	5.4	100.0
3	N	134	50	55	58	60	27	15	399
	% row	33.6	12.5	13.8	14.5	15.0	6.8	3.8	100.0
4	N	107	50	46	52	84	33	36	408
	% row	26.2	12.3	11.3	12.7	20.6	8.1	8.8	100.0
5	N	70	72	66	66	80	57	49	460
	% row	15.2	15.7	14.3	14.3	17.4	12.4	10.7	100.0
6	N	64	77	66	77	115	77	56	532
	% row	12.0	14.5	12.4	14.5	21.6	14.5	10.5	100.0
7	N	42	74	63	82	132	99	88	580
	% row	7.2	12.8	10.9	14.1	22.8	17.1	15.2	100.0
8	N	33	75	59	106	130	136	111	650
	% row	5.1	11.5	9.1	16.3	20.0	20.9	17.1	100.0
9	N	17	51	42	65	108	151	155	589
	% row	2.9	8.7	7.1	11.0	18.3	25.6	26.3	100.0
10	N	21	48	31	79	113	211	245	748
	% row	2.8	6.4	4.1	10.6	15.1	28.2	32.8	100.0
All	N	717	562	498	657	891	827	785	4937
	% row	14.5	11.4	10.1	13.3	18.0	16.8	15.9	100.0

The correlation between educational level and household income decile is evident. Higher income deciles consist of more highly educated individuals.

Mission 9

edu_levels <- c(1:7)
incomes <- c()
for (i in 1:7) {
  nmsl <- ess_gb %>% filter(eisced == i)
  incomes <- c(incomes, mean(nmsl$hinctnta))
}

df <- data.frame(
  inc = incomes,
  edu = edu_levels
)

graph <- ggplot(df, aes(x = edu_levels, y = incomes)) + 
  geom_line() + labs(title = "Relationship Between Income Decile and Highest Edu",
                     x = "Highest Level of Education, ES-ISCED",
                     y = "Total Household Income Decile")

print(graph)

Mission 10

df_summary <- ess_gb %>%
  group_by(eiscedp, hinctnta) %>%
  summarize(Count = n()) %>%
  mutate(Proportion = Count / sum(Count))

## `summarise()` has grouped output by 'eiscedp'. You can override using the
## `.groups` argument.

# Plot
ggplot(df_summary, aes(x = eiscedp, y = Proportion, fill = hinctnta)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Proportion of Income Decile by Partner Edu", 
       y = "Proportion", 
       x = "Partner's Highest Level of Education") +
  theme_minimal()

Participation_3_Hou

Yisu

2023-10-05