Appendix for DACSS 601 final project
This appendix contains the tibble of tidied data and summary statistics, count and percent, used to answer the research question “Is there is a positive correlation between the number of languages a student in Spain speaks and how well informed they feel on climate change, global health, migration, international conflict, world hunger, causes of poverty, and gender inequality?”
Programme for International Student Assessment.(2020). Student questionnaire data files (PISA 2018 Database) [Dataset and codebook]. Organisation for Economic Co-operation and Development. https://www.oecd.org/pisa/data/2018database/
#read in csv & examine data
pisa <- read_csv("pisa_smaller_2022-2-20.csv")
pisa
# A tibble: 35,943 x 17
CNT ST001D01T ST004D01T ST197Q01HA ST197Q02HA ST197Q04HA
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 ESP 10 2 4 4 4
2 ESP 9 1 3 2 3
3 ESP 10 2 4 3 3
4 ESP 8 2 2 1 3
5 ESP 10 1 NA NA NA
6 ESP 10 1 4 2 3
7 ESP 9 1 NA NA NA
8 ESP 9 2 3 2 2
9 ESP 9 2 NA NA NA
10 ESP 10 2 3 3 3
# ... with 35,933 more rows, and 11 more variables: ST197Q07HA <dbl>,
# ST197Q08HA <dbl>, ST197Q09HA <dbl>, ST197Q12HA <dbl>,
# ST220Q01HA <dbl>, ST220Q02HA <dbl>, ST220Q03HA <dbl>,
# ST220Q04HA <dbl>, ST177Q01HA <dbl>, ST019AQ01T <dbl>,
# ST021Q01TA <dbl>
tail(pisa)
# A tibble: 6 x 17
CNT ST001D01T ST004D01T ST197Q01HA ST197Q02HA ST197Q04HA
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 ESP 10 1 4 4 4
2 ESP 9 2 3 3 3
3 ESP 10 2 4 4 4
4 ESP 9 2 2 2 2
5 ESP 8 2 3 3 3
6 ESP 9 1 2 2 2
# ... with 11 more variables: ST197Q07HA <dbl>, ST197Q08HA <dbl>,
# ST197Q09HA <dbl>, ST197Q12HA <dbl>, ST220Q01HA <dbl>,
# ST220Q02HA <dbl>, ST220Q03HA <dbl>, ST220Q04HA <dbl>,
# ST177Q01HA <dbl>, ST019AQ01T <dbl>, ST021Q01TA <dbl>
#remove additional variables not needed to answer research question
pisa_tidy <- pisa %>%
select(-c("ST001D01T", "ST004D01T", "ST220Q01HA", "ST220Q02HA", "ST220Q03HA", "ST220Q04HA", "ST019AQ01T", "ST021Q01TA")) %>%
#rename variables
rename(country=CNT,
informed_climate_change=ST197Q01HA,
informed_global_health=ST197Q02HA,
informed_migration=ST197Q04HA,
informed_international_conflict=ST197Q07HA,
informed_world_hunger=ST197Q08HA,
informed_poverty_causes=ST197Q09HA,
informed_gender_equality=ST197Q12HA,
language_self=ST177Q01HA) %>%
#remove NAs
drop_na %>%
#recode values
mutate(country = recode(country, ESP = "Spain")) %>%
mutate(informed_climate_change = recode(informed_climate_change,
`1` = "Not informed",
`2` = "Not well informed",
`3` = "Informed",
`4` = "Well informed")) %>%
mutate(informed_global_health = recode(informed_global_health,
`1` = "Not informed",
`2` = "Not well informed",
`3` = "Informed",
`4` = "Well informed")) %>%
mutate(informed_migration = recode(informed_migration,
`1` = "Not informed",
`2` = "Not well informed",
`3` = "Informed",
`4` = "Well informed")) %>%
mutate(informed_international_conflict = recode(informed_international_conflict,
`1` = "Not informed",
`2` = "Not well informed",
`3` = "Informed",
`4` = "Well informed")) %>%
mutate(informed_world_hunger = recode(informed_world_hunger,
`1` = "Not informed",
`2` = "Not well informed",
`3` = "Informed",
`4` = "Well informed")) %>%
mutate(informed_poverty_causes = recode(informed_poverty_causes,
`1` = "Not informed",
`2` = "Not well informed",
`3` = "Informed",
`4` = "Well informed")) %>%
mutate(informed_gender_equality = recode(informed_gender_equality,
`1` = "Not informed",
`2` = "Not well informed",
`3` = "Informed",
`4` = "Well informed")) %>%
mutate(language_self = recode(language_self,
`1` = "One",
`2` = "Two",
`3` = "Three",
`4` = "Four +"))
#examine
pisa_tidy
# A tibble: 28,022 x 9
country informed_climate_change informed_global_h~ informed_migrat~
<chr> <chr> <chr> <chr>
1 Spain Well informed Well informed Well informed
2 Spain Informed Not well informed Informed
3 Spain Well informed Informed Informed
4 Spain Not well informed Not informed Informed
5 Spain Well informed Not well informed Informed
6 Spain Informed Not well informed Not well inform~
7 Spain Informed Informed Informed
8 Spain Not well informed Not well informed Informed
9 Spain Not well informed Informed Informed
10 Spain Informed Informed Informed
# ... with 28,012 more rows, and 5 more variables:
# informed_international_conflict <chr>,
# informed_world_hunger <chr>, informed_poverty_causes <chr>,
# informed_gender_equality <chr>, language_self <chr>
#calculate percent for language_self
select(pisa_tidy, "language_self") %>%
group_by(language_self) %>%
summarise(count = n()) %>%
mutate(percent = count/sum(count) * 100) %>%
arrange(factor (language_self, levels = c("One", "Two", "Three", "Four +")))
# A tibble: 4 x 3
language_self count percent
<chr> <int> <dbl>
1 One 3996 14.3
2 Two 10732 38.3
3 Three 10370 37.0
4 Four + 2924 10.4
#calculate percent for informed_climate_change
select(pisa_tidy, "informed_climate_change") %>%
group_by(informed_climate_change) %>%
summarise(count = n()) %>%
mutate(percent = count/sum(count) * 100) %>%
arrange(factor(informed_climate_change,
levels = c("Not informed", "Not well informed", "Informed", "Well informed")))
# A tibble: 4 x 3
informed_climate_change count percent
<chr> <int> <dbl>
1 Not informed 541 1.93
2 Not well informed 4000 14.3
3 Informed 16623 59.3
4 Well informed 6858 24.5
#calculate percent for informed_global_health
select(pisa_tidy, "informed_global_health") %>%
group_by(informed_global_health) %>%
summarise(count = n()) %>%
mutate(percent = count/sum(count) * 100) %>%
arrange(factor(informed_global_health,
levels = c("Not informed", "Not well informed", "Informed", "Well informed")))
# A tibble: 4 x 3
informed_global_health count percent
<chr> <int> <dbl>
1 Not informed 467 1.67
2 Not well informed 7249 25.9
3 Informed 16315 58.2
4 Well informed 3991 14.2
#calculate percent for informed_migration
select(pisa_tidy, "informed_migration") %>%
group_by(informed_migration) %>%
summarise(count = n()) %>%
mutate(percent = count/sum(count) * 100) %>%
arrange(factor(informed_migration,
levels = c("Not informed", "Not well informed", "Informed", "Well informed")))
# A tibble: 4 x 3
informed_migration count percent
<chr> <int> <dbl>
1 Not informed 450 1.61
2 Not well informed 5532 19.7
3 Informed 16583 59.2
4 Well informed 5457 19.5
#calculate percent for informed_international_conflict
select(pisa_tidy, "informed_international_conflict") %>%
group_by(informed_international_conflict) %>%
summarise(count = n()) %>%
mutate(percent = count/sum(count) * 100) %>%
arrange(factor(informed_international_conflict,
levels = c("Not informed", "Not well informed", "Informed", "Well informed")))
# A tibble: 4 x 3
informed_international_conflict count percent
<chr> <int> <dbl>
1 Not informed 733 2.62
2 Not well informed 8349 29.8
3 Informed 13758 49.1
4 Well informed 5182 18.5
#calculate percent for informed_world_hunger
select(pisa_tidy, "informed_world_hunger") %>%
group_by(informed_world_hunger) %>%
summarise(count = n()) %>%
mutate(percent = count/sum(count) * 100) %>%
arrange(factor(informed_world_hunger,
levels = c("Not informed", "Not well informed", "Informed", "Well informed")))
# A tibble: 4 x 3
informed_world_hunger count percent
<chr> <int> <dbl>
1 Not informed 345 1.23
2 Not well informed 4331 15.5
3 Informed 16459 58.7
4 Well informed 6887 24.6
#calculate percent for informed_poverty_causes
select(pisa_tidy,"informed_poverty_causes") %>%
group_by(informed_poverty_causes) %>%
summarise(count = n()) %>%
mutate(percent = count/sum(count) * 100) %>%
arrange(factor(informed_poverty_causes,
levels = c("Not informed", "Not well informed", "Informed", "Well informed")))
# A tibble: 4 x 3
informed_poverty_causes count percent
<chr> <int> <dbl>
1 Not informed 391 1.40
2 Not well informed 5230 18.7
3 Informed 15367 54.8
4 Well informed 7034 25.1
#calculate percent for informed_gender_equality
select(pisa_tidy, "informed_gender_equality") %>%
group_by(informed_gender_equality) %>%
summarise(count = n()) %>%
mutate(percent = count/sum(count) * 100) %>%
arrange(factor(informed_gender_equality,
levels = c("Not informed", "Not well informed", "Informed", "Well informed")))
# A tibble: 4 x 3
informed_gender_equality count percent
<chr> <int> <dbl>
1 Not informed 365 1.30
2 Not well informed 1671 5.96
3 Informed 11550 41.2
4 Well informed 14436 51.5
#language_self & informed_climate_change
language_climate_change <- select(pisa_tidy, "language_self", "informed_climate_change") %>%
group_by(language_self, informed_climate_change) %>%
summarise(count = n()) %>%
mutate(percent = count/sum(count) * 100) %>%
arrange(factor (language_self, levels = c("One", "Two", "Three", "Four +")), factor(informed_climate_change,
levels = c("Not informed", "Not well informed", "Informed", "Well informed")))
language_climate_change
# A tibble: 16 x 4
# Groups: language_self [4]
language_self informed_climate_change count percent
<chr> <chr> <int> <dbl>
1 One Not informed 171 4.28
2 One Not well informed 957 23.9
3 One Informed 2275 56.9
4 One Well informed 593 14.8
5 Two Not informed 142 1.32
6 Two Not well informed 1518 14.1
7 Two Informed 6740 62.8
8 Two Well informed 2332 21.7
9 Three Not informed 134 1.29
10 Three Not well informed 1196 11.5
11 Three Informed 6071 58.5
12 Three Well informed 2969 28.6
13 Four + Not informed 94 3.21
14 Four + Not well informed 329 11.3
15 Four + Informed 1537 52.6
16 Four + Well informed 964 33.0
#combine "not informed" and "not well informed" to become not informed and "informed" and "well informed" to become informed
language_climate_change_2 <- pisa_tidy%>%
mutate(informed_climate_change = recode(informed_climate_change,
`Not informed` = "Not informed",
`Not well informed` = "Not informed",
`Informed` = "Informed",
`Well informed` = "Informed")) %>%
group_by(language_self, informed_climate_change) %>%
summarise(count = n()) %>%
mutate(percent = count/sum(count) * 100) %>%
arrange(factor (language_self, levels = c("One", "Two", "Three", "Four +")), factor(informed_climate_change,
levels = c("Not informed", "Informed")))
language_climate_change_2
# A tibble: 8 x 4
# Groups: language_self [4]
language_self informed_climate_change count percent
<chr> <chr> <int> <dbl>
1 One Not informed 1128 28.2
2 One Informed 2868 71.8
3 Two Not informed 1660 15.5
4 Two Informed 9072 84.5
5 Three Not informed 1330 12.8
6 Three Informed 9040 87.2
7 Four + Not informed 423 14.5
8 Four + Informed 2501 85.5
#calculate percent for language_self & informed_global_health
language_global_health <- select(pisa_tidy, "language_self", "informed_global_health") %>%
group_by(language_self, informed_global_health) %>%
summarise(count = n()) %>%
mutate(percent = count/sum(count) * 100) %>%
arrange(factor (language_self, levels = c("One", "Two", "Three", "Four +")), factor(informed_global_health,
levels = c("Not informed", "Not well informed", "Informed", "Well informed")))
language_global_health
# A tibble: 16 x 4
# Groups: language_self [4]
language_self informed_global_health count percent
<chr> <chr> <int> <dbl>
1 One Not informed 126 3.15
2 One Not well informed 1303 32.6
3 One Informed 2147 53.7
4 One Well informed 420 10.5
5 Two Not informed 139 1.30
6 Two Not well informed 2894 27.0
7 Two Informed 6361 59.3
8 Two Well informed 1338 12.5
9 Three Not informed 133 1.28
10 Three Not well informed 2450 23.6
11 Three Informed 6204 59.8
12 Three Well informed 1583 15.3
13 Four + Not informed 69 2.36
14 Four + Not well informed 602 20.6
15 Four + Informed 1603 54.8
16 Four + Well informed 650 22.2
#combine "not informed" and "not well informed" to become not informed and "informed" and "well informed" to become informed
language_global_health_2 <- pisa_tidy%>%
mutate(informed_global_health = recode(informed_global_health,
`Not informed` = "Not informed",
`Not well informed` = "Not informed",
`Informed` = "Informed",
`Well informed` = "Informed")) %>%
group_by(language_self, informed_global_health) %>%
summarise(count = n()) %>%
mutate(percent = count/sum(count) * 100) %>%
arrange(factor (language_self, levels = c("One", "Two", "Three", "Four +")), factor(informed_global_health,
levels = c("Not informed", "Informed")))
language_global_health_2
# A tibble: 8 x 4
# Groups: language_self [4]
language_self informed_global_health count percent
<chr> <chr> <int> <dbl>
1 One Not informed 1429 35.8
2 One Informed 2567 64.2
3 Two Not informed 3033 28.3
4 Two Informed 7699 71.7
5 Three Not informed 2583 24.9
6 Three Informed 7787 75.1
7 Four + Not informed 671 22.9
8 Four + Informed 2253 77.1
#calculate percent for language_self & informed_migration
language_migration <- select(pisa_tidy, "language_self", "informed_migration") %>%
group_by(language_self, informed_migration) %>%
summarise(count = n()) %>%
mutate(percent = count/sum(count) * 100) %>%
arrange(factor (language_self, levels = c("One", "Two", "Three", "Four +")), factor(informed_migration,
levels = c("Not informed", "Not well informed", "Informed", "Well informed")))
language_migration
# A tibble: 16 x 4
# Groups: language_self [4]
language_self informed_migration count percent
<chr> <chr> <int> <dbl>
1 One Not informed 134 3.35
2 One Not well informed 948 23.7
3 One Informed 2301 57.6
4 One Well informed 613 15.3
5 Two Not informed 113 1.05
6 Two Not well informed 2248 20.9
7 Two Informed 6548 61.0
8 Two Well informed 1823 17.0
9 Three Not informed 122 1.18
10 Three Not well informed 1881 18.1
11 Three Informed 6196 59.7
12 Three Well informed 2171 20.9
13 Four + Not informed 81 2.77
14 Four + Not well informed 455 15.6
15 Four + Informed 1538 52.6
16 Four + Well informed 850 29.1
#combine "not informed" and "not well informed" to become not informed and "informed" and "well informed" to become informed
language_migration_2 <- pisa_tidy%>%
mutate(informed_migration = recode(informed_migration,
`Not informed` = "Not informed",
`Not well informed` = "Not informed",
`Informed` = "Informed",
`Well informed` = "Informed")) %>%
group_by(language_self, informed_migration) %>%
summarise(count = n()) %>%
mutate(percent = count/sum(count) * 100) %>%
arrange(factor (language_self, levels = c("One", "Two", "Three", "Four +")), factor(informed_migration,
levels = c("Not informed", "Informed")))
language_migration_2
# A tibble: 8 x 4
# Groups: language_self [4]
language_self informed_migration count percent
<chr> <chr> <int> <dbl>
1 One Not informed 1082 27.1
2 One Informed 2914 72.9
3 Two Not informed 2361 22.0
4 Two Informed 8371 78.0
5 Three Not informed 2003 19.3
6 Three Informed 8367 80.7
7 Four + Not informed 536 18.3
8 Four + Informed 2388 81.7
#calculate percent for language_self & informed_international_conflict
language_international_conflict <- select(pisa_tidy, "language_self", "informed_international_conflict") %>%
group_by(language_self, informed_international_conflict) %>%
summarise(count = n()) %>%
mutate(percent = count/sum(count) * 100) %>%
arrange(factor (language_self, levels = c("One", "Two", "Three", "Four +")), factor(informed_international_conflict,
levels = c("Not informed", "Not well informed", "Informed", "Well informed")))
language_international_conflict
# A tibble: 16 x 4
# Groups: language_self [4]
language_self informed_international_conflict count percent
<chr> <chr> <int> <dbl>
1 One Not informed 195 4.88
2 One Not well informed 1480 37.0
3 One Informed 1791 44.8
4 One Well informed 530 13.3
5 Two Not informed 226 2.11
6 Two Not well informed 3319 30.9
7 Two Informed 5477 51.0
8 Two Well informed 1710 15.9
9 Three Not informed 222 2.14
10 Three Not well informed 2870 27.7
11 Three Informed 5157 49.7
12 Three Well informed 2121 20.5
13 Four + Not informed 90 3.08
14 Four + Not well informed 680 23.3
15 Four + Informed 1333 45.6
16 Four + Well informed 821 28.1
#combine "not informed" and "not well informed" to become not informed and "informed" and "well informed" to become informed
language_international_conflict_2 <- pisa_tidy%>%
mutate(informed_international_conflict = recode(informed_international_conflict,
`Not informed` = "Not informed",
`Not well informed` = "Not informed",
`Informed` = "Informed",
`Well informed` = "Informed")) %>%
group_by(language_self, informed_international_conflict) %>%
summarise(count = n()) %>%
mutate(percent = count/sum(count) * 100) %>%
arrange(factor (language_self, levels = c("One", "Two", "Three", "Four +")), factor(informed_international_conflict,
levels = c("Not informed", "Informed")))
language_international_conflict_2
# A tibble: 8 x 4
# Groups: language_self [4]
language_self informed_international_conflict count percent
<chr> <chr> <int> <dbl>
1 One Not informed 1675 41.9
2 One Informed 2321 58.1
3 Two Not informed 3545 33.0
4 Two Informed 7187 67.0
5 Three Not informed 3092 29.8
6 Three Informed 7278 70.2
7 Four + Not informed 770 26.3
8 Four + Informed 2154 73.7
#calculate percent for language_self & informed_world_hunger
language_world_hunger <- select(pisa_tidy, "language_self", "informed_world_hunger") %>%
group_by(language_self, informed_world_hunger) %>%
summarise(count = n()) %>%
mutate(percent = count/sum(count) * 100) %>%
arrange(factor (language_self, levels = c("One", "Two", "Three", "Four +")), factor(informed_world_hunger,
levels = c("Not informed", "Not well informed", "Informed", "Well informed")))
language_world_hunger
# A tibble: 16 x 4
# Groups: language_self [4]
language_self informed_world_hunger count percent
<chr> <chr> <int> <dbl>
1 One Not informed 97 2.43
2 One Not well informed 780 19.5
3 One Informed 2326 58.2
4 One Well informed 793 19.8
5 Two Not informed 92 0.857
6 Two Not well informed 1717 16.0
7 Two Informed 6514 60.7
8 Two Well informed 2409 22.4
9 Three Not informed 95 0.916
10 Three Not well informed 1453 14.0
11 Three Informed 6091 58.7
12 Three Well informed 2731 26.3
13 Four + Not informed 61 2.09
14 Four + Not well informed 381 13.0
15 Four + Informed 1528 52.3
16 Four + Well informed 954 32.6
#combine "not informed" and "not well informed" to become not informed and "informed" and "well informed" to become informed
language_world_hunger_2 <- pisa_tidy%>%
mutate(informed_world_hunger = recode(informed_world_hunger,
`Not informed` = "Not informed",
`Not well informed` = "Not informed",
`Informed` = "Informed",
`Well informed` = "Informed")) %>%
group_by(language_self, informed_world_hunger) %>%
summarise(count = n()) %>%
mutate(percent = count/sum(count) * 100) %>%
arrange(factor (language_self, levels = c("One", "Two", "Three", "Four +")), factor(informed_world_hunger,
levels = c("Not informed", "Informed")))
language_world_hunger_2
# A tibble: 8 x 4
# Groups: language_self [4]
language_self informed_world_hunger count percent
<chr> <chr> <int> <dbl>
1 One Not informed 877 21.9
2 One Informed 3119 78.1
3 Two Not informed 1809 16.9
4 Two Informed 8923 83.1
5 Three Not informed 1548 14.9
6 Three Informed 8822 85.1
7 Four + Not informed 442 15.1
8 Four + Informed 2482 84.9
#calculate percent for language_self & informed_poverty_causes
language_poverty_causes <- select(pisa_tidy, "language_self", "informed_poverty_causes") %>%
group_by(language_self, informed_poverty_causes) %>%
summarise(count = n()) %>%
mutate(percent = count/sum(count) * 100) %>%
arrange(factor (language_self, levels = c("One", "Two", "Three", "Four +")), factor(informed_poverty_causes,
levels = c("Not informed", "Not well informed", "Informed", "Well informed")))
language_poverty_causes
# A tibble: 16 x 4
# Groups: language_self [4]
language_self informed_poverty_causes count percent
<chr> <chr> <int> <dbl>
1 One Not informed 108 2.70
2 One Not well informed 894 22.4
3 One Informed 2169 54.3
4 One Well informed 825 20.6
5 Two Not informed 106 0.988
6 Two Not well informed 2113 19.7
7 Two Informed 6104 56.9
8 Two Well informed 2409 22.4
9 Three Not informed 115 1.11
10 Three Not well informed 1789 17.3
11 Three Informed 5664 54.6
12 Three Well informed 2802 27.0
13 Four + Not informed 62 2.12
14 Four + Not well informed 434 14.8
15 Four + Informed 1430 48.9
16 Four + Well informed 998 34.1
#combine "not informed" and "not well informed" to become not informed and "informed" and "well informed" to become informed
language_poverty_causes_2 <- pisa_tidy%>%
mutate(informed_poverty_causes = recode(informed_poverty_causes,
`Not informed` = "Not informed",
`Not well informed` = "Not informed",
`Informed` = "Informed",
`Well informed` = "Informed")) %>%
group_by(language_self, informed_poverty_causes) %>%
summarise(count = n()) %>%
mutate(percent = count/sum(count) * 100) %>%
arrange(factor (language_self, levels = c("One", "Two", "Three", "Four +")), factor(informed_poverty_causes,
levels = c("Not informed", "Informed")))
language_poverty_causes_2
# A tibble: 8 x 4
# Groups: language_self [4]
language_self informed_poverty_causes count percent
<chr> <chr> <int> <dbl>
1 One Not informed 1002 25.1
2 One Informed 2994 74.9
3 Two Not informed 2219 20.7
4 Two Informed 8513 79.3
5 Three Not informed 1904 18.4
6 Three Informed 8466 81.6
7 Four + Not informed 496 17.0
8 Four + Informed 2428 83.0
#calculate percent for language_self & informed_gender_equality
language_gender_equality <- select(pisa_tidy, "language_self", "informed_gender_equality") %>%
group_by(language_self, informed_gender_equality) %>%
summarise(count = n()) %>%
mutate(percent = count/sum(count) * 100) %>%
arrange(factor (language_self, levels = c("One", "Two", "Three", "Four +")), factor(informed_gender_equality,
levels = c("Not informed", "Not well informed", "Informed", "Well informed")))
language_gender_equality
# A tibble: 16 x 4
# Groups: language_self [4]
language_self informed_gender_equality count percent
<chr> <chr> <int> <dbl>
1 One Not informed 123 3.08
2 One Not well informed 393 9.83
3 One Informed 1846 46.2
4 One Well informed 1634 40.9
5 Two Not informed 80 0.745
6 Two Not well informed 634 5.91
7 Two Informed 4734 44.1
8 Two Well informed 5284 49.2
9 Three Not informed 90 0.868
10 Three Not well informed 490 4.73
11 Three Informed 4004 38.6
12 Three Well informed 5786 55.8
13 Four + Not informed 72 2.46
14 Four + Not well informed 154 5.27
15 Four + Informed 966 33.0
16 Four + Well informed 1732 59.2
#combine "not informed" and "not well informed" to become not informed and "informed" and "well informed" to become informed
language_gender_equality_2 <- pisa_tidy%>%
mutate(informed_gender_equality = recode(informed_gender_equality,
`Not informed` = "Not informed",
`Not well informed` = "Not informed",
`Informed` = "Informed",
`Well informed` = "Informed")) %>%
group_by(language_self, informed_gender_equality) %>%
summarise(count = n()) %>%
mutate(percent = count/sum(count) * 100) %>%
arrange(factor (language_self, levels = c("One", "Two", "Three", "Four +")), factor(informed_gender_equality,
levels = c("Not informed", "Informed")))
language_gender_equality_2
# A tibble: 8 x 4
# Groups: language_self [4]
language_self informed_gender_equality count percent
<chr> <chr> <int> <dbl>
1 One Not informed 516 12.9
2 One Informed 3480 87.1
3 Two Not informed 714 6.65
4 Two Informed 10018 93.3
5 Three Not informed 580 5.59
6 Three Informed 9790 94.4
7 Four + Not informed 226 7.73
8 Four + Informed 2698 92.3