Appendix: DACSS 601 Final Project

Appendix for DACSS 601 final project

Laura Collazo
2022-03-19

Appendix

This appendix contains the tibble of tidied data and summary statistics, count and percent, used to answer the research question “Is there is a positive correlation between the number of languages a student in Spain speaks and how well informed they feel on climate change, global health, migration, international conflict, world hunger, causes of poverty, and gender inequality?”

Source

Programme for International Student Assessment.(2020). Student questionnaire data files (PISA 2018 Database) [Dataset and codebook]. Organisation for Economic Co-operation and Development. https://www.oecd.org/pisa/data/2018database/

#read in csv & examine data

pisa <- read_csv("pisa_smaller_2022-2-20.csv")

pisa
# A tibble: 35,943 x 17
   CNT   ST001D01T ST004D01T ST197Q01HA ST197Q02HA ST197Q04HA
   <chr>     <dbl>     <dbl>      <dbl>      <dbl>      <dbl>
 1 ESP          10         2          4          4          4
 2 ESP           9         1          3          2          3
 3 ESP          10         2          4          3          3
 4 ESP           8         2          2          1          3
 5 ESP          10         1         NA         NA         NA
 6 ESP          10         1          4          2          3
 7 ESP           9         1         NA         NA         NA
 8 ESP           9         2          3          2          2
 9 ESP           9         2         NA         NA         NA
10 ESP          10         2          3          3          3
# ... with 35,933 more rows, and 11 more variables: ST197Q07HA <dbl>,
#   ST197Q08HA <dbl>, ST197Q09HA <dbl>, ST197Q12HA <dbl>,
#   ST220Q01HA <dbl>, ST220Q02HA <dbl>, ST220Q03HA <dbl>,
#   ST220Q04HA <dbl>, ST177Q01HA <dbl>, ST019AQ01T <dbl>,
#   ST021Q01TA <dbl>
tail(pisa)
# A tibble: 6 x 17
  CNT   ST001D01T ST004D01T ST197Q01HA ST197Q02HA ST197Q04HA
  <chr>     <dbl>     <dbl>      <dbl>      <dbl>      <dbl>
1 ESP          10         1          4          4          4
2 ESP           9         2          3          3          3
3 ESP          10         2          4          4          4
4 ESP           9         2          2          2          2
5 ESP           8         2          3          3          3
6 ESP           9         1          2          2          2
# ... with 11 more variables: ST197Q07HA <dbl>, ST197Q08HA <dbl>,
#   ST197Q09HA <dbl>, ST197Q12HA <dbl>, ST220Q01HA <dbl>,
#   ST220Q02HA <dbl>, ST220Q03HA <dbl>, ST220Q04HA <dbl>,
#   ST177Q01HA <dbl>, ST019AQ01T <dbl>, ST021Q01TA <dbl>
#remove additional variables not needed to answer research question

pisa_tidy <- pisa %>%
  
select(-c("ST001D01T", "ST004D01T", "ST220Q01HA", "ST220Q02HA", "ST220Q03HA", "ST220Q04HA", "ST019AQ01T", "ST021Q01TA")) %>%
  
#rename variables
  
rename(country=CNT,
informed_climate_change=ST197Q01HA,
informed_global_health=ST197Q02HA,
informed_migration=ST197Q04HA,
informed_international_conflict=ST197Q07HA,
informed_world_hunger=ST197Q08HA,
informed_poverty_causes=ST197Q09HA,
informed_gender_equality=ST197Q12HA,
language_self=ST177Q01HA) %>%

#remove NAs

drop_na %>%

#recode values
  
mutate(country = recode(country, ESP = "Spain")) %>%
  
mutate(informed_climate_change = recode(informed_climate_change, 
      `1` = "Not informed", 
      `2` = "Not well informed",
      `3` = "Informed", 
      `4` = "Well informed")) %>%

mutate(informed_global_health = recode(informed_global_health, 
      `1` = "Not informed", 
      `2` = "Not well informed",
      `3` = "Informed", 
      `4` = "Well informed")) %>%

mutate(informed_migration = recode(informed_migration, 
      `1` = "Not informed", 
      `2` = "Not well informed",
      `3` = "Informed", 
      `4` = "Well informed")) %>%

mutate(informed_international_conflict = recode(informed_international_conflict,
      `1` = "Not informed", 
      `2` = "Not well informed",
      `3` = "Informed", 
      `4` = "Well informed")) %>%

mutate(informed_world_hunger = recode(informed_world_hunger, 
      `1` = "Not informed", 
      `2` = "Not well informed",
      `3` = "Informed", 
      `4` = "Well informed")) %>%

mutate(informed_poverty_causes = recode(informed_poverty_causes, 
      `1` = "Not informed", 
      `2` = "Not well informed",
      `3` = "Informed", 
      `4` = "Well informed")) %>%

mutate(informed_gender_equality = recode(informed_gender_equality, 
      `1` = "Not informed", 
      `2` = "Not well informed",
      `3` = "Informed", 
      `4` = "Well informed")) %>%
  
mutate(language_self = recode(language_self, 
      `1` = "One", 
      `2` = "Two", 
      `3` = "Three", 
      `4` = "Four +"))

#examine

pisa_tidy
# A tibble: 28,022 x 9
   country informed_climate_change informed_global_h~ informed_migrat~
   <chr>   <chr>                   <chr>              <chr>           
 1 Spain   Well informed           Well informed      Well informed   
 2 Spain   Informed                Not well informed  Informed        
 3 Spain   Well informed           Informed           Informed        
 4 Spain   Not well informed       Not informed       Informed        
 5 Spain   Well informed           Not well informed  Informed        
 6 Spain   Informed                Not well informed  Not well inform~
 7 Spain   Informed                Informed           Informed        
 8 Spain   Not well informed       Not well informed  Informed        
 9 Spain   Not well informed       Informed           Informed        
10 Spain   Informed                Informed           Informed        
# ... with 28,012 more rows, and 5 more variables:
#   informed_international_conflict <chr>,
#   informed_world_hunger <chr>, informed_poverty_causes <chr>,
#   informed_gender_equality <chr>, language_self <chr>

Univariate Summary Statistics

How many languages do you speak well enough to converse with others?

#calculate percent for language_self

select(pisa_tidy, "language_self") %>%
  group_by(language_self) %>%
summarise(count = n()) %>%
  mutate(percent = count/sum(count) * 100) %>%
  arrange(factor (language_self, levels = c("One", "Two", "Three", "Four +")))
# A tibble: 4 x 3
  language_self count percent
  <chr>         <int>   <dbl>
1 One            3996    14.3
2 Two           10732    38.3
3 Three         10370    37.0
4 Four +         2924    10.4

How informed are you about the following topics? Climate change and global warming

#calculate percent for informed_climate_change

select(pisa_tidy, "informed_climate_change") %>%
  group_by(informed_climate_change) %>%
summarise(count = n()) %>%
  mutate(percent = count/sum(count) * 100) %>%
  arrange(factor(informed_climate_change, 
                         levels = c("Not informed", "Not well informed", "Informed", "Well informed")))
# A tibble: 4 x 3
  informed_climate_change count percent
  <chr>                   <int>   <dbl>
1 Not informed              541    1.93
2 Not well informed        4000   14.3 
3 Informed                16623   59.3 
4 Well informed            6858   24.5 

How informed are you about the following topics? Global health (e.g. epidemics)

#calculate percent for informed_global_health

select(pisa_tidy, "informed_global_health") %>%
  group_by(informed_global_health) %>%
summarise(count = n()) %>%
  mutate(percent = count/sum(count) * 100) %>%
  arrange(factor(informed_global_health, 
                         levels = c("Not informed", "Not well informed", "Informed", "Well informed")))
# A tibble: 4 x 3
  informed_global_health count percent
  <chr>                  <int>   <dbl>
1 Not informed             467    1.67
2 Not well informed       7249   25.9 
3 Informed               16315   58.2 
4 Well informed           3991   14.2 

How informed are you about the following topics? Migration (movement of people)

#calculate percent for informed_migration

select(pisa_tidy, "informed_migration") %>%
  group_by(informed_migration) %>%
summarise(count = n()) %>%
  mutate(percent = count/sum(count) * 100) %>%
  arrange(factor(informed_migration, 
                         levels = c("Not informed", "Not well informed", "Informed", "Well informed")))
# A tibble: 4 x 3
  informed_migration count percent
  <chr>              <int>   <dbl>
1 Not informed         450    1.61
2 Not well informed   5532   19.7 
3 Informed           16583   59.2 
4 Well informed       5457   19.5 

How informed are you about the following topics? International conflicts

#calculate percent for informed_international_conflict

select(pisa_tidy, "informed_international_conflict") %>%
  group_by(informed_international_conflict) %>%
summarise(count = n()) %>%
  mutate(percent = count/sum(count) * 100) %>%
  arrange(factor(informed_international_conflict, 
                         levels = c("Not informed", "Not well informed", "Informed", "Well informed")))
# A tibble: 4 x 3
  informed_international_conflict count percent
  <chr>                           <int>   <dbl>
1 Not informed                      733    2.62
2 Not well informed                8349   29.8 
3 Informed                        13758   49.1 
4 Well informed                    5182   18.5 

How informed are you about the following topics? Hunger or malnutrition in different parts of the world

#calculate percent for informed_world_hunger

select(pisa_tidy, "informed_world_hunger") %>%
  group_by(informed_world_hunger) %>%
summarise(count = n()) %>%
  mutate(percent = count/sum(count) * 100) %>%
  arrange(factor(informed_world_hunger, 
                         levels = c("Not informed", "Not well informed", "Informed", "Well informed")))
# A tibble: 4 x 3
  informed_world_hunger count percent
  <chr>                 <int>   <dbl>
1 Not informed            345    1.23
2 Not well informed      4331   15.5 
3 Informed              16459   58.7 
4 Well informed          6887   24.6 

How informed are you about the following topics? Causes of poverty

#calculate percent for informed_poverty_causes

select(pisa_tidy,"informed_poverty_causes") %>%
  group_by(informed_poverty_causes) %>%
summarise(count = n()) %>%
  mutate(percent = count/sum(count) * 100) %>%
  arrange(factor(informed_poverty_causes, 
                         levels = c("Not informed", "Not well informed", "Informed", "Well informed")))
# A tibble: 4 x 3
  informed_poverty_causes count percent
  <chr>                   <int>   <dbl>
1 Not informed              391    1.40
2 Not well informed        5230   18.7 
3 Informed                15367   54.8 
4 Well informed            7034   25.1 

How informed are you about the following topics? Equality between men and women in different parts of the world

#calculate percent for informed_gender_equality

select(pisa_tidy, "informed_gender_equality") %>%
  group_by(informed_gender_equality) %>%
summarise(count = n()) %>%
  mutate(percent = count/sum(count) * 100) %>%
  arrange(factor(informed_gender_equality, 
                         levels = c("Not informed", "Not well informed", "Informed", "Well informed")))
# A tibble: 4 x 3
  informed_gender_equality count percent
  <chr>                    <int>   <dbl>
1 Not informed               365    1.30
2 Not well informed         1671    5.96
3 Informed                 11550   41.2 
4 Well informed            14436   51.5 

Bivariate Summary Statistics

How informed students feel on climate change by number of languages they speak

#language_self & informed_climate_change

language_climate_change <- select(pisa_tidy, "language_self", "informed_climate_change") %>%
  group_by(language_self, informed_climate_change) %>%
summarise(count = n()) %>%
  mutate(percent = count/sum(count) * 100) %>%
  arrange(factor (language_self, levels = c("One", "Two", "Three", "Four +")), factor(informed_climate_change, 
                         levels = c("Not informed", "Not well informed", "Informed", "Well informed")))

language_climate_change
# A tibble: 16 x 4
# Groups:   language_self [4]
   language_self informed_climate_change count percent
   <chr>         <chr>                   <int>   <dbl>
 1 One           Not informed              171    4.28
 2 One           Not well informed         957   23.9 
 3 One           Informed                 2275   56.9 
 4 One           Well informed             593   14.8 
 5 Two           Not informed              142    1.32
 6 Two           Not well informed        1518   14.1 
 7 Two           Informed                 6740   62.8 
 8 Two           Well informed            2332   21.7 
 9 Three         Not informed              134    1.29
10 Three         Not well informed        1196   11.5 
11 Three         Informed                 6071   58.5 
12 Three         Well informed            2969   28.6 
13 Four +        Not informed               94    3.21
14 Four +        Not well informed         329   11.3 
15 Four +        Informed                 1537   52.6 
16 Four +        Well informed             964   33.0 
#combine "not informed" and "not well informed" to become not informed and "informed" and "well informed" to become informed

language_climate_change_2 <- pisa_tidy%>%
  mutate(informed_climate_change = recode(informed_climate_change, 
      `Not informed` = "Not informed", 
      `Not well informed` = "Not informed",
      `Informed` = "Informed", 
      `Well informed` = "Informed")) %>%
  group_by(language_self, informed_climate_change) %>%
summarise(count = n()) %>%
  mutate(percent = count/sum(count) * 100) %>%
  arrange(factor (language_self, levels = c("One", "Two", "Three", "Four +")), factor(informed_climate_change, 
                         levels = c("Not informed", "Informed")))

language_climate_change_2
# A tibble: 8 x 4
# Groups:   language_self [4]
  language_self informed_climate_change count percent
  <chr>         <chr>                   <int>   <dbl>
1 One           Not informed             1128    28.2
2 One           Informed                 2868    71.8
3 Two           Not informed             1660    15.5
4 Two           Informed                 9072    84.5
5 Three         Not informed             1330    12.8
6 Three         Informed                 9040    87.2
7 Four +        Not informed              423    14.5
8 Four +        Informed                 2501    85.5

How informed students feel on global health by number of languages they speak

#calculate percent for language_self & informed_global_health

language_global_health <- select(pisa_tidy, "language_self", "informed_global_health") %>%
  group_by(language_self, informed_global_health) %>%
summarise(count = n()) %>%
  mutate(percent = count/sum(count) * 100) %>%
  arrange(factor (language_self, levels = c("One", "Two", "Three", "Four +")), factor(informed_global_health, 
                         levels = c("Not informed", "Not well informed", "Informed", "Well informed")))

language_global_health
# A tibble: 16 x 4
# Groups:   language_self [4]
   language_self informed_global_health count percent
   <chr>         <chr>                  <int>   <dbl>
 1 One           Not informed             126    3.15
 2 One           Not well informed       1303   32.6 
 3 One           Informed                2147   53.7 
 4 One           Well informed            420   10.5 
 5 Two           Not informed             139    1.30
 6 Two           Not well informed       2894   27.0 
 7 Two           Informed                6361   59.3 
 8 Two           Well informed           1338   12.5 
 9 Three         Not informed             133    1.28
10 Three         Not well informed       2450   23.6 
11 Three         Informed                6204   59.8 
12 Three         Well informed           1583   15.3 
13 Four +        Not informed              69    2.36
14 Four +        Not well informed        602   20.6 
15 Four +        Informed                1603   54.8 
16 Four +        Well informed            650   22.2 
#combine "not informed" and "not well informed" to become not informed and "informed" and "well informed" to become informed

language_global_health_2 <- pisa_tidy%>%
  mutate(informed_global_health = recode(informed_global_health, 
      `Not informed` = "Not informed", 
      `Not well informed` = "Not informed",
      `Informed` = "Informed", 
      `Well informed` = "Informed")) %>%
  group_by(language_self, informed_global_health) %>%
summarise(count = n()) %>%
  mutate(percent = count/sum(count) * 100) %>%
  arrange(factor (language_self, levels = c("One", "Two", "Three", "Four +")), factor(informed_global_health, 
                         levels = c("Not informed", "Informed")))

language_global_health_2
# A tibble: 8 x 4
# Groups:   language_self [4]
  language_self informed_global_health count percent
  <chr>         <chr>                  <int>   <dbl>
1 One           Not informed            1429    35.8
2 One           Informed                2567    64.2
3 Two           Not informed            3033    28.3
4 Two           Informed                7699    71.7
5 Three         Not informed            2583    24.9
6 Three         Informed                7787    75.1
7 Four +        Not informed             671    22.9
8 Four +        Informed                2253    77.1

How informed students feel on migration by number of languages they speak

#calculate percent for language_self & informed_migration

language_migration <- select(pisa_tidy, "language_self", "informed_migration") %>%
  group_by(language_self, informed_migration) %>%
summarise(count = n()) %>%
  mutate(percent = count/sum(count) * 100) %>%
  arrange(factor (language_self, levels = c("One", "Two", "Three", "Four +")), factor(informed_migration, 
                         levels = c("Not informed", "Not well informed", "Informed", "Well informed")))

language_migration
# A tibble: 16 x 4
# Groups:   language_self [4]
   language_self informed_migration count percent
   <chr>         <chr>              <int>   <dbl>
 1 One           Not informed         134    3.35
 2 One           Not well informed    948   23.7 
 3 One           Informed            2301   57.6 
 4 One           Well informed        613   15.3 
 5 Two           Not informed         113    1.05
 6 Two           Not well informed   2248   20.9 
 7 Two           Informed            6548   61.0 
 8 Two           Well informed       1823   17.0 
 9 Three         Not informed         122    1.18
10 Three         Not well informed   1881   18.1 
11 Three         Informed            6196   59.7 
12 Three         Well informed       2171   20.9 
13 Four +        Not informed          81    2.77
14 Four +        Not well informed    455   15.6 
15 Four +        Informed            1538   52.6 
16 Four +        Well informed        850   29.1 
#combine "not informed" and "not well informed" to become not informed and "informed" and "well informed" to become informed

language_migration_2 <- pisa_tidy%>%
  mutate(informed_migration = recode(informed_migration, 
      `Not informed` = "Not informed", 
      `Not well informed` = "Not informed",
      `Informed` = "Informed", 
      `Well informed` = "Informed")) %>%
  group_by(language_self, informed_migration) %>%
summarise(count = n()) %>%
  mutate(percent = count/sum(count) * 100) %>%
  arrange(factor (language_self, levels = c("One", "Two", "Three", "Four +")), factor(informed_migration, 
                         levels = c("Not informed", "Informed")))

language_migration_2
# A tibble: 8 x 4
# Groups:   language_self [4]
  language_self informed_migration count percent
  <chr>         <chr>              <int>   <dbl>
1 One           Not informed        1082    27.1
2 One           Informed            2914    72.9
3 Two           Not informed        2361    22.0
4 Two           Informed            8371    78.0
5 Three         Not informed        2003    19.3
6 Three         Informed            8367    80.7
7 Four +        Not informed         536    18.3
8 Four +        Informed            2388    81.7

How informed students feel on international conflicts by number of languages they speak

#calculate percent for language_self & informed_international_conflict

language_international_conflict <- select(pisa_tidy, "language_self", "informed_international_conflict") %>%
  group_by(language_self, informed_international_conflict) %>%
summarise(count = n()) %>%
  mutate(percent = count/sum(count) * 100) %>%
  arrange(factor (language_self, levels = c("One", "Two", "Three", "Four +")), factor(informed_international_conflict, 
                         levels = c("Not informed", "Not well informed", "Informed", "Well informed")))

language_international_conflict
# A tibble: 16 x 4
# Groups:   language_self [4]
   language_self informed_international_conflict count percent
   <chr>         <chr>                           <int>   <dbl>
 1 One           Not informed                      195    4.88
 2 One           Not well informed                1480   37.0 
 3 One           Informed                         1791   44.8 
 4 One           Well informed                     530   13.3 
 5 Two           Not informed                      226    2.11
 6 Two           Not well informed                3319   30.9 
 7 Two           Informed                         5477   51.0 
 8 Two           Well informed                    1710   15.9 
 9 Three         Not informed                      222    2.14
10 Three         Not well informed                2870   27.7 
11 Three         Informed                         5157   49.7 
12 Three         Well informed                    2121   20.5 
13 Four +        Not informed                       90    3.08
14 Four +        Not well informed                 680   23.3 
15 Four +        Informed                         1333   45.6 
16 Four +        Well informed                     821   28.1 
#combine "not informed" and "not well informed" to become not informed and "informed" and "well informed" to become informed

language_international_conflict_2 <- pisa_tidy%>%
  mutate(informed_international_conflict = recode(informed_international_conflict,
      `Not informed` = "Not informed", 
      `Not well informed` = "Not informed",
      `Informed` = "Informed", 
      `Well informed` = "Informed")) %>%
  group_by(language_self, informed_international_conflict) %>%
summarise(count = n()) %>%
  mutate(percent = count/sum(count) * 100) %>%
  arrange(factor (language_self, levels = c("One", "Two", "Three", "Four +")), factor(informed_international_conflict, 
                         levels = c("Not informed", "Informed")))

language_international_conflict_2
# A tibble: 8 x 4
# Groups:   language_self [4]
  language_self informed_international_conflict count percent
  <chr>         <chr>                           <int>   <dbl>
1 One           Not informed                     1675    41.9
2 One           Informed                         2321    58.1
3 Two           Not informed                     3545    33.0
4 Two           Informed                         7187    67.0
5 Three         Not informed                     3092    29.8
6 Three         Informed                         7278    70.2
7 Four +        Not informed                      770    26.3
8 Four +        Informed                         2154    73.7

How informed students feel on world hunger by number of languages they speak

#calculate percent for language_self & informed_world_hunger

language_world_hunger <- select(pisa_tidy, "language_self", "informed_world_hunger") %>%
  group_by(language_self, informed_world_hunger) %>%
summarise(count = n()) %>%
  mutate(percent = count/sum(count) * 100) %>%
  arrange(factor (language_self, levels = c("One", "Two", "Three", "Four +")), factor(informed_world_hunger, 
                         levels = c("Not informed", "Not well informed", "Informed", "Well informed")))

language_world_hunger
# A tibble: 16 x 4
# Groups:   language_self [4]
   language_self informed_world_hunger count percent
   <chr>         <chr>                 <int>   <dbl>
 1 One           Not informed             97   2.43 
 2 One           Not well informed       780  19.5  
 3 One           Informed               2326  58.2  
 4 One           Well informed           793  19.8  
 5 Two           Not informed             92   0.857
 6 Two           Not well informed      1717  16.0  
 7 Two           Informed               6514  60.7  
 8 Two           Well informed          2409  22.4  
 9 Three         Not informed             95   0.916
10 Three         Not well informed      1453  14.0  
11 Three         Informed               6091  58.7  
12 Three         Well informed          2731  26.3  
13 Four +        Not informed             61   2.09 
14 Four +        Not well informed       381  13.0  
15 Four +        Informed               1528  52.3  
16 Four +        Well informed           954  32.6  
#combine "not informed" and "not well informed" to become not informed and "informed" and "well informed" to become informed

language_world_hunger_2 <- pisa_tidy%>%
  mutate(informed_world_hunger = recode(informed_world_hunger, 
      `Not informed` = "Not informed", 
      `Not well informed` = "Not informed",
      `Informed` = "Informed", 
      `Well informed` = "Informed")) %>%
  group_by(language_self, informed_world_hunger) %>%
summarise(count = n()) %>%
  mutate(percent = count/sum(count) * 100) %>%
  arrange(factor (language_self, levels = c("One", "Two", "Three", "Four +")), factor(informed_world_hunger, 
                         levels = c("Not informed", "Informed")))

language_world_hunger_2
# A tibble: 8 x 4
# Groups:   language_self [4]
  language_self informed_world_hunger count percent
  <chr>         <chr>                 <int>   <dbl>
1 One           Not informed            877    21.9
2 One           Informed               3119    78.1
3 Two           Not informed           1809    16.9
4 Two           Informed               8923    83.1
5 Three         Not informed           1548    14.9
6 Three         Informed               8822    85.1
7 Four +        Not informed            442    15.1
8 Four +        Informed               2482    84.9

How informed students feel on causes of poverty by number of languages they speak

#calculate percent for language_self & informed_poverty_causes

language_poverty_causes <- select(pisa_tidy, "language_self", "informed_poverty_causes") %>%
  group_by(language_self, informed_poverty_causes) %>%
summarise(count = n()) %>%
  mutate(percent = count/sum(count) * 100) %>%
  arrange(factor (language_self, levels = c("One", "Two", "Three", "Four +")), factor(informed_poverty_causes, 
                         levels = c("Not informed", "Not well informed", "Informed", "Well informed")))

language_poverty_causes
# A tibble: 16 x 4
# Groups:   language_self [4]
   language_self informed_poverty_causes count percent
   <chr>         <chr>                   <int>   <dbl>
 1 One           Not informed              108   2.70 
 2 One           Not well informed         894  22.4  
 3 One           Informed                 2169  54.3  
 4 One           Well informed             825  20.6  
 5 Two           Not informed              106   0.988
 6 Two           Not well informed        2113  19.7  
 7 Two           Informed                 6104  56.9  
 8 Two           Well informed            2409  22.4  
 9 Three         Not informed              115   1.11 
10 Three         Not well informed        1789  17.3  
11 Three         Informed                 5664  54.6  
12 Three         Well informed            2802  27.0  
13 Four +        Not informed               62   2.12 
14 Four +        Not well informed         434  14.8  
15 Four +        Informed                 1430  48.9  
16 Four +        Well informed             998  34.1  
#combine "not informed" and "not well informed" to become not informed and "informed" and "well informed" to become informed

language_poverty_causes_2 <- pisa_tidy%>%
  mutate(informed_poverty_causes = recode(informed_poverty_causes, 
      `Not informed` = "Not informed", 
      `Not well informed` = "Not informed",
      `Informed` = "Informed", 
      `Well informed` = "Informed")) %>%
  group_by(language_self, informed_poverty_causes) %>%
summarise(count = n()) %>%
  mutate(percent = count/sum(count) * 100) %>%
  arrange(factor (language_self, levels = c("One", "Two", "Three", "Four +")), factor(informed_poverty_causes, 
                         levels = c("Not informed", "Informed")))

language_poverty_causes_2
# A tibble: 8 x 4
# Groups:   language_self [4]
  language_self informed_poverty_causes count percent
  <chr>         <chr>                   <int>   <dbl>
1 One           Not informed             1002    25.1
2 One           Informed                 2994    74.9
3 Two           Not informed             2219    20.7
4 Two           Informed                 8513    79.3
5 Three         Not informed             1904    18.4
6 Three         Informed                 8466    81.6
7 Four +        Not informed              496    17.0
8 Four +        Informed                 2428    83.0

How informed students feel on gender equality by number of languages they speak

#calculate percent for language_self & informed_gender_equality

language_gender_equality <- select(pisa_tidy, "language_self", "informed_gender_equality") %>%
  group_by(language_self, informed_gender_equality) %>%
summarise(count = n()) %>%
  mutate(percent = count/sum(count) * 100) %>%
  arrange(factor (language_self, levels = c("One", "Two", "Three", "Four +")), factor(informed_gender_equality, 
                         levels = c("Not informed", "Not well informed", "Informed", "Well informed")))

language_gender_equality
# A tibble: 16 x 4
# Groups:   language_self [4]
   language_self informed_gender_equality count percent
   <chr>         <chr>                    <int>   <dbl>
 1 One           Not informed               123   3.08 
 2 One           Not well informed          393   9.83 
 3 One           Informed                  1846  46.2  
 4 One           Well informed             1634  40.9  
 5 Two           Not informed                80   0.745
 6 Two           Not well informed          634   5.91 
 7 Two           Informed                  4734  44.1  
 8 Two           Well informed             5284  49.2  
 9 Three         Not informed                90   0.868
10 Three         Not well informed          490   4.73 
11 Three         Informed                  4004  38.6  
12 Three         Well informed             5786  55.8  
13 Four +        Not informed                72   2.46 
14 Four +        Not well informed          154   5.27 
15 Four +        Informed                   966  33.0  
16 Four +        Well informed             1732  59.2  
#combine "not informed" and "not well informed" to become not informed and "informed" and "well informed" to become informed

language_gender_equality_2 <- pisa_tidy%>%
  mutate(informed_gender_equality = recode(informed_gender_equality, 
      `Not informed` = "Not informed", 
      `Not well informed` = "Not informed",
      `Informed` = "Informed", 
      `Well informed` = "Informed")) %>%
  group_by(language_self, informed_gender_equality) %>%
summarise(count = n()) %>%
  mutate(percent = count/sum(count) * 100) %>%
  arrange(factor (language_self, levels = c("One", "Two", "Three", "Four +")), factor(informed_gender_equality, 
                         levels = c("Not informed", "Informed")))

language_gender_equality_2
# A tibble: 8 x 4
# Groups:   language_self [4]
  language_self informed_gender_equality count percent
  <chr>         <chr>                    <int>   <dbl>
1 One           Not informed               516   12.9 
2 One           Informed                  3480   87.1 
3 Two           Not informed               714    6.65
4 Two           Informed                 10018   93.3 
5 Three         Not informed               580    5.59
6 Three         Informed                  9790   94.4 
7 Four +        Not informed               226    7.73
8 Four +        Informed                  2698   92.3