# Required packages
packages <- c(
"tidyverse",
"sjPlot",
"broom",
"modelsummary",
"gt",
"knitr",
"scales",
"kableExtra",
"flextable",
"car",
"fst"
)
# Install and load packages
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## `modelsummary` 2.0.0 now uses `tinytable` as its default table-drawing
##   backend. Learn more at: https://vincentarelbundock.github.io/tinytable/
## 
## Revert to `kableExtra` for one session:
## 
##   options(modelsummary_factory_default = 'kableExtra')
##   options(modelsummary_factory_latex = 'kableExtra')
##   options(modelsummary_factory_html = 'kableExtra')
## 
## Silence this message forever:
## 
##   config_modelsummary(startup_message = FALSE)
## 
## 
## Attaching package: 'scales'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
## 
## 
## 
## Attaching package: 'kableExtra'
## 
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
## 
## 
## 
## Attaching package: 'flextable'
## 
## 
## The following objects are masked from 'package:kableExtra':
## 
##     as_image, footnote
## 
## 
## The following object is masked from 'package:purrr':
## 
##     compose
## 
## 
## Loading required package: carData
## 
## 
## Attaching package: 'car'
## 
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## 
## The following object is masked from 'package:purrr':
## 
##     some
## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "sjPlot"    "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"    
##  [7] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
## [13] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "broom"     "sjPlot"    "lubridate" "forcats"   "stringr"   "dplyr"    
##  [7] "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse"
## [13] "stats"     "graphics"  "grDevices" "utils"     "datasets"  "methods"  
## [19] "base"     
## 
## [[4]]
##  [1] "modelsummary" "broom"        "sjPlot"       "lubridate"    "forcats"     
##  [6] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [11] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [16] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[5]]
##  [1] "gt"           "modelsummary" "broom"        "sjPlot"       "lubridate"   
##  [6] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [11] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [16] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [21] "base"        
## 
## [[6]]
##  [1] "knitr"        "gt"           "modelsummary" "broom"        "sjPlot"      
##  [6] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [11] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [16] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [21] "methods"      "base"        
## 
## [[7]]
##  [1] "scales"       "knitr"        "gt"           "modelsummary" "broom"       
##  [6] "sjPlot"       "lubridate"    "forcats"      "stringr"      "dplyr"       
## [11] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [16] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [21] "datasets"     "methods"      "base"        
## 
## [[8]]
##  [1] "kableExtra"   "scales"       "knitr"        "gt"           "modelsummary"
##  [6] "broom"        "sjPlot"       "lubridate"    "forcats"      "stringr"     
## [11] "dplyr"        "purrr"        "readr"        "tidyr"        "tibble"      
## [16] "ggplot2"      "tidyverse"    "stats"        "graphics"     "grDevices"   
## [21] "utils"        "datasets"     "methods"      "base"        
## 
## [[9]]
##  [1] "flextable"    "kableExtra"   "scales"       "knitr"        "gt"          
##  [6] "modelsummary" "broom"        "sjPlot"       "lubridate"    "forcats"     
## [11] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [16] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [21] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[10]]
##  [1] "car"          "carData"      "flextable"    "kableExtra"   "scales"      
##  [6] "knitr"        "gt"           "modelsummary" "broom"        "sjPlot"      
## [11] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [16] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [21] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [26] "methods"      "base"        
## 
## [[11]]
##  [1] "fst"          "car"          "carData"      "flextable"    "kableExtra"  
##  [6] "scales"       "knitr"        "gt"           "modelsummary" "broom"       
## [11] "sjPlot"       "lubridate"    "forcats"      "stringr"      "dplyr"       
## [16] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [21] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [26] "datasets"     "methods"      "base"

TASK 1: Life Satisfaction In Atlantic Canada Regions

Data Preparation

# Load CHS data
CHS <- read.csv("CHS2021ECL_PUMF.csv")

dim(CHS)
## [1] 40988   109
variables_of_interest <- c("PLIS_05", "PGEOGR")
head(CHS[, variables_of_interest])
##   PLIS_05 PGEOGR
## 1       6      3
## 2       6     26
## 3       1     22
## 4       7     22
## 5       7     16
## 6       7     10
table(CHS$PGEOGR)
## 
##    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15   16 
## 2446 1714 1088 1977 2037 2001 1218 1165 1395 2195  946 1021 1009 1091 1033 2233 
##   17   18   19   20   21   22   23   24   25   26   27   28 
## 1037 1924  997  940 1742 1078  968 2748 1110 1123 1964  788
table(CHS$PGEOGR)
## 
##    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15   16 
## 2446 1714 1088 1977 2037 2001 1218 1165 1395 2195  946 1021 1009 1091 1033 2233 
##   17   18   19   20   21   22   23   24   25   26   27   28 
## 1037 1924  997  940 1742 1078  968 2748 1110 1123 1964  788
table(CHS$PLIS_05)
## 
##    1    2    3    4    5    6    7    8    9   99 
## 1442 1059 1164 3933 3279 6634 9707 5597 7881  292
#Create subset for Atlantic Canada
Atlantic_Canada <- CHS %>%
  filter(PGEOGR %in% c("1", "2", "3", "4", "5", "6")) %>% #
  select(outcome = PLIS_05,
         region = PGEOGR)
Atlantic_Canada <- Atlantic_Canada %>%
  mutate(
# Clean outcome variable 
     outcome = case_when(
      outcome < 0 ~ NA_real_,
      outcome > 10 ~ NA_real_,
      TRUE ~ as.numeric(outcome)
    ), 
    
# Create new factor with descriptive region names
    region_name = factor(region,
                       levels = 1:6,
                       labels = c("Newfoundland and Labrador", 
                                 "Prince Edward Island", 
                                 "Halifax", 
                                 "Outside Halifax - NS", 
                                 "Saint John and Moncton", 
                                 "Outside Saint John and Moncton - NB")),
    
  )
str(Atlantic_Canada)
## 'data.frame':    11263 obs. of  3 variables:
##  $ outcome    : num  6 9 8 9 8 8 9 8 9 4 ...
##  $ region     : int  3 4 1 4 4 1 1 2 5 3 ...
##  $ region_name: Factor w/ 6 levels "Newfoundland and Labrador",..: 3 4 1 4 4 1 1 2 5 3 ...
summary(Atlantic_Canada)
##     outcome          region                                   region_name  
##  Min.   :1.000   Min.   :1.000   Newfoundland and Labrador          :2446  
##  1st Qu.:6.000   1st Qu.:2.000   Prince Edward Island               :1714  
##  Median :7.000   Median :4.000   Halifax                            :1088  
##  Mean   :6.621   Mean   :3.484   Outside Halifax - NS               :1977  
##  3rd Qu.:8.000   3rd Qu.:5.000   Saint John and Moncton             :2037  
##  Max.   :9.000   Max.   :6.000   Outside Saint John and Moncton - NB:2001  
##  NA's   :86
Atlantic_Canada %>%
  group_by(region_name) %>%
  summarize(
    mean_satisfaction = mean(outcome, na.rm = TRUE),
    median_satisfaction = median(outcome, na.rm = TRUE),
    sd_satisfaction = sd(outcome, na.rm = TRUE),
    n = n()
  ) %>%
  arrange(desc(mean_satisfaction)) %>%
  kable(digits = 2, caption = "Life Satisfaction by Region in Atlantic Canada")
Life Satisfaction by Region in Atlantic Canada
region_name mean_satisfaction median_satisfaction sd_satisfaction n
Prince Edward Island 6.77 7 1.94 1714
Outside Saint John and Moncton - NB 6.71 7 2.08 2001
Newfoundland and Labrador 6.69 7 2.07 2446
Outside Halifax - NS 6.62 7 2.05 1977
Saint John and Moncton 6.52 7 2.09 2037
Halifax 6.25 7 2.07 1088

Visualization with Boxplot

# Create Boxplot Visual
 ggplot(Atlantic_Canada, aes(x = region_name, y = outcome, fill = region_name)) +
  geom_boxplot() +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 20),
        legend.position = "none") +
  labs(x = "Region", y = "Life Satisfaction (0-10)", 
       title = "Life Satisfaction by Region in Atlantic Canada")
## Warning: Removed 86 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

Descriptive Analysis

The boxplot above shows a visual of life satisfaction by regions in Atlantic Canada. The life satisfaction is on the y axis and ranges from 0-10, and the regions are on the x axis. Each region has their own box to indicate the level of satisfaction and to show where the scores are mostly similar. In this particular boxplot visual, the median life satisfaction scores across regions of Atlantic Canada are relatively the same. The regions listed as Newfoundland and Labrador and Outside Saint John and Moncton have a higher percent of life satisfaction for their region. Although, the other regions are less satisfied in their region. The IQR between regions listed as Prince Edward Island and Outside Halifax – NS are mostly in the same range. Also, the regions listed Newfoundland and Labrador and Outside Saint John and Moncton have a similar IQR, and Halifax and Saint John and Moncton share IQRs as well. Newfoundland and Labrador, Prince Edward Island, Outside Halifax – NS, and Outside Saint John and Moncton have outliers as well. One difference is that bigger regions in Atlantic Canada show less satisfaction, and smaller regions have higher satisfaction.

Regression Analysis

# Running the model 
Atlantic_Canada_Model1 <- lm(outcome ~ region_name, data = Atlantic_Canada)

summary(Atlantic_Canada_Model1)
## 
## Call:
## lm(formula = outcome ~ region_name, data = Atlantic_Canada)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.7721 -0.7721  0.3052  1.4802  2.7500 
## 
## Coefficients:
##                                                Estimate Std. Error t value
## (Intercept)                                     6.69485    0.04166 160.702
## region_namePrince Edward Island                 0.07727    0.06482   1.192
## region_nameHalifax                             -0.44485    0.07495  -5.935
## region_nameOutside Halifax - NS                -0.07711    0.06230  -1.238
## region_nameSaint John and Moncton              -0.17502    0.06182  -2.831
## region_nameOutside Saint John and Moncton - NB  0.01389    0.06213   0.224
##                                                Pr(>|t|)    
## (Intercept)                                     < 2e-16 ***
## region_namePrince Edward Island                 0.23323    
## region_nameHalifax                             3.03e-09 ***
## region_nameOutside Halifax - NS                 0.21582    
## region_nameSaint John and Moncton               0.00464 ** 
## region_nameOutside Saint John and Moncton - NB  0.82313    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.052 on 11171 degrees of freedom
##   (86 observations deleted due to missingness)
## Multiple R-squared:  0.005023,   Adjusted R-squared:  0.004577 
## F-statistic: 11.28 on 5 and 11171 DF,  p-value: 7.196e-11
# creating a visual for the regression model 
plot_model(Atlantic_Canada_Model1, 
           type = "est",
           show.values = TRUE,
           value.offset = 0.3,
           vline.color = "red",
           title = "Life Satisfaction By Region in Atlantic Canada",
           axis.labels = c("Newfoundland and Labrador", 
                                 "Prince Edward Island", 
                                 "Halifax", 
                                 "Outside Halifax - NS", 
                                 "Saint John and Moncton", 
                                 "Outside Saint John and Moncton - NB"),
           axis.title = "Life Satisfaction") +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 14),
    axis.text.y = element_text(face = "bold")
  )

# create the regression table 
predictor_labels <- c(
  "(Intercept)" = "Intercept",
  "region_namePrince Edward Island" = "Prince Edward Island",
  "region_nameHalifax" = "Halifax",
  "region_nameOutside Halifax - NS" = "Outside Halifax (NS)",
  "region_nameSaint John and Moncton" = "Saint John & Moncton",
  "region_nameOutside Saint John and Moncton - NB" = "Outside SJ & Moncton (NB)"
)

tab_model(
  Atlantic_Canada_Model1,
  pred.labels = predictor_labels,
  dv.labels = c("Model 1:\nRegion"),
  string.pred = "Predictors",
  string.est = "β",
  string.ci = "95% CI",
  string.p = "p-value",
  p.style = "stars",
  p.threshold = c(0.05, 0.01, 0.001),
  show.reflvl = TRUE,
  show.aic = TRUE,
  show.r2 = TRUE,
  show.fstat = TRUE,
  show.loglik = FALSE,
  digits = 3,
  title = "Table 1. Life Satisfaction by Region in Atlantic Canada",
  CSS = list(
    css.table = "font-size: 12px; width: auto; margin-bottom: 20px;",
    css.thead = "font-weight: bold; border-bottom: 1px solid #ddd;",
    css.tdata = "padding: 5px; border-bottom: 1px solid #f0f0f0;",
    css.caption = "font-weight: bold; color: #333333; font-size: 14px; margin-bottom: 10px;",
    css.footnote = "font-size: 11px; color: #555555;",
    css.depvarhead = "color: #333333; font-weight: bold; text-align: center; border-bottom: 1px solid #ddd;",
    css.centeralign = "text-align: center;",
    css.firsttablecol = "font-weight: bold; font-size: 12px;"
  )
)
Table 1. Life Satisfaction by Region in Atlantic Canada
  Model 1: Region
Predictors β 95% CI
Intercept 6.695 *** 6.613 – 6.777
Prince Edward Island 0.077 -0.050 – 0.204
Halifax -0.445 *** -0.592 – -0.298
Outside Halifax (NS) -0.077 -0.199 – 0.045
Saint John & Moncton -0.175 ** -0.296 – -0.054
Outside SJ & Moncton (NB) 0.014 -0.108 – 0.136
Observations 11177
R2 / R2 adjusted 0.005 / 0.005
AIC 47790.102
  • p<0.05   ** p<0.01   *** p<0.001

Regression Interpretation

The regression model shows Life Satisfaction by Region in Atlantic Canada. Each region has its own calculation about the life satisfaction within that area. From the regression model, Halifax and Saint John and Moncton, have significantly different life satisfaction from the reference region. The reference region has a value of 6.695 and the value for Halifax is -0.445 and Saint John and Moncton have a value of -0.175. This implies that the life satisfaction is different and significantly lower for that area. Other regions do not have significantly different values from the reference region. The magnitude and direction of significant coefficient has negative values, which means a lower and negative result and effect. This suggests that those numbers that are negative, Halifax and Saint John and Moncton, have less satisfaction within their region. Although, the only predictor for life satisfaction in this case is the region within Atlantic Canada. Because of this, adding another predictor such as income, or available resources within each region may result in different numbers. Region is not as strong as other predictors because other predictors like income, and available resources within each region like health, and education will affect an individual’s overall life satisfaction more than the region that they are in. Overall, the results show the data of Life Satisfaction within different regions within Atlantic Canada, even though the region predictor is not the strongest predictor to use when speaking upon life satisfaction.

BONUS By looking at the coefficient visualization, coefficients that are statistically significant can be determined because of their location in comparison to other. For instance, Outside Halifax - NS and PEI are closer and further left than the other regions. Considering they are not relatively close to other regions, they can be determined statistically significant. Also, depending on what side of zero the coefficient is on, we can determine whether it will ahve a positive or negative affect. This visual approach is helpful for quickly interpreting the regression results because knowing that Outside Halifax - NS and PEI are already determined statistically significant, we can expect to see similar results in the regression model. We can also expect to see what has positive effects and negative effects on the outcome.

TASK 2: Populist Attitudes in Portugal

Part A: Data Preparation

# Load the Portugal Data
Portugal <- read_fst("portugal_data.fst")
head(Portugal[, c("trstplt", "trstprl", "trstprt", "gndr")])
##   trstplt trstprl trstprt gndr
## 1      88      88      NA    2
## 2       1      88      NA    2
## 3       2      88      NA    1
## 4       0      88      NA    1
## 5       3      88      NA    2
## 6       0       0      NA    1
table(Portugal$trstprt)
## 
##    0    1    2    3    4    5    6    7    8    9   10   77   88   99 
## 4544 2049 2418 2243 1715 1902  545  360  152   45   37   37  314    9
# Step 1: Clean the trust variables (set values > 10 to NA)
Portugal$trstplt <- ifelse(Portugal$trstplt > 10, NA, Portugal$trstplt)
Portugal$trstprl <- ifelse(Portugal$trstprl > 10, NA, Portugal$trstprl)
Portugal$trstprt <- ifelse(Portugal$trstprt > 10, NA, Portugal$trstprt)

# Step 2: Create a trust scale (0-100)
Portugal$trust <- scales::rescale(
  Portugal$trstplt + Portugal$trstprl + Portugal$trstprt, 
  na.rm = TRUE, 
  to = c(0, 100)
)

# Step 3: Flip the scale to create a populist measure (higher = more populist)
# This reverses the direction so that lower trust = higher populism
Portugal$populist <- scales::rescale(Portugal$trust, na.rm = TRUE, to = c(100, 0))
# Recode gender as a factor (1 = Male, 2 = Female)
Portugal$gender <- factor(Portugal$gndr, 
                           levels = c(1, 2), 
                           labels = c("Male", "Female"))
# Examine the properties of our new scale
summary(Portugal$populist, na.rm = TRUE)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00   56.67   73.33   72.64   90.00  100.00    2350
table(Portugal$gender)
## 
##   Male Female 
##   7268  10613
table(Portugal$yrbrn)
## 
## 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 
##    1    1    1    3    3    7    8    9   17   20   25   42   39   75   65   88 
## 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 
##   98  115  120  141  172  188  192  218  216  225  236  279  250  281  286  277 
## 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 
##  261  254  302  297  307  287  279  326  298  293  274  317  312  290  266  252 
## 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 
##  274  258  241  319  283  270  240  292  298  285  290  270  279  261  279  271 
## 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 
##  263  300  307  279  274  252  247  261  240  237  210  211  217  221  182  170 
## 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 
##  193  144  145  128  119   93   73   62   64   46   53   46   30   21   22   20 
## 2005 2006 7777 8888 9999 
##    5    9    2    1   11
# Create generation variable 
Portugal <- Portugal %>%
  mutate(
    # Recode year of birth, setting invalid values to NA
    birth_year = ifelse(yrbrn < 1928 | yrbrn > 2005, NA, yrbrn),
    
    # Create generation variable
    Port_Generation = case_when(
      birth_year %in% 1928:1945 ~ "Interwar",
      birth_year %in% 1946:1964 ~ "Baby Boomers",
      birth_year %in% 1965:1980 ~ "Generation X",
      birth_year %in% 1981:1996 ~ "Millennials",
      birth_year %in% 1997:2005 ~ "Gen Z",
      TRUE ~ NA_character_
    ),
    
    # Convert to factor
    Port_Generation = factor(Port_Generation, 
                      levels = c("Interwar", "Baby Boomers", 
                               "Generation X", "Millennials", "Gen Z"))
  )

table(Portugal$Port_Generation, useNA = "ifany")
## 
##     Interwar Baby Boomers Generation X  Millennials        Gen Z         <NA> 
##         4382         5371         4416         2645          307          760

Part B: Descriptive Analysis

Create a boxplot visualization:

Portugal <- Portugal %>%
  filter(!is.na(Port_Generation) & !is.na(populist))
# create boxplot visual
ggplot(Portugal, aes(x = Port_Generation, y = populist, fill = Port_Generation)) +
  geom_boxplot() +
  theme_minimal() +
  theme(axis.text.x = element_text(),
        legend.position = "none") +
  labs(x = "Generation", y = "Populist Attitudes (0-10)", 
       title = "Populist Attitudes By Generation",
       subtitle = "Measuring Populist Attitudes Based on ESS Data",
       caption = "Data Source: Portugal ESS Data"
       )

Boxplot Visualization Interpretation:

The boxplot above shows a visual of Populist Attitudes by Generation. The populist attitude scale is on the y axis and ranges from 0-10, and the generations are on the x axis. Each generation has their own box to indicate the populist attitude and to show where the scores are mostly similar. In this boxplot visual, the median populist attitude scores of each generation decreases as the generation increases. As the generation increases, the populist attitudes decrease, however, each IQR is relatively the same. Although, the IQR for Gen Z is lower than other generations. Each generation has outliers; however, past generations have more outliers, and the outliers decrease as time passes. The main pattern and observation of this boxplot visualization is that as generation increases, the populist attitudes decrease.

Part C: Regression Models:

# Model 1: Populist attitudes predicted by gender only
# Make Male the reference category
Portugal$gender <- relevel(factor(Portugal$gender), ref = "Female")

# Run the regression
model1_gender <- lm(populist ~ gender, data = Portugal)

# View the model summary
summary(model1_gender)
## 
## Call:
## lm(formula = populist ~ gender, data = Portugal)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -73.076 -15.445   1.221  16.924  27.888 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  73.0765     0.2135 342.277  < 2e-16 ***
## genderMale   -0.9645     0.3306  -2.917  0.00354 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 19.97 on 14999 degrees of freedom
## Multiple R-squared:  0.0005671,  Adjusted R-squared:  0.0005004 
## F-statistic:  8.51 on 1 and 14999 DF,  p-value: 0.003536
# Model 2: Populist attitudes predicted by both gender and generation
# Run the regression
model2_gender_generation <- lm(populist ~ gender + Port_Generation, data = Portugal)

# View the model summary 
summary(model2_gender_generation)
## 
## Call:
## lm(formula = populist ~ gender + Port_Generation, data = Portugal)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -74.464 -14.304   1.267  16.288  40.207 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  74.4642     0.3537 210.552  < 2e-16 ***
## genderMale                   -0.7526     0.3295  -2.284 0.022366 *  
## Port_GenerationBaby Boomers  -0.6681     0.4377  -1.526 0.126990    
## Port_GenerationGeneration X  -1.6457     0.4573  -3.598 0.000321 ***
## Port_GenerationMillennials   -3.4938     0.5211  -6.705 2.09e-11 ***
## Port_GenerationGen Z        -13.9185     1.2064 -11.537  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 19.86 on 14995 degrees of freedom
## Multiple R-squared:  0.01163,    Adjusted R-squared:  0.0113 
## F-statistic:  35.3 on 5 and 14995 DF,  p-value: < 2.2e-16
task2models <- list(
  "Gender Only" = lm(populist ~ gender, data = Portugal),
  "Gender + Generation" = lm(populist ~ gender + Port_Generation, data = Portugal)
)

# Create custom coefficient labels
task2coef <- c("(Intercept)" = "Intercept (Interwar)", 
                           "Port_GenerationBaby Boomers" = "Baby Boomers",
                           "Port_GenerationGeneration X" = "Generation X",
                           "Port_GenerationMillennials" = "Millennials",
                           "Port_GenerationGen Z" = "Gen Z",
                           "genderMale" = "Male")

# Create custom goodness-of-fit metrics
task2gofm <- tribble(
  ~raw,        ~clean,          ~fmt,
  "nobs",      "N",             0,
  "r.squared", "R²",            2,
  "adj.r.squared", "Adjusted R²", 2,
  "AIC",       "AIC",           1,
  "BIC",       "BIC",           1,
  "rmse",      "RMSE",          2
)
# create the regression table 
table1 <- modelsummary(
  task2models,
  coef_map = task2coef,
  gof_map = task2gofm,
  stars = TRUE,
  title = "Populist Attitudes Predicted by Gender and/or Generation",
  notes = "Data Source: Portugal ESS Data"
)
table1
Populist Attitudes Predicted by Gender and/or Generation
Gender Only Gender + Generation
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
Data Source: Portugal ESS Data
Intercept (Interwar) 73.076*** 74.464***
(0.214) (0.354)
Baby Boomers -0.668
(0.438)
Generation X -1.646***
(0.457)
Millennials -3.494***
(0.521)
Gen Z -13.918***
(1.206)
Male -0.965** -0.753*
(0.331) (0.329)
N 15001 15001
0.00 0.01
Adjusted R² 0.00 0.01
RMSE 19.97 19.85

Part D: Regression Interpretation

For model 1, the populist attitude is 73.076. Since the predictor is male, that number is the female value. However, males have a significantly lower value. The populist attitude intercept is 74.464 for model 2, which consists of the predictors gender and generation. For the bonus question, the intercept changes between we added the generation predictor to gender. It changed from gender only to gender and generation, meaning there is a different in the value because of the addition. This also explains why the coefficient for male changes between the models as well. This tells us that populist attitudes can be determined by these predictors, gender and generation and we can use this data to predict populist attitudes by examining the relationship each variable has with each other. The reference category for both consists of the women, although the category for model 2 is interwar women. Generation X, Millennials, and the Gen Z generations are all significantly lower than the intercept. This indicates that the populist attitudes decrease as generation increases. Model 2 best explains populist attitudes because it is based on more than one predictor. Having a second predictor ensures the results are more accurate and reliable.

TASK 3: Authorian Values In France

Part A: Data Preparation

# Load the France data
France <- read_fst("france_data.fst")
head(France[, c("ipbhprp", "impsafe", "ipstrgv", "imptrad", "ipfrule")])
##   ipbhprp impsafe ipstrgv imptrad ipfrule
## 1       3       1       1       1       2
## 2       4       3       4       4       4
## 3       6       6       6       5       6
## 4       5       6       4       6       5
## 5       1       1       1       1       4
## 6       2       1       2       3       5
table(France$imptrad)
## 
##    1    2    3    4    5    6    7    8    9 
## 3145 3711 3014 3754 3165 1843   48   90  126
# Create authoritarian values scale from Schwartz human values items
France <- France %>%
  mutate(
    # Rename variables for clarity
    behavior = ipbhprp,   # Proper behavior
    security = impsafe,   # Security
    stronggov = ipstrgv,   # Strong government
    tradition = imptrad, # Tradition
    rules = ipfrule     # Following rules
  ) %>%
  # Set invalid values to NA
  mutate(across(c("behavior", "security", "stronggov", "tradition", "rules"),
                ~ na_if(.x, 7) %>% na_if(8) %>% na_if(9))) %>%
  # Apply reverse coding (original scale is 1=very much like me, 6=not like me at all)
  # After reversing: higher scores = more authoritarian
  mutate(across(c("behavior", "security", "stronggov", "tradition", "rules"), ~ 7 - .x))

# Calculate the authoritarian scale (0-100)
France$auth <- scales::rescale(
  France$behavior + 
  France$security + 
  France$stronggov + 
  France$tradition + 
  France$rules, 
  to = c(0, 100), 
  na.rm = TRUE
)

# Examine the distribution
summary(France$auth)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00   48.00   60.00   59.75   72.00  100.00     800
table(France$eisced)
## 
##    0    1    2    3    4    5    6    7   55   77   88 
## 3309 2638 1580 3714 2815 2073  961 1915   17   12    4
unique(France$eisced)
##  [1]  0  2  1  3  5  6  4  7 77 55 88
# Create categorical predictor variables
France <- France %>%
  mutate(
    # Create Education_Level variable
    Education_Level = case_when(
      eisced %in% c("0", "1", "2") ~ "Lower Secondary or Less",
      eisced %in% c("3", "4") ~ "Upper Secondary",
      eisced %in% c("5", "6", "7") ~ "Tertiary Education",
      eisced %in% c("55", "77", "88") ~ NA_character_,
      TRUE ~ NA_character_
    ), 
    Education_Level = factor(Education_Level, levels = c("Lower Secondary or Less", "Upper Secondary", "Tertiary Education")))
    
table(France$Education_Level, useNA = "ifany")
## 
## Lower Secondary or Less         Upper Secondary      Tertiary Education 
##                    7527                    6529                    4949 
##                    <NA> 
##                      33
table(France$domicil)
## 
##    1    2    3    4    5    7    8 
## 3584 2361 6257 5668 1162    3    3
unique(France$domicil)
## [1] 3 2 1 4 5 8 7
# create residential area variable 
France <- France %>%
  mutate(
    Residential_Area = case_when(
      domicil %in% c("1", "2") ~ "Urban",
      domicil %in% c("3", "4", "5") ~ "Rural",
      domicil %in% c("7", "8", "9") ~ NA_character_,
      TRUE ~ NA_character_ 
    ), Residential_Area = factor(Residential_Area, levels = c("Urban", "Rural"))
  )
table(France$Residential_Area, useNA = "ifany")
## 
## Urban Rural  <NA> 
##  5945 13087     6
table(France$hincfel)
## 
##    1    2    3    4    7    8    9 
## 4981 7868 2469  356   27   28 1503
unique(France$hincfel)
## [1]  9 NA  4  3  2  1  7  8
# create feeling income variable 
France <- France %>%
  mutate(
    Feeling_Income = case_when(
      hincfel == "1" ~ "Economic Comfort",
      hincfel %in% c("2", "3", "4") ~ "Economic Strain",
      hincfel %in% c("7", "8", "9") ~ NA_character_,
      TRUE ~ NA_character_ 
    ), Feeling_Income = factor(Feeling_Income, levels = c("Economic Comfort", "Economic Strain"))
  )
table(France$Feeling_Income, useNA = "ifany")
## 
## Economic Comfort  Economic Strain             <NA> 
##             4981            10693             3364

Part B: Descriptive Analysis:

# Filter out NA values
France <- France %>%
  filter(!is.na(auth), !is.na(Education_Level))

# Create violin plot with internal boxplot
ggplot(France, aes(x = Education_Level, y = auth, fill = Education_Level)) +
  geom_violin(alpha = 0.6, color = "black", draw_quantiles = c(0.25, 0.5, 0.75)) +  
  geom_boxplot(width = 0.2, outlier.shape = NA, alpha = 0.8) +
  scale_fill_manual(values = c("#028090", "red", "orange")) +  # Custom colors
  labs(
    title = "Distribution of Authoritarian Values by Education Level",
    x = "Education Level",
    y = "Authoritarian Values (0-100)",
    fill = "Education Level",
    caption = "Data Source: France ESS Data"
  ) +
  theme_minimal() +  
  theme(
    plot.title = element_text(face = "bold", size = 14),
    plot.subtitle = element_text(color = "gray50"),
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "none"  
  )

Visualization Intepretation:

The visualization above shows that authoritarian values are the most highest in lower secondary or less education, and lowest in tertiary education. This is also known as the central tendency. The wide section of each boxplot shows that there is higher amount of data within that value. Meaning, between 50-75 authoritarian values for lower secondary or less education have a wider shape in that area. This indicates that most of the data is in that range. The same concept applies for each boxplot. This visuaization also that there are little amount of no authoritarian values in each edcucational section. For instance, considering each education level tapers at the end and is narrow, it suggests that there is not much data in that value.

Part C: Progressive Model Building:

# Model 1: Education level only
model1_education <- lm(auth ~ Education_Level, data = France)

summary(model1_education)
## 
## Call:
## lm(formula = auth ~ Education_Level, data = France)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -63.439 -11.473   0.561  12.561  45.291 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                        63.4395     0.2236  283.76   <2e-16 ***
## Education_LevelUpper Secondary     -3.9660     0.3240  -12.24   <2e-16 ***
## Education_LevelTertiary Education  -8.7301     0.3506  -24.90   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 18.73 on 18210 degrees of freedom
## Multiple R-squared:  0.03302,    Adjusted R-squared:  0.03292 
## F-statistic:   311 on 2 and 18210 DF,  p-value: < 2.2e-16
# Model 2: Education level + residential area 
model2_education_residential <- lm(auth ~ Education_Level + Residential_Area, data = France)

summary(model2_education_residential)
## 
## Call:
## lm(formula = auth ~ Education_Level + Residential_Area, data = France)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -63.34 -11.73   0.61  12.66  45.44 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                        63.6811     0.3099 205.470   <2e-16 ***
## Education_LevelUpper Secondary     -3.9482     0.3241 -12.183   <2e-16 ***
## Education_LevelTertiary Education  -8.7727     0.3524 -24.896   <2e-16 ***
## Residential_AreaRural              -0.3432     0.3019  -1.137    0.256    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 18.73 on 18205 degrees of freedom
##   (4 observations deleted due to missingness)
## Multiple R-squared:  0.03312,    Adjusted R-squared:  0.03296 
## F-statistic: 207.9 on 3 and 18205 DF,  p-value: < 2.2e-16
# Model 3: Education level + residential area + economic status
model3_education_residential_economic <- lm(auth ~ Education_Level + Residential_Area + Feeling_Income, data = France)

summary(model3_education_residential_economic)
## 
## Call:
## lm(formula = auth ~ Education_Level + Residential_Area + Feeling_Income, 
##     data = France)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -63.943 -12.501   0.457  13.058  46.281 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                        63.6777     0.4599 138.457  < 2e-16 ***
## Education_LevelUpper Secondary     -4.3999     0.3739 -11.767  < 2e-16 ***
## Education_LevelTertiary Education  -9.0007     0.4104 -21.934  < 2e-16 ***
## Residential_AreaRural              -0.9583     0.3291  -2.912 0.003600 ** 
## Feeling_IncomeEconomic Strain       1.2236     0.3347   3.656 0.000257 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 18.53 on 15168 degrees of freedom
##   (3040 observations deleted due to missingness)
## Multiple R-squared:  0.03583,    Adjusted R-squared:  0.03557 
## F-statistic: 140.9 on 4 and 15168 DF,  p-value: < 2.2e-16
# create the regression table 
predictor_labels <- c(
  "(Intercept)" = "Intercept",
  "Education_LevelUpper Secondary" = "Upper Secondary",
  "Education_LevelTertiary Education" = "Tertiary Education",
  "Residential_AreaRural" = "Rural",
  "Feeling_IncomeEconomic Strain" = "Economic Strain"
  )

tab_model(
  model1_education, model2_education_residential, model3_education_residential_economic,
  pred.labels = predictor_labels,
  dv.labels = c("Model 1:\nEducation", "Model 2:\n+ Residential", "Model 3:\n+ Income"),
  string.pred = "Predictors",
  string.est = "β",
  string.ci = "95% CI",
  string.p = "p-value",
  p.style = "stars",
  p.threshold = c(0.05, 0.01, 0.001),
  show.reflvl = TRUE,
  show.aic = TRUE,
  show.r2 = TRUE,
  show.fstat = TRUE,
  show.loglik = FALSE,
  digits = 3,
  title = "Table 1. Authoritarian Values in Relation to Education Level, Residential Area, and Economic Status",
  CSS = list(
    css.table = "font-size: 12px; width: auto; margin-bottom: 20px;",
    css.thead = "font-weight: bold; border-bottom: 1px solid #ddd;",
    css.tdata = "padding: 5px; border-bottom: 1px solid #f0f0f0;",
    css.caption = "font-weight: bold; color: #333333; font-size: 14px; margin-bottom: 10px;",
    css.footnote = "font-size: 11px; color: #555555;",
    css.depvarhead = "color: #333333; font-weight: bold; text-align: center; border-bottom: 1px solid #ddd;",
    css.centeralign = "text-align: center;",
    css.firsttablecol = "font-weight: bold; font-size: 12px;"
  )
)
Table 1. Authoritarian Values in Relation to Education Level, Residential Area, and Economic Status
  Model 1: Education Model 2: + Residential Model 3: + Income
Predictors β 95% CI β 95% CI β 95% CI
Intercept 63.439 *** 63.001 – 63.878 63.681 *** 63.074 – 64.289 63.678 *** 62.776 – 64.579
Upper Secondary -3.966 *** -4.601 – -3.331 -3.948 *** -4.583 – -3.313 -4.400 *** -5.133 – -3.667
Tertiary Education -8.730 *** -9.417 – -8.043 -8.773 *** -9.463 – -8.082 -9.001 *** -9.805 – -8.196
Rural -0.343 -0.935 – 0.249 -0.958 ** -1.603 – -0.313
Economic Strain 1.224 *** 0.568 – 1.880
Observations 18213 18209 15173
R2 / R2 adjusted 0.033 / 0.033 0.033 / 0.033 0.036 / 0.036
AIC 158432.974 158392.730 131662.349
  • p<0.05   ** p<0.01   *** p<0.001
# calculate and present the AIC and BIC 
aic_bic_data <- data.frame(
  Model = c("Model 1: Education Level", "Model 2: Education Level and Residential Area", "Model 3: Educational Level, Residential Area, and Economic Income"),
  AIC = c(AIC(model1_education), AIC(model2_education_residential), AIC(model3_education_residential_economic)),
  BIC = c(BIC(model1_education), BIC(model2_education_residential), BIC(model3_education_residential_economic))
)

kable(aic_bic_data, digits = 1, caption = "Comparison of AIC and BIC Across Models")
Comparison of AIC and BIC Across Models
Model AIC BIC
Model 1: Education Level 158433.0 158464.2
Model 2: Education Level and Residential Area 158392.7 158431.8
Model 3: Educational Level, Residential Area, and Economic Income 131662.3 131708.1
# create coefficient plot model
plot_model(model3_education_residential_economic, 
           type = "est",
           show.values = TRUE,
           value.offset = 0.3,
           vline.color = "red",
           title = "Table 1. Authoritarian Values in Relation to Education Level, Residential Area, and Economic Status",
           axis.labels = c("Interception", 
                           "Upper Secondary",
                           "Tertiary Education",
                           "Rural",
                           "Economic Strain"),
           axis.title = "Authoritarian Values") +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 14),
    axis.text.y = element_text(face = "bold")
  )

## Part D: Model Comparison and Interpretation: The intercept in model 1 is 63.439 and represents individuals with lower secondary or less education. For model 2 the intercept is 63.681, which is relatively similar to model 1. Considering there is very little change to the intercept when residential area is added to education level of the intercept, it suggests that residential area doesn’t change or affect the authoritarian values of those with lower secondary or less education. For model 3, the intercept is 63.678, and again there is not much change. Although, adding income to the predictors show that it helps explain authoritarian values at education levels. Education is the predictor that is the best out of all three predictors. Using the education predictor would be the best one to describe and predict the authoritarian values because the values and coefficients do not change much when other predictors are added. These models tell us that lower secondary or less education have higher authoritarian values, and higher education has less authoritarian values. For instance, the authoritarian values can be lowered due to higher education because of thinking more deeply and by being open to diverse ideas. Residential area and economic status do not change the much. However, they still can predict the findings. For expample, income that is causing stress can result in higher authoritarian values and living in a rural area can result in a little bit lower values than other areas. Overall, each variable help explain authoritarian values, however, education level is the most reliable and highest predictor variable.

BONUS

model4_interaction_education_residential <- lm(auth ~ Education_Level * Residential_Area, data = France)
summary(model4_interaction_education_residential)
## 
## Call:
## lm(formula = auth ~ Education_Level * Residential_Area, data = France)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -63.388 -12.654   0.612  12.947  45.771 
## 
## Coefficients:
##                                                         Estimate Std. Error
## (Intercept)                                              63.5582     0.4159
## Education_LevelUpper Secondary                           -2.9040     0.6152
## Education_LevelTertiary Education                        -9.3291     0.5942
## Residential_AreaRural                                    -0.1704     0.4931
## Education_LevelUpper Secondary:Residential_AreaRural     -1.4303     0.7237
## Education_LevelTertiary Education:Residential_AreaRural   0.9685     0.7388
##                                                         t value Pr(>|t|)    
## (Intercept)                                             152.839  < 2e-16 ***
## Education_LevelUpper Secondary                           -4.720 2.37e-06 ***
## Education_LevelTertiary Education                       -15.701  < 2e-16 ***
## Residential_AreaRural                                    -0.345   0.7297    
## Education_LevelUpper Secondary:Residential_AreaRural     -1.976   0.0481 *  
## Education_LevelTertiary Education:Residential_AreaRural   1.311   0.1899    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 18.73 on 18203 degrees of freedom
##   (4 observations deleted due to missingness)
## Multiple R-squared:  0.03365,    Adjusted R-squared:  0.03339 
## F-statistic: 126.8 on 5 and 18203 DF,  p-value: < 2.2e-16
task3models <- list(
   "Education" = lm(auth ~ Education_Level, data = France),
   "Education + Residential Area" = lm(auth ~ Education_Level + Residential_Area, data = France)

)

# Create custom coefficient labels
task3coef <- c("(Intercept)" = "Intercept ", 
                           "Education_LevelUpper Secondary " = "Upper Secondary",
                           "Education_LevelTertiary Education" = "Tertiary Education",
                           "Residential_AreaRural" = "Rural",
                           "Education_LevelUpper Secondary:Residential_AreaRural" = "Upper Secondary + Rural",
                           "Education_LevelTertiary Education:Residential_AreaRural" = "Tertiary Education + Rural")

# Create custom goodness-of-fit metrics
task3gofm <- tribble(
  ~raw,        ~clean,          ~fmt,
  "nobs",      "N",             0,
  "r.squared", "R²",            2,
  "adj.r.squared", "Adjusted R²", 2,
  "AIC",       "AIC",           1,
  "BIC",       "BIC",           1,
  "rmse",      "RMSE",          2
)
table2 <- modelsummary(
  task3models,
  coef_map = task3coef,
  gof_map = task3gofm,
  stars = TRUE,
  title = "Authoritarian Values of Education Level and Educational + Residential Area",
  notes = "Data source: France ESS Data"
)
table2
Authoritarian Values of Education Level and Educational + Residential Area
Education Education + Residential Area
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
Data source: France ESS Data
Intercept 63.439*** 63.681***
(0.224) (0.310)
Tertiary Education -8.730*** -8.773***
(0.351) (0.352)
Rural -0.343
(0.302)
N 18213 18209
0.03 0.03
Adjusted R² 0.03 0.03
RMSE 18.73 18.73
table3 <- modelsummary(
  task3models,
  output = "kableExtra",
  coef_map = task3coef,
  gof_map = task3gofm,
  stars = TRUE,
) %>%
  kableExtra::kable_styling(
    bootstrap_options = c("striped", "hover", "condensed"),
    full_width = FALSE,
    position = "center"
  ) %>%
  kableExtra::add_header_above(c(" " = 1, "Authoritarian Values Models" = 2)) %>%
  kableExtra::footnote(
    general = "Data source: France ESS Data",
    number = c(
      "Reference category for education is 'Lower Secondary or less'",
      "Reference category for residential area is 'Urban'"
    ),
    symbol = c("+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001")
  )
table3
Authoritarian Values Models
Education Education + Residential Area
Intercept 63.439*** 63.681***
(0.224) (0.310)
Tertiary Education −8.730*** −8.773***
(0.351) (0.352)
Rural −0.343
(0.302)
N 18213 18209
0.03 0.03
Adjusted R² 0.03 0.03
RMSE 18.73 18.73
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
Note:
Data source: France ESS Data
1 Reference category for education is ‘Lower Secondary or less’
2 Reference category for residential area is ‘Urban’
+ p < 0.1, p < 0.05, ** p < 0.01, *** p < 0.001

Education still remains the best predictor for authoritarian values. Where people live, urban or rural, does not overly effect the results. From this table, there is no relationship or interaction between education and education + residential area that is statistically significant.