Assignment 2: Regression Model Building & Interactions

Author

Jingyi Yang

Setup

library(faux) #Creates multi-variate normally distributed variables

Warning: package 'faux' was built under R version 4.4.3


************
Welcome to faux. For support and examples visit:
https://debruine.github.io/faux/
- Get and set global package options with: faux_options()
************

library(haven) #Imports survey data 
library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2

── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(skimr)
library(lmtest)

Loading required package: zoo

Attaching package: 'zoo'

The following objects are masked from 'package:base':

    as.Date, as.Date.numeric

library(sandwich)

Warning: package 'sandwich' was built under R version 4.4.3

library(stargazer)


Please cite as: 

 Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
 R package version 5.2.3. https://CRAN.R-project.org/package=stargazer

library(jtools)

Warning: package 'jtools' was built under R version 4.4.3

library(ggplot2)
library(ggeffects)

Warning: package 'ggeffects' was built under R version 4.4.3

library(caret)

Warning: package 'caret' was built under R version 4.4.3

Loading required package: lattice

Attaching package: 'caret'

The following object is masked from 'package:purrr':

    lift

library(MASS)


Attaching package: 'MASS'

The following object is masked from 'package:dplyr':

    select

library(flextable)

Warning: package 'flextable' was built under R version 4.4.3


Attaching package: 'flextable'

The following object is masked from 'package:jtools':

    theme_apa

The following object is masked from 'package:purrr':

    compose

library(janitor)


Attaching package: 'janitor'

The following objects are masked from 'package:stats':

    chisq.test, fisher.test

Data prepare

Import the data

WVS_9_Countries_reduced <- read_dta("C:/Users/Admin/Downloads/WVS - 9 Countries_reduced.dta")
head(WVS_9_Countries_reduced)

# A tibble: 6 × 12
  b_country b_country_alpha w_weight   q47   q49   q50   q57  q158  q260  q273
      <dbl> <chr>              <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1        50 BGD                    1     3     4     2     2     9     2     1
2        50 BGD                    1     3     1     5     2    10     2     1
3        50 BGD                    1     2     8     9     2     9     2     1
4        50 BGD                    1     1     9     9     1     9     2     1
5        50 BGD                    1     3     8     7     2    10     1     1
6        50 BGD                    1     3     8     7     2    10     1     1
# ℹ 2 more variables: age_group4 <dbl+lbl>, educ4 <dbl+lbl>

Recode and Reverse coding

Rename the column.
Reverse coding for the variable “How to describe the state of personal health.” It aims to fit with the dependent variable “How Satisfied with Life” and independent variables like ” How satisfied with personal finances.”

new_names <- c("country_number", "Country", "Weight","personal_health", "life_satisfaction", "personal_finances", "trust_in_people", "science_technology", "biological_sex", "marital_status","age_group", "education") #Give your variables new informative names 

# Update column names
colnames(WVS_9_Countries_reduced) <- new_names #Apply new names to your data frame
head(WVS_9_Countries_reduced)

# A tibble: 6 × 12
  country_number Country Weight personal_health life_satisfaction
           <dbl> <chr>    <dbl>           <dbl>             <dbl>
1             50 BGD          1               3                 4
2             50 BGD          1               3                 1
3             50 BGD          1               2                 8
4             50 BGD          1               1                 9
5             50 BGD          1               3                 8
6             50 BGD          1               3                 8
# ℹ 7 more variables: personal_finances <dbl>, trust_in_people <dbl>,
#   science_technology <dbl>, biological_sex <dbl>, marital_status <dbl>,
#   age_group <dbl+lbl>, education <dbl+lbl>

# Reverse Coding
WVS_9_Countries_reduced<- WVS_9_Countries_reduced %>% 
mutate(personal_health= case_when(
personal_health ==1 ~ 5,
personal_health ==2 ~ 4,
personal_health ==3 ~ 3,
personal_health ==4 ~ 2, 
personal_health ==5 ~ 1),
personal_health = labelled(personal_health, c(`Very poor` = 1, `Poor` = 2, `Fair` = 3, `Good` = 4, `Very good`=5)))

head(WVS_9_Countries_reduced)

# A tibble: 6 × 12
  country_number Country Weight personal_health life_satisfaction
           <dbl> <chr>    <dbl> <dbl+lbl>                   <dbl>
1             50 BGD          1 3 [Fair]                        4
2             50 BGD          1 3 [Fair]                        1
3             50 BGD          1 4 [Good]                        8
4             50 BGD          1 5 [Very good]                   9
5             50 BGD          1 3 [Fair]                        8
6             50 BGD          1 3 [Fair]                        8
# ℹ 7 more variables: personal_finances <dbl>, trust_in_people <dbl>,
#   science_technology <dbl>, biological_sex <dbl>, marital_status <dbl>,
#   age_group <dbl+lbl>, education <dbl+lbl>

skimr::skim(WVS_9_Countries_reduced) #Checks the variables in your data frame; evaluate for missing data

Data summary
Name	WVS_9_Countries_reduced
Number of rows	16536
Number of columns	12
_______________________
Column type frequency:
character	1
numeric	11
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	empty	n_unique	whitespace
Country	0	1	3	3	0	9	0

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
country_number	0	1.00	413.41	297.72	50.00	124.00	504	716	840	▇▁▃▂▆
Weight	0	1.00	1.00	0.46	0.12	0.91	1	1	10	▇▁▁▁▁
personal_health	95	0.99	3.75	0.88	1.00	3.00	4	4	5	▁▁▅▇▃
life_satisfaction	104	0.99	6.90	2.25	1.00	6.00	7	8	10	▁▂▅▇▅
personal_finances	128	0.99	6.20	2.47	1.00	5.00	7	8	10	▂▃▆▇▅
trust_in_people	273	0.98	1.69	0.46	1.00	1.00	2	2	2	▃▁▁▁▇
science_technology	316	0.98	7.60	2.25	1.00	6.00	8	10	10	▁▁▃▆▇
biological_sex	0	1.00	1.50	0.50	1.00	1.00	2	2	2	▇▁▁▁▇
marital_status	148	0.99	2.68	2.14	1.00	1.00	1	5	6	▇▁▁▁▃
age_group	5	1.00	2.25	0.98	1.00	2.00	2	3	4	▅▇▁▅▃
education	279	0.98	1.93	1.05	1.00	1.00	2	3	4	▇▃▁▃▂

Non Interactive Model

Hypothesis and regression model

Directional hypothesis: I think the results would be that compared to the people who are satisfied with their financial and health status, people who are less satisfied with their financial and health status are less likely to be satisfied with life. Regarding the age groups, according to the research, age might positively correlate with life satisfaction (Baird, Lucas, and Donnellan 2010). Accordingly, the hypothesis would be the higher the age, the higher the level of life satisfaction.
During the OLS analysis, I will treat the independent variable “How satisfied with personal finances” and “How to describe the state of personal health” as a numerical variable as it is meaningful for adding one number as it means a higher level of satisfaction and status. I treated the variable “age” as a categorical variable as it can make the group range more clear.
I selected education, biological sex, and trust in people as control variables. Education and biological sex are demographic factors, and the “trust” variable explores the impact of trust on individuals’ life satisfaction.

directional_hypothesis<- lm(life_satisfaction~personal_finances+personal_health+factor(age_group)+factor(education)+ factor(biological_sex)+ factor(trust_in_people), data=WVS_9_Countries_reduced) 
summary(directional_hypothesis)


Call:
lm(formula = life_satisfaction ~ personal_finances + personal_health + 
    factor(age_group) + factor(education) + factor(biological_sex) + 
    factor(trust_in_people), data = WVS_9_Countries_reduced)

Residuals:
    Min      1Q  Median      3Q     Max 
-8.3179 -0.9554  0.0525  0.9627  7.2972 

Coefficients:
                          Estimate Std. Error t value Pr(>|t|)    
(Intercept)               1.640768   0.078424  20.922  < 2e-16 ***
personal_finances         0.514881   0.005929  86.847  < 2e-16 ***
personal_health           0.504950   0.016543  30.523  < 2e-16 ***
factor(age_group)2        0.183196   0.034667   5.284 1.28e-07 ***
factor(age_group)3        0.302157   0.039687   7.613 2.82e-14 ***
factor(age_group)4        0.466369   0.046924   9.939  < 2e-16 ***
factor(education)2       -0.027822   0.035357  -0.787   0.4314    
factor(education)3       -0.089450   0.037343  -2.395   0.0166 *  
factor(education)4       -0.082125   0.047777  -1.719   0.0857 .  
factor(biological_sex)2   0.066829   0.027136   2.463   0.0138 *  
factor(trust_in_people)2 -0.024665   0.031387  -0.786   0.4320    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.7 on 15979 degrees of freedom
  (546 observations deleted due to missingness)
Multiple R-squared:  0.4344,    Adjusted R-squared:  0.4341 
F-statistic:  1227 on 10 and 15979 DF,  p-value: < 2.2e-16

stargazer(directional_hypothesis, digits=3, type="text", dep.var.labels=c("How Satisfied with Life"), covariate.labels=c("Satisfied With Personal Finances", "Describe State Of Personal Health", "30-49", "50-64", " 65+", "Some Post HS education", "BA/BS", "Advanced Degree", "Female",  "Need to be very careful about trusting people"),single.row = TRUE)


===========================================================================
                                                   Dependent variable:     
                                              -----------------------------
                                                 How Satisfied with Life   
---------------------------------------------------------------------------
Satisfied With Personal Finances                    0.515*** (0.006)       
Describe State Of Personal Health                   0.505*** (0.017)       
30-49                                               0.183*** (0.035)       
50-64                                               0.302*** (0.040)       
65+                                                 0.466*** (0.047)       
Some Post HS education                               -0.028 (0.035)        
BA/BS                                               -0.089** (0.037)       
Advanced Degree                                      -0.082* (0.048)       
Female                                               0.067** (0.027)       
Need to be very careful about trusting people        -0.025 (0.031)        
Constant                                            1.641*** (0.078)       
---------------------------------------------------------------------------
Observations                                             15,990            
R2                                                        0.434            
Adjusted R2                                               0.434            
Residual Std. Error                                1.700 (df = 15979)      
F Statistic                                   1,227.490*** (df = 10; 15979)
===========================================================================
Note:                                           *p<0.1; **p<0.05; ***p<0.01

The table above shows the result for the OLS model. According to the table, we can interpret that： a) When financial satisfaction increases by one unit, life satisfaction will increase by 0.515. b) When personal health increases by one unit, life satisfaction, on average, will increase by 0.505 when holding financial satisfaction constant. c) Comparing the age group “18-29,” life satisfaction , on average, will increase by 0.183 in the “30-49” group when holding financial satisfaction constant. Comparing the age group “18-29,” life satisfaction will increase by 0.302 in the “50-64” group holding financial satisfaction constant. Comparing the age group “18-29,” life satisfaction, on average, will increase by 0.466 in the “65+” group when holding financial satisfaction constant. d) Comparing the education group “HS degree or less,” life satisfaction, on average, will decrease by 0.027822 in the “Some Post HS education” when holding financial satisfaction constant Comparing the education group “HS degree or less,” life satisfaction, on average, will decrease by 0.089450 in the “BA/BS” group when holding financial satisfaction constant. Comparing the education group “HS degree or less,” life satisfaction, on average, will decrease by 0.082125 in the “Advanced Degree” group when holding financial satisfaction constant. e) Comparing the males, life satisfaction, on average, will increase by 0.066829 for females holding financial satisfaction constant. f) Comparing “Need to be very careful about trusting people,” life satisfaction, on average, will decrease by 0.024665 in the “Most People can be trusted” group when holding financial satisfaction constant.

Graphing Regression Results

personal_finances_one_term<- ggpredict(directional_hypothesis, terms=("personal_finances"))
print(personal_finances_one_term, n=Inf)

# Predicted values of life_satisfaction

personal_finances | Predicted |     95% CI
------------------------------------------
                1 |      4.05 | 3.94, 4.15
                2 |      4.56 | 4.47, 4.66
                3 |      5.08 | 4.99, 5.17
                4 |      5.59 | 5.51, 5.68
                5 |      6.11 | 6.02, 6.20
                6 |      6.62 | 6.54, 6.71
                7 |      7.14 | 7.05, 7.22
                8 |      7.65 | 7.57, 7.74
                9 |      8.17 | 8.08, 8.26
               10 |      8.68 | 8.59, 8.78

Adjusted for:
* personal_health = 3.75
*       age_group =    1
*       education =    1
*  biological_sex =    1
* trust_in_people =    1

personal_finances_three_term<- ggpredict(directional_hypothesis, terms=c("personal_finances","personal_health [5]", "age_group[1,4]"))
print(personal_finances_three_term, n=Inf)

# Predicted values of life_satisfaction

age_group: 1

personal_finances | Predicted |     95% CI
------------------------------------------
                1 |      4.68 | 4.56, 4.80
                2 |      5.20 | 5.09, 5.30
                3 |      5.71 | 5.61, 5.81
                4 |      6.23 | 6.13, 6.32
                5 |      6.74 | 6.65, 6.83
                6 |      7.25 | 7.16, 7.35
                7 |      7.77 | 7.68, 7.86
                8 |      8.28 | 8.19, 8.38
                9 |      8.80 | 8.71, 8.89
               10 |      9.31 | 9.22, 9.41

age_group: 4

personal_finances | Predicted |     95% CI
------------------------------------------
                1 |      5.15 | 5.02, 5.28
                2 |      5.66 | 5.54, 5.79
                3 |      6.18 | 6.06, 6.29
                4 |      6.69 | 6.58, 6.80
                5 |      7.21 | 7.10, 7.31
                6 |      7.72 | 7.62, 7.82
                7 |      8.24 | 8.14, 8.34
                8 |      8.75 | 8.65, 8.85
                9 |      9.27 | 9.17, 9.37
               10 |      9.78 | 9.68, 9.88

Adjusted for:
*       education = 1
*  biological_sex = 1
* trust_in_people = 1

footnote_text <- str_wrap("Note: state of personal health is 'Very Good', Age group is set to '18-29' and '65+'", width = 70)
ggplot(personal_finances_three_term, aes(x = factor(x), y = predicted, group = as.factor(facet), 
                         fill = as.factor(facet))) +
  geom_bar(stat = "identity",  width = 0.7 , position = position_dodge()) +
  theme_minimal(base_size = 7) +
  scale_fill_manual(name = "Age",
                     values = c("lightgrey", "darkgrey"),
                     labels = c("1" = "18-29", "4" = "65+"))+
  labs(x = "Satisfied With Personal Finances", y = "Predicted Satisfied With Life", 
       title = "Bar Chart by Response Level and Group",
        subtitle= footnote_text)+
  geom_errorbar(aes(ymin=conf.low, ymax=conf.high),
                linewidth=.3,    # Thinner lines
                width=.2, position = position_dodge(width=.7))

footnote_text <- str_wrap("Note: state of personal health is 'Very Good', Age group is set to '18-29' and '65+'", width = 70)
ggplot(personal_finances_three_term, aes(x = factor(x), y = predicted, 
                         group = as.factor(facet), 
                         color = as.factor(facet))) +
  geom_line(linewidth = 1) +  # Line for the predicted probabilities
  geom_point(size = 2) +  # Points on the lines
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high, fill = as.factor(facet)), alpha = 0.2) +  # Confidence intervals
  scale_color_manual(name = "Age",
                     values = c("lightgrey", "darkgrey"),
                     labels = c("1" = "18-29", "4" = "65+")) +  # Update legend labels
  scale_fill_manual(name = "Age",
                     values = c("lightgrey", "darkgrey"),
                     labels = c("1" = "18-29", "4" = "65+")) +  # Update fill legend labels
  theme_minimal(base_size = 10) +
  labs(x = "Satisfied With Personal Finances", y = "Predicted Satisfied With Life", 
       title = "Line Chart by Response Level and Group",
       subtitle= footnote_text)

Using `function to estimate the impact of people’s satisfaction with their finances on their satisfaction with life through controlling the state of personal health, which is “very good,” and shows differences in different age groups, which are “18-29” and “65+.”

Coefficient Plots and Interpretation

plot_summs(directional_hypothesis,  model.names = c("OLS: How Satisfied with Life"), coefs=c( "Satisfied With Personal Finances" = "personal_finances", "Describe State Of Personal Health" = "personal_health", "30-49" = "factor(age_group)2", "50-64" = "factor(age_group)3", " 65+" = "factor(age_group)4", "Some Post HS education" = "factor(education)2", "BA/BS" = "factor(education)3", "Advanced Degree" = "factor(education)4", "Female" = "factor(biological_sex)2",  "Need to be very careful about trusting people" = "factor(trust_in_people)2"),
inner_ci_level = .9, robust=TRUE)

According to the plot, the coefficient for “Satisfied With Personal Finances” is 0.515, and 0.505 for “Describe State Of Personal Health,” which shows financial well-being is more important to life satisfaction across these nine countries.

Re-estimate the model using robust standard errors

robust1<-coeftest(directional_hypothesis, vcov = 
                vcovHC(directional_hypothesis, type="HC1"))
robust3<-coeftest(directional_hypothesis, vcov = 
                vcovHC(directional_hypothesis, type="HC3"))
stargazer(directional_hypothesis, robust1,robust3, type="text", dep.var.labels=c("How Satisfied with Life"), covariate.labels=c("Satisfied With Personal Finances", "Describe State Of Personal Health", "30-49", "50-64", " 65+", "Some Post HS education", "BA/BS", "Advanced Degree", "Female",  "Need to be very careful about trusting people"),single.row = TRUE)


===============================================================================================================
                                                                     Dependent variable:                       
                                              -----------------------------------------------------------------
                                                 How Satisfied with Life                                       
                                                           OLS                          coefficient            
                                                                                           test                
                                                           (1)                     (2)               (3)       
---------------------------------------------------------------------------------------------------------------
Satisfied With Personal Finances                    0.515*** (0.006)        0.515*** (0.008)  0.515*** (0.008) 
Describe State Of Personal Health                   0.505*** (0.017)        0.505*** (0.019)  0.505*** (0.019) 
30-49                                               0.183*** (0.035)        0.183*** (0.036)  0.183*** (0.036) 
50-64                                               0.302*** (0.040)        0.302*** (0.041)  0.302*** (0.041) 
65+                                                 0.466*** (0.047)        0.466*** (0.047)  0.466*** (0.047) 
Some Post HS education                               -0.028 (0.035)          -0.028 (0.035)    -0.028 (0.035)  
BA/BS                                               -0.089** (0.037)        -0.089*** (0.034) -0.089*** (0.034)
Advanced Degree                                      -0.082* (0.048)        -0.082** (0.040)  -0.082** (0.040) 
Female                                               0.067** (0.027)         0.067** (0.027)   0.067** (0.027) 
Need to be very careful about trusting people        -0.025 (0.031)          -0.025 (0.028)    -0.025 (0.028)  
Constant                                            1.641*** (0.078)        1.641*** (0.085)  1.641*** (0.085) 
---------------------------------------------------------------------------------------------------------------
Observations                                             15,990                                                
R2                                                        0.434                                                
Adjusted R2                                               0.434                                                
Residual Std. Error                                1.700 (df = 15979)                                          
F Statistic                                   1,227.490*** (df = 10; 15979)                                    
===============================================================================================================
Note:                                                                               *p<0.1; **p<0.05; ***p<0.01

The table above shows the result that re-estimates the model using robust standard errors.
The conclusion is a) the coefficients do not change with the application of robust standard errors. b) the robust standard error sizes are the same for the model before adjustment. Accordingly, the conclusions between model types and the overall difference in size between the standard errors do not warrant the use of robust standard error adjustments.

Interactive Model

Hypothesis and regression model.

interactive_hypothesis <- lm(life_satisfaction ~personal_finances+personal_health, data=WVS_9_Countries_reduced)
interactive_hypothesis_interaction <- lm(life_satisfaction ~personal_finances*personal_health, data=WVS_9_Countries_reduced)
stargazer(interactive_hypothesis, interactive_hypothesis_interaction, type="text", digits=3, dep.var.labels=c("Satisfied With Life"), covariate.labels=c("Satisfied With Personal Finances", "State Of Personal Health", "state of personal health:State Of Personal Health"),single.row = TRUE)


===========================================================================================================
                                                                     Dependent variable:                   
                                                  ---------------------------------------------------------
                                                                     Satisfied With Life                   
                                                              (1)                          (2)             
-----------------------------------------------------------------------------------------------------------
Satisfied With Personal Finances                        0.524*** (0.006)             0.675*** (0.021)      
State Of Personal Health                                0.470*** (0.016)             0.705*** (0.035)      
state of personal health:State Of Personal Health                                   -0.042*** (0.006)      
Constant                                                1.895*** (0.060)             1.076*** (0.126)      
-----------------------------------------------------------------------------------------------------------
Observations                                                 16,362                       16,362           
R2                                                           0.430                        0.432            
Adjusted R2                                                  0.430                        0.432            
Residual Std. Error                                    1.702 (df = 16359)           1.699 (df = 16358)     
F Statistic                                       6,170.153*** (df = 2; 16359) 4,145.519*** (df = 3; 16358)
===========================================================================================================
Note:                                                                           *p<0.1; **p<0.05; ***p<0.01

According to the table above, the effect of financial satisfaction on life satisfaction to be stronger for healthier individuals compared to persons in poorer health as the coefficient increase from 0.524 to 0.675.

As the p-value is less than 0.05, the interactive term is significant. The significant coefficient on the interaction term indicates that the influence of How satisfied with personal finances on How Satisfied with Life is different based on how people describe their state of personal health.

Graphing Regression Results

financial_satisfaction<-ggpredict(interactive_hypothesis_interaction,terms=c("personal_finances","personal_health [1,5]"))
financial_satisfaction

# Predicted values of life_satisfaction

personal_health: 1

personal_finances | Predicted |     95% CI
------------------------------------------
                1 |      2.41 | 2.26, 2.57
                2 |      3.05 | 2.92, 3.18
                4 |      4.31 | 4.22, 4.41
                6 |      5.58 | 5.49, 5.67
                7 |      6.21 | 6.11, 6.32
               10 |      8.11 | 7.94, 8.29

personal_health: 5

personal_finances | Predicted |     95% CI
------------------------------------------
                1 |      5.07 | 4.95, 5.18
                2 |      5.53 | 5.44, 5.63
                4 |      6.47 | 6.40, 6.54
                6 |      7.40 | 7.35, 7.45
                7 |      7.87 | 7.82, 7.92
               10 |      9.27 | 9.19, 9.35


Not all rows are shown in the output. Use `print(..., n = Inf)` to show
  all rows.

ggplot(financial_satisfaction, aes(x = factor(x), 
                 y = predicted, 
                 group = group, 
                 fill = group)) +
  geom_bar(stat = "identity", width = 0.7, position = position_dodge()) +
  scale_fill_manual(name = "State Of Personal Health",
                    values = c("lightgrey", "darkgrey"),
                    labels = c("1" = "Very Poor", "5" = "Very Good")) +  # Update legend labels
  theme_minimal(base_size = 10) +
  labs(x = "Satisfied With Personal Finances", 
       y = "Predicted Satisfied with Life", 
       title = "Bar Chart by Response Level and Group") +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high),
                linewidth = 0.3,    # Thinner lines
                width = 0.2, 
                position = position_dodge(width = 0.7))

 ggplot(financial_satisfaction, aes(x = factor(x), 
                 y = predicted, 
                         group = group, 
                         color = group)) +
  geom_line(linewidth = 1) +  # Line for the predicted probabilities
  geom_point(size = 2) +  # Points on the lines
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = 0.1) +  # Confidence intervals
  scale_color_manual(name = "State Of Personal Health",
                     values = c("lightgrey", "darkgrey"),
                     labels = c("1" = "Very Poor", "5" = "Very Good")) +  # Update legend labels
  theme_minimal(base_size = 10) +
  labs(x = " Satisfied With Personal Finances", 
       y = "Predicted Satisfied with Life", 
       title = "Bar Chart by Response Level and Group")

According to the prediction and graphic, the slopes for the two lines are different and tend to interact as the significant interaction term from the interactive model. The model clearly shows that when people increase their satisfaction with personal finances, holding everything else constant, people with very good personal health are significantly more satisfied with their lives than people with very poor personal health. In addition, it is also worth noticing that when satisfaction with personal finances increases, the differences in prediction of people’s satisfaction with life between very good and very poor personal health decrease.

financial_satisfaction_no_interact <- ggpredict(directional_hypothesis, terms=c("personal_finances","personal_health [1,5]"))

ggplot(financial_satisfaction, aes(x = factor(x), 
                 y = predicted, 
                 group = group, 
                 fill = group)) +
  geom_bar(stat = "identity", width = 0.7, position = position_dodge()) +
  scale_fill_manual(name = "State Of Personal Health",
                     values = c("lightgrey", "darkgrey"),
                     labels = c("1" = "Very Poor", "5" = "Very Good")) +  # Update legend labels
  theme_minimal(base_size = 10) +
  labs(x = "Satisfied With Personal Finances", 
       y = "Predicted Satisfied with Life", 
       title = "Bar Chart by Response Level and Group") +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high),
                linewidth = 0.3,    # Thinner lines
                width = 0.2, 
                position = position_dodge(width = 0.7))

ggplot(financial_satisfaction_no_interact, aes(x = factor(x), y = predicted, 
                         group = as.factor(group), 
                         color = as.factor(group))) +
  geom_line(linewidth = 1) +  # Line for the predicted probabilities
  geom_point(size = 2) +  # Points on the lines
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high, fill = as.factor(group)), alpha = 0.2) +  # Confidence intervals
  scale_color_manual(name = "State Of Personal Health",
                     values = c("lightgrey", "darkgrey"),
                     labels = c("1" = "Very Poor", "5" = "Very Good")) +  # Update legend labels
  scale_fill_manual(name = "State Of Personal Health",
                     values = c("lightgrey", "darkgrey"),
                     labels = c("1" = "Very Poor", "5" = "Very Good")) +  # Update fill legend labels
  theme_minimal(base_size = 10) +
  labs(x = "Satisfied With Personal Finances", y = "Predicted Satisfied With Life", 
       title = "Line Chart by Response Level and Group")

The graphic shows that two lines run parallel with each other, which shows relationship between satisfied with personal finances and satisfied with life is same with very good and very bad personal health. Accordingly, the significant coefficient on the interaction term shows more precise relationship between satisfied with personal finances and satisfied with life as it not missing the impact of personal health.

Conclusion

According to the statistics from the summary of the regression model, the R square and adjusted R square for the interactive model are higher than the non-interactive model (0.430 to 0.432), the Residual Standard Error for the interactive model is lower than the non-interactive model (1.702 to 1.699), and the F Statistic for the interactive model is lower than the non-interactive model (6,170.153 to 4,145.519). Thus, according to the graphic and fit statistics, the interactive model fits the data more closely.

References

Baird, Brendan M., Richard E. Lucas, and M. Brent Donnellan. 2010. “Life Satisfaction Across the Lifespan: Findings from Two Nationally Representative Panel Studies.” Social Indicators Research 99 (2): 183–203. https://doi.org/10.1007/s11205-010-9584-9.