library(tidyverse)
library(sjstats)
library(haven)
library(lme4)
library(arm)
library(modelsummary)
library(ggplot2)
library(stargazer)
library(janitor)
library(ggeffects)
library(skimr)
library(kableExtra)

wvs_data <- read_dta("wvs_hw4_data.dta")

1 Write a hypothesis for all primary independent variables

People with a higher financial satisfaction will be more satisfied.
People with a higher personal health satisfaction will be more satisfied.

IV speculation for varying slope and intercept model

Financial satisfaction

For the USA I think we have a fairly high level of financial satisfaction compared to most countries thus, I think this variable will have a lower impact compared to places such as KEN or IRQ where the finances would go further or be more impactful there.

Personal Health

For the USA I think we have a reputation of being unhealthy or obese thus, I think Personal health will be more impactful here. I believe GER will follow a similar road as the USA but maybe not as impactful. Places like JAP will not show as much as an impact since their population has a healthier lifestyle.

2 Control Variables

Sex (q260) - Each gender has a different experience when it comes to financial satisfaction and personal health A general idea is that males tend to be more worried about financial satisfaction and females may tend to lean more toward personal health. There is also a difference in the cultures of countries impacting each gender differently.
Immigrant (q263) - Typically, someone moves to another country because of political, health, or financial stress in that region, thus I think immigrants will be more satisfied over all.
Education (q275) - The higher someones education level normally the higher their salary which means they can have a higher financial satisfaction and personal health. However, it also gives them a sense of accomplishment possibly leading to being more satisfied overall.

3 Data Cleaning

# Creating a new data frame for my variables

wvs <- data.frame(wvs_data$q49, wvs_data$q50,wvs_data$q47, wvs_data$q260, wvs_data$q263,wvs_data$q275, wvs_data$w_weight, wvs_data$democ, wvs_data$b_country_alpha)

# giving more intuitive names
new_names <- c("Satisfaction", "Fin_sat", "Per_health", "Sex", "Immigrant", "Educ", "w_weight", "Democratic", "Country")

colnames(wvs) <- new_names
# lots of negative values here that need to be turned into NA
skim(wvs)

Data summary
Name	wvs
Number of rows	14003
Number of columns	9
_______________________
Column type frequency:
character	1
numeric	8
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	empty	n_unique	whitespace
Country	0	1	3	3	0	9	0

Variable type: numeric

skim_variable	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
Satisfaction	1	6.88	2.35	-2.00	5	7	9	10	<U+2581><U+2581><U+2585><U+2585><U+2587>
Fin_sat	1	5.92	2.54	-2.00	4	6	8	10	<U+2581><U+2582><U+2587><U+2586><U+2587>
Per_health	1	2.18	0.90	-2.00	2	2	3	5	<U+2581><U+2581><U+2587><U+2583><U+2581>
Sex	1	1.51	0.51	-2.00	1	2	2	2	<U+2581><U+2581><U+2581><U+2587><U+2587>
Immigrant	1	1.03	0.29	-2.00	1	1	1	2	<U+2581><U+2581><U+2581><U+2587><U+2581>
Educ	1	3.97	1.91	-5.00	3	4	6	8	<U+2581><U+2581><U+2583><U+2587><U+2583>
w_weight	1	1.00	0.36	0.13	1	1	1	10	<U+2587><U+2581><U+2581><U+2581><U+2581>
Democratic	1	-846.85	2796.92	-9999.00	6	8	9	10	<U+2581><U+2581><U+2581><U+2581><U+2587>

# Changing them
wvs <- wvs %>%
  mutate_all(~ifelse(. <= -1, NA, .))  #Non-substantive resposne to NA


wvs %>%                  
  count(Country) %>%                           
  mutate(percent = scales::percent(n / sum(n)))  #Review country scale in sample

##   Country    n percent
## 1     DEU 1528 10.912%
## 2     IRQ 1200  8.570%
## 3     JPN 1353  9.662%
## 4     KEN 1266  9.041%
## 5     LBY 1196  8.541%
## 6     MEX 1741 12.433%
## 7     MYS 1313  9.377%
## 8     RUS 1810 12.926%
## 9     USA 2596 18.539%

wvs %>%                  
  count(Satisfaction) %>%                           
  mutate(percent = scales::percent(n / sum(n))) #Review DV

##    Satisfaction    n percent
## 1             1  269  1.921%
## 2             2  260  1.857%
## 3             3  652  4.656%
## 4             4  872  6.227%
## 5             5 1759 12.562%
## 6             6 1407 10.048%
## 7             7 2217 15.832%
## 8             8 2814 20.096%
## 9             9 1565 11.176%
## 10           10 2092 14.940%
## 11           NA   96  0.686%

# ~7 sat overall
wvs %>% 
  summarise(mean_sat_overall = mean(Satisfaction, na.rm = TRUE))

##   mean_sat_overall
## 1         6.939383

# looking at Covariates
wvs %>%                  
  count(Fin_sat) %>%                            
  mutate(percent = scales::percent(n / sum(n)))

##    Fin_sat    n percent
## 1        1  698   4.98%
## 2        2  547   3.91%
## 3        3  994   7.10%
## 4        4 1340   9.57%
## 5        5 2263  16.16%
## 6        6 1759  12.56%
## 7        7 1979  14.13%
## 8        8 2091  14.93%
## 9        9  921   6.58%
## 10      10 1232   8.80%
## 11      NA  179   1.28%

# this has not been flipped yet so thats why they get lower going up
wvs %>%                  
  count(Per_health) %>%                            
  mutate(percent = scales::percent(n / sum(n)))

##   Per_health    n percent
## 1          1 3091  22.07%
## 2          2 6241  44.57%
## 3          3 3711  26.50%
## 4          4  762   5.44%
## 5          5  156   1.11%
## 6         NA   42   0.30%

# flipping personal health
wvs <- wvs %>% 
  mutate(Per_health = case_when(
    Per_health ==1 ~ 5,
    Per_health ==2 ~ 4,
    Per_health ==3 ~ 3,
    Per_health ==4 ~ 2,
    Per_health ==5 ~ 1
  ))
# Making female variable
wvs <- wvs %>%  
  mutate(female = case_when(
    Sex ==1 ~ 0,
    Sex ==2 ~ 1))

# Making immigrant variable (1=yes)
wvs <- wvs %>%  
  mutate(Immigrant_y = case_when(
    Immigrant ==1 ~ 0,
    Immigrant ==2 ~ 1))

# Making education variable

wvs <- wvs %>%  
  mutate(College = case_when(
    Educ ==1 ~ 0,
    Educ ==2 ~ 0,
    Educ == 3 ~ 0,
    Educ == 4 ~ 0,
    Educ == 5 ~ 0,
    Educ == 6 ~ 1,
    Educ == 7 ~ 1,
    Educ == 8 ~ 1))



wvs <- wvs[complete.cases(wvs$Satisfaction, wvs$Democratic, wvs$College, wvs$Fin_sat, wvs$Per_health, wvs$female, wvs$Immigrant_y, wvs$w_weight, wvs$Country), ] #Only complete cases in analysis
skim(wvs)

Data summary
Name	wvs
Number of rows	12254
Number of columns	12
_______________________
Column type frequency:
character	1
numeric	11
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	empty	n_unique	whitespace
Country	0	1	3	3	0	8	0

Variable type: numeric

skim_variable	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
Satisfaction	1	6.87	2.22	1.00	5.00	7	8	10	<U+2581><U+2582><U+2585><U+2587><U+2585>
Fin_sat	1	5.99	2.41	1.00	4.00	6	8	10	<U+2582><U+2585><U+2587><U+2587><U+2583>
Per_health	1	3.77	0.87	1.00	3.00	4	4	5	<U+2581><U+2581><U+2585><U+2587><U+2583>
Sex	1	1.51	0.50	1.00	1.00	2	2	2	<U+2587><U+2581><U+2581><U+2581><U+2587>
Immigrant	1	1.05	0.21	1.00	1.00	1	1	2	<U+2587><U+2581><U+2581><U+2581><U+2581>
Educ	1	4.06	1.81	1.00	3.00	4	6	8	<U+2586><U+2586><U+2587><U+2583><U+2583>
w_weight	1	1.00	0.37	0.13	0.94	1	1	10	<U+2587><U+2581><U+2581><U+2581><U+2581>
Democratic	1	7.83	1.65	5.00	7.00	8	9	10	<U+2586><U+2582><U+2587><U+2582><U+2585>
female	1	0.51	0.50	0.00	0.00	1	1	1	<U+2587><U+2581><U+2581><U+2581><U+2587>
Immigrant_y	1	0.05	0.21	0.00	0.00	0	0	1	<U+2587><U+2581><U+2581><U+2581><U+2581>
College	1	0.26	0.44	0.00	0.00	0	1	1	<U+2587><U+2581><U+2581><U+2581><U+2583>

# 9 countries
clusters <- length(unique(wvs$Country[!is.na(wvs$Country)]))

xtab_fin<-wvs %>%        #Looks at IV Distribution by Country           
  group_by(Country) %>% 
  count(Fin_sat) %>%                          
  mutate(percent = scales::percent(n / sum(n))) 

xtab_health<-wvs %>%        #Looks at IV Distribution by Country           
  group_by(Country) %>% 
  count(Per_health) %>%                          
  mutate(percent = scales::percent(n / sum(n))) 

# Making a weighted model


#wvs_w <- svydesign(ids = ~1,
 #                         weights =~w_weight,
  #                        data = wvs)

4 MLM model

set.seed(123)
###Varying Intercept Model - For Interclass Correlation 

mlm <- lmer(Satisfaction ~ Fin_sat + Per_health + College + female + Immigrant_y + (1|Country), data = wvs)
summary(mlm)

## Linear mixed model fit by REML ['lmerMod']
## Formula: 
## Satisfaction ~ Fin_sat + Per_health + College + female + Immigrant_y +  
##     (1 | Country)
##    Data: wvs
## 
## REML criterion at convergence: 46493.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.2467 -0.5551  0.0423  0.5492  4.3926 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Country  (Intercept) 0.5504   0.7419  
##  Residual             2.5875   1.6086  
## Number of obs: 12254, groups:  Country, 8
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  2.48493    0.27187   9.140
## Fin_sat      0.45861    0.00665  68.966
## Per_health   0.39853    0.01787  22.307
## College     -0.01154    0.03496  -0.330
## female       0.07128    0.02928   2.435
## Immigrant_y -0.05195    0.07191  -0.722
## 
## Correlation of Fixed Effects:
##             (Intr) Fin_st Pr_hlt Colleg female
## Fin_sat     -0.078                            
## Per_health  -0.207 -0.261                     
## College      0.000 -0.076 -0.095              
## female      -0.063  0.007  0.019  0.077       
## Immigrant_y -0.007  0.009 -0.014 -0.026  0.002

icc<-.5504/(.5504+2.5875)
# ~.175 which is pretty high meaning we want to use a grouped model
print(icc)

## [1] 0.1754039

# Country-level estimates
coef_mlm <-round(coef(mlm)$Country, digits = 3)

# Average estimates
round(fixef(mlm), digits = 3)

## (Intercept)     Fin_sat  Per_health     College      female Immigrant_y 
##       2.485       0.459       0.399      -0.012       0.071      -0.052

# Country-level errors
round(ranef(mlm)$Country, digits = 3)

##     (Intercept)
## DEU       0.465
## IRQ      -1.530
## JPN       0.053
## KEN      -0.479
## MEX       0.946
## MYS       0.088
## RUS       0.080
## USA       0.376

# Standard errors
round(se.fixef(mlm), digits = 3)

## (Intercept)     Fin_sat  Per_health     College      female Immigrant_y 
##       0.272       0.007       0.018       0.035       0.029       0.072

## Multilevel Model (MLM) ## Varying Intercept & Varying Slope Model 
mlm1 <- lmer(Satisfaction ~ Fin_sat + Per_health + College + female + Immigrant_y + (1 + Fin_sat|Country), data = wvs)
summary(mlm1)

## Linear mixed model fit by REML ['lmerMod']
## Formula: 
## Satisfaction ~ Fin_sat + Per_health + College + female + Immigrant_y +  
##     (1 + Fin_sat | Country)
##    Data: wvs
## 
## REML criterion at convergence: 45992.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.2630 -0.5298  0.0324  0.5738  4.4154 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  Country  (Intercept) 2.8781   1.6965        
##           Fin_sat     0.0397   0.1993   -0.98
##  Residual             2.4790   1.5745        
## Number of obs: 12254, groups:  Country, 8
## 
## Fixed effects:
##              Estimate Std. Error t value
## (Intercept)  2.359968   0.603983   3.907
## Fin_sat      0.505792   0.070791   7.145
## Per_health   0.388566   0.017538  22.156
## College     -0.007333   0.034261  -0.214
## female       0.064818   0.028689   2.259
## Immigrant_y -0.061259   0.070439  -0.870
## 
## Correlation of Fixed Effects:
##             (Intr) Fin_st Pr_hlt Colleg female
## Fin_sat     -0.970                            
## Per_health  -0.091 -0.025                     
## College     -0.001 -0.006 -0.093              
## female      -0.027  0.000  0.019  0.076       
## Immigrant_y -0.004  0.001 -0.014 -0.027  0.001
## optimizer (nloptwrap) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.0157253 (tol = 0.002, component 1)

# Country-level estimates
coef_mlm1 <- round(coef(mlm1)$Country, digits = 3)
coef_mlm1$country<-sort(unique(wvs$Country))

round(se.fixef(mlm1), digits = 3)

## (Intercept)     Fin_sat  Per_health     College      female Immigrant_y 
##       0.604       0.071       0.018       0.034       0.029       0.070

df<- data.frame( se.ranef = round(se.ranef(mlm1)$Country, digits = 3), 
                 country_n = as.vector(table(wvs$Country)))

# Average estimates
round(fixef(mlm1), digits = 3)

## (Intercept)     Fin_sat  Per_health     College      female Immigrant_y 
##       2.360       0.506       0.389      -0.007       0.065      -0.061

# Country-level errors
round(ranef(mlm1)$Country, digits = 3)

##     (Intercept) Fin_sat
## DEU       1.186  -0.125
## IRQ      -3.594   0.448
## JPN      -0.342   0.044
## KEN      -0.204  -0.067
## MEX       2.228  -0.210
## MYS      -0.195   0.025
## RUS       0.155  -0.031
## USA       0.767  -0.084

# Standard errors
round(se.fixef(mlm1), digits = 3)

## (Intercept)     Fin_sat  Per_health     College      female Immigrant_y 
##       0.604       0.071       0.018       0.034       0.029       0.070

mlm_h <- lmer(Satisfaction ~ Fin_sat + Per_health + College + female + Immigrant_y + (1 + Per_health|Country), data = wvs)
summary(mlm1)

## Linear mixed model fit by REML ['lmerMod']
## Formula: 
## Satisfaction ~ Fin_sat + Per_health + College + female + Immigrant_y +  
##     (1 + Fin_sat | Country)
##    Data: wvs
## 
## REML criterion at convergence: 45992.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.2630 -0.5298  0.0324  0.5738  4.4154 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  Country  (Intercept) 2.8781   1.6965        
##           Fin_sat     0.0397   0.1993   -0.98
##  Residual             2.4790   1.5745        
## Number of obs: 12254, groups:  Country, 8
## 
## Fixed effects:
##              Estimate Std. Error t value
## (Intercept)  2.359968   0.603983   3.907
## Fin_sat      0.505792   0.070791   7.145
## Per_health   0.388566   0.017538  22.156
## College     -0.007333   0.034261  -0.214
## female       0.064818   0.028689   2.259
## Immigrant_y -0.061259   0.070439  -0.870
## 
## Correlation of Fixed Effects:
##             (Intr) Fin_st Pr_hlt Colleg female
## Fin_sat     -0.970                            
## Per_health  -0.091 -0.025                     
## College     -0.001 -0.006 -0.093              
## female      -0.027  0.000  0.019  0.076       
## Immigrant_y -0.004  0.001 -0.014 -0.027  0.001
## optimizer (nloptwrap) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.0157253 (tol = 0.002, component 1)

# Country-level estimates
coef_mlm2 <- round(coef(mlm_h)$Country, digits = 3)
coef_mlm2$country<-sort(unique(wvs$Country))

round(se.fixef(mlm_h), digits = 3)

## (Intercept)     Fin_sat  Per_health     College      female Immigrant_y 
##       0.280       0.007       0.052       0.035       0.029       0.072

df1<- data.frame( se.ranef = round(se.ranef(mlm_h)$Country, digits = 3), 
                 country_n = as.vector(table(wvs$Country)))

# Average estimates
round(fixef(mlm_h), digits = 3)

## (Intercept)     Fin_sat  Per_health     College      female Immigrant_y 
##       2.522       0.455       0.396      -0.018       0.065      -0.051

# Country-level errors
round(ranef(mlm_h)$Country, digits = 3)

##     (Intercept) Per_health
## DEU       0.629     -0.044
## IRQ      -0.948     -0.163
## JPN      -0.407      0.130
## KEN      -0.061     -0.105
## MEX       1.465     -0.134
## MYS      -0.497      0.153
## RUS      -0.037      0.032
## USA      -0.144      0.132

# Standard errors
round(se.fixef(mlm_h), digits = 3)

## (Intercept)     Fin_sat  Per_health     College      female Immigrant_y 
##       0.280       0.007       0.052       0.035       0.029       0.072

# Making the table 
msummary(list("VI Only" = mlm,
                "VIVS fin_sat" = mlm1,
              " VIVS per_health" = mlm_h), 
         coef_rename = c("Fin_sat" = "Financial Satisfaction (Ind level)", 
                         "Per_health" = "Personal Health (Ind level)",
                         "College" = "College and above education (Ind level)",
                         "female" = "Female (Ind level)",
                         "Immigrant_y" = "Immgrant (Ind level)",
"SD (Intercept Country)" = "Country RE Intercept SD",
"SD (Fin_sat Country)" = "Country RE Financial Sat SD", 
"Cor (Intercept~Fin_sat Country)" = "Correlation RE Int & Financial Sat"))

	VI Only	VIVS fin_sat	VIVS per_health
(Intercept)	2.485	2.360	2.522
	(0.272)	(0.604)	(0.280)
Financial Satisfaction (Ind level)	0.459	0.506	0.455
	(0.007)	(0.071)	(0.007)
Personal Health (Ind level)	0.399	0.389	0.396
	(0.018)	(0.018)	(0.052)
College and above education (Ind level)	-0.012	-0.007	-0.018
	(0.035)	(0.034)	(0.035)
Female (Ind level)	0.071	0.065	0.065
	(0.029)	(0.029)	(0.029)
Immgrant (Ind level)	-0.052	-0.061	-0.051
	(0.072)	(0.070)	(0.072)
Country RE Intercept SD	0.742	1.696	0.766
Country RE Financial Sat SD		0.199
Correlation RE Int & Financial Sat		-0.978
SD (Per_health Country)			0.137
Cor (Intercept~Per_health Country)			-0.366
SD (Observations)	1.609	1.574	1.605
Num.Obs.	12254	12254	12254
R2 Marg.	0.328	0.372	0.324
R2 Cond.	0.446	0.490	0.448
AIC	46509.9	46012.8	46473.5
BIC	46569.2	46086.9	46547.6
ICC	0.2	0.2	0.2
RMSE	1.61	1.57	1.60

# Table for Intercepts VIVS

kable(coef_mlm1, digits = 7, align = "ccccccc", col.names = c("Intercept", "Financial Satisfaction", "Personal Health", "College", "Female", "Immigrant", "Country"), caption = "VIVS Financial Satisfaction Intercept and Coeffecient Results") %>%
  kable_styling(font_size = 16)

VIVS Financial Satisfaction Intercept and Coeffecient Results
	Intercept	Financial Satisfaction	Personal Health	College	Female	Immigrant	Country
DEU	3.546	0.381	0.389	-0.007	0.065	-0.061	DEU
IRQ	-1.234	0.954	0.389	-0.007	0.065	-0.061	IRQ
JPN	2.018	0.550	0.389	-0.007	0.065	-0.061	JPN
KEN	2.156	0.439	0.389	-0.007	0.065	-0.061	KEN
MEX	4.587	0.295	0.389	-0.007	0.065	-0.061	MEX
MYS	2.165	0.531	0.389	-0.007	0.065	-0.061	MYS
RUS	2.515	0.475	0.389	-0.007	0.065	-0.061	RUS
USA	3.127	0.422	0.389	-0.007	0.065	-0.061	USA

kable(coef_mlm2, digits = 7, align = "ccccccc", col.names = c("Intercept", "Financial Satisfaction", "Personal Health", "College", "Female", "Immigrant", "Country"), caption = "VIVS Personal Health Intercept and Coeffecient Results") %>%
  kable_styling(font_size = 16)

VIVS Personal Health Intercept and Coeffecient Results
	Intercept	Financial Satisfaction	Personal Health	College	Female	Immigrant	Country
DEU	3.151	0.455	0.352	-0.018	0.065	-0.051	DEU
IRQ	1.574	0.455	0.233	-0.018	0.065	-0.051	IRQ
JPN	2.115	0.455	0.526	-0.018	0.065	-0.051	JPN
KEN	2.461	0.455	0.292	-0.018	0.065	-0.051	KEN
MEX	3.987	0.455	0.262	-0.018	0.065	-0.051	MEX
MYS	2.025	0.455	0.549	-0.018	0.065	-0.051	MYS
RUS	2.485	0.455	0.428	-0.018	0.065	-0.051	RUS
USA	2.378	0.455	0.529	-0.018	0.065	-0.051	USA

5 ICC

After running a test on ICC above we notice a score of .17 which means there is clustering inside of our data and that we would want to run a grouped model to mitigate some bias.

6 Interpret models

Model interpret - Looking at the table and graph we notice that IRQ has the highest impact of financial satisfaction on overall satisfaction. The coefficient is .954 which means that financial satisfaction increases overall satisfaction at a large rate there. However, compared to places like Mexico where the coefficient is .295 the impact is not as severe but still positive. The others fall in a pretty close be consistent dropping line. One interesting point is that the two Asian countries JPN and MYS are in close proximity to each other showing a similar culture or impact level of financial satisfaction on overall satisfaction. This somewhat supported my hypothesis that the higher level the economy of a country the lower the impact of financial satisfaction through IRQ being much higher than the US. However, this theory is not quite correct as the US does have a larger economy than MEX however, its impact is larger in the US. Looking into personal health as suspected it matters more in the US however, not as suspected it matters a lot in Asian countries like JPN. I expected that the healthier overall a country is the less impactful this is however, it appears that the culture may cause an impact pushing the importance of that up. But, looking at the US it seems personal health is more important to satisfaction than financial satisfaction. The impact for Fin_sat for the US is .389 while for per_health it is .529 which shows it leaning more towards that being more impactful. We also notice that IRQ is the lowest in this category so it seems if a country is impacted heavily by one factor the other is probably less important in that country.

For intercepts IRQ is the only one negative on the Fin_sat model while having the highest slope, however MEX is the highest intercept but the lowest slope. all other countries roughly fall around an intercept of 2-3 with DEU being a slight exception in that group. Looking at the personal health model we notice roughly the same theory but IRQ is now positive and just slightly behind the 2 mark. It appears they have been squeezed together a little bit closer to that 2 mark overall as well. The slope range is not as large either and more condensed around the .3-.4 mark.

mlm_graph<-cbind(coef_mlm1, df)
mlm_graph<-clean_names(mlm_graph)
names(mlm_graph)

##  [1] "intercept"          "fin_sat"            "per_health"        
##  [4] "college"            "female"             "immigrant_y"       
##  [7] "country"            "se_ranef_intercept" "se_ranef_fin_sat"  
## [10] "country_n"

###Add CIs for the Random Slope Coefficients
mlm_graph <- mlm_graph %>%
  mutate(
    lower_ci_va = fin_sat - 1.96 * se_ranef_fin_sat,
    upper_ci_va = fin_sat + 1.96 * se_ranef_fin_sat
  )

# creating ggplot in desc order
ggplot(mlm_graph, aes(x = reorder(country, -fin_sat), y = fin_sat)) +
  geom_point(size=3) +   theme_minimal(base_size = 13) +
  labs(x = "Country", y = "OLS Coefficient", 
       title = "Impact of Financial statisfaction on overall satisfaction", 
       subtitle= "Results are OLS Coefficients from Varying Intercept, Varying Slope Model") +
  labs(color = "DP Opinion") +
  theme(legend.position = c(0.05, 0.95), legend.justification = c(0, 1)) +
  geom_errorbar(aes(ymin=lower_ci_va, ymax=upper_ci_va),
                linewidth=.3,    # Thinner lines
                width=.2, position = position_dodge(width=.7))+
  scale_y_continuous(limits = c(0, 1.5), breaks = seq(0, 1.5, by = 0.5))

# plot for personal health

mlm_graph1<-cbind(coef_mlm2, df1)
mlm_graph1<-clean_names(mlm_graph1)
names(mlm_graph1)

##  [1] "intercept"           "fin_sat"             "per_health"         
##  [4] "college"             "female"              "immigrant_y"        
##  [7] "country"             "se_ranef_intercept"  "se_ranef_per_health"
## [10] "country_n"

###Add CIs for the Random Slope Coefficients
mlm_graph1 <- mlm_graph1 %>%
  mutate(
    lower_ci_va = per_health - 1.96 * se_ranef_per_health,
    upper_ci_va = per_health + 1.96 * se_ranef_per_health
  )

# creating ggplot in desc order
ggplot(mlm_graph1, aes(x = reorder(country, -per_health), y = per_health)) +
  geom_point(size=3) +   theme_minimal(base_size = 13) +
  labs(x = "Country", y = "OLS Coefficient", 
       title = "Impact of Personal Health on overall satisfaction", 
       subtitle= "Results are OLS Coefficients from Varying Intercept, Varying Slope Model") +
  labs(color = "DP Opinion") +
  theme(legend.position = c(0.05, 0.95), legend.justification = c(0, 1)) +
  geom_errorbar(aes(ymin=lower_ci_va, ymax=upper_ci_va),
                linewidth=.3,    # Thinner lines
                width=.2, position = position_dodge(width=.7))+
  scale_y_continuous(limits = c(0, 1.5), breaks = seq(0, 1.5, by = 0.5))

Part 2

1. Hypothesis

I believe the more democratic a country the higher the satisfaction levels will be. My reasoning is that these countries have a more consistent and normally pretty high financial and personal health satisfaction levels overall. People also have more rights or freedoms in this type of government compared to a dictatorship or communist country meaning the overall satisfaction levels will more than likely be better.

2. estimate model

## Multilevel Model (MLM) ## Varying Intercept & Varying Slope Model - with group level predictor
mlm3 <- lmer(Satisfaction ~ Fin_sat + Per_health + Democratic + College + female + Immigrant_y + (1 + Fin_sat|Country), data = wvs)
summary(mlm3)

## Linear mixed model fit by REML ['lmerMod']
## Formula: Satisfaction ~ Fin_sat + Per_health + Democratic + College +  
##     female + Immigrant_y + (1 + Fin_sat | Country)
##    Data: wvs
## 
## REML criterion at convergence: 45996
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.2631 -0.5299  0.0320  0.5729  4.4148 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  Country  (Intercept) 2.94260  1.715         
##           Fin_sat     0.03961  0.199    -0.97
##  Residual             2.47896  1.574         
## Number of obs: 12254, groups:  Country, 8
## 
## Fixed effects:
##              Estimate Std. Error t value
## (Intercept)  2.529699   0.885983   2.855
## Fin_sat      0.505766   0.070709   7.153
## Per_health   0.388632   0.017539  22.159
## Democratic  -0.021578   0.081661  -0.264
## College     -0.007372   0.034263  -0.215
## female       0.064843   0.028689   2.260
## Immigrant_y -0.061311   0.070443  -0.870
## 
## Correlation of Fixed Effects:
##             (Intr) Fin_st Pr_hlt Dmcrtc Colleg female
## Fin_sat     -0.664                                   
## Per_health  -0.062 -0.025                            
## Democratic  -0.725 -0.004 -0.001                     
## College     -0.003 -0.006 -0.093  0.003              
## female      -0.017  0.000  0.019 -0.002  0.076       
## Immigrant_y  0.002  0.001 -0.014 -0.006 -0.027  0.001
## optimizer (nloptwrap) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.0625765 (tol = 0.002, component 1)

# Country-level estimates
coef_mlm3 <- round(coef(mlm3)$Country, digits = 3)
coef_mlm3$country<-sort(unique(wvs$Country))

# Average estimates
round(fixef(mlm3), digits = 3)

## (Intercept)     Fin_sat  Per_health  Democratic     College      female 
##       2.530       0.506       0.389      -0.022      -0.007       0.065 
## Immigrant_y 
##      -0.061

# Country-level errors
round(ranef(mlm3)$Country, digits = 3)

##     (Intercept) Fin_sat
## DEU       1.232  -0.125
## IRQ      -3.636   0.449
## JPN      -0.295   0.044
## KEN      -0.178  -0.068
## MEX       2.230  -0.210
## MYS      -0.215   0.025
## RUS       0.092  -0.031
## USA       0.769  -0.084

# Standard errors
round(se.fixef(mlm3), digits = 3)

## (Intercept)     Fin_sat  Per_health  Democratic     College      female 
##       0.886       0.071       0.018       0.082       0.034       0.029 
## Immigrant_y 
##       0.070

df2<- data.frame( se.ranef = round(se.ranef(mlm3)$Country, digits = 3), 
                 country_n = as.vector(table(wvs$Country)))



## Multilevel Model (MLM) ## Varying Intercept & Varying Slope Model - with group level predictor
mlm4 <- lmer(Satisfaction ~ Fin_sat + Per_health + Democratic + College + female + Immigrant_y + (1 + Per_health|Country), data = wvs)
summary(mlm4)

## Linear mixed model fit by REML ['lmerMod']
## Formula: Satisfaction ~ Fin_sat + Per_health + Democratic + College +  
##     female + Immigrant_y + (1 + Per_health | Country)
##    Data: wvs
## 
## REML criterion at convergence: 46454.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.3435 -0.5587  0.0442  0.5524  4.1878 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  Country  (Intercept) 0.60956  0.7807        
##           Per_health  0.01881  0.1371   -0.39
##  Residual             2.57584  1.6049        
## Number of obs: 12254, groups:  Country, 8
## 
## Fixed effects:
##              Estimate Std. Error t value
## (Intercept)  1.403927   1.225897   1.145
## Fin_sat      0.455313   0.006654  68.425
## Per_health   0.396389   0.051773   7.656
## Democratic   0.142024   0.151456   0.938
## College     -0.018026   0.034968  -0.515
## female       0.065387   0.029249   2.235
## Immigrant_y -0.050780   0.071834  -0.707
## 
## Correlation of Fixed Effects:
##             (Intr) Fin_st Pr_hlt Dmcrtc Colleg female
## Fin_sat     -0.008                                   
## Per_health  -0.098 -0.088                            
## Democratic  -0.972 -0.010 -0.001                     
## College     -0.002 -0.073 -0.028  0.001              
## female      -0.018  0.008  0.006  0.004  0.077       
## Immigrant_y -0.001  0.009 -0.004 -0.001 -0.025  0.003

coef_mlm4 <- round(coef(mlm4)$Country, digits = 3)
coef_mlm4$country<-sort(unique(wvs$Country))
# Average estimates
round(fixef(mlm4), digits = 3)

## (Intercept)     Fin_sat  Per_health  Democratic     College      female 
##       1.404       0.455       0.396       0.142      -0.018       0.065 
## Immigrant_y 
##      -0.051

# Country-level errors
round(ranef(mlm4)$Country, digits = 3)

##     (Intercept) Per_health
## DEU       0.336     -0.046
## IRQ      -0.692     -0.161
## JPN      -0.703      0.128
## KEN      -0.214     -0.106
## MEX       1.452     -0.135
## MYS      -0.380      0.154
## RUS       0.362      0.034
## USA      -0.162      0.133

# Standard errors
round(se.fixef(mlm4), digits = 3)

## (Intercept)     Fin_sat  Per_health  Democratic     College      female 
##       1.226       0.007       0.052       0.151       0.035       0.029 
## Immigrant_y 
##       0.072

df3<- data.frame( se.ranef = round(se.ranef(mlm4)$Country, digits = 3), 
                 country_n = as.vector(table(wvs$Country)))

3 interpret

Looks like my group level variable was not significant. Tested the graphs to compare the difference between non group and group and noticed that there was no real difference thus, I left those out as there was no significance. One piece that was noticed that for personal health the democratic number was higher (.142) indicating democratic countries tend to value this more than financial satisfaction as the number was negative in that scale (-.022). However, since it was not significant I do not have anything to suppor this claim.

This looks very similar to the last models above however, there is a small increase in the intercept overall by ~.1-.2 and for some slopes there is a minor increase like IRQ increased from .954 to .955. however, the results are not impactful enough to make any new claims. With this in mind I would want to use a different group level variable to test for something more significant which would require a new project and tests.

Some patterns were noticed such as Asian countries scored very similarly showing a consistent level of satisfaction impact across them. USA and MEX despite being neighbors moved very differently with the USA having a higher impact on both scales especially, personal health. It also appears that countries who value financial satisfaction more tend to not value personal health as much. The group that falls into the middle range stays consistent on both which shows that these countries may have a higher standard of living as they value both to be satisfied overall.

# Making the table 
msummary(list("VIVS fin_sat + Grp level" = mlm3,
              "VIVS per_health + Grp level" = mlm4), 
         coef_rename = c("Fin_sat" = "Financial Satisfaction (Ind level)", 
                         "Per_health" = "Personal Health (Ind level)",
                         "Democratic" = "Democratic (Group level)",
                         "College" = "College and above education (Ind level)",
                         "female" = "Female (Ind level)",
                         "Immigrant_y" = "Immgrant (Ind level)",
"SD (Intercept Country)" = "Country RE Intercept SD",
"SD (Fin_sat Country)" = "Country RE Financial Sat SD", 
"Cor (Intercept~Fin_sat Country)" = "Correlation RE Int & Financial Sat"))

	VIVS fin_sat + Grp level	VIVS per_health + Grp level
(Intercept)	2.530	1.404
	(0.886)	(1.226)
Financial Satisfaction (Ind level)	0.506	0.455
	(0.071)	(0.007)
Personal Health (Ind level)	0.389	0.396
	(0.018)	(0.052)
Democratic (Group level)	-0.022	0.142
	(0.082)	(0.151)
College and above education (Ind level)	-0.007	-0.018
	(0.034)	(0.035)
Female (Ind level)	0.065	0.065
	(0.029)	(0.029)
Immgrant (Ind level)	-0.061	-0.051
	(0.070)	(0.072)
Country RE Intercept SD	1.715	0.781
Country RE Financial Sat SD	0.199
Correlation RE Int & Financial Sat	-0.975
SD (Per_health Country)		0.137
Cor (Intercept~Per_health Country)		-0.387
SD (Observations)	1.574	1.605
Num.Obs.	12254	12254
R2 Marg.	0.367	0.345
R2 Cond.	0.492	0.465
AIC	46018.0	46476.6
BIC	46099.5	46558.1
ICC	0.2	0.2
RMSE	1.57	1.60

kable(coef_mlm3, digits = 7, align = "ccccccc", col.names = c("Intercept", "Financial Satisfaction", "Personal Health", "Democratic", "College", "Female", "Immigrant", "Country"), caption = "VIVS Financial Satisfaction Intercept and Coeffecient Results with Group level") %>%
  kable_styling(font_size = 16)

VIVS Financial Satisfaction Intercept and Coeffecient Results with Group level
	Intercept	Financial Satisfaction	Personal Health	Democratic	College	Female	Immigrant	Country
DEU	3.762	0.381	0.389	-0.022	-0.007	0.065	-0.061	DEU
IRQ	-1.106	0.955	0.389	-0.022	-0.007	0.065	-0.061	IRQ
JPN	2.235	0.550	0.389	-0.022	-0.007	0.065	-0.061	JPN
KEN	2.352	0.438	0.389	-0.022	-0.007	0.065	-0.061	KEN
MEX	4.760	0.296	0.389	-0.022	-0.007	0.065	-0.061	MEX
MYS	2.315	0.531	0.389	-0.022	-0.007	0.065	-0.061	MYS
RUS	2.621	0.475	0.389	-0.022	-0.007	0.065	-0.061	RUS
USA	3.299	0.422	0.389	-0.022	-0.007	0.065	-0.061	USA

kable(coef_mlm4, digits = 7, align = "ccccccc", col.names = c("Intercept", "Financial Satisfaction", "Personal Health", "Democratic", "College", "Female", "Immigrant", "Country"), caption = "VIVS Personal Health Intercept and Coeffecient Results with Group level") %>%
  kable_styling(font_size = 16)

VIVS Personal Health Intercept and Coeffecient Results with Group level
	Intercept	Financial Satisfaction	Personal Health	Democratic	College	Female	Immigrant	Country
DEU	1.740	0.455	0.350	0.142	-0.018	0.065	-0.051	DEU
IRQ	0.712	0.455	0.235	0.142	-0.018	0.065	-0.051	IRQ
JPN	0.701	0.455	0.525	0.142	-0.018	0.065	-0.051	JPN
KEN	1.190	0.455	0.290	0.142	-0.018	0.065	-0.051	KEN
MEX	2.856	0.455	0.261	0.142	-0.018	0.065	-0.051	MEX
MYS	1.024	0.455	0.551	0.142	-0.018	0.065	-0.051	MYS
RUS	1.766	0.455	0.430	0.142	-0.018	0.065	-0.051	RUS
USA	1.242	0.455	0.529	0.142	-0.018	0.065	-0.051	USA

Homework 4

Ethan Campbell

2023-11-03