1 Input:

#Call packages
pacman::p_load(rio,
               here,
               janitor,
               tidyverse,
               dplyr,
               magrittr,
               ggplot2,
               purrr,
               lubridate,
               mice,
               plotly)

#Data:
library(readxl)
df<- read_excel("C:/Users/locca/Downloads/Data_Rác thải điện tử.xlsx", 
    sheet = "Đã mã hóa")
View(df)

2 Output:

2.1 Prepare the dataset:

df_new <-df %>% 
  mutate(SAFETY = (SAFETY1+SAFETY2+SAFETY3+SAFETY4)/4,
         OFFER = (OFFER1+OFFER2+OFFER3+OFFER4)/4,
         AWARENESS = (AWARENESS1+AWARENESS2+AWARENESS3+AWARENESS4+SUPPLYINPUT1+SUPPLYINPUT2+SUPPLYINPUT3+SUPPLYINPUT4)/8,
         REVERSE_DECISION = (REVERSE_DECISION1+REVERSE_DECISION2+REVERSE_DECISION3+REVERSE_DECISION4)/4)


m<-df %>% select(contains(c("SAFETY",
                            "OFFER",
                            "SUPPLY",
                            "AWARE",
                            "REVERSE")))

2.2 Some preliminary analysis data:

2.2.1 Likert chart:

#===========================
# Simulate data for ploting 
#===========================

responses <- c("Hoàn toàn không đồng ý", 
               "Không đồng ý", 
               "Bình thường", 
               "Đồng ý", 
               "Hoàn toàn đồng ý")

size <- nrow(df_new)

brand <- as.vector(unlist(purrr::map(.x = unique(names(m)),
                                     .f = ~rep(.x,size)))
)

prob = c(sum(df_new$SAFETY1 == 1)/size,
         sum(df_new$SAFETY1 == 2)/size,
         sum(df_new$SAFETY1 == 3)/size,
         sum(df_new$SAFETY1 == 4)/size,
         sum(df_new$SAFETY1 == 5)/size)

cus_res <- c(
      ### For SAFETY variable:
             sample(responses, 
                    size = size, 
                    replace = TRUE, 
                    prob = c(sum(df_new$SAFETY1 == 1)/size,
                             sum(df_new$SAFETY1 == 2)/size,
                              sum(df_new$SAFETY1 == 3)/size,
                              sum(df_new$SAFETY1 == 4)/size,
                              sum(df_new$SAFETY1 == 5)/size)),
             sample(responses, 
                    size = size, 
                    replace = TRUE, 
                    prob = c(sum(df_new$SAFETY2 == 1)/size,
                             sum(df_new$SAFETY2 == 2)/size,
                              sum(df_new$SAFETY2 == 3)/size,
                              sum(df_new$SAFETY2 == 4)/size,
                              sum(df_new$SAFETY2 == 5)/size)),
             sample(responses, 
                    size = size, 
                    replace = TRUE, 
                    prob = c(sum(df_new$SAFETY3 == 1)/size,
                             sum(df_new$SAFETY3 == 2)/size,
                              sum(df_new$SAFETY3 == 3)/size,
                              sum(df_new$SAFETY3 == 4)/size,
                              sum(df_new$SAFETY3 == 5)/size)),
             sample(responses, 
                    size = size, 
                    replace = TRUE, 
                    prob = c(sum(df_new$SAFETY4 == 1)/size,
                             sum(df_new$SAFETY4 == 2)/size,
                              sum(df_new$SAFETY4 == 3)/size,
                              sum(df_new$SAFETY4 == 4)/size,
                              sum(df_new$SAFETY4 == 5)/size)),

     ### For OFFER variable:
             sample(responses, 
                    size = size, 
                    replace = TRUE, 
                    prob = c(sum(df_new$OFFER1 == 1)/size,
                             sum(df_new$OFFER1 == 2)/size,
                              sum(df_new$OFFER1 == 3)/size,
                              sum(df_new$OFFER1 == 4)/size,
                              sum(df_new$OFFER1 == 5)/size)),
             sample(responses, 
                    size = size, 
                    replace = TRUE, 
                    prob = c(sum(df_new$OFFER2 == 1)/size,
                             sum(df_new$OFFER2 == 2)/size,
                              sum(df_new$OFFER2 == 3)/size,
                              sum(df_new$OFFER2 == 4)/size,
                              sum(df_new$OFFER2 == 5)/size)),
             sample(responses, 
                    size = size, 
                    replace = TRUE, 
                    prob = c(sum(df_new$OFFER3 == 1)/size,
                             sum(df_new$OFFER3 == 2)/size,
                              sum(df_new$OFFER3 == 3)/size,
                              sum(df_new$OFFER3 == 4)/size,
                              sum(df_new$OFFER3 == 5)/size)),
             sample(responses, 
                    size = size, 
                    replace = TRUE, 
                    prob = c(sum(df_new$OFFER4 == 1)/size,
                             sum(df_new$OFFER4 == 2)/size,
                              sum(df_new$OFFER4 == 3)/size,
                              sum(df_new$OFFER4 == 4)/size,
                              sum(df_new$OFFER4 == 5)/size)),
      ### For SUPPLYINPUT variable:
             sample(responses, 
                    size = size, 
                    replace = TRUE, 
                    prob = c(sum(df_new$SUPPLYINPUT1 == 1)/size,
                             sum(df_new$SUPPLYINPUT1 == 2)/size,
                              sum(df_new$SUPPLYINPUT1 == 3)/size,
                              sum(df_new$SUPPLYINPUT1 == 4)/size,
                              sum(df_new$SUPPLYINPUT1 == 5)/size)),
             sample(responses, 
                    size = size, 
                    replace = TRUE, 
                    prob = c(sum(df_new$SUPPLYINPUT2 == 1)/size,
                             sum(df_new$SUPPLYINPUT2 == 2)/size,
                              sum(df_new$SUPPLYINPUT2 == 3)/size,
                              sum(df_new$SUPPLYINPUT2 == 4)/size,
                              sum(df_new$SUPPLYINPUT2 == 5)/size)),
             sample(responses, 
                    size = size, 
                    replace = TRUE, 
                    prob = c(sum(df_new$SUPPLYINPUT3 == 1)/size,
                             sum(df_new$SUPPLYINPUT3 == 2)/size,
                              sum(df_new$SUPPLYINPUT3 == 3)/size,
                              sum(df_new$SUPPLYINPUT3 == 4)/size,
                              sum(df_new$SUPPLYINPUT3 == 5)/size)),
              sample(responses, 
                    size = size, 
                    replace = TRUE, 
                    prob = c(sum(df_new$SUPPLYINPUT4 == 1)/size,
                             sum(df_new$SUPPLYINPUT4 == 2)/size,
                              sum(df_new$SUPPLYINPUT4 == 3)/size,
                              sum(df_new$SUPPLYINPUT4 == 4)/size,
                              sum(df_new$SUPPLYINPUT4 == 5)/size)),
       ### For AWARENESS variable:
            sample(responses, 
                    size = size, 
                    replace = TRUE, 
                    prob = c(sum(df_new$AWARENESS1 == 1)/size,
                             sum(df_new$AWARENESS1 == 2)/size,
                              sum(df_new$AWARENESS1 == 3)/size,
                              sum(df_new$AWARENESS1 == 4)/size,
                              sum(df_new$AWARENESS1 == 5)/size)),
             sample(responses, 
                    size = size, 
                    replace = TRUE, 
                    prob = c(sum(df_new$AWARENESS2 == 1)/size,
                             sum(df_new$AWARENESS2 == 2)/size,
                              sum(df_new$AWARENESS2 == 3)/size,
                              sum(df_new$AWARENESS2 == 4)/size,
                              sum(df_new$AWARENESS2 == 5)/size)),
             sample(responses, 
                    size = size, 
                    replace = TRUE, 
                    prob = c(sum(df_new$AWARENESS3 == 1)/size,
                             sum(df_new$AWARENESS3 == 2)/size,
                              sum(df_new$AWARENESS3 == 3)/size,
                              sum(df_new$AWARENESS3 == 4)/size,
                              sum(df_new$AWARENESS3 == 5)/size)),
              sample(responses, 
                    size = size, 
                    replace = TRUE, 
                    prob = c(sum(df_new$AWARENESS4 == 1)/size,
                             sum(df_new$AWARENESS4 == 2)/size,
                              sum(df_new$AWARENESS4 == 3)/size,
                              sum(df_new$AWARENESS4 == 4)/size,
                              sum(df_new$AWARENESS4 == 5)/size)),

      ### For AWARENESS variable:
            sample(responses, 
                    size = size, 
                    replace = TRUE, 
                    prob = c(sum(df_new$REVERSE_DECISION1 == 1)/size,
                             sum(df_new$REVERSE_DECISION1 == 2)/size,
                              sum(df_new$REVERSE_DECISION1 == 3)/size,
                              sum(df_new$REVERSE_DECISION1 == 4)/size,
                              sum(df_new$REVERSE_DECISION1 == 5)/size)),
             sample(responses, 
                    size = size, 
                    replace = TRUE, 
                    prob = c(sum(df_new$REVERSE_DECISION2 == 1)/size,
                             sum(df_new$REVERSE_DECISION2 == 2)/size,
                              sum(df_new$REVERSE_DECISION2 == 3)/size,
                              sum(df_new$REVERSE_DECISION2 == 4)/size,
                              sum(df_new$REVERSE_DECISION2 == 5)/size)),
             sample(responses, 
                    size = size, 
                    replace = TRUE, 
                    prob = c(sum(df_new$REVERSE_DECISION3 == 1)/size,
                             sum(df_new$REVERSE_DECISION3 == 2)/size,
                              sum(df_new$REVERSE_DECISION3 == 3)/size,
                              sum(df_new$REVERSE_DECISION3 == 4)/size,
                              sum(df_new$REVERSE_DECISION3 == 5)/size)),
              sample(responses, 
                    size = size, 
                    replace = TRUE, 
                    prob = c(sum(df_new$REVERSE_DECISION4 == 1)/size,
                             sum(df_new$REVERSE_DECISION4 == 2)/size,
                              sum(df_new$REVERSE_DECISION4 == 3)/size,
                              sum(df_new$REVERSE_DECISION4 == 4)/size,
                              sum(df_new$REVERSE_DECISION4 == 5)/size))
)
#===========================
# Prepare data for ploting
#===========================

data_cus <- tibble(brand = brand, cus_res = cus_res)

data_cus %>% 
  group_by(brand, cus_res) %>% 
  count() %>% 
  ungroup() %>% 
  group_by(brand) %>% 
  mutate(percent = 100*n / sum(n)) %>% 
  mutate(percent = round(percent, 0)) %>% 
  mutate(bar_text = paste0(percent, "%")) %>% 
  ungroup() -> df_for_ploting

df_for_ploting %>% 
  filter(cus_res == responses[5]) %>% 
  arrange(percent) %>% 
  pull(brand) -> order_y

df_for_ploting %>% 
  mutate(brand = factor(brand, levels = order_y)) %>% 
  mutate(cus_res = factor(cus_res, levels = responses[5:1])) -> df_odered

#---------------------
# Data Vis: Version 1
#---------------------

## Select Font for the graph: 

col_dislike_alot <- "#e36c33"

col_dislike <- "#edad88"

col_neutral <- "#c7cdd1"

col_like <- "#829cb2"

col_like_alot <- "#3e6487"

  
library(showtext)
## Loading required package: sysfonts
## Loading required package: showtextdb
my_font <- "Roboto Condensed"
font_add_google(name = my_font, family = my_font)

showtext_auto()

theme_set(theme_minimal())

df_odered %>% 
  ggplot(aes(y = brand, x = percent, fill = cus_res)) + 
  geom_col(width = 0.8, position = "fill") + 
  theme(legend.position = "top") + 
  theme(plot.margin = unit(rep(0.7, 4), "cm")) +
  scale_fill_manual(values = c('Hoàn toàn đồng ý' = col_like_alot,
                               'Đồng ý'= col_like, 
                               'Bình thường' = col_neutral,
                               'Không đồng ý'  = col_dislike, 
                               'Hoàn toàn không đồng ý' = col_dislike_alot),
                   guide = guide_legend(reverse = TRUE)) +
  theme(text = element_text(family = my_font)) + 
  theme(legend.title = element_blank()) + 
  theme(legend.text = element_text(size = 11, family = my_font, color = "grey10")) + 
  theme(legend.key.height = unit(0.35, "cm")) +  
  theme(legend.key.width = unit(0.27*3, "cm")) + 
  theme(axis.title = element_blank()) + 
  theme(panel.grid.minor = element_blank()) + 
  theme(panel.grid.major.x = element_line(color = "grey70", size = 0.8)) + 
  scale_x_continuous(expand = c(0, 0), labels = paste0(seq(0, 100, 25), "%")) + 
  scale_y_discrete(expand = c(0, 0)) + 
  theme(axis.text = element_text(color = "grey30", size = 11, family = my_font)) + 
  theme(plot.title = ggtext::element_markdown(size = 16, face = "bold")) + 
  theme(plot.caption = element_text(size = 10.5, color = "grey40", vjust = -1.5, hjust = 0)) + 
  theme(plot.subtitle = element_text(size = 11.5, color = "grey10")) + 
  theme(plot.title.position = "plot") +  
  theme(plot.caption.position = "plot")->gg1
gg1

2.2.2 The distribution plot of likert scales:

m_new <-m %>% pivot_longer(cols = everything(),
                           names_to = "variable",
                           values_to = "value")

ggplot()+
  ggridges::geom_density_ridges(data  = m_new, 
                                aes(x  = value,
                                    y  = as.factor(variable),
                                    fill = as.factor(variable),
                                    height = ..density..), 
                                scale = 3, 
                                alpha = .6) +
  scale_x_continuous(limits = c(0,6))+
  geom_vline(xintercept = 3, col = "red",size = 2)+
  geom_point(data = data.frame(list(variable = unique(m_new$variable), value = colMeans(m))),
             aes(x = value, 
                 y = as.factor(variable)),
             size = 3, 
             col  = "blue")+
  theme(legend.position="none")+
  viridis::scale_fill_viridis(discrete = TRUE)+
  labs(x        = "Gía trị Likert",
       y        = "Biến trong thang đo")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## Picking joint bandwidth of 0.287

2.2.3 The Correlation coefficients analyst:

library(corrplot)
## corrplot 0.92 loaded
library(grDevices)
cor.mtest <- function(mat, ...) {
  mat <- as.matrix(mat)
  n <- ncol(mat)
  p.mat<- matrix(NA, n, n)
  diag(p.mat) <- 0
  for (i in 1:(n - 1)) {
    for (j in (i + 1):n) {
      tmp <- cor.test(mat[, i], mat[, j], ...)
      p.mat[i, j] <- p.mat[j, i] <- tmp$p.value
    }
  }
  colnames(p.mat) <- rownames(p.mat) <- colnames(mat)
  p.mat
}
# matrix of the p-value of the correlation
p.mat <- cor.mtest(df_new %>% 
                     select(c("SAFETY",
                    "OFFER",
                    "AWARENESS",
                    "REVERSE_DECISION")))

col <- colorRampPalette(c("#BB4444", "#EE9988", "#FFFFFF", "#77AADD", "#4477AA"))
corrplot(cor(df_new %>% 
                     select(c("SAFETY",
                    "OFFER",
                    "AWARENESS",
                    "REVERSE_DECISION"))), method="color", col=col(200),  
         type="upper", order="hclust", 
         addCoef.col = "black", # Add coefficient of correlation
         tl.col="black", tl.srt=45, #Text label color and rotation
         # Combine with significance
         p.mat = p.mat, sig.level = 0.01, insig = "blank", 
         # hide correlation coefficient on the principal diagonal
         diag=FALSE 
)

2.3 Linear regression analyst:

2.3.1 First model:

#Convert the RETURN_EW into factor class:
df_new$RETURN_EW<-as.factor(df_new$RETURN_EW)
class(df_new$RETURN_EW)   ##Check again
## [1] "factor"
#Assume X3 is a moderation variable:
library(jtools)
summ(reg2<-lm(REVERSE_DECISION ~ SAFETY+ AWARENESS + OFFER + RETURN_EW,
         data = df_new))
Observations 208
Dependent variable REVERSE_DECISION
Type OLS linear regression
F(4,203) 57.20
0.53
Adj. R² 0.52
Est. S.E. t val. p
(Intercept) 0.17 0.23 0.77 0.44
SAFETY 0.15 0.08 2.04 0.04
AWARENESS 0.50 0.08 6.18 0.00
OFFER 0.20 0.07 3.00 0.00
RETURN_EW1 0.21 0.10 2.01 0.05
Standard errors: OLS
#Kiểm định phương sai:
knitr::kable(anova(reg2), digits = 3)
Df Sum Sq Mean Sq F value Pr(>F)
SAFETY 1 71.549 71.549 155.183 0.000
AWARENESS 1 28.455 28.455 61.716 0.000
OFFER 1 3.618 3.618 7.848 0.006
RETURN_EW 1 1.862 1.862 4.038 0.046
Residuals 203 93.595 0.461 NA NA

2.3.2 Some output plots:

library(jtools)
interactions::interact_plot(reg2, 
                            pred = "AWARENESS", 
                            modx = "RETURN_EW")

effect_plot(reg2, 
            pred = AWARENESS, 
            interval = TRUE, 
            plot.points = TRUE, 
            jitter = 0.05)

plot_summs(reg2)
## Loading required namespace: broom.mixed

2.4 Check model assumptions:

2.4.1 Test the variance hypothesis:

#Kiểm định phương sai:
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:purrr':
## 
##     some
ncvTest(reg2)
## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 8.520334, Df = 1, p = 0.003512
spreadLevelPlot(reg2)

## 
## Suggested power transformation:  0.4295601
## -->Vì p < 0 nên giả định phương sai không đổi bị phủ định. (Thường kết quả kiểm định này khá nhạy)

2.4.2 Test for linearity hypothesis:

# Kiểm định tính tuyến tính
## Gỉa định mối tương quan là tuyến tính: 
crPlots(reg2,
        col.lines = c("blue", "red"),
        ylab = "Component + Residual")

2.4.3 Test for Autocorrelated Errors hypothesis:

## Kiểm định sự độc lập giữa các biến:
durbinWatsonTest(reg2)
##  lag Autocorrelation D-W Statistic p-value
##    1     -0.02950111      2.052554   0.694
##  Alternative hypothesis: rho != 0
### Vì p > 0 nên chấp nhận giả thuyết H0: Không có hiện tượng tự tương quan trong mô hình.

2.4.4 Test for the Outlier hypothesis

## Kiểm định giá trị outlier:
outlierTest(reg2)
##      rstudent unadjusted p-value Bonferroni p
## 170 -5.350007         2.3731e-07    4.936e-05
library(car)
leveragePlots(reg2,
              lwd = 3)

##Các điểm có label là những điểm outliers.

2.4.5 Test for importance affect in model

#Evaluate the relative importance:
##Use traditional method:
library(relaimpo)
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:plotly':
## 
##     select
## The following object is masked from 'package:dplyr':
## 
##     select
## Loading required package: boot
## 
## Attaching package: 'boot'
## The following object is masked from 'package:car':
## 
##     logit
## Loading required package: survey
## Loading required package: grid
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## Loading required package: survival
## 
## Attaching package: 'survival'
## The following object is masked from 'package:boot':
## 
##     aml
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
## Loading required package: mitools
## This is the global version of package relaimpo.
## If you are a non-US user, a version with the interesting additional metric pmvd is available
## from Ulrike Groempings web site at prof.beuth-hochschule.de/groemping.
metrics <-calc.relimp(reg2,
                      type = c("lmg"))

(x<-data.frame(name = c("SAFETY","OFFER","AWARENESS","RETURN_EW"),
              value = metrics@lmg) %>% 
  mutate(name = fct_reorder(name, 
                            value)) %>%
  ggplot( aes(x=name, 
              y=value)) +
    geom_bar(stat="identity", 
             fill="#f68060", 
             alpha=.6, 
             width=.4) +
    coord_flip() +
    xlab("") +
    theme_bw())

##Using bootstrap:
boot<-boot.relimp(reg2,
                  b = 1000,
                  type = c("lmg"),
                  fixed = F)
booteval.relimp(boot,
                typesel = c("lmg"),
                level = 0.9,
                bty = "perc",
                nodiff = T)
## Response variable: REVERSE_DECISION 
## Total response variance: 0.9617344 
## Analysis based on 208 observations 
## 
## 4 Regressors: 
## SAFETY AWARENESS OFFER RETURN_EW 
## Proportion of variance explained by model: 52.99%
## Metrics are not normalized (rela=FALSE). 
## 
## Relative importance metrics: 
## 
##                 lmg
## SAFETY    0.1446348
## AWARENESS 0.2451139
## OFFER     0.1264465
## RETURN_EW 0.0136629
## 
## Average coefficients for different model sizes: 
## 
##                  1X       2Xs       3Xs       4Xs
## SAFETY    0.6271583 0.4278895 0.2673299 0.1532378
## AWARENESS 0.7448869 0.6437029 0.5611999 0.5006647
## OFFER     0.5793144 0.3777011 0.2528506 0.1964011
## RETURN_EW 0.3129917 0.2163038 0.1989478 0.2058563
## 
##  
##  Confidence interval information ( 1000 bootstrap replicates, bty= perc ): 
## Relative Contributions with confidence intervals: 
##  
##                               Lower  Upper
##               percentage 0.9  0.9    0.9   
## SAFETY.lmg    0.1446     _BC_ 0.0910 0.1963
## AWARENESS.lmg 0.2451     A___ 0.1707 0.3223
## OFFER.lmg     0.1264     _BC_ 0.0782 0.1821
## RETURN_EW.lmg 0.0137     ___D 0.0027 0.0383
## 
## Letters indicate the ranks covered by bootstrap CIs. 
## (Rank bootstrap confidence intervals always obtained by percentile method) 
## CAUTION: Bootstrap confidence intervals can be somewhat liberal.

2.5 Mediation analyst approach:

2.5.1 Modeling with {lavaan} package:

#Assume X3 is a mediation variable:
##Define the model:
library(lavaan)
## This is lavaan 0.6-14
## lavaan is FREE software! Please report any bugs.
mediation_model <- '
  # Direct effects
  AWARENESS ~ a*SAFETY  +  b*OFFER
  REVERSE_DECISION ~ c*AWARENESS + d*SAFETY + e*OFFER

  # Indirect effect:
  indirect := c*(a + b)

  # Total effect:
  total := d + e + indirect'

##Estimate the mediation model
reg3 <- sem(mediation_model, 
            data = df_new)
##Summarize the results
summary(reg3, 
        standardized = TRUE, 
        fit.measures = TRUE)
## lavaan 0.6.14 ended normally after 1 iteration
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                         7
## 
##   Number of observations                           208
## 
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
## 
## Model Test Baseline Model:
## 
##   Test statistic                               338.836
##   Degrees of freedom                                 5
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    1.000
##   Tucker-Lewis Index (TLI)                       1.000
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)               -396.821
##   Loglikelihood unrestricted model (H1)       -396.821
##                                                       
##   Akaike (AIC)                                 807.642
##   Bayesian (BIC)                               831.005
##   Sample-size adjusted Bayesian (SABIC)        808.825
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.000
##   90 Percent confidence interval - lower         0.000
##   90 Percent confidence interval - upper         0.000
##   P-value H_0: RMSEA <= 0.050                       NA
##   P-value H_0: RMSEA >= 0.080                       NA
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.000
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Regressions:
##                      Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   AWARENESS ~                                                             
##     SAFETY     (a)      0.525    0.051   10.257    0.000    0.525    0.539
##     OFFER      (b)      0.321    0.051    6.269    0.000    0.321    0.330
##   REVERSE_DECISION ~                                                      
##     AWARENESS  (c)      0.492    0.081    6.102    0.000    0.492    0.458
##     SAFETY     (d)      0.186    0.073    2.539    0.011    0.186    0.177
##     OFFER      (e)      0.182    0.065    2.808    0.005    0.182    0.174
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .AWARENESS         0.339    0.033   10.198    0.000    0.339    0.409
##    .REVERSE_DECISI    0.459    0.045   10.198    0.000    0.459    0.479
## 
## Defined Parameters:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##     indirect          0.417    0.072    5.750    0.000    0.417    0.398
##     total             0.785    0.062   12.613    0.000    0.785    0.750

2.5.2 Modeling with {mediation} package:

##Or we can use Preacher & Hayes (2004) approach:
###Using meidation package:
library(mediation)
## Warning: package 'mediation' was built under R version 4.2.3
## Loading required package: mvtnorm
## Loading required package: sandwich
## mediation: Causal Mediation Analysis
## Version: 4.5.0
fitM <- lm(AWARENESS ~ SAFETY + OFFER, 
           data=df_new) 
fitY <- lm(REVERSE_DECISION ~ AWARENESS + SAFETY + OFFER, 
           data=df_new)
fitMed <- mediate(fitM, 
                  fitY, 
                  sims = 1000, 
                  boot = TRUE,
                  treat= "SAFETY", 
                  mediator="AWARENESS")
## Running nonparametric bootstrap
summary(fitMed)
## 
## Causal Mediation Analysis 
## 
## Nonparametric Bootstrap Confidence Intervals with the Percentile Method
## 
##                Estimate 95% CI Lower 95% CI Upper p-value    
## ACME             0.2585       0.1312         0.41  <2e-16 ***
## ADE              0.1856       0.0289         0.35   0.026 *  
## Total Effect     0.4441       0.3168         0.58  <2e-16 ***
## Prop. Mediated   0.5821       0.3145         0.92  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Sample Size Used: 208 
## 
## 
## Simulations: 1000
plot(fitMed)

The mediate function gives us:

  • Average Direct Effects (ADE) means direct effect of SAFETY and OFFER on REVERSE_DECISION without AWARENESS.

  • Combined indirect and direct effects (Total Effect) means total effect of SAFETY and OFFER on REVERSE_DECISION plus the indirect effect of AWARENESS.

  • Average Causal Mediation Effects (ACME) = Total Effect - ADE

  • The ratio of these estimates (Prop. Mediated).

The ACME here is the indirect effect of M (total effect - direct effect) and thus this value tells us if our mediation effect is significant. (Source: Alyssa Blair)