#1 - Import Dataset

data<-read.csv("speeddate_mod.csv")

#1A - Remove rows containing missing values (NA) and use the complete cases for the remaining two questions.

data_1<-na.omit(data) #omits N/As from data

#1B - Conduct a moderation analysis to investigate whether perceived intelligence moderates the effect of perceived attractiveness on partner’s likability ratings? Print out the output from lavaan. Is the interaction between attractiveness and intelligence significant?

#Independent variable (X) - percieved attractiveness
#Dependent variable (y) - likeability
#Moderator (Z) - intelligence

mod_1<-lm(other_like~other_attr+other_intel+ other_attr:other_intel, data = data_1) #Y ~ X + Moderator + X * Moderator

summary(mod_1) #printed version of summary output
## 
## Call:
## lm(formula = other_like ~ other_attr + other_intel + other_attr:other_intel, 
##     data = data_1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.9241 -0.7147  0.0759  0.7249  6.3658 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            -0.791311   0.446842  -1.771   0.0768 .  
## other_attr              0.657284   0.076975   8.539  < 2e-16 ***
## other_intel             0.488173   0.059996   8.137 8.42e-16 ***
## other_attr:other_intel -0.017145   0.009832  -1.744   0.0814 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.248 on 1505 degrees of freedom
## Multiple R-squared:  0.5354, Adjusted R-squared:  0.5345 
## F-statistic: 578.2 on 3 and 1505 DF,  p-value: < 2.2e-16
#Intercept: if someone scored 1 (reference group) in both attractiveness and intelligence, their predicted likeability would be -0.79 (baseline without any information)

#Significant Main effect of attractiveness: for the reference group of intelligence, each 1-unit increase in attractiveness is associated with a 0.657 increase in likeability

#Significant Main effect of intelligence: for the reference group of attractiveness, each 1 unit increase in intelligence is associated with a 0.488 increase in likeability

#NON-SIGNIFICANT interaction for attractiveness & intelligence: The interaction between attractiveness and intelligencfe was non-significant. This indicates that intelligence did not moderate the effect of attractiveness and likeability; the effect of attractiveness on likeability did not differ based on level of intelligence.

#1C- Redo the moderation analysis after centering other_attr and other_intel. Print out the output from lavaan. Does centering change the significance of the interaction between attractiveness and intelligence?

data_1$other_attr_center<-data_1$other_attr-mean(data$other_attr, na.rm = TRUE) #centering attractiveness
data_1$other_intel_center<-data_1$other_intel-mean(data$other_intel, na.rm = TRUE) #centering inyelligence

mod_2<-lm(other_like~other_attr_center+other_intel_center+ other_attr_center:other_intel_center, data = data_1)

summary(mod_2)
## 
## Call:
## lm(formula = other_like ~ other_attr_center + other_intel_center + 
##     other_attr_center:other_intel_center, data = data_1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.9241 -0.7147  0.0759  0.7249  6.3658 
## 
## Coefficients:
##                                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                           6.213392   0.033692 184.417   <2e-16 ***
## other_attr_center                     0.528357   0.017922  29.480   <2e-16 ***
## other_intel_center                    0.379998   0.024180  15.715   <2e-16 ***
## other_attr_center:other_intel_center -0.017145   0.009832  -1.744   0.0814 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.248 on 1505 degrees of freedom
## Multiple R-squared:  0.5354, Adjusted R-squared:  0.5345 
## F-statistic: 578.2 on 3 and 1505 DF,  p-value: < 2.2e-16
#Centering does NOT change the interaction between attractiveness and intelligence; it remains non-significant which suggests that intelligence does not moderate the relationship between attractiveness and likeability

#2- Import new dataset

data_2<-read.csv("nlsy_med.csv")

#2A - Conduct the mediation analysis using the lavaan package and obtain the confidence intervals using boostrapping. Print the output from lavaan.

#total effect - Mother Education (X) - Children's Math Achievement (Y)
path_c<- lm(math~ME, data = data_2)
summary(path_c) #Mother education has a significant main effect on Child Math Achievement
## 
## Call:
## lm(formula = math ~ ME, data = data_2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.6224  -2.9299  -0.4574   2.4876  29.4876 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   6.1824     1.3709   4.510 8.73e-06 ***
## ME            0.5275     0.1199   4.398 1.43e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.618 on 369 degrees of freedom
## Multiple R-squared:  0.04981,    Adjusted R-squared:  0.04723 
## F-statistic: 19.34 on 1 and 369 DF,  p-value: 1.432e-05
path_a<-lm(HE~ME, data=data_2)
summary(path_a) #significant; there is a significant relationship between mother educationa nd home environment
## 
## Call:
## lm(formula = HE ~ ME, data = data_2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.5020 -0.7805  0.2195  1.2195  3.3587 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.10944    0.49127   8.365 1.25e-15 ***
## ME           0.13926    0.04298   3.240   0.0013 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.655 on 369 degrees of freedom
## Multiple R-squared:  0.02766,    Adjusted R-squared:  0.02502 
## F-statistic:  10.5 on 1 and 369 DF,  p-value: 0.001305
path_bc<-lm(math~ME+HE, data = data_2)
summary(path_bc) #path B & C combined are also significant paths
## 
## Call:
## lm(formula = math ~ ME + HE, data = data_2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.9302  -3.0045  -0.2226   2.2386  29.3856 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   4.2736     1.4764   2.895 0.004022 ** 
## ME            0.4628     0.1201   3.853 0.000137 ***
## HE            0.4645     0.1434   3.238 0.001311 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.559 on 368 degrees of freedom
## Multiple R-squared:  0.07613,    Adjusted R-squared:  0.07111 
## F-statistic: 15.16 on 2 and 368 DF,  p-value: 4.7e-07

#2A - Bootstrapping

library(lavaan)
## This is lavaan 0.6-21
## lavaan is FREE software! Please report any bugs.
med_model<-'
HE~a*ME #M ~ a*X
math~b*HE #Y ~ b*M
math~c_prime*ME #Y ~ c*X
indirect:=a*b
total:=c_prime+(a*b)
'

fit_model<-sem(med_model,
               data=data_2,
               se="bootstrap",
               bootstrap=5000) #fits the model
## Warning: lavaan->lav_model_nvcov_bootstrap():  
##    26 bootstrap runs failed or did not converge.
parameterEstimates(fit_model,
                   level=0.95,
                   boot.ci.type = "bca.simple")[,c(1:5,8:10)] #uses parameter estimates to show bootstrap CI
##        lhs op           rhs    label    est pvalue ci.lower ci.upper
## 1       HE  ~            ME        a  0.139  0.001    0.062    0.219
## 2     math  ~            HE        b  0.465  0.000    0.218    0.711
## 3     math  ~            ME  c_prime  0.463  0.000    0.225    0.696
## 4       HE ~~            HE           2.724  0.000    2.351    3.174
## 5     math ~~          math          20.621  0.000   15.955   28.014
## 6       ME ~~            ME           3.995     NA    3.995    3.995
## 7 indirect :=           a*b indirect  0.065  0.010    0.025    0.127
## 8    total := c_prime+(a*b)    total  0.528  0.000    0.284    0.766

#2B = Are the three effects (i.e., a, c’, and ab) significant? Does a mediation effect exist? If yes, is it a complete or partial mediation?

#path A is significant (p= 0.001)
#path B is significant (p<0.05)
#path C prime is significant (p<0.05)
#the indirect effect is significant (p=0.01)
#the direct effect is significant (p<0.05)

#since all of the pathways and effects are significant, this is a complete mediation. 

#2C = Display the path plot using the semPaths() function from the semPlot package.

library(semPlot)
semPaths(object=fit_model, whatLabels = "par")