Jacob Souch

Including libraries

library(readxl)
library(car)
## Loading required package: carData
library(janitor) 
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
#install.packages("janitor")
library(ggplot2)
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
#install.packages("stargazer")
library(ggpubr)
#install.packages("ggpubr")
library(ggrepel)
#install.packages("ggpmisc")
library(ggpmisc)
## Loading required package: ggpp
## 
## Attaching package: 'ggpp'
## The following object is masked from 'package:ggplot2':
## 
##     annotate

#

Reading in the Excel file

Junkins <- read_excel("C:/Users/jacob/OneDrive - University of Texas at San Antonio/Courses/Stats for Demographic Data 2/Homework 1/Junkins Data.xlsx")

Creation of variables

Junkins$south<- recode(Junkins$region, "3=1; else=0")
tabyl(Junkins$south)
##  Junkins$south  n percent
##              0 34    0.68
##              1 16    0.32
Junkins$northeast<- recode(Junkins$region, "1=1; else=0")
tabyl(Junkins$northeast)
##  Junkins$northeast  n percent
##                  0 41    0.82
##                  1  9    0.18
Junkins$midwest<- recode(Junkins$region, "2=1; else=0")
tabyl(Junkins$midwest)
##  Junkins$midwest  n percent
##                0 38    0.76
##                1 12    0.24
Junkins$west<- recode(Junkins$region, "4=1; else=0")
tabyl(Junkins$west)
##  Junkins$west  n percent
##             0 37    0.74
##             1 13    0.26
Junkins$relconssq<-Junkins$relcons*Junkins$relcons

Junkins$relconscu<-Junkins$relconssq*Junkins$relcons

Junkins$relconsln<-log(Junkins$relcons)

Junkins$relconsrec<-1/(Junkins$relcons)

geographic variation in age at marriage

model <-lm(t_ageFM~northeast + midwest + west, Junkins)

summary(model)
## 
## Call:
## lm(formula = t_ageFM ~ northeast + midwest + west, data = Junkins)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.43846 -0.55625  0.06563  0.75677  2.23750 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 27.20625    0.26624 102.188   <2e-16 ***
## northeast    1.03264    0.44373   2.327   0.0244 *  
## midwest      0.00625    0.40669   0.015   0.9878    
## west        -0.31779    0.39765  -0.799   0.4283    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.065 on 46 degrees of freedom
## Multiple R-squared:  0.1657, Adjusted R-squared:  0.1113 
## F-statistic: 3.045 on 3 and 46 DF,  p-value: 0.03805
model <-lm(t_ageFM~northeast, Junkins)

summary(model)
## 
## Call:
## lm(formula = t_ageFM ~ northeast, data = Junkins)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.65732 -0.50271  0.01768  0.74268  2.34268 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  27.1073     0.1642 165.054  < 2e-16 ***
## northeast     1.1316     0.3871   2.923  0.00527 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.052 on 48 degrees of freedom
## Multiple R-squared:  0.1511, Adjusted R-squared:  0.1334 
## F-statistic: 8.545 on 1 and 48 DF,  p-value: 0.005272

#scatterplot and correlation of DV with IV

plot(Junkins$relcons,Junkins$t_ageFM)

cor.test(Junkins$relcons,Junkins$t_ageFM)
## 
##  Pearson's product-moment correlation
## 
## data:  Junkins$relcons and Junkins$t_ageFM
## t = -4.8748, df = 48, p-value = 1.233e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.7359186 -0.3537616
## sample estimates:
##        cor 
## -0.5754459

tests of different specifications of religious concentration

#Linear
model <-lm(t_ageFM~relcons, Junkins)

summary(model)
## 
## Call:
## lm(formula = t_ageFM ~ relcons, data = Junkins)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.8183 -0.4097  0.0782  0.5650  1.8037 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 28.27091    0.23707 119.252  < 2e-16 ***
## relcons     -0.04911    0.01007  -4.875 1.23e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9335 on 48 degrees of freedom
## Multiple R-squared:  0.3311, Adjusted R-squared:  0.3172 
## F-statistic: 23.76 on 1 and 48 DF,  p-value: 1.233e-05
#Quadratic 
model <-lm(t_ageFM~relcons + relconssq, Junkins)

summary(model)
## 
## Call:
## lm(formula = t_ageFM ~ relcons + relconssq, data = Junkins)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.81884 -0.41111  0.08564  0.56738  1.80413 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.824e+01  3.623e-01  77.952   <2e-16 ***
## relcons     -4.607e-02  2.878e-02  -1.601    0.116    
## relconssq   -5.175e-05  4.593e-04  -0.113    0.911    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9432 on 47 degrees of freedom
## Multiple R-squared:  0.3313, Adjusted R-squared:  0.3029 
## F-statistic: 11.64 on 2 and 47 DF,  p-value: 7.81e-05
#Cubic 
model <-lm(t_ageFM~relcons + relconssq + relconscu, Junkins)

summary(model)
## 
## Call:
## lm(formula = t_ageFM ~ relcons + relconssq + relconscu, data = Junkins)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.74980 -0.53189  0.02847  0.54631  1.86677 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.859e+01  5.415e-01  52.791   <2e-16 ***
## relcons     -1.017e-01  7.065e-02  -1.439    0.157    
## relconssq    2.049e-03  2.481e-03   0.826    0.413    
## relconscu   -2.026e-05  2.351e-05  -0.862    0.393    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9458 on 46 degrees of freedom
## Multiple R-squared:  0.3419, Adjusted R-squared:  0.299 
## F-statistic: 7.968 on 3 and 46 DF,  p-value: 0.0002207
#Natural Log 
model <-lm(t_ageFM~relconsln, Junkins)

summary(model)
## 
## Call:
## lm(formula = t_ageFM ~ relconsln, data = Junkins)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.6441 -0.5908  0.0393  0.6119  1.9690 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  29.6203     0.5419  54.655  < 2e-16 ***
## relconsln    -0.8412     0.1911  -4.402 5.96e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9633 on 48 degrees of freedom
## Multiple R-squared:  0.2876, Adjusted R-squared:  0.2728 
## F-statistic: 19.38 on 1 and 48 DF,  p-value: 5.958e-05
#Reciprocal
model <-lm(t_ageFM~relconsrec, Junkins)

summary(model)
## 
## Call:
## lm(formula = t_ageFM ~ relconsrec, data = Junkins)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.44794 -0.49354 -0.09158  0.80604  2.17950 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  26.7445     0.2233 119.789  < 2e-16 ***
## relconsrec    6.6906     2.0013   3.343  0.00161 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.028 on 48 degrees of freedom
## Multiple R-squared:  0.1889, Adjusted R-squared:  0.172 
## F-statistic: 11.18 on 1 and 48 DF,  p-value: 0.001612

test of mediation

#AAFM ~ Northeast + Relcons
model.1 <-lm(t_ageFM~northeast + relcons, Junkins)
summary(model.1)
## 
## Call:
## lm(formula = t_ageFM ~ northeast + relcons, data = Junkins)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.72217 -0.50792 -0.02583  0.54625  1.90289 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 28.10361    0.30729  91.457  < 2e-16 ***
## northeast    0.34786    0.40488   0.859 0.394607    
## relcons     -0.04375    0.01187  -3.686 0.000589 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.936 on 47 degrees of freedom
## Multiple R-squared:  0.3415, Adjusted R-squared:  0.3135 
## F-statistic: 12.19 on 2 and 47 DF,  p-value: 5.45e-05
#Northeast ~ Relcons (Mediation?)
model.2 <-lm(relcons~northeast, Junkins)
summary(model.2)
## 
## Call:
## lm(formula = relcons ~ northeast, data = Junkins)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.032  -8.037  -2.772   3.938  48.628 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   22.772      1.778  12.810  < 2e-16 ***
## northeast    -17.914      4.190  -4.275 9.02e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.38 on 48 degrees of freedom
## Multiple R-squared:  0.2758, Adjusted R-squared:  0.2607 
## F-statistic: 18.28 on 1 and 48 DF,  p-value: 9.019e-05
plot(Junkins$relcons,Junkins$northeast)

#Yes, a path exists. 

#AAAFM ~ Relcons
model.3 <-lm(t_ageFM ~ relcons, Junkins)
summary(model.3)
## 
## Call:
## lm(formula = t_ageFM ~ relcons, data = Junkins)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.8183 -0.4097  0.0782  0.5650  1.8037 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 28.27091    0.23707 119.252  < 2e-16 ***
## relcons     -0.04911    0.01007  -4.875 1.23e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9335 on 48 degrees of freedom
## Multiple R-squared:  0.3311, Adjusted R-squared:  0.3172 
## F-statistic: 23.76 on 1 and 48 DF,  p-value: 1.233e-05
plot(Junkins$t_ageFM,Junkins$relcons)

#Yes, a relationship exists. 

#Relcons ~ AAFM + Northeast
model.4 <- lm(relcons ~ t_ageFM + northeast,Junkins)
summary(model.4)
## 
## Call:
## lm(formula = relcons ~ t_ageFM + northeast, data = Junkins)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -21.309  -6.399  -1.857   5.070  35.007 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  161.717     37.729   4.286 8.94e-05 ***
## t_ageFM       -5.126      1.391  -3.686 0.000589 ***
## northeast    -12.113      4.048  -2.993 0.004398 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.13 on 47 degrees of freedom
## Multiple R-squared:  0.4382, Adjusted R-squared:  0.4143 
## F-statistic: 18.33 on 2 and 47 DF,  p-value: 1.304e-06
plot(Junkins$t_ageFM,Junkins$northeast)

#Yes, a relationship exists between t_ageFM and northeast and relcons. 

stargazer(model.1, model.2, model.3, model.4, type = "text", title = "Baron and Kenny Method")
## 
## Baron and Kenny Method
## ===============================================================================================================
##                                                         Dependent variable:                                    
##                     -------------------------------------------------------------------------------------------
##                            t_ageFM                relcons                t_ageFM                relcons        
##                              (1)                    (2)                    (3)                    (4)          
## ---------------------------------------------------------------------------------------------------------------
## t_ageFM                                                                                        -5.126***       
##                                                                                                 (1.391)        
##                                                                                                                
## northeast                   0.348                -17.914***                                    -12.113***      
##                            (0.405)                (4.190)                                       (4.048)        
##                                                                                                                
## relcons                   -0.044***                                     -0.049***                              
##                            (0.012)                                       (0.010)                               
##                                                                                                                
## Constant                  28.104***              22.772***              28.271***              161.717***      
##                            (0.307)                (1.778)                (0.237)                (37.729)       
##                                                                                                                
## ---------------------------------------------------------------------------------------------------------------
## Observations                  50                     50                     50                     50          
## R2                          0.341                  0.276                  0.331                  0.438         
## Adjusted R2                 0.313                  0.261                  0.317                  0.414         
## Residual Std. Error    0.936 (df = 47)        11.383 (df = 48)       0.933 (df = 48)        10.132 (df = 47)   
## F Statistic         12.186*** (df = 2; 47) 18.278*** (df = 1; 48) 23.764*** (df = 1; 48) 18.329*** (df = 2; 47)
## ===============================================================================================================
## Note:                                                                               *p<0.1; **p<0.05; ***p<0.01
library(bda)
## Loading required package: boot
## 
## Attaching package: 'boot'
## The following object is masked from 'package:car':
## 
##     logit
## bda v15 (Bin Wang, 2021)
mediation.test(Junkins$northeast,Junkins$relcons,Junkins$t_ageFM)
##              Sobel     Aroian    Goodman
## z.value -0.8423315 -0.8210210 -0.8653924
## p.value  0.3996024  0.4116343  0.3868234
#The results of the Sobel test indicate that there is no statistically significant mediation. 

Interpretation

The state-level average age at first marriage is 28.10 years for the region group: South, Midwest, and West. A one-unit (percent) increase in Religious Concentration is associated with a 0.044 year decrease in Average Age at First Marriage, and this result is statistically significant (β = -0.04375,t = -3.686, P< 0.001). The change of region from South, Midwest,and West to its counterpart—Northeastis associated with a 0.35 year increase in the average age at first marriage, but this result is not statistically significant (β = 0.34786, t = 0.859, P = ns).The results of the Sobel test indicate that there is no statistically significant mediation from Northeast on Religious Concentration on Average Age at First Marriage.

test of moderation

model <-lm(t_ageFM~northeast + relcons + (northeast*relcons), Junkins)

summary(model)
## 
## Call:
## lm(formula = t_ageFM ~ northeast + relcons + (northeast * relcons), 
##     data = Junkins)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.72617 -0.50502 -0.02841  0.57561  1.89866 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       28.11320    0.30988  90.724  < 2e-16 ***
## northeast         -0.23321    1.06750  -0.218 0.828031    
## relcons           -0.04417    0.01197  -3.689 0.000594 ***
## northeast:relcons  0.11804    0.20041   0.589 0.558754    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9426 on 46 degrees of freedom
## Multiple R-squared:  0.3464, Adjusted R-squared:  0.3038 
## F-statistic: 8.127 on 3 and 46 DF,  p-value: 0.0001898

Interpretation

The average age at first marriage is 28.11 years for the region group: South, Midwest, and West. A one-unit (percent) increase in religious concentration is associated with a -0.04417 year decrease in Average Age at First Marriage, and this result is statistically significant (β = -0.04375,t = -3.686, P< 0.001). The change of region from South, Midwest,and West to its counterpart—Northeastis associated with a 0.23321 year decrease in the average age at first marriage, but this result is not statistically significant (β = 0.34786, t = 0.859, P = ns). The interaction of Northeast × Religious Concentration demonstrates the effect of Northeast on Religious Concentration. The Average Age at First Marriage increased by 0.11804 years for a one percent increase in religious concentration for the Northeast compared to its counterpart (β=0.11804, t = 0.589, P= ns, but this result is not statistically significant.

Junkins %>%
ggplot(aes(x = relcons, y = t_ageFM, group = region, color = factor(region)))+
  #geom_point()+
  geom_smooth(method = lm, se = F)+
  stat_poly_line(se=F) +
  theme_classic()+
  ggtitle("Simple Slopes for Average Age at First Marriage by Region")
## `geom_smooth()` using formula = 'y ~ x'

Junkins %>%
ggplot(aes(x = relcons, y = t_ageFM, group = northeast, color = factor(northeast)))+
  #geom_point()+
  geom_smooth(method = lm, se = F)+
  stat_poly_line(se=F) +
  theme_classic()+
  ggtitle("Simple Slopes for Average Age at First Marriage by Northeast")
## `geom_smooth()` using formula = 'y ~ x'

Junkins %>%
ggplot(aes(x = region, y = relcons, color = region))+
  geom_point()+
  geom_smooth(method = lm, se = F)+
  stat_poly_line(se=F) +
  theme_classic()+
  ggtitle("State-Level Religious Concentration by Region")
## `geom_smooth()` using formula = 'y ~ x'

creating a nice summary table

Model.1 <- lm(t_ageFM~northeast, Junkins)
Model.2 <- lm (t_ageFM~northeast + relcons, Junkins)


#https://ademos.people.uic.edu/Chapter13.html

stargazer(Model.1, Model.2,type="text", 
column.labels = c("Model 1", "Model 2"), 
intercept.bottom = FALSE, 
single.row=FALSE,     
notes.append = FALSE, 
header=FALSE) 
## 
## ================================================================
##                                 Dependent variable:             
##                     --------------------------------------------
##                                       t_ageFM                   
##                            Model 1               Model 2        
##                              (1)                   (2)          
## ----------------------------------------------------------------
## Constant                  27.107***             28.104***       
##                            (0.164)               (0.307)        
##                                                                 
## northeast                 1.132***                0.348         
##                            (0.387)               (0.405)        
##                                                                 
## relcons                                         -0.044***       
##                                                  (0.012)        
##                                                                 
## ----------------------------------------------------------------
## Observations                 50                     50          
## R2                          0.151                 0.341         
## Adjusted R2                 0.133                 0.313         
## Residual Std. Error    1.052 (df = 48)       0.936 (df = 47)    
## F Statistic         8.545*** (df = 1; 48) 12.186*** (df = 2; 47)
## ================================================================
## Note:                                *p<0.1; **p<0.05; ***p<0.01

Interpretation:

Model 1 is statistically significant and it estimates the Average Age at First Marriage such that it is 27.107 + 1.132 * Northeast. This means that the average at at first marriage is estimated to be 1.132 years older for the Northeast compared to its regional counterpart. Model 1 has an R2 of 0.133, meaning that 13.3% of the variation in Average Age at First Marriage can be accounted for by Northeast. In contrast, Model 2 estimates Average Age at First Marriage to be 28.104 + 0.348 * Northeast - 0.044 * Religious Concentation, and the model is statistically significant. 31.3% of the variation in Average Age at First Marriage can be explained by the model. Model 2 is superior to Model 1 due to the greater explanatory capacity and reduced error. Approximately, 135% more variation is explained by Model 2 compared to Model 1.