library(readxl)
library(car)
## Loading required package: carData
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
#install.packages("janitor")
library(ggplot2)
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
#install.packages("stargazer")
library(ggpubr)
#install.packages("ggpubr")
library(ggrepel)
#install.packages("ggpmisc")
library(ggpmisc)
## Loading required package: ggpp
##
## Attaching package: 'ggpp'
## The following object is masked from 'package:ggplot2':
##
## annotate
#
Junkins <- read_excel("C:/Users/jacob/OneDrive - University of Texas at San Antonio/Courses/Stats for Demographic Data 2/Homework 1/Junkins Data.xlsx")
Junkins$south<- recode(Junkins$region, "3=1; else=0")
tabyl(Junkins$south)
## Junkins$south n percent
## 0 34 0.68
## 1 16 0.32
Junkins$northeast<- recode(Junkins$region, "1=1; else=0")
tabyl(Junkins$northeast)
## Junkins$northeast n percent
## 0 41 0.82
## 1 9 0.18
Junkins$midwest<- recode(Junkins$region, "2=1; else=0")
tabyl(Junkins$midwest)
## Junkins$midwest n percent
## 0 38 0.76
## 1 12 0.24
Junkins$west<- recode(Junkins$region, "4=1; else=0")
tabyl(Junkins$west)
## Junkins$west n percent
## 0 37 0.74
## 1 13 0.26
Junkins$relconssq<-Junkins$relcons*Junkins$relcons
Junkins$relconscu<-Junkins$relconssq*Junkins$relcons
Junkins$relconsln<-log(Junkins$relcons)
Junkins$relconsrec<-1/(Junkins$relcons)
model <-lm(t_ageFM~northeast + midwest + west, Junkins)
summary(model)
##
## Call:
## lm(formula = t_ageFM ~ northeast + midwest + west, data = Junkins)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.43846 -0.55625 0.06563 0.75677 2.23750
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 27.20625 0.26624 102.188 <2e-16 ***
## northeast 1.03264 0.44373 2.327 0.0244 *
## midwest 0.00625 0.40669 0.015 0.9878
## west -0.31779 0.39765 -0.799 0.4283
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.065 on 46 degrees of freedom
## Multiple R-squared: 0.1657, Adjusted R-squared: 0.1113
## F-statistic: 3.045 on 3 and 46 DF, p-value: 0.03805
model <-lm(t_ageFM~northeast, Junkins)
summary(model)
##
## Call:
## lm(formula = t_ageFM ~ northeast, data = Junkins)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.65732 -0.50271 0.01768 0.74268 2.34268
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 27.1073 0.1642 165.054 < 2e-16 ***
## northeast 1.1316 0.3871 2.923 0.00527 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.052 on 48 degrees of freedom
## Multiple R-squared: 0.1511, Adjusted R-squared: 0.1334
## F-statistic: 8.545 on 1 and 48 DF, p-value: 0.005272
#scatterplot and correlation of DV with IV
plot(Junkins$relcons,Junkins$t_ageFM)
cor.test(Junkins$relcons,Junkins$t_ageFM)
##
## Pearson's product-moment correlation
##
## data: Junkins$relcons and Junkins$t_ageFM
## t = -4.8748, df = 48, p-value = 1.233e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.7359186 -0.3537616
## sample estimates:
## cor
## -0.5754459
#Linear
model <-lm(t_ageFM~relcons, Junkins)
summary(model)
##
## Call:
## lm(formula = t_ageFM ~ relcons, data = Junkins)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.8183 -0.4097 0.0782 0.5650 1.8037
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 28.27091 0.23707 119.252 < 2e-16 ***
## relcons -0.04911 0.01007 -4.875 1.23e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9335 on 48 degrees of freedom
## Multiple R-squared: 0.3311, Adjusted R-squared: 0.3172
## F-statistic: 23.76 on 1 and 48 DF, p-value: 1.233e-05
#Quadratic
model <-lm(t_ageFM~relcons + relconssq, Junkins)
summary(model)
##
## Call:
## lm(formula = t_ageFM ~ relcons + relconssq, data = Junkins)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.81884 -0.41111 0.08564 0.56738 1.80413
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.824e+01 3.623e-01 77.952 <2e-16 ***
## relcons -4.607e-02 2.878e-02 -1.601 0.116
## relconssq -5.175e-05 4.593e-04 -0.113 0.911
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9432 on 47 degrees of freedom
## Multiple R-squared: 0.3313, Adjusted R-squared: 0.3029
## F-statistic: 11.64 on 2 and 47 DF, p-value: 7.81e-05
#Cubic
model <-lm(t_ageFM~relcons + relconssq + relconscu, Junkins)
summary(model)
##
## Call:
## lm(formula = t_ageFM ~ relcons + relconssq + relconscu, data = Junkins)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.74980 -0.53189 0.02847 0.54631 1.86677
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.859e+01 5.415e-01 52.791 <2e-16 ***
## relcons -1.017e-01 7.065e-02 -1.439 0.157
## relconssq 2.049e-03 2.481e-03 0.826 0.413
## relconscu -2.026e-05 2.351e-05 -0.862 0.393
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9458 on 46 degrees of freedom
## Multiple R-squared: 0.3419, Adjusted R-squared: 0.299
## F-statistic: 7.968 on 3 and 46 DF, p-value: 0.0002207
#Natural Log
model <-lm(t_ageFM~relconsln, Junkins)
summary(model)
##
## Call:
## lm(formula = t_ageFM ~ relconsln, data = Junkins)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.6441 -0.5908 0.0393 0.6119 1.9690
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 29.6203 0.5419 54.655 < 2e-16 ***
## relconsln -0.8412 0.1911 -4.402 5.96e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9633 on 48 degrees of freedom
## Multiple R-squared: 0.2876, Adjusted R-squared: 0.2728
## F-statistic: 19.38 on 1 and 48 DF, p-value: 5.958e-05
#Reciprocal
model <-lm(t_ageFM~relconsrec, Junkins)
summary(model)
##
## Call:
## lm(formula = t_ageFM ~ relconsrec, data = Junkins)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.44794 -0.49354 -0.09158 0.80604 2.17950
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 26.7445 0.2233 119.789 < 2e-16 ***
## relconsrec 6.6906 2.0013 3.343 0.00161 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.028 on 48 degrees of freedom
## Multiple R-squared: 0.1889, Adjusted R-squared: 0.172
## F-statistic: 11.18 on 1 and 48 DF, p-value: 0.001612
#AAFM ~ Northeast + Relcons
model.1 <-lm(t_ageFM~northeast + relcons, Junkins)
summary(model.1)
##
## Call:
## lm(formula = t_ageFM ~ northeast + relcons, data = Junkins)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.72217 -0.50792 -0.02583 0.54625 1.90289
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 28.10361 0.30729 91.457 < 2e-16 ***
## northeast 0.34786 0.40488 0.859 0.394607
## relcons -0.04375 0.01187 -3.686 0.000589 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.936 on 47 degrees of freedom
## Multiple R-squared: 0.3415, Adjusted R-squared: 0.3135
## F-statistic: 12.19 on 2 and 47 DF, p-value: 5.45e-05
#Northeast ~ Relcons (Mediation?)
model.2 <-lm(relcons~northeast, Junkins)
summary(model.2)
##
## Call:
## lm(formula = relcons ~ northeast, data = Junkins)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.032 -8.037 -2.772 3.938 48.628
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.772 1.778 12.810 < 2e-16 ***
## northeast -17.914 4.190 -4.275 9.02e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.38 on 48 degrees of freedom
## Multiple R-squared: 0.2758, Adjusted R-squared: 0.2607
## F-statistic: 18.28 on 1 and 48 DF, p-value: 9.019e-05
plot(Junkins$relcons,Junkins$northeast)
#Yes, a path exists.
#AAAFM ~ Relcons
model.3 <-lm(t_ageFM ~ relcons, Junkins)
summary(model.3)
##
## Call:
## lm(formula = t_ageFM ~ relcons, data = Junkins)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.8183 -0.4097 0.0782 0.5650 1.8037
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 28.27091 0.23707 119.252 < 2e-16 ***
## relcons -0.04911 0.01007 -4.875 1.23e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9335 on 48 degrees of freedom
## Multiple R-squared: 0.3311, Adjusted R-squared: 0.3172
## F-statistic: 23.76 on 1 and 48 DF, p-value: 1.233e-05
plot(Junkins$t_ageFM,Junkins$relcons)
#Yes, a relationship exists.
#Relcons ~ AAFM + Northeast
model.4 <- lm(relcons ~ t_ageFM + northeast,Junkins)
summary(model.4)
##
## Call:
## lm(formula = relcons ~ t_ageFM + northeast, data = Junkins)
##
## Residuals:
## Min 1Q Median 3Q Max
## -21.309 -6.399 -1.857 5.070 35.007
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 161.717 37.729 4.286 8.94e-05 ***
## t_ageFM -5.126 1.391 -3.686 0.000589 ***
## northeast -12.113 4.048 -2.993 0.004398 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.13 on 47 degrees of freedom
## Multiple R-squared: 0.4382, Adjusted R-squared: 0.4143
## F-statistic: 18.33 on 2 and 47 DF, p-value: 1.304e-06
plot(Junkins$t_ageFM,Junkins$northeast)
#Yes, a relationship exists between t_ageFM and northeast and relcons.
stargazer(model.1, model.2, model.3, model.4, type = "text", title = "Baron and Kenny Method")
##
## Baron and Kenny Method
## ===============================================================================================================
## Dependent variable:
## -------------------------------------------------------------------------------------------
## t_ageFM relcons t_ageFM relcons
## (1) (2) (3) (4)
## ---------------------------------------------------------------------------------------------------------------
## t_ageFM -5.126***
## (1.391)
##
## northeast 0.348 -17.914*** -12.113***
## (0.405) (4.190) (4.048)
##
## relcons -0.044*** -0.049***
## (0.012) (0.010)
##
## Constant 28.104*** 22.772*** 28.271*** 161.717***
## (0.307) (1.778) (0.237) (37.729)
##
## ---------------------------------------------------------------------------------------------------------------
## Observations 50 50 50 50
## R2 0.341 0.276 0.331 0.438
## Adjusted R2 0.313 0.261 0.317 0.414
## Residual Std. Error 0.936 (df = 47) 11.383 (df = 48) 0.933 (df = 48) 10.132 (df = 47)
## F Statistic 12.186*** (df = 2; 47) 18.278*** (df = 1; 48) 23.764*** (df = 1; 48) 18.329*** (df = 2; 47)
## ===============================================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
library(bda)
## Loading required package: boot
##
## Attaching package: 'boot'
## The following object is masked from 'package:car':
##
## logit
## bda v15 (Bin Wang, 2021)
mediation.test(Junkins$northeast,Junkins$relcons,Junkins$t_ageFM)
## Sobel Aroian Goodman
## z.value -0.8423315 -0.8210210 -0.8653924
## p.value 0.3996024 0.4116343 0.3868234
#The results of the Sobel test indicate that there is no statistically significant mediation.
The state-level average age at first marriage is 28.10 years for the region group: South, Midwest, and West. A one-unit (percent) increase in Religious Concentration is associated with a 0.044 year decrease in Average Age at First Marriage, and this result is statistically significant (β = -0.04375,t = -3.686, P< 0.001). The change of region from South, Midwest,and West to its counterpart—Northeast—is associated with a 0.35 year increase in the average age at first marriage, but this result is not statistically significant (β = 0.34786, t = 0.859, P = ns).The results of the Sobel test indicate that there is no statistically significant mediation from Northeast on Religious Concentration on Average Age at First Marriage.
model <-lm(t_ageFM~northeast + relcons + (northeast*relcons), Junkins)
summary(model)
##
## Call:
## lm(formula = t_ageFM ~ northeast + relcons + (northeast * relcons),
## data = Junkins)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.72617 -0.50502 -0.02841 0.57561 1.89866
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 28.11320 0.30988 90.724 < 2e-16 ***
## northeast -0.23321 1.06750 -0.218 0.828031
## relcons -0.04417 0.01197 -3.689 0.000594 ***
## northeast:relcons 0.11804 0.20041 0.589 0.558754
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9426 on 46 degrees of freedom
## Multiple R-squared: 0.3464, Adjusted R-squared: 0.3038
## F-statistic: 8.127 on 3 and 46 DF, p-value: 0.0001898
The average age at first marriage is 28.11 years for the region group: South, Midwest, and West. A one-unit (percent) increase in religious concentration is associated with a -0.04417 year decrease in Average Age at First Marriage, and this result is statistically significant (β = -0.04375,t = -3.686, P< 0.001). The change of region from South, Midwest,and West to its counterpart—Northeast—is associated with a 0.23321 year decrease in the average age at first marriage, but this result is not statistically significant (β = 0.34786, t = 0.859, P = ns). The interaction of Northeast × Religious Concentration demonstrates the effect of Northeast on Religious Concentration. The Average Age at First Marriage increased by 0.11804 years for a one percent increase in religious concentration for the Northeast compared to its counterpart (β=0.11804, t = 0.589, P= ns, but this result is not statistically significant.
Junkins %>%
ggplot(aes(x = relcons, y = t_ageFM, group = region, color = factor(region)))+
#geom_point()+
geom_smooth(method = lm, se = F)+
stat_poly_line(se=F) +
theme_classic()+
ggtitle("Simple Slopes for Average Age at First Marriage by Region")
## `geom_smooth()` using formula = 'y ~ x'
Junkins %>%
ggplot(aes(x = relcons, y = t_ageFM, group = northeast, color = factor(northeast)))+
#geom_point()+
geom_smooth(method = lm, se = F)+
stat_poly_line(se=F) +
theme_classic()+
ggtitle("Simple Slopes for Average Age at First Marriage by Northeast")
## `geom_smooth()` using formula = 'y ~ x'
Junkins %>%
ggplot(aes(x = region, y = relcons, color = region))+
geom_point()+
geom_smooth(method = lm, se = F)+
stat_poly_line(se=F) +
theme_classic()+
ggtitle("State-Level Religious Concentration by Region")
## `geom_smooth()` using formula = 'y ~ x'
Model.1 <- lm(t_ageFM~northeast, Junkins)
Model.2 <- lm (t_ageFM~northeast + relcons, Junkins)
#https://ademos.people.uic.edu/Chapter13.html
stargazer(Model.1, Model.2,type="text",
column.labels = c("Model 1", "Model 2"),
intercept.bottom = FALSE,
single.row=FALSE,
notes.append = FALSE,
header=FALSE)
##
## ================================================================
## Dependent variable:
## --------------------------------------------
## t_ageFM
## Model 1 Model 2
## (1) (2)
## ----------------------------------------------------------------
## Constant 27.107*** 28.104***
## (0.164) (0.307)
##
## northeast 1.132*** 0.348
## (0.387) (0.405)
##
## relcons -0.044***
## (0.012)
##
## ----------------------------------------------------------------
## Observations 50 50
## R2 0.151 0.341
## Adjusted R2 0.133 0.313
## Residual Std. Error 1.052 (df = 48) 0.936 (df = 47)
## F Statistic 8.545*** (df = 1; 48) 12.186*** (df = 2; 47)
## ================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
Model 1 is statistically significant and it estimates the Average Age at First Marriage such that it is 27.107 + 1.132 * Northeast. This means that the average at at first marriage is estimated to be 1.132 years older for the Northeast compared to its regional counterpart. Model 1 has an R2 of 0.133, meaning that 13.3% of the variation in Average Age at First Marriage can be accounted for by Northeast. In contrast, Model 2 estimates Average Age at First Marriage to be 28.104 + 0.348 * Northeast - 0.044 * Religious Concentation, and the model is statistically significant. 31.3% of the variation in Average Age at First Marriage can be explained by the model. Model 2 is superior to Model 1 due to the greater explanatory capacity and reduced error. Approximately, 135% more variation is explained by Model 2 compared to Model 1.