###install.packages("car")
###install.packages("sjstats")
###install.packages("compute.es")
###install.packages("pwr")
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(EnvStats)
## 
## Attaching package: 'EnvStats'
## 
## The following objects are masked from 'package:stats':
## 
##     predict, predict.lm
## 
## The following object is masked from 'package:base':
## 
##     print.default
library(infer)
library(car)### This gives Levene's Test
## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:EnvStats':
## 
##     qqPlot
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some
library(sjstats)### This produces anova_stats
## 
## Attaching package: 'sjstats'
## 
## The following object is masked from 'package:infer':
## 
##     p_value
## 
## The following object is masked from 'package:EnvStats':
## 
##     cv
library(compute.es)### This computes effect size, but if you use anova_stats you don't need this. 
library(pwr)### This allows anova_stats
getwd()
## [1] "C:/Users/Jerome/Documents/0000_Work_Files/0000_Morgan/000_Psychometrics_Courses/PSYM_570/HW_6_24July23"
lab3_msd <- read.csv("msdlabs_race_arrest_clean.csv")
### There are four assumptions for factorial ANOVA: 1. the variables are normally distributed. 2. The variables are independent. 3. The variances are equal. 4. The sample sizes are equal. 
### In Lab 1, I ran Shapiro-Wilks tests on the 3 dependent variables and found none of them were normally distributed. They are what they are; I can't change them. I assume the variables are independent; I had no involvement in their collection. For self concept and family connectedness, variances are assumed to be equal because of the non-significant results of the Levene Tests, as shown below. The variances for depression were not equal, as shown below.  
### Check for homogeneity of variances

leveneTest(lab3_msd$selfconcept,
           interaction (lab3_msd$hisp,
                        lab3_msd$female),
           center = mean)
## Levene's Test for Homogeneity of Variance (center = mean)
##         Df F value Pr(>F)
## group    3  1.2956 0.2744
##       1552
leveneTest(lab3_msd$depress,
           interaction (lab3_msd$hisp,
                        lab3_msd$female),
           center = mean)
## Levene's Test for Homogeneity of Variance (center = mean)
##         Df F value  Pr(>F)  
## group    3  3.1519 0.02409 *
##       1552                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(lab3_msd$famconnect,
           interaction (lab3_msd$hisp,
                        lab3_msd$female),
           center = mean)
## Levene's Test for Homogeneity of Variance (center = mean)
##         Df F value Pr(>F)
## group    3  0.3126 0.8163
##       1552
### Factorial ANOVA for self concept by Hisp and Female
selfcon <- aov(selfconcept ~ hisp*female, data = lab3_msd)
summary (selfcon)
##               Df Sum Sq Mean Sq F value   Pr(>F)    
## hisp           1    7.4   7.436  18.280 2.02e-05 ***
## female         1   15.2  15.248  37.488 1.16e-09 ***
## hisp:female    1    0.5   0.466   1.146    0.285    
## Residuals   1552  631.3   0.407                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova_stats(selfcon)
## term        |   df |   sumsq | meansq | statistic | p.value | etasq | partial.etasq | omegasq | partial.omegasq | epsilonsq | cohens.f | power
## ----------------------------------------------------------------------------------------------------------------------------------------------
## hisp        |    1 |   7.436 |  7.436 |    18.280 |  < .001 | 0.011 |         0.012 |   0.011 |           0.011 |     0.011 |    0.109 | 0.990
## female      |    1 |  15.248 | 15.248 |    37.488 |  < .001 | 0.023 |         0.024 |   0.023 |           0.023 |     0.023 |    0.155 | 1.000
## hisp:female |    1 |   0.466 |  0.466 |     1.146 |   0.285 | 0.001 |         0.001 |   0.000 |           0.000 |     0.000 |    0.027 | 0.188
## Residuals   | 1552 | 631.282 |  0.407 |           |         |       |               |         |                 |           |          |
interaction.plot(x.factor = lab3_msd$hisp,
                 trace.factor = lab3_msd$female,
                 response = lab3_msd$selfconcept, 
                 fun = mean,
                 ylab = "Self Concept",
                 xlab = "Hispanic (1) vs non-Hispanic (0)",
                 main = "Interaction Plot of Self Concept by Hispanic and Gender",
                 trace.label = "Gender")

### Factorial ANOVA for family connectedness by Hisp and Female
famcon <- aov(famconnect ~ hisp*female, data = lab3_msd)
summary (famcon)
##               Df Sum Sq Mean Sq F value   Pr(>F)    
## hisp           1    1.3   1.268   2.493    0.115    
## female         1    8.7   8.722  17.152 3.64e-05 ***
## hisp:female    1    0.9   0.903   1.775    0.183    
## Residuals   1552  789.2   0.509                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova_stats(famcon)
## term        |   df |   sumsq | meansq | statistic | p.value | etasq | partial.etasq | omegasq | partial.omegasq | epsilonsq | cohens.f | power
## ----------------------------------------------------------------------------------------------------------------------------------------------
## hisp        |    1 |   1.268 |  1.268 |     2.493 |   0.115 | 0.002 |         0.002 |   0.001 |           0.001 |     0.001 |    0.040 | 0.352
## female      |    1 |   8.722 |  8.722 |    17.152 |  < .001 | 0.011 |         0.011 |   0.010 |           0.010 |     0.010 |    0.105 | 0.985
## hisp:female |    1 |   0.903 |  0.903 |     1.775 |   0.183 | 0.001 |         0.001 |   0.000 |           0.000 |     0.000 |    0.034 | 0.266
## Residuals   | 1552 | 789.213 |  0.509 |           |         |       |               |         |                 |           |          |
interaction.plot(x.factor = lab3_msd$hisp,
                 trace.factor = lab3_msd$female,
                 response = lab3_msd$famconnect, 
                 fun = mean,
                 ylab = "Family Connectedness",
                 xlab = "Hispanic (1) vs non-Hispanic (0)",
                 main = "Interaction Plot of Family Connectedness by Hispanic and Gender",
                 trace.label = "Gender")

### Factorial ANOVA for depression by Hisp and Female
depr <- aov(depress ~ hisp*female, data = lab3_msd)
summary(depr)
##               Df Sum Sq Mean Sq F value Pr(>F)  
## hisp           1   0.02  0.0168   0.155 0.6935  
## female         1   0.62  0.6216   5.748 0.0166 *
## hisp:female    1   0.04  0.0427   0.395 0.5297  
## Residuals   1552 167.82  0.1081                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova_stats (depr)
## term        |   df |   sumsq | meansq | statistic | p.value | etasq | partial.etasq | omegasq | partial.omegasq | epsilonsq | cohens.f | power
## ----------------------------------------------------------------------------------------------------------------------------------------------
## hisp        |    1 |   0.017 |  0.017 |     0.155 |   0.694 | 0.000 |         0.000 |  -0.001 |          -0.001 |    -0.001 |    0.010 | 0.068
## female      |    1 |   0.622 |  0.622 |     5.748 |   0.017 | 0.004 |         0.004 |   0.003 |           0.003 |     0.003 |    0.061 | 0.669
## hisp:female |    1 |   0.043 |  0.043 |     0.395 |   0.530 | 0.000 |         0.000 |   0.000 |           0.000 |     0.000 |    0.016 | 0.096
## Residuals   | 1552 | 167.820 |  0.108 |           |         |       |               |         |                 |           |          |
interaction.plot(x.factor = lab3_msd$hisp,
                 trace.factor = lab3_msd$female,
                 response = lab3_msd$depress, 
                 fun = mean,
                 ylab = "Depression",
                 xlab = "Hispanic (1) vs non-Hispanic (0)",
                 main = "Interaction Plot of Depression by Hispanic and Gender",
                 trace.label = "Gender")

### Now begins the linear regression part of the lab.
### Four assumptions of linear regression: 1. The relationship between the dependent and independent variables is linear. 2. Homogeneity of variances of the dependent variables. 3. The observations are independent. 4. For any value of  the independent variable, the dependent variable is normally distributed. 
### The regressions will show if the relationship is linear. The independence of the variables is assumed. As noted above, the dependent variables are not normally distributed. Also as noted above, the depession variable does not have equal variances; the other dependent variables do.
### I will run 6 regressions, two for each of the dependent variables. One will be for Hispanic ethnicity; the other will be for gender. 
sc_hisp <- lm(formula = selfconcept ~ hisp, data = lab3_msd)
summary(sc_hisp)
## 
## Call:
## lm(formula = selfconcept ~ hisp, data = lab3_msd)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.1357 -0.3427 -0.3427  0.6573  0.8643 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.34267    0.01752 190.835  < 2e-16 ***
## hisp        -0.20699    0.04898  -4.226 2.52e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6452 on 1554 degrees of freedom
## Multiple R-squared:  0.01136,    Adjusted R-squared:  0.01073 
## F-statistic: 17.86 on 1 and 1554 DF,  p-value: 2.517e-05
fc_hisp <- lm(formula = famconnect ~ hisp, data = lab3_msd)
summary(fc_hisp)
## 
## Call:
## lm(formula = famconnect ~ hisp, data = lab3_msd)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.2513 -0.2513 -0.2513  0.7487  0.8342 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.25129    0.01946  167.05   <2e-16 ***
## hisp        -0.08546    0.05442   -1.57    0.117    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.717 on 1554 degrees of freedom
## Multiple R-squared:  0.001584,   Adjusted R-squared:  0.0009417 
## F-statistic: 2.466 on 1 and 1554 DF,  p-value: 0.1166
dp_hisp <- lm(formula = depress ~ hisp, data = lab3_msd)
summary(dp_hisp)
## 
## Call:
## lm(formula = depress ~ hisp, data = lab3_msd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.93467  0.07517  0.07517  0.07517  1.07517 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.924834   0.008938 103.467   <2e-16 ***
## hisp        0.009839   0.024994   0.394    0.694    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3293 on 1554 degrees of freedom
## Multiple R-squared:  9.971e-05,  Adjusted R-squared:  -0.0005437 
## F-statistic: 0.155 on 1 and 1554 DF,  p-value: 0.6939
sc_gender <- lm(formula = selfconcept ~ female, data = lab3_msd)
summary (sc_gender)
## 
## Call:
## lm(formula = selfconcept ~ female, data = lab3_msd)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.2246 -0.4254 -0.2246  0.5746  0.7754 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.42535    0.02406 142.348  < 2e-16 ***
## female      -0.20077    0.03263  -6.152 9.71e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6412 on 1554 degrees of freedom
## Multiple R-squared:  0.02378,    Adjusted R-squared:  0.02315 
## F-statistic: 37.85 on 1 and 1554 DF,  p-value: 9.712e-10
fc_gender <- lm(formula = famconnect ~ female, data = lab3_msd)
summary(fc_gender)
## 
## Call:
## lm(formula = famconnect ~ female, data = lab3_msd)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.3225 -0.3225 -0.1714  0.6775  0.8286 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.32254    0.02678 124.067  < 2e-16 ***
## female      -0.15114    0.03632  -4.161 3.33e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7136 on 1554 degrees of freedom
## Multiple R-squared:  0.01102,    Adjusted R-squared:  0.01038 
## F-statistic: 17.32 on 1 and 1554 DF,  p-value: 3.335e-05
fc_dep <- lm(formula = depress ~ female, data = lab3_msd)
summary(fc_dep)
## 
## Call:
## lm(formula = depress ~ female, data = lab3_msd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.94444  0.05556  0.05556  0.09577  1.09577 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.90423    0.01234  73.306   <2e-16 ***
## female       0.04022    0.01673   2.404   0.0163 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3287 on 1554 degrees of freedom
## Multiple R-squared:  0.003706,   Adjusted R-squared:  0.003065 
## F-statistic:  5.78 on 1 and 1554 DF,  p-value: 0.01632