Import Data and creating a new data frame that only contains the needed variables

library(rio)
library(ggplot2)
library(GGally)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
library(CCA)
## Loading required package: fda
## Warning: package 'fda' was built under R version 3.6.2
## Loading required package: Matrix
## 
## Attaching package: 'fda'
## The following object is masked from 'package:graphics':
## 
##     matplot
## Loading required package: fields
## Warning: package 'fields' was built under R version 3.6.2
## Loading required package: spam
## Loading required package: dotCall64
## Loading required package: grid
## Spam version 2.5-1 (2019-12-12) is loaded.
## Type 'help( Spam)' or 'demo( spam)' for a short introduction 
## and overview of this package.
## Help for individual functions is also obtained by adding the
## suffix '.spam' to the function name, e.g. 'help( chol.spam)'.
## 
## Attaching package: 'spam'
## The following object is masked from 'package:Matrix':
## 
##     det
## The following objects are masked from 'package:base':
## 
##     backsolve, forwardsolve
## See https://github.com/NCAR/Fields for
##  an extensive vignette, other supplements and source code
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:GGally':
## 
##     nasa
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(CCP)
library(candisc)
## Warning: package 'candisc' was built under R version 3.6.2
## Loading required package: car
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## Loading required package: heplots
## 
## Attaching package: 'candisc'
## The following object is masked from 'package:stats':
## 
##     cancor
library(lessR)
## 
## lessR 3.8.9     feedback: gerbing@pdx.edu     web: lessRstats.com/new
## ---------------------------------------------------------------------
## 1. d <- Read("")           Read text, Excel, SPSS, SAS or R data file
##                            d: default data frame, no need for data=
## 2. l <- Read("", var_labels=TRUE)   Read variable labels into l,
##                            required name for data frame of labels
## 3. Help()                  Get help, and, e.g., Help(Read)
## 4. hs(), bc(), or ca()     All histograms, all bar charts, or both
## 5. Plot(X) or Plot(X,Y)    For continuous and categorical variables
## 6. by1= , by2=             Trellis graphics, a plot for each by1, by2
## 7. reg(Y ~ X, Rmd="eg")    Regression with full interpretative output
## 8. style("gray")           Grayscale theme, + many others available
##    style(show=TRUE)        all color/style options and current values
## 9. getColors()             create many styles of color palettes
## 
## lessR parameter names now use _'s. Names with a period are deprecated.
## Ex:  bin_width  instead of  bin.width
## 
## Attaching package: 'lessR'
## The following objects are masked from 'package:car':
## 
##     bc, Recode, sp
FullData <- read.csv("/Users/Lorraine/Desktop/Project 2.csv")
PJ2 <- subset(FullData, select=c(Rigorous.Instruction.., Collaborative.Teachers.., Supportive.Environment.., Effective.School.Leadership.., Strong.Family.Community.Ties.., Trust.., Average.ELA.Proficiency, Average.Math.Proficiency))

Change the variables from factors to numerics. Let’s change percentages to decimals. In order to change factors to numerics, we need to change them to characters first.

PJ2$Rigorous.Instruction.. <- as.numeric(sub("%", "",PJ2$Rigorous.Instruction..,fixed=TRUE))/100
PJ2$Collaborative.Teachers.. <- as.numeric(sub("%", "",PJ2$Collaborative.Teachers..,fixed=TRUE))/100
PJ2$Supportive.Environment.. <- as.numeric(sub("%", "",PJ2$Supportive.Environment..,fixed=TRUE))/100
PJ2$Effective.School.Leadership.. <- as.numeric(sub("%", "",PJ2$Effective.School.Leadership..,fixed=TRUE))/100
PJ2$Strong.Family.Community.Ties.. <- as.numeric(sub("%", "",PJ2$Strong.Family.Community.Ties..,fixed=TRUE))/100
PJ2$Trust.. <- as.numeric(sub("%", "",PJ2$Trust.. ,fixed=TRUE))/100
PJ2$Average.ELA.Proficiency <-as.character(PJ2$Average.ELA.Proficiency)
PJ2$Average.ELA.Proficiency <- as.numeric(PJ2$Average.ELA.Proficiency)
PJ2$Average.Math.Proficiency <- as.character(PJ2$Average.Math.Proficiency)
PJ2$Average.Math.Proficiency <- as.numeric(PJ2$Average.Math.Proficiency)

Running basic descriptives on the data

summary(PJ2)
##  Rigorous.Instruction.. Collaborative.Teachers.. Supportive.Environment..
##  Min.   :0.0000         Min.   :0.0000           Min.   :0.0000          
##  1st Qu.:0.8600         1st Qu.:0.8500           1st Qu.:0.8400          
##  Median :0.9000         Median :0.9000           Median :0.8900          
##  Mean   :0.8948         Mean   :0.8844           Mean   :0.8875          
##  3rd Qu.:0.9400         3rd Qu.:0.9400           3rd Qu.:0.9400          
##  Max.   :1.0000         Max.   :1.0000           Max.   :1.0000          
##  NA's   :25             NA's   :25               NA's   :25              
##  Effective.School.Leadership.. Strong.Family.Community.Ties..
##  Min.   :0.0000                Min.   :0.0000                
##  1st Qu.:0.7600                1st Qu.:0.8000                
##  Median :0.8300                Median :0.8300                
##  Mean   :0.8162                Mean   :0.8309                
##  3rd Qu.:0.8900                3rd Qu.:0.8700                
##  Max.   :0.9900                Max.   :0.9900                
##  NA's   :25                    NA's   :25                    
##     Trust..       Average.ELA.Proficiency Average.Math.Proficiency
##  Min.   :0.0000   Min.   :1.810           Min.   :1.830           
##  1st Qu.:0.8700   1st Qu.:2.250           1st Qu.:2.300           
##  Median :0.9200   Median :2.450           Median :2.580           
##  Mean   :0.9042   Mean   :2.534           Mean   :2.669           
##  3rd Qu.:0.9400   3rd Qu.:2.760           3rd Qu.:2.980           
##  Max.   :1.0000   Max.   :3.930           Max.   :4.200           
##  NA's   :25       NA's   :55              NA's   :55
sum(is.na(PJ2)) #260 missing data, less than 5% of the total data, run lisewise deletion
## [1] 260
PJ2 <- na.omit(PJ2)
summary(PJ2)
##  Rigorous.Instruction.. Collaborative.Teachers.. Supportive.Environment..
##  Min.   :0.0000         Min.   :0.0000           Min.   :0.0000          
##  1st Qu.:0.8600         1st Qu.:0.8500           1st Qu.:0.8400          
##  Median :0.9000         Median :0.9000           Median :0.8900          
##  Mean   :0.8939         Mean   :0.8832           Mean   :0.8854          
##  3rd Qu.:0.9400         3rd Qu.:0.9300           3rd Qu.:0.9300          
##  Max.   :1.0000         Max.   :1.0000           Max.   :1.0000          
##  Effective.School.Leadership.. Strong.Family.Community.Ties..
##  Min.   :0.0000                Min.   :0.0000                
##  1st Qu.:0.7600                1st Qu.:0.8000                
##  Median :0.8300                Median :0.8300                
##  Mean   :0.8146                Mean   :0.8296                
##  3rd Qu.:0.8800                3rd Qu.:0.8700                
##  Max.   :0.9900                Max.   :0.9900                
##     Trust..       Average.ELA.Proficiency Average.Math.Proficiency
##  Min.   :0.0000   Min.   :1.810           Min.   :1.830           
##  1st Qu.:0.8700   1st Qu.:2.250           1st Qu.:2.300           
##  Median :0.9100   Median :2.450           Median :2.580           
##  Mean   :0.9031   Mean   :2.534           Mean   :2.669           
##  3rd Qu.:0.9400   3rd Qu.:2.760           3rd Qu.:2.980           
##  Max.   :1.0000   Max.   :3.930           Max.   :4.200
boxplot(PJ2$Average.Math.Proficiency)

boxplot.stats(PJ2$Average.Math.Proficiency)$out
## [1] 4.03 4.15 4.19 4.07 4.16 4.20
#Only Average Math Proficiency has outliers(statistically defined). However, I do not have more information to determine whether they are actual outliers given its practical context. Also, there are only 6 of them. Therefore, I am keeping them in my data.

Test Kurtosis and Skewness

library(moments)
skewness(PJ2)
##         Rigorous.Instruction..       Collaborative.Teachers.. 
##                     -5.3958196                     -2.2033102 
##       Supportive.Environment..  Effective.School.Leadership.. 
##                     -2.2939757                     -1.2900541 
## Strong.Family.Community.Ties..                        Trust.. 
##                     -2.0293937                     -3.4608423 
##        Average.ELA.Proficiency       Average.Math.Proficiency 
##                      0.9040115                      0.6586586
kurtosis(PJ2)
##         Rigorous.Instruction..       Collaborative.Teachers.. 
##                      66.967941                      19.500977 
##       Supportive.Environment..  Effective.School.Leadership.. 
##                      29.949557                       7.561101 
## Strong.Family.Community.Ties..                        Trust.. 
##                      27.772212                      42.435323 
##        Average.ELA.Proficiency       Average.Math.Proficiency 
##                       3.488466                       2.806103
agostino.test(PJ2$Rigorous.Instruction.., alternative = c("two.sided", "less", "greater"))
## 
##  D'Agostino skewness test
## 
## data:  PJ2$Rigorous.Instruction..
## skew = -5.3958, z = -30.4398, p-value < 0.00000000000000022
## alternative hypothesis: data have a skewness
agostino.test(PJ2$Collaborative.Teachers.., alternative = c("two.sided", "less", "greater"))
## 
##  D'Agostino skewness test
## 
## data:  PJ2$Collaborative.Teachers..
## skew = -2.2033, z = -20.1892, p-value < 0.00000000000000022
## alternative hypothesis: data have a skewness
agostino.test(PJ2$Supportive.Environment.., alternative = c("two.sided", "less", "greater"))
## 
##  D'Agostino skewness test
## 
## data:  PJ2$Supportive.Environment..
## skew = -2.294, z = -20.636, p-value < 0.00000000000000022
## alternative hypothesis: data have a skewness
agostino.test(PJ2$Effective.School.Leadership.., alternative = c("two.sided", "less", "greater"))
## 
##  D'Agostino skewness test
## 
## data:  PJ2$Effective.School.Leadership..
## skew = -1.2901, z = -14.5295, p-value < 0.00000000000000022
## alternative hypothesis: data have a skewness
agostino.test(PJ2$Strong.Family.Community.Ties.., alternative = c("two.sided", "less", "greater"))
## 
##  D'Agostino skewness test
## 
## data:  PJ2$Strong.Family.Community.Ties..
## skew = -2.0294, z = -19.2846, p-value < 0.00000000000000022
## alternative hypothesis: data have a skewness
agostino.test(PJ2$Trust.., alternative = c("two.sided", "less", "greater"))
## 
##  D'Agostino skewness test
## 
## data:  PJ2$Trust..
## skew = -3.4608, z = -25.2918, p-value < 0.00000000000000022
## alternative hypothesis: data have a skewness
agostino.test(PJ2$Average.ELA.Proficiency, alternative = c("two.sided", "less", "greater"))
## 
##  D'Agostino skewness test
## 
## data:  PJ2$Average.ELA.Proficiency
## skew = 0.90401, z = 11.19746, p-value < 0.00000000000000022
## alternative hypothesis: data have a skewness
agostino.test(PJ2$Average.Math.Proficiency, alternative = c("two.sided", "less", "greater"))
## 
##  D'Agostino skewness test
## 
## data:  PJ2$Average.Math.Proficiency
## skew = 0.65866, z = 8.64434, p-value < 0.00000000000000022
## alternative hypothesis: data have a skewness
# D'Agostino skewness test indicates that all variables are significantly skewed, with two criterion variable moderately skewed and the predictor variables highly skewed. Since the criterion variables are moderately skewed, I would not transform any of the variables.

Mutiple regression analysis with Average ELA Proficiency as the criterion variable

#renaming all variables so the codes could be ran without a problem
library(reshape) #detached package dplyr
## 
## Attaching package: 'reshape'
## The following object is masked from 'package:dplyr':
## 
##     rename
## The following object is masked from 'package:Matrix':
## 
##     expand
PJ2 <- rename(PJ2,c(Rigorous.Instruction..="RI",Collaborative.Teachers..="CT",Supportive.Environment..="SE", Effective.School.Leadership..="ESL",Strong.Family.Community.Ties.. ="FCT",Trust..="Trust", Average.ELA.Proficiency = "AEP", Average.Math.Proficiency = "AMP"))
#Run regression using the lm function
Reg1 <- lm(AEP ~ RI + CT + SE + ESL + FCT + Trust, data = PJ2)
summary(Reg1)
## 
## Call:
## lm(formula = AEP ~ RI + CT + SE + ESL + FCT + Trust, data = PJ2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8227 -0.2367 -0.0515  0.1830  1.4328 
## 
## Coefficients:
##             Estimate Std. Error t value             Pr(>|t|)
## (Intercept)   1.1712     0.1787   6.553      0.0000000000831
## RI            0.5632     0.1891   2.978              0.00296
## CT           -0.1324     0.3721  -0.356              0.72202
## SE            2.1311     0.2264   9.413 < 0.0000000000000002
## ESL           1.3929     0.2513   5.543      0.0000000364993
## FCT          -0.2718     0.1903  -1.428              0.15361
## Trust        -2.0147     0.3856  -5.225      0.0000002044152
## 
## Residual standard error: 0.3309 on 1210 degrees of freedom
## Multiple R-squared:  0.1756, Adjusted R-squared:  0.1715 
## F-statistic: 42.97 on 6 and 1210 DF,  p-value: < 0.00000000000000022
#Hierarchical Regression
Model1 <- lm(AEP ~ RI, data = PJ2)
summary(Model1)
## 
## Call:
## lm(formula = AEP ~ RI, data = PJ2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.68372 -0.26241 -0.06811  0.20628  1.42750 
## 
## Coefficients:
##             Estimate Std. Error t value            Pr(>|t|)
## (Intercept)   1.1307     0.1274   8.875 <0.0000000000000002
## RI            1.5701     0.1421  11.049 <0.0000000000000002
## 
## Residual standard error: 0.3467 on 1215 degrees of freedom
## Multiple R-squared:  0.09131,    Adjusted R-squared:  0.09056 
## F-statistic: 122.1 on 1 and 1215 DF,  p-value: < 0.00000000000000022
Model2 <- lm(AEP ~ RI + CT, data = PJ2)
Model3 <- lm(AEP ~ RI + CT + SE, data = PJ2)
Model4 <- lm(AEP ~ RI + CT + SE + ESL, data = PJ2)
Model5 <- lm(AEP ~ RI + CT + SE + ESL + FCT, data = PJ2)
#Testing change in R-suqare
rsq.delta1 <- summary(Model2)$r.sqiared - summary(Model1)$r.squared
rsq.delta1
## numeric(0)
anova(Model1, Model2, test = "F")
## Analysis of Variance Table
## 
## Model 1: AEP ~ RI
## Model 2: AEP ~ RI + CT
##   Res.Df    RSS Df Sum of Sq      F      Pr(>F)
## 1   1215 146.07                                
## 2   1214 143.43  1    2.6457 22.394 0.000002482
rsq.delta2 <- summary(Model3)$r.sqiared - summary(Model2)$r.squared
rsq.delta2
## numeric(0)
anova(Model2, Model3, test = "F")
## Analysis of Variance Table
## 
## Model 1: AEP ~ RI + CT
## Model 2: AEP ~ RI + CT + SE
##   Res.Df    RSS Df Sum of Sq     F             Pr(>F)
## 1   1214 143.43                                      
## 2   1213 137.52  1    5.9056 52.09 0.0000000000009325
rsq.delta3 <- summary(Model4)$r.sqiared - summary(Model3)$r.squared
rsq.delta3
## numeric(0)
anova(Model3, Model4, test = "F")
## Analysis of Variance Table
## 
## Model 1: AEP ~ RI + CT + SE
## Model 2: AEP ~ RI + CT + SE + ESL
##   Res.Df    RSS Df Sum of Sq      F    Pr(>F)
## 1   1213 137.52                              
## 2   1212 136.08  1    1.4396 12.822 0.0003562
rsq.delta4 <- summary(Model5)$r.sqiared - summary(Model4)$r.squared
rsq.delta4
## numeric(0)
anova(Model4, Model5, test = "F")
## Analysis of Variance Table
## 
## Model 1: AEP ~ RI + CT + SE + ESL
## Model 2: AEP ~ RI + CT + SE + ESL + FCT
##   Res.Df    RSS Df Sum of Sq      F  Pr(>F)
## 1   1212 136.08                            
## 2   1211 135.51  1   0.57427 5.1321 0.02366
rsq.delta5 <- summary(Reg1)$r.sqiared - summary(Model5)$r.squared
rsq.delta5
## numeric(0)
anova(Model5, Reg1, test = "F")
## Analysis of Variance Table
## 
## Model 1: AEP ~ RI + CT + SE + ESL + FCT
## Model 2: AEP ~ RI + CT + SE + ESL + FCT + Trust
##   Res.Df    RSS Df Sum of Sq      F       Pr(>F)
## 1   1211 135.51                                 
## 2   1210 132.52  1    2.9905 27.306 0.0000002044
#checking for multicollinearity 
vif(Reg1)
##       RI       CT       SE      ESL      FCT    Trust 
## 1.943860 8.560109 2.414204 6.769008 1.571616 6.170379
#multiple regression without CT and FCT
New <- lm(AEP ~ RI + SE + ESL + Trust, data = PJ2)
summary(New)
## 
## Call:
## lm(formula = AEP ~ RI + SE + ESL + Trust, data = PJ2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.81437 -0.23605 -0.04935  0.18320  1.42567 
## 
## Coefficients:
##             Estimate Std. Error t value             Pr(>|t|)
## (Intercept)   1.1098     0.1736   6.392      0.0000000002331
## RI            0.5202     0.1772   2.936              0.00339
## SE            2.0304     0.2149   9.447 < 0.0000000000000002
## ESL           1.3450     0.1969   6.831      0.0000000000133
## Trust        -2.1415     0.3599  -5.950      0.0000000035089
## 
## Residual standard error: 0.331 on 1212 degrees of freedom
## Multiple R-squared:  0.1742, Adjusted R-squared:  0.1715 
## F-statistic: 63.91 on 4 and 1212 DF,  p-value: < 0.00000000000000022

Mutiple regression analysis with Average Math Proficiency as the criterion variable

#Run regression using the lm function
Reg2 <- lm(AMP ~ RI + CT + SE + ESL + FCT + Trust, data = PJ2)
summary(Reg2)
## 
## Call:
## lm(formula = AMP ~ RI + CT + SE + ESL + FCT + Trust, data = PJ2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.9596 -0.3114 -0.0492  0.2806  2.3432 
## 
## Coefficients:
##             Estimate Std. Error t value             Pr(>|t|)
## (Intercept)   0.5068     0.2264   2.239              0.02534
## RI            0.6233     0.2395   2.602              0.00938
## CT           -0.1100     0.4713  -0.233              0.81548
## SE            3.2908     0.2868  11.475 < 0.0000000000000002
## ESL           1.4970     0.3183   4.703          0.000002860
## FCT          -0.1472     0.2411  -0.610              0.54175
## Trust        -2.5566     0.4884  -5.235          0.000000195
## 
## Residual standard error: 0.4192 on 1210 degrees of freedom
## Multiple R-squared:   0.21,  Adjusted R-squared:  0.2061 
## F-statistic: 53.61 on 6 and 1210 DF,  p-value: < 0.00000000000000022
#Hierarchical Regression
Model6 <- lm(AMP ~ RI, data = PJ2)
summary(Model6)
## 
## Call:
## lm(formula = AMP ~ RI, data = PJ2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.86372 -0.35219 -0.07913  0.27616  2.11605 
## 
## Coefficients:
##             Estimate Std. Error t value             Pr(>|t|)
## (Intercept)   0.7339     0.1637   4.482           0.00000808
## RI            2.1647     0.1826  11.854 < 0.0000000000000002
## 
## Residual standard error: 0.4456 on 1215 degrees of freedom
## Multiple R-squared:  0.1037, Adjusted R-squared:  0.1029 
## F-statistic: 140.5 on 1 and 1215 DF,  p-value: < 0.00000000000000022
Model7 <- lm(AMP ~ RI + CT, data = PJ2)
Model8 <- lm(AMP ~ RI + CT + SE, data = PJ2)
Model9 <- lm(AMP ~ RI + CT + SE + ESL, data = PJ2)
Model10 <- lm(AMP ~ RI + CT + SE + ESL + FCT, data = PJ2)
#Testing change in R-suqare
rsq.delta6 <- summary(Model7)$r.sqiared - summary(Model6)$r.squared
rsq.delta6
## numeric(0)
anova(Model6, Model7, test = "F")
## Analysis of Variance Table
## 
## Model 1: AMP ~ RI
## Model 2: AMP ~ RI + CT
##   Res.Df    RSS Df Sum of Sq      F      Pr(>F)
## 1   1215 241.25                                
## 2   1214 237.36  1    3.8916 19.904 0.000008899
rsq.delta7 <- summary(Model8)$r.sqiared - summary(Model7)$r.squared
rsq.delta7
## numeric(0)
anova(Model7, Model8, test = "F")
## Analysis of Variance Table
## 
## Model 1: AMP ~ RI + CT
## Model 2: AMP ~ RI + CT + SE
##   Res.Df    RSS Df Sum of Sq      F                Pr(>F)
## 1   1214 237.36                                          
## 2   1213 219.16  1    18.203 100.75 < 0.00000000000000022
rsq.delta8 <- summary(Model9)$r.sqiared - summary(Model8)$r.squared
rsq.delta8
## numeric(0)
anova(Model8, Model9, test = "F")
## Analysis of Variance Table
## 
## Model 1: AMP ~ RI + CT + SE
## Model 2: AMP ~ RI + CT + SE + ESL
##   Res.Df    RSS Df Sum of Sq      F   Pr(>F)
## 1   1213 219.16                             
## 2   1212 217.82  1    1.3369 7.4385 0.006476
rsq.delta9 <- summary(Model10)$r.sqiared - summary(Model9)$r.squared
rsq.delta9
## numeric(0)
anova(Model9, Model10, test = "F")
## Analysis of Variance Table
## 
## Model 1: AMP ~ RI + CT + SE + ESL
## Model 2: AMP ~ RI + CT + SE + ESL + FCT
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1   1212 217.82                           
## 2   1211 217.44  1   0.37631 2.0957  0.148
rsq.delta10 <- summary(Reg2)$r.sqiared - summary(Model10)$r.squared
rsq.delta10
## numeric(0)
anova(Model10, Reg2, test = "F")
## Analysis of Variance Table
## 
## Model 1: AMP ~ RI + CT + SE + ESL + FCT
## Model 2: AMP ~ RI + CT + SE + ESL + FCT + Trust
##   Res.Df    RSS Df Sum of Sq      F       Pr(>F)
## 1   1211 217.44                                 
## 2   1210 212.63  1    4.8153 27.402 0.0000001946
#multiple regression without FCT
New1 <- lm(AMP ~ RI + CT + SE + ESL + Trust, data = PJ2)
summary(New1)
## 
## Call:
## lm(formula = AMP ~ RI + CT + SE + ESL + Trust, data = PJ2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.97225 -0.31364 -0.04711  0.27974  2.37584 
## 
## Coefficients:
##             Estimate Std. Error t value             Pr(>|t|)
## (Intercept)   0.4742     0.2199   2.156               0.0313
## RI            0.6109     0.2386   2.560               0.0106
## CT           -0.1017     0.4710  -0.216               0.8291
## SE            3.2418     0.2752  11.778 < 0.0000000000000002
## ESL           1.4977     0.3182   4.706         0.0000028118
## Trust        -2.6041     0.4820  -5.402         0.0000000791
## 
## Residual standard error: 0.4191 on 1211 degrees of freedom
## Multiple R-squared:  0.2098, Adjusted R-squared:  0.2065 
## F-statistic: 64.29 on 5 and 1211 DF,  p-value: < 0.00000000000000022
New2 <- lm(AMP ~ RI + SE + ESL + Trust, data = PJ2)
summary(New2)
## 
## Call:
## lm(formula = AMP ~ RI + SE + ESL + Trust, data = PJ2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.97766 -0.31238 -0.04729  0.27784  2.37673 
## 
## Coefficients:
##             Estimate Std. Error t value             Pr(>|t|)
## (Intercept)   0.4733     0.2198   2.153              0.03148
## RI            0.5934     0.2243   2.646              0.00825
## SE            3.2329     0.2720  11.884 < 0.0000000000000002
## ESL           1.4550     0.2492   5.838        0.00000000677
## Trust        -2.6380     0.4556  -5.790        0.00000000895
## 
## Residual standard error: 0.4189 on 1212 degrees of freedom
## Multiple R-squared:  0.2097, Adjusted R-squared:  0.2071 
## F-statistic: 80.41 on 4 and 1212 DF,  p-value: < 0.00000000000000022

Canonical Regression

#Identifying predictor and criterion variables, library dplyr
pred <- PJ2 %>% select(RI, CT, SE, ESL, FCT, Trust)
crit <- PJ2 %>% select(AEP, AMP)
ggpairs(pred) #some high correlations

ggpairs(crit) #high correlation

matcor(pred, crit)
## $Xcor
##              RI        CT        SE       ESL       FCT     Trust
## RI    1.0000000 0.6130197 0.6048823 0.4726465 0.4252463 0.5269134
## CT    0.6130197 1.0000000 0.6019965 0.8992480 0.4707950 0.8824691
## SE    0.6048823 0.6019965 1.0000000 0.4597990 0.5605202 0.6362909
## ESL   0.4726465 0.8992480 0.4597990 1.0000000 0.4073891 0.8605750
## FCT   0.4252463 0.4707950 0.5605202 0.4073891 1.0000000 0.5199311
## Trust 0.5269134 0.8824691 0.6362909 0.8605750 0.5199311 1.0000000
## 
## $Ycor
##           AEP       AMP
## AEP 1.0000000 0.9359562
## AMP 0.9359562 1.0000000
## 
## $XYcor
##              RI        CT        SE       ESL       FCT     Trust
## RI    1.0000000 0.6130197 0.6048823 0.4726465 0.4252463 0.5269134
## CT    0.6130197 1.0000000 0.6019965 0.8992480 0.4707950 0.8824691
## SE    0.6048823 0.6019965 1.0000000 0.4597990 0.5605202 0.6362909
## ESL   0.4726465 0.8992480 0.4597990 1.0000000 0.4073891 0.8605750
## FCT   0.4252463 0.4707950 0.5605202 0.4073891 1.0000000 0.5199311
## Trust 0.5269134 0.8824691 0.6362909 0.8605750 0.5199311 1.0000000
## AEP   0.3021701 0.2865938 0.3622688 0.2681362 0.1777522 0.2368743
## AMP   0.3219612 0.2923694 0.4225323 0.2563458 0.2216340 0.2499606
##             AEP       AMP
## RI    0.3021701 0.3219612
## CT    0.2865938 0.2923694
## SE    0.3622688 0.4225323
## ESL   0.2681362 0.2563458
## FCT   0.1777522 0.2216340
## Trust 0.2368743 0.2499606
## AEP   1.0000000 0.9359562
## AMP   0.9359562 1.0000000
CC1 <- cc(pred, crit)
CC1$cor
## [1] 0.4605990 0.1844779
CC1[3:4]
## $xcoef
##              [,1]       [,2]
## RI     -2.6494684  -5.518006
## CT      0.4061550   2.362393
## SE    -15.5582870   6.324098
## ESL    -6.2887145 -14.943308
## FCT     0.3670306   7.143441
## Trust  11.4182399  10.200002
## 
## $ycoef
##          [,1]      [,2]
## AEP  0.857947 -7.763662
## AMP -2.733250  5.382202
CC2 <- comput(pred, crit, CC1)
CC2[3:6]
## $corr.X.xscores
##             [,1]       [,2]
## RI    -0.6942150 -0.2043748
## CT    -0.6221488 -0.3722150
## SE    -0.9342900  0.2564751
## ESL   -0.5340775 -0.5842506
## FCT   -0.4983808  0.3223009
## Trust -0.5374234 -0.1935407
## 
## $corr.Y.xscores
##           [,1]        [,2]
## AEP -0.4106778 -0.08352988
## AMP -0.4578121 -0.02026293
## 
## $corr.X.yscores
##             [,1]        [,2]
## RI    -0.3197547 -0.03770262
## CT    -0.2865611 -0.06866542
## SE    -0.4303331  0.04731399
## ESL   -0.2459956 -0.10778131
## FCT   -0.2295537  0.05945739
## Trust -0.2475367 -0.03570397
## 
## $corr.Y.yscores
##           [,1]       [,2]
## AEP -0.8916168 -0.4527908
## AMP -0.9939494 -0.1098394
rho <- CC1$cor
n <- dim(pred)[1]
p <- length(pred)
q <- length(crit)
p.asym(rho, n, p, q, tstat = "Wilks")
## Wilks' Lambda, using F-approximation (Rao's F):
##               stat    approx df1  df2          p.value
## 1 to 2:  0.7610364 29.478912  12 2418 0.00000000000000
## 2 to 2:  0.9659679  8.525919   5 1210 0.00000006035999
#standard coefficient for canonical regression
s1 <- diag(sqrt(diag(cov(pred))))
s1 %*% CC1$xcoef
##            [,1]       [,2]
## [1,] -0.1853930 -0.3861150
## [2,]  0.0303112  0.1763045
## [3,] -1.0133588  0.4119078
## [4,] -0.6179166 -1.4682998
## [5,]  0.0229423  0.4465212
## [6,]  0.6981402  0.6236540
s2 <- diag(sqrt(diag(cov(crit))))
s2 %*% CC1$ycoef
##            [,1]      [,2]
## [1,]  0.3119404 -2.822786
## [2,] -1.2859119  2.532164