Import Data and creating a new data frame that only contains the needed variables
library(rio)
library(ggplot2)
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(CCA)
## Loading required package: fda
## Warning: package 'fda' was built under R version 3.6.2
## Loading required package: Matrix
##
## Attaching package: 'fda'
## The following object is masked from 'package:graphics':
##
## matplot
## Loading required package: fields
## Warning: package 'fields' was built under R version 3.6.2
## Loading required package: spam
## Loading required package: dotCall64
## Loading required package: grid
## Spam version 2.5-1 (2019-12-12) is loaded.
## Type 'help( Spam)' or 'demo( spam)' for a short introduction
## and overview of this package.
## Help for individual functions is also obtained by adding the
## suffix '.spam' to the function name, e.g. 'help( chol.spam)'.
##
## Attaching package: 'spam'
## The following object is masked from 'package:Matrix':
##
## det
## The following objects are masked from 'package:base':
##
## backsolve, forwardsolve
## See https://github.com/NCAR/Fields for
## an extensive vignette, other supplements and source code
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:GGally':
##
## nasa
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(CCP)
library(candisc)
## Warning: package 'candisc' was built under R version 3.6.2
## Loading required package: car
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## Loading required package: heplots
##
## Attaching package: 'candisc'
## The following object is masked from 'package:stats':
##
## cancor
library(lessR)
##
## lessR 3.8.9 feedback: gerbing@pdx.edu web: lessRstats.com/new
## ---------------------------------------------------------------------
## 1. d <- Read("") Read text, Excel, SPSS, SAS or R data file
## d: default data frame, no need for data=
## 2. l <- Read("", var_labels=TRUE) Read variable labels into l,
## required name for data frame of labels
## 3. Help() Get help, and, e.g., Help(Read)
## 4. hs(), bc(), or ca() All histograms, all bar charts, or both
## 5. Plot(X) or Plot(X,Y) For continuous and categorical variables
## 6. by1= , by2= Trellis graphics, a plot for each by1, by2
## 7. reg(Y ~ X, Rmd="eg") Regression with full interpretative output
## 8. style("gray") Grayscale theme, + many others available
## style(show=TRUE) all color/style options and current values
## 9. getColors() create many styles of color palettes
##
## lessR parameter names now use _'s. Names with a period are deprecated.
## Ex: bin_width instead of bin.width
##
## Attaching package: 'lessR'
## The following objects are masked from 'package:car':
##
## bc, Recode, sp
FullData <- read.csv("/Users/Lorraine/Desktop/Project 2.csv")
PJ2 <- subset(FullData, select=c(Rigorous.Instruction.., Collaborative.Teachers.., Supportive.Environment.., Effective.School.Leadership.., Strong.Family.Community.Ties.., Trust.., Average.ELA.Proficiency, Average.Math.Proficiency))
Change the variables from factors to numerics. Let’s change percentages to decimals. In order to change factors to numerics, we need to change them to characters first.
PJ2$Rigorous.Instruction.. <- as.numeric(sub("%", "",PJ2$Rigorous.Instruction..,fixed=TRUE))/100
PJ2$Collaborative.Teachers.. <- as.numeric(sub("%", "",PJ2$Collaborative.Teachers..,fixed=TRUE))/100
PJ2$Supportive.Environment.. <- as.numeric(sub("%", "",PJ2$Supportive.Environment..,fixed=TRUE))/100
PJ2$Effective.School.Leadership.. <- as.numeric(sub("%", "",PJ2$Effective.School.Leadership..,fixed=TRUE))/100
PJ2$Strong.Family.Community.Ties.. <- as.numeric(sub("%", "",PJ2$Strong.Family.Community.Ties..,fixed=TRUE))/100
PJ2$Trust.. <- as.numeric(sub("%", "",PJ2$Trust.. ,fixed=TRUE))/100
PJ2$Average.ELA.Proficiency <-as.character(PJ2$Average.ELA.Proficiency)
PJ2$Average.ELA.Proficiency <- as.numeric(PJ2$Average.ELA.Proficiency)
PJ2$Average.Math.Proficiency <- as.character(PJ2$Average.Math.Proficiency)
PJ2$Average.Math.Proficiency <- as.numeric(PJ2$Average.Math.Proficiency)
Running basic descriptives on the data
summary(PJ2)
## Rigorous.Instruction.. Collaborative.Teachers.. Supportive.Environment..
## Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.8600 1st Qu.:0.8500 1st Qu.:0.8400
## Median :0.9000 Median :0.9000 Median :0.8900
## Mean :0.8948 Mean :0.8844 Mean :0.8875
## 3rd Qu.:0.9400 3rd Qu.:0.9400 3rd Qu.:0.9400
## Max. :1.0000 Max. :1.0000 Max. :1.0000
## NA's :25 NA's :25 NA's :25
## Effective.School.Leadership.. Strong.Family.Community.Ties..
## Min. :0.0000 Min. :0.0000
## 1st Qu.:0.7600 1st Qu.:0.8000
## Median :0.8300 Median :0.8300
## Mean :0.8162 Mean :0.8309
## 3rd Qu.:0.8900 3rd Qu.:0.8700
## Max. :0.9900 Max. :0.9900
## NA's :25 NA's :25
## Trust.. Average.ELA.Proficiency Average.Math.Proficiency
## Min. :0.0000 Min. :1.810 Min. :1.830
## 1st Qu.:0.8700 1st Qu.:2.250 1st Qu.:2.300
## Median :0.9200 Median :2.450 Median :2.580
## Mean :0.9042 Mean :2.534 Mean :2.669
## 3rd Qu.:0.9400 3rd Qu.:2.760 3rd Qu.:2.980
## Max. :1.0000 Max. :3.930 Max. :4.200
## NA's :25 NA's :55 NA's :55
sum(is.na(PJ2)) #260 missing data, less than 5% of the total data, run lisewise deletion
## [1] 260
PJ2 <- na.omit(PJ2)
summary(PJ2)
## Rigorous.Instruction.. Collaborative.Teachers.. Supportive.Environment..
## Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.8600 1st Qu.:0.8500 1st Qu.:0.8400
## Median :0.9000 Median :0.9000 Median :0.8900
## Mean :0.8939 Mean :0.8832 Mean :0.8854
## 3rd Qu.:0.9400 3rd Qu.:0.9300 3rd Qu.:0.9300
## Max. :1.0000 Max. :1.0000 Max. :1.0000
## Effective.School.Leadership.. Strong.Family.Community.Ties..
## Min. :0.0000 Min. :0.0000
## 1st Qu.:0.7600 1st Qu.:0.8000
## Median :0.8300 Median :0.8300
## Mean :0.8146 Mean :0.8296
## 3rd Qu.:0.8800 3rd Qu.:0.8700
## Max. :0.9900 Max. :0.9900
## Trust.. Average.ELA.Proficiency Average.Math.Proficiency
## Min. :0.0000 Min. :1.810 Min. :1.830
## 1st Qu.:0.8700 1st Qu.:2.250 1st Qu.:2.300
## Median :0.9100 Median :2.450 Median :2.580
## Mean :0.9031 Mean :2.534 Mean :2.669
## 3rd Qu.:0.9400 3rd Qu.:2.760 3rd Qu.:2.980
## Max. :1.0000 Max. :3.930 Max. :4.200
boxplot(PJ2$Average.Math.Proficiency)

boxplot.stats(PJ2$Average.Math.Proficiency)$out
## [1] 4.03 4.15 4.19 4.07 4.16 4.20
#Only Average Math Proficiency has outliers(statistically defined). However, I do not have more information to determine whether they are actual outliers given its practical context. Also, there are only 6 of them. Therefore, I am keeping them in my data.
Test Kurtosis and Skewness
library(moments)
skewness(PJ2)
## Rigorous.Instruction.. Collaborative.Teachers..
## -5.3958196 -2.2033102
## Supportive.Environment.. Effective.School.Leadership..
## -2.2939757 -1.2900541
## Strong.Family.Community.Ties.. Trust..
## -2.0293937 -3.4608423
## Average.ELA.Proficiency Average.Math.Proficiency
## 0.9040115 0.6586586
kurtosis(PJ2)
## Rigorous.Instruction.. Collaborative.Teachers..
## 66.967941 19.500977
## Supportive.Environment.. Effective.School.Leadership..
## 29.949557 7.561101
## Strong.Family.Community.Ties.. Trust..
## 27.772212 42.435323
## Average.ELA.Proficiency Average.Math.Proficiency
## 3.488466 2.806103
agostino.test(PJ2$Rigorous.Instruction.., alternative = c("two.sided", "less", "greater"))
##
## D'Agostino skewness test
##
## data: PJ2$Rigorous.Instruction..
## skew = -5.3958, z = -30.4398, p-value < 0.00000000000000022
## alternative hypothesis: data have a skewness
agostino.test(PJ2$Collaborative.Teachers.., alternative = c("two.sided", "less", "greater"))
##
## D'Agostino skewness test
##
## data: PJ2$Collaborative.Teachers..
## skew = -2.2033, z = -20.1892, p-value < 0.00000000000000022
## alternative hypothesis: data have a skewness
agostino.test(PJ2$Supportive.Environment.., alternative = c("two.sided", "less", "greater"))
##
## D'Agostino skewness test
##
## data: PJ2$Supportive.Environment..
## skew = -2.294, z = -20.636, p-value < 0.00000000000000022
## alternative hypothesis: data have a skewness
agostino.test(PJ2$Effective.School.Leadership.., alternative = c("two.sided", "less", "greater"))
##
## D'Agostino skewness test
##
## data: PJ2$Effective.School.Leadership..
## skew = -1.2901, z = -14.5295, p-value < 0.00000000000000022
## alternative hypothesis: data have a skewness
agostino.test(PJ2$Strong.Family.Community.Ties.., alternative = c("two.sided", "less", "greater"))
##
## D'Agostino skewness test
##
## data: PJ2$Strong.Family.Community.Ties..
## skew = -2.0294, z = -19.2846, p-value < 0.00000000000000022
## alternative hypothesis: data have a skewness
agostino.test(PJ2$Trust.., alternative = c("two.sided", "less", "greater"))
##
## D'Agostino skewness test
##
## data: PJ2$Trust..
## skew = -3.4608, z = -25.2918, p-value < 0.00000000000000022
## alternative hypothesis: data have a skewness
agostino.test(PJ2$Average.ELA.Proficiency, alternative = c("two.sided", "less", "greater"))
##
## D'Agostino skewness test
##
## data: PJ2$Average.ELA.Proficiency
## skew = 0.90401, z = 11.19746, p-value < 0.00000000000000022
## alternative hypothesis: data have a skewness
agostino.test(PJ2$Average.Math.Proficiency, alternative = c("two.sided", "less", "greater"))
##
## D'Agostino skewness test
##
## data: PJ2$Average.Math.Proficiency
## skew = 0.65866, z = 8.64434, p-value < 0.00000000000000022
## alternative hypothesis: data have a skewness
# D'Agostino skewness test indicates that all variables are significantly skewed, with two criterion variable moderately skewed and the predictor variables highly skewed. Since the criterion variables are moderately skewed, I would not transform any of the variables.
Mutiple regression analysis with Average ELA Proficiency as the criterion variable
#renaming all variables so the codes could be ran without a problem
library(reshape) #detached package dplyr
##
## Attaching package: 'reshape'
## The following object is masked from 'package:dplyr':
##
## rename
## The following object is masked from 'package:Matrix':
##
## expand
PJ2 <- rename(PJ2,c(Rigorous.Instruction..="RI",Collaborative.Teachers..="CT",Supportive.Environment..="SE", Effective.School.Leadership..="ESL",Strong.Family.Community.Ties.. ="FCT",Trust..="Trust", Average.ELA.Proficiency = "AEP", Average.Math.Proficiency = "AMP"))
#Run regression using the lm function
Reg1 <- lm(AEP ~ RI + CT + SE + ESL + FCT + Trust, data = PJ2)
summary(Reg1)
##
## Call:
## lm(formula = AEP ~ RI + CT + SE + ESL + FCT + Trust, data = PJ2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8227 -0.2367 -0.0515 0.1830 1.4328
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.1712 0.1787 6.553 0.0000000000831
## RI 0.5632 0.1891 2.978 0.00296
## CT -0.1324 0.3721 -0.356 0.72202
## SE 2.1311 0.2264 9.413 < 0.0000000000000002
## ESL 1.3929 0.2513 5.543 0.0000000364993
## FCT -0.2718 0.1903 -1.428 0.15361
## Trust -2.0147 0.3856 -5.225 0.0000002044152
##
## Residual standard error: 0.3309 on 1210 degrees of freedom
## Multiple R-squared: 0.1756, Adjusted R-squared: 0.1715
## F-statistic: 42.97 on 6 and 1210 DF, p-value: < 0.00000000000000022
#Hierarchical Regression
Model1 <- lm(AEP ~ RI, data = PJ2)
summary(Model1)
##
## Call:
## lm(formula = AEP ~ RI, data = PJ2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.68372 -0.26241 -0.06811 0.20628 1.42750
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.1307 0.1274 8.875 <0.0000000000000002
## RI 1.5701 0.1421 11.049 <0.0000000000000002
##
## Residual standard error: 0.3467 on 1215 degrees of freedom
## Multiple R-squared: 0.09131, Adjusted R-squared: 0.09056
## F-statistic: 122.1 on 1 and 1215 DF, p-value: < 0.00000000000000022
Model2 <- lm(AEP ~ RI + CT, data = PJ2)
Model3 <- lm(AEP ~ RI + CT + SE, data = PJ2)
Model4 <- lm(AEP ~ RI + CT + SE + ESL, data = PJ2)
Model5 <- lm(AEP ~ RI + CT + SE + ESL + FCT, data = PJ2)
#Testing change in R-suqare
rsq.delta1 <- summary(Model2)$r.sqiared - summary(Model1)$r.squared
rsq.delta1
## numeric(0)
anova(Model1, Model2, test = "F")
## Analysis of Variance Table
##
## Model 1: AEP ~ RI
## Model 2: AEP ~ RI + CT
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 1215 146.07
## 2 1214 143.43 1 2.6457 22.394 0.000002482
rsq.delta2 <- summary(Model3)$r.sqiared - summary(Model2)$r.squared
rsq.delta2
## numeric(0)
anova(Model2, Model3, test = "F")
## Analysis of Variance Table
##
## Model 1: AEP ~ RI + CT
## Model 2: AEP ~ RI + CT + SE
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 1214 143.43
## 2 1213 137.52 1 5.9056 52.09 0.0000000000009325
rsq.delta3 <- summary(Model4)$r.sqiared - summary(Model3)$r.squared
rsq.delta3
## numeric(0)
anova(Model3, Model4, test = "F")
## Analysis of Variance Table
##
## Model 1: AEP ~ RI + CT + SE
## Model 2: AEP ~ RI + CT + SE + ESL
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 1213 137.52
## 2 1212 136.08 1 1.4396 12.822 0.0003562
rsq.delta4 <- summary(Model5)$r.sqiared - summary(Model4)$r.squared
rsq.delta4
## numeric(0)
anova(Model4, Model5, test = "F")
## Analysis of Variance Table
##
## Model 1: AEP ~ RI + CT + SE + ESL
## Model 2: AEP ~ RI + CT + SE + ESL + FCT
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 1212 136.08
## 2 1211 135.51 1 0.57427 5.1321 0.02366
rsq.delta5 <- summary(Reg1)$r.sqiared - summary(Model5)$r.squared
rsq.delta5
## numeric(0)
anova(Model5, Reg1, test = "F")
## Analysis of Variance Table
##
## Model 1: AEP ~ RI + CT + SE + ESL + FCT
## Model 2: AEP ~ RI + CT + SE + ESL + FCT + Trust
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 1211 135.51
## 2 1210 132.52 1 2.9905 27.306 0.0000002044
#checking for multicollinearity
vif(Reg1)
## RI CT SE ESL FCT Trust
## 1.943860 8.560109 2.414204 6.769008 1.571616 6.170379
#multiple regression without CT and FCT
New <- lm(AEP ~ RI + SE + ESL + Trust, data = PJ2)
summary(New)
##
## Call:
## lm(formula = AEP ~ RI + SE + ESL + Trust, data = PJ2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.81437 -0.23605 -0.04935 0.18320 1.42567
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.1098 0.1736 6.392 0.0000000002331
## RI 0.5202 0.1772 2.936 0.00339
## SE 2.0304 0.2149 9.447 < 0.0000000000000002
## ESL 1.3450 0.1969 6.831 0.0000000000133
## Trust -2.1415 0.3599 -5.950 0.0000000035089
##
## Residual standard error: 0.331 on 1212 degrees of freedom
## Multiple R-squared: 0.1742, Adjusted R-squared: 0.1715
## F-statistic: 63.91 on 4 and 1212 DF, p-value: < 0.00000000000000022
Mutiple regression analysis with Average Math Proficiency as the criterion variable
#Run regression using the lm function
Reg2 <- lm(AMP ~ RI + CT + SE + ESL + FCT + Trust, data = PJ2)
summary(Reg2)
##
## Call:
## lm(formula = AMP ~ RI + CT + SE + ESL + FCT + Trust, data = PJ2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.9596 -0.3114 -0.0492 0.2806 2.3432
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.5068 0.2264 2.239 0.02534
## RI 0.6233 0.2395 2.602 0.00938
## CT -0.1100 0.4713 -0.233 0.81548
## SE 3.2908 0.2868 11.475 < 0.0000000000000002
## ESL 1.4970 0.3183 4.703 0.000002860
## FCT -0.1472 0.2411 -0.610 0.54175
## Trust -2.5566 0.4884 -5.235 0.000000195
##
## Residual standard error: 0.4192 on 1210 degrees of freedom
## Multiple R-squared: 0.21, Adjusted R-squared: 0.2061
## F-statistic: 53.61 on 6 and 1210 DF, p-value: < 0.00000000000000022
#Hierarchical Regression
Model6 <- lm(AMP ~ RI, data = PJ2)
summary(Model6)
##
## Call:
## lm(formula = AMP ~ RI, data = PJ2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.86372 -0.35219 -0.07913 0.27616 2.11605
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.7339 0.1637 4.482 0.00000808
## RI 2.1647 0.1826 11.854 < 0.0000000000000002
##
## Residual standard error: 0.4456 on 1215 degrees of freedom
## Multiple R-squared: 0.1037, Adjusted R-squared: 0.1029
## F-statistic: 140.5 on 1 and 1215 DF, p-value: < 0.00000000000000022
Model7 <- lm(AMP ~ RI + CT, data = PJ2)
Model8 <- lm(AMP ~ RI + CT + SE, data = PJ2)
Model9 <- lm(AMP ~ RI + CT + SE + ESL, data = PJ2)
Model10 <- lm(AMP ~ RI + CT + SE + ESL + FCT, data = PJ2)
#Testing change in R-suqare
rsq.delta6 <- summary(Model7)$r.sqiared - summary(Model6)$r.squared
rsq.delta6
## numeric(0)
anova(Model6, Model7, test = "F")
## Analysis of Variance Table
##
## Model 1: AMP ~ RI
## Model 2: AMP ~ RI + CT
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 1215 241.25
## 2 1214 237.36 1 3.8916 19.904 0.000008899
rsq.delta7 <- summary(Model8)$r.sqiared - summary(Model7)$r.squared
rsq.delta7
## numeric(0)
anova(Model7, Model8, test = "F")
## Analysis of Variance Table
##
## Model 1: AMP ~ RI + CT
## Model 2: AMP ~ RI + CT + SE
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 1214 237.36
## 2 1213 219.16 1 18.203 100.75 < 0.00000000000000022
rsq.delta8 <- summary(Model9)$r.sqiared - summary(Model8)$r.squared
rsq.delta8
## numeric(0)
anova(Model8, Model9, test = "F")
## Analysis of Variance Table
##
## Model 1: AMP ~ RI + CT + SE
## Model 2: AMP ~ RI + CT + SE + ESL
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 1213 219.16
## 2 1212 217.82 1 1.3369 7.4385 0.006476
rsq.delta9 <- summary(Model10)$r.sqiared - summary(Model9)$r.squared
rsq.delta9
## numeric(0)
anova(Model9, Model10, test = "F")
## Analysis of Variance Table
##
## Model 1: AMP ~ RI + CT + SE + ESL
## Model 2: AMP ~ RI + CT + SE + ESL + FCT
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 1212 217.82
## 2 1211 217.44 1 0.37631 2.0957 0.148
rsq.delta10 <- summary(Reg2)$r.sqiared - summary(Model10)$r.squared
rsq.delta10
## numeric(0)
anova(Model10, Reg2, test = "F")
## Analysis of Variance Table
##
## Model 1: AMP ~ RI + CT + SE + ESL + FCT
## Model 2: AMP ~ RI + CT + SE + ESL + FCT + Trust
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 1211 217.44
## 2 1210 212.63 1 4.8153 27.402 0.0000001946
#multiple regression without FCT
New1 <- lm(AMP ~ RI + CT + SE + ESL + Trust, data = PJ2)
summary(New1)
##
## Call:
## lm(formula = AMP ~ RI + CT + SE + ESL + Trust, data = PJ2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.97225 -0.31364 -0.04711 0.27974 2.37584
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.4742 0.2199 2.156 0.0313
## RI 0.6109 0.2386 2.560 0.0106
## CT -0.1017 0.4710 -0.216 0.8291
## SE 3.2418 0.2752 11.778 < 0.0000000000000002
## ESL 1.4977 0.3182 4.706 0.0000028118
## Trust -2.6041 0.4820 -5.402 0.0000000791
##
## Residual standard error: 0.4191 on 1211 degrees of freedom
## Multiple R-squared: 0.2098, Adjusted R-squared: 0.2065
## F-statistic: 64.29 on 5 and 1211 DF, p-value: < 0.00000000000000022
New2 <- lm(AMP ~ RI + SE + ESL + Trust, data = PJ2)
summary(New2)
##
## Call:
## lm(formula = AMP ~ RI + SE + ESL + Trust, data = PJ2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.97766 -0.31238 -0.04729 0.27784 2.37673
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.4733 0.2198 2.153 0.03148
## RI 0.5934 0.2243 2.646 0.00825
## SE 3.2329 0.2720 11.884 < 0.0000000000000002
## ESL 1.4550 0.2492 5.838 0.00000000677
## Trust -2.6380 0.4556 -5.790 0.00000000895
##
## Residual standard error: 0.4189 on 1212 degrees of freedom
## Multiple R-squared: 0.2097, Adjusted R-squared: 0.2071
## F-statistic: 80.41 on 4 and 1212 DF, p-value: < 0.00000000000000022
Canonical Regression
#Identifying predictor and criterion variables, library dplyr
pred <- PJ2 %>% select(RI, CT, SE, ESL, FCT, Trust)
crit <- PJ2 %>% select(AEP, AMP)
ggpairs(pred) #some high correlations

ggpairs(crit) #high correlation

matcor(pred, crit)
## $Xcor
## RI CT SE ESL FCT Trust
## RI 1.0000000 0.6130197 0.6048823 0.4726465 0.4252463 0.5269134
## CT 0.6130197 1.0000000 0.6019965 0.8992480 0.4707950 0.8824691
## SE 0.6048823 0.6019965 1.0000000 0.4597990 0.5605202 0.6362909
## ESL 0.4726465 0.8992480 0.4597990 1.0000000 0.4073891 0.8605750
## FCT 0.4252463 0.4707950 0.5605202 0.4073891 1.0000000 0.5199311
## Trust 0.5269134 0.8824691 0.6362909 0.8605750 0.5199311 1.0000000
##
## $Ycor
## AEP AMP
## AEP 1.0000000 0.9359562
## AMP 0.9359562 1.0000000
##
## $XYcor
## RI CT SE ESL FCT Trust
## RI 1.0000000 0.6130197 0.6048823 0.4726465 0.4252463 0.5269134
## CT 0.6130197 1.0000000 0.6019965 0.8992480 0.4707950 0.8824691
## SE 0.6048823 0.6019965 1.0000000 0.4597990 0.5605202 0.6362909
## ESL 0.4726465 0.8992480 0.4597990 1.0000000 0.4073891 0.8605750
## FCT 0.4252463 0.4707950 0.5605202 0.4073891 1.0000000 0.5199311
## Trust 0.5269134 0.8824691 0.6362909 0.8605750 0.5199311 1.0000000
## AEP 0.3021701 0.2865938 0.3622688 0.2681362 0.1777522 0.2368743
## AMP 0.3219612 0.2923694 0.4225323 0.2563458 0.2216340 0.2499606
## AEP AMP
## RI 0.3021701 0.3219612
## CT 0.2865938 0.2923694
## SE 0.3622688 0.4225323
## ESL 0.2681362 0.2563458
## FCT 0.1777522 0.2216340
## Trust 0.2368743 0.2499606
## AEP 1.0000000 0.9359562
## AMP 0.9359562 1.0000000
CC1 <- cc(pred, crit)
CC1$cor
## [1] 0.4605990 0.1844779
CC1[3:4]
## $xcoef
## [,1] [,2]
## RI -2.6494684 -5.518006
## CT 0.4061550 2.362393
## SE -15.5582870 6.324098
## ESL -6.2887145 -14.943308
## FCT 0.3670306 7.143441
## Trust 11.4182399 10.200002
##
## $ycoef
## [,1] [,2]
## AEP 0.857947 -7.763662
## AMP -2.733250 5.382202
CC2 <- comput(pred, crit, CC1)
CC2[3:6]
## $corr.X.xscores
## [,1] [,2]
## RI -0.6942150 -0.2043748
## CT -0.6221488 -0.3722150
## SE -0.9342900 0.2564751
## ESL -0.5340775 -0.5842506
## FCT -0.4983808 0.3223009
## Trust -0.5374234 -0.1935407
##
## $corr.Y.xscores
## [,1] [,2]
## AEP -0.4106778 -0.08352988
## AMP -0.4578121 -0.02026293
##
## $corr.X.yscores
## [,1] [,2]
## RI -0.3197547 -0.03770262
## CT -0.2865611 -0.06866542
## SE -0.4303331 0.04731399
## ESL -0.2459956 -0.10778131
## FCT -0.2295537 0.05945739
## Trust -0.2475367 -0.03570397
##
## $corr.Y.yscores
## [,1] [,2]
## AEP -0.8916168 -0.4527908
## AMP -0.9939494 -0.1098394
rho <- CC1$cor
n <- dim(pred)[1]
p <- length(pred)
q <- length(crit)
p.asym(rho, n, p, q, tstat = "Wilks")
## Wilks' Lambda, using F-approximation (Rao's F):
## stat approx df1 df2 p.value
## 1 to 2: 0.7610364 29.478912 12 2418 0.00000000000000
## 2 to 2: 0.9659679 8.525919 5 1210 0.00000006035999
#standard coefficient for canonical regression
s1 <- diag(sqrt(diag(cov(pred))))
s1 %*% CC1$xcoef
## [,1] [,2]
## [1,] -0.1853930 -0.3861150
## [2,] 0.0303112 0.1763045
## [3,] -1.0133588 0.4119078
## [4,] -0.6179166 -1.4682998
## [5,] 0.0229423 0.4465212
## [6,] 0.6981402 0.6236540
s2 <- diag(sqrt(diag(cov(crit))))
s2 %*% CC1$ycoef
## [,1] [,2]
## [1,] 0.3119404 -2.822786
## [2,] -1.2859119 2.532164