setwd("C:/Work Files/Dissertations and Theses/Zenaida Dissertation")
# Set CRAN mirror directly (e.g., USA)
options(repos = c(CRAN = "https://cran.rstudio.com/"))
# Install and load the polycor package
if (!require(polycor)) {
install.packages("polycor")
}
## Loading required package: polycor
library(polycor)
Bring in the data
Zenaida.Data <-read.csv("CBT11.6.23CO.csv")
names(Zenaida.Data)
## [1] "CBvN" "PCL" "IH" "CB_Unaware" "INCOME"
## [6] "EDU" "DUM_REL" "AGET"
run correlations between PCL and covariates.
First run pearson corr,
covariate.corr <-subset(Zenaida.Data[,c(2,5:7)])
correlation_matrix <- cor(covariate.corr, method = "pearson")
print(correlation_matrix)
## PCL INCOME EDU DUM_REL
## PCL 1.00000000 -0.1135630 -0.19897485 0.08215935
## INCOME -0.11356301 1.0000000 0.39539027 0.12573443
## EDU -0.19897485 0.3953903 1.00000000 0.05793602
## DUM_REL 0.08215935 0.1257344 0.05793602 1.00000000
…then run polychoric correlations. Because there are so many categories R is reading this data as integer / continuous and defaulting to Pearson Corr. So the data needs to be restructured. With so many categories I am going to make a table in order to define the right number of levels.
table(covariate.corr$INCOME)
##
## 1 2 3 4 5 6 7 8 9 10 11 12
## 23 22 14 18 11 14 11 11 3 1 11 8
table(covariate.corr$EDU)
##
## 1 2 3 4 5 6 7
## 1 14 26 6 73 22 5
table(covariate.corr$DUM_REL)
##
## 0 1
## 122 25
Now I can create a set of orderd categories in order to calculate the polychoric correlations.
covariate.corr$INCOME<-ordered(covariate.corr$INCOME, levels =c(1:12))
covariate.corr$EDU<-ordered(covariate.corr$EDU,levels = c(1:7))
covariate.corr$DUM_REL <-ordered(covariate.corr$DUM_REL, levels = c(0,1),
labels = c('Not Religious', 'Religious'))
str(covariate.corr)
## 'data.frame': 147 obs. of 4 variables:
## $ PCL : num 1.5 1.8 0.5 3.9 1.4 3.3 1.3 1.9 4.1 0.2 ...
## $ INCOME : Ord.factor w/ 12 levels "1"<"2"<"3"<"4"<..: 7 1 12 1 4 7 11 6 1 1 ...
## $ EDU : Ord.factor w/ 7 levels "1"<"2"<"3"<"4"<..: 5 3 5 3 5 5 5 3 1 2 ...
## $ DUM_REL: Ord.factor w/ 2 levels "Not Religious"<..: 1 1 1 1 1 1 1 1 1 1 ...
Now I can run the polychoric correlations
polychoric_corr <- hetcor(covariate.corr,cor.method = 'polychoric')
## Warning in FUN(X[[i]], ...): polychoric correlation between variables INCOME and EDU produced warnings:
## NaNs produced
## NaNs produced
## NaNs produced
print("Polychoric Correlation Matrix:")
## [1] "Polychoric Correlation Matrix:"
print(polychoric_corr$correlations)
## PCL INCOME EDU DUM_REL
## PCL 1.0000000 -0.1045444 -0.1921542 0.1135223
## INCOME -0.1045444 1.0000000 0.4491525 0.2070088
## EDU -0.1921542 0.4491525 1.0000000 0.1023163
## DUM_REL 0.1135223 0.2070088 0.1023163 1.0000000
print("Polychoric Correlation Summary:")
## [1] "Polychoric Correlation Summary:"
print(polychoric_corr)
##
## Two-Step Estimates
##
## Correlations/Type of Correlation:
## PCL INCOME EDU DUM_REL
## PCL 1 Polyserial Polyserial Polyserial
## INCOME -0.1045 1 Polychoric Polychoric
## EDU -0.1922 0.4492 1 Polychoric
## DUM_REL 0.1135 0.207 0.1023 1
##
## Standard Errors:
## PCL INCOME EDU
## PCL
## INCOME 0.08351
## EDU 0.08302 0.06778
## DUM_REL 0.1166 0.1211 0.1262
##
## n = 147
##
## P-values for Tests of Bivariate Normality:
## PCL INCOME EDU
## PCL
## INCOME 7.674e-08
## EDU 0.08049 0.6032
## DUM_REL 0.1137 0.9175 0.8453
If you compare the pearson correlations and the polychoric correlations you can see that there is not any meaningful difference and none of these potential covariates are significant.