library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
library(corrplot)
## corrplot 0.92 loaded
library(dagitty)
d <- read.csv("HAVANA_ModelData.csv")
dfMatrix <- as.matrix(d)
rcorr(dfMatrix, type = "pearson")
## Age PCS_Total Interference.Total IEQ_Total MeanNPRS lnHFP
## Age 1.00 -0.05 0.02 0.11 -0.17 -0.31
## PCS_Total -0.05 1.00 0.56 0.57 0.37 0.03
## Interference.Total 0.02 0.56 1.00 0.57 0.19 0.01
## IEQ_Total 0.11 0.57 0.57 1.00 0.14 -0.12
## MeanNPRS -0.17 0.37 0.19 0.14 1.00 0.24
## lnHFP -0.31 0.03 0.01 -0.12 0.24 1.00
##
## n= 94
##
##
## P
## Age PCS_Total Interference.Total IEQ_Total MeanNPRS
## Age 0.6153 0.8441 0.2980 0.1042
## PCS_Total 0.6153 0.0000 0.0000 0.0003
## Interference.Total 0.8441 0.0000 0.0000 0.0678
## IEQ_Total 0.2980 0.0000 0.0000 0.1794
## MeanNPRS 0.1042 0.0003 0.0678 0.1794
## lnHFP 0.0024 0.7465 0.8970 0.2341 0.0186
## lnHFP
## Age 0.0024
## PCS_Total 0.7465
## Interference.Total 0.8970
## IEQ_Total 0.2341
## MeanNPRS 0.0186
## lnHFP
f<-cor(d)
corrplot(f, method = 'ellipse', type = 'upper')

g <- downloadGraph("dagitty.net/mMXvNmD")
plot(g)

# evaluate d-separation implications of the dag
# - note - got an error when tried to run with the latent variable so I deleted it from the model for now
# - second note - still got the error, turns out that when you export a csv from excel it has the bad habit of adding rows with 'NA' data -
r <- localTests(g,d)
# Bonferroni correction
r$p.value <- p.adjust(r$p.value)
# focus on tests with p-values below a threshold
r_threshold <- r[r$p.value < 0.05,]
# plot all results - regardless of threshold
plotLocalTestResults(r)

# plot results - meeting threshold -
# note - not many of these tests are significant - I'm still working on what this all means
plotLocalTestResults(r_threshold)

Next steps
- Figure out what the above means
- Consider regression (based on above DAG analysis) - vs. SEM (with lavaan package, my next attempt…)
Regression - just for kicks
regression_model <- lm(lnHFP ~ MeanNPRS + PCS_Total + IEQ_Total + Interference.Total + Age, d)
summary(regression_model)
##
## Call:
## lm(formula = lnHFP ~ MeanNPRS + PCS_Total + IEQ_Total + Interference.Total +
## Age, data = d)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.0310 -1.0158 0.0572 1.0806 3.6953
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.544002 0.832869 7.857 9.06e-12 ***
## MeanNPRS 0.237392 0.121695 1.951 0.0543 .
## PCS_Total -0.001108 0.032183 -0.034 0.9726
## IEQ_Total -0.033796 0.026580 -1.272 0.2069
## Interference.Total 0.023319 0.038730 0.602 0.5487
## Age -0.053693 0.020972 -2.560 0.0122 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.55 on 88 degrees of freedom
## Multiple R-squared: 0.1521, Adjusted R-squared: 0.1039
## F-statistic: 3.157 on 5 and 88 DF, p-value: 0.01143