library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
library(corrplot)
## corrplot 0.92 loaded
library(dagitty)
d <- read.csv("HAVANA_ModelData.csv")
dfMatrix <- as.matrix(d)
rcorr(dfMatrix, type = "pearson")
##                      Age PCS_Total Interference.Total IEQ_Total MeanNPRS lnHFP
## Age                 1.00     -0.05               0.02      0.11    -0.17 -0.31
## PCS_Total          -0.05      1.00               0.56      0.57     0.37  0.03
## Interference.Total  0.02      0.56               1.00      0.57     0.19  0.01
## IEQ_Total           0.11      0.57               0.57      1.00     0.14 -0.12
## MeanNPRS           -0.17      0.37               0.19      0.14     1.00  0.24
## lnHFP              -0.31      0.03               0.01     -0.12     0.24  1.00
## 
## n= 94 
## 
## 
## P
##                    Age    PCS_Total Interference.Total IEQ_Total MeanNPRS
## Age                       0.6153    0.8441             0.2980    0.1042  
## PCS_Total          0.6153           0.0000             0.0000    0.0003  
## Interference.Total 0.8441 0.0000                       0.0000    0.0678  
## IEQ_Total          0.2980 0.0000    0.0000                       0.1794  
## MeanNPRS           0.1042 0.0003    0.0678             0.1794            
## lnHFP              0.0024 0.7465    0.8970             0.2341    0.0186  
##                    lnHFP 
## Age                0.0024
## PCS_Total          0.7465
## Interference.Total 0.8970
## IEQ_Total          0.2341
## MeanNPRS           0.0186
## lnHFP
f<-cor(d)
corrplot(f, method = 'ellipse', type = 'upper')

g <- downloadGraph("dagitty.net/mMXvNmD")
plot(g)

# evaluate d-separation implications of the dag
# - note - got an error when tried to run with the latent variable so I deleted it from the model for now
# - second note - still got the error, turns out that when you export a csv from excel it has the bad habit of adding rows with 'NA' data - 

r <- localTests(g,d)

# Bonferroni correction
r$p.value <- p.adjust(r$p.value)

# focus on tests with p-values below a threshold

r_threshold <- r[r$p.value < 0.05,]
# plot all results - regardless of threshold
plotLocalTestResults(r)

# plot results - meeting threshold - 

# note - not many of these tests are significant - I'm still working on what this all means
plotLocalTestResults(r_threshold)

Next steps

  1. Figure out what the above means
  2. Consider regression (based on above DAG analysis) - vs. SEM (with lavaan package, my next attempt…)

Regression - just for kicks

regression_model <- lm(lnHFP ~ MeanNPRS + PCS_Total + IEQ_Total + Interference.Total + Age, d)
summary(regression_model)
## 
## Call:
## lm(formula = lnHFP ~ MeanNPRS + PCS_Total + IEQ_Total + Interference.Total + 
##     Age, data = d)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.0310 -1.0158  0.0572  1.0806  3.6953 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         6.544002   0.832869   7.857 9.06e-12 ***
## MeanNPRS            0.237392   0.121695   1.951   0.0543 .  
## PCS_Total          -0.001108   0.032183  -0.034   0.9726    
## IEQ_Total          -0.033796   0.026580  -1.272   0.2069    
## Interference.Total  0.023319   0.038730   0.602   0.5487    
## Age                -0.053693   0.020972  -2.560   0.0122 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.55 on 88 degrees of freedom
## Multiple R-squared:  0.1521, Adjusted R-squared:  0.1039 
## F-statistic: 3.157 on 5 and 88 DF,  p-value: 0.01143