#####################Read and Pre-Clean the Data#######################
require(Amelia)
## Loading required package: Amelia
## Loading required package: Rcpp
## ##
## ## Amelia II: Multiple Imputation
## ## (Version 1.7.6, built: 2019-11-24)
## ## Copyright (C) 2005-2021 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##
library(car)
## Loading required package: carData
library(corrplot)
## corrplot 0.84 loaded
library(ggcorrplot)
## Loading required package: ggplot2
library(heplots)
library(kableExtra)
library(MANOVA.RM)
library(MASS)
library(MVN)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
## sROC 0.1-2 loaded
library(mvtnorm)
require(psych) #to describe
## Loading required package: psych
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
## The following object is masked from 'package:car':
##
## logit
require(ggplot2)
library(ggcorrplot)
library(qcc)
## Package 'qcc' version 2.7
## Type 'citation("qcc")' for citing this R package in publications.
require(reticulate) #to use Python in R as well
## Loading required package: reticulate
require(ResourceSelection)
## Loading required package: ResourceSelection
## ResourceSelection 0.3-5 2019-07-22
library(rstatix)
##
## Attaching package: 'rstatix'
## The following object is masked from 'package:MASS':
##
## select
## The following object is masked from 'package:ggcorrplot':
##
## cor_pmat
## The following object is masked from 'package:stats':
##
## filter
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v tibble 3.0.4 v dplyr 1.0.2
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.0
## v purrr 0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x psych::%+%() masks ggplot2::%+%()
## x psych::alpha() masks ggplot2::alpha()
## x dplyr::filter() masks rstatix::filter(), stats::filter()
## x dplyr::group_rows() masks kableExtra::group_rows()
## x dplyr::lag() masks stats::lag()
## x dplyr::recode() masks car::recode()
## x dplyr::select() masks rstatix::select(), MASS::select()
## x purrr::some() masks car::some()
corfunction=function(d){
mycorr=cor(d[, 1:ncol(d)]); p.mat=ggcorrplot::cor_pmat(d[,1:ncol(d)])
myplot=ggcorrplot(mycorr, hc.order=TRUE,type="lower",colors=c("red", "white","green"),tl.cex = 8, tl.col = "black", lab=TRUE, lab_size=2, p.mat=p.mat, insig="pch", pch=4)
print(myplot)}
mydata=read.csv("D:/PaulaEcho/global.csv", stringsAsFactors = TRUE)
colnames(mydata)
## [1] "PrePost" "Duration..in.seconds." "Date"
## [4] "Profession" "Specialization" "Practice"
## [7] "Years" "Knowledge" "Recent"
## [10] "Texas" "Cause" "Saliva"
## [13] "Location" "Prevalence" "Onset"
## [16] "Symptoms" "Diagnosis" "AntiT_Drugs"
## [19] "Prevention" "Score" "Age"
## [22] "Ethnicity" "Gender" "Screened"
## [25] "Tested" "MatchID"
#########################################################################
We have true missing for Quantitative Methods, as that section was recently added.
#########################################################################
missmap(mydata)
#########################################################################
#########################################################################
as.data.frame(100*round(table(mydata$Gender)/length(mydata$Gender),4))%>%
kbl(col.names = c("Gender", "%"))%>%kable_classic(full_width=F)
| Gender | % |
|---|---|
| Female | 77.17 |
| Male | 22.83 |
#########################################################################
#########################################################################
as.data.frame(100*round(table(mydata$Ethnicity)/length(mydata$Ethnicity),4))%>%
kbl(col.names = c("Ethnicity", "%"))%>%kable_classic(full_width=F)
| Ethnicity | % |
|---|---|
| Hispanic | 58.27 |
| Non-Hispanic | 38.19 |
| Non -Hispanic | 0.39 |
| Prefer not to respond | 3.15 |
#########################################################################
#########################################################################
as.data.frame(100*round(table(mydata$Age)/length(mydata$Age),4))%>%
kbl(col.names = c("Age", "%"))%>%kable_classic(full_width=F)
| Age | % |
|---|---|
| 18 to 24 | 5.51 |
| 25 to 34 | 18.90 |
| 35 to 44 | 26.38 |
| 45 to 64 | 41.73 |
| 65 and over | 6.30 |
| Prefer not to respond | 1.18 |
#########################################################################
#########################################################################
as.data.frame(100*round(table(mydata$Profession)/length(mydata$Profession),4))%>%
kbl(col.names = c("Profession", "%"))%>%kable_classic(full_width=F)
| Profession | % |
|---|---|
| CHW | 37.01 |
| DVM | 3.54 |
| MD/DO | 28.74 |
| NP/PA | 2.36 |
| Other | 28.35 |
#########################################################################
#########################################################################
as.data.frame(100*round(table(mydata$Specialization)/length(mydata$Specialization),4))%>%
kbl(col.names = c("Specialization", "%"))%>%kable_classic(full_width=F)
| Specialization | % |
|---|---|
| 42.91 | |
| Air Force idmt | 0.39 |
| Air Force IDMT | 0.39 |
| Biomedicine | 0.39 |
| Community health | 28.35 |
| Community health,Other | 0.39 |
| DC | 0.39 |
| Does not apply | 2.76 |
| economist | 0.39 |
| Economist | 0.39 |
| Epidemiologist | 1.18 |
| FSC | 0.39 |
| Infectious disease,Cardiology,General practitioner/Family physician,Community health | 0.79 |
| LVN | 0.39 |
| Microbiologist | 0.39 |
| MLS | 0.39 |
| Other | 1.18 |
| Other / Does not apply | 4.33 |
| Other,Does not apply | 0.39 |
| Ph.D. / Post-Grad / Faculty | 4.33 |
| PharmD / MPharm / Pharm Student | 4.72 |
| PT, MHA, PhD | 0.39 |
| Public Health | 1.57 |
| R&D Industry | 0.39 |
| RCS | 0.39 |
| Student | 1.57 |
| Zoonosis Control Program Specialist | 0.39 |
#########################################################################
#########################################################################
myt=table(mydata$PrePost,mydata$Knowledge)
myt=myt/rowSums(myt)
colnames(myt)=c("None", "Very Limited", "Limited", "Good", "Excellent")
row.names(myt)=c("Pre","Post")
myt=round(100*myt, 4)
myt%>%
kbl()%>%kable_classic(full_width=F)
| None | Very Limited | Limited | Good | Excellent | |
|---|---|---|---|---|---|
| Pre | 12.5683 | 19.1257 | 32.2404 | 30.0546 | 6.0109 |
| Post | 0.0000 | 5.6338 | 21.1268 | 57.7465 | 15.4930 |
fisher.test(myt, simulate.p.value = TRUE)
## Warning in fisher.test(myt, simulate.p.value = TRUE): 'x' has been rounded to
## integer: Mean relative difference: 0.010514
##
## Fisher's Exact Test for Count Data with simulated p-value (based on
## 2000 replicates)
##
## data: myt
## p-value = 0.0004998
## alternative hypothesis: two.sided
#########################################################################
#########################################################################
myt=table(mydata$PrePost,mydata$Recent)
myt=myt/rowSums(myt)
colnames(myt)=c("Not at all Confident", "Somewhat Confident", "Confident", "Very Confident")
row.names(myt)=c("Pre","Post")
myt=round(100*myt, 4)
myt%>%
kbl()%>%kable_classic(full_width=F)
| Not at all Confident | Somewhat Confident | Confident | Very Confident | |
|---|---|---|---|---|
| Pre | 40.9836 | 34.4262 | 17.4863 | 7.1038 |
| Post | 1.4085 | 16.9014 | 54.9296 | 26.7606 |
fisher.test(myt, simulate.p.value = TRUE)
## Warning in fisher.test(myt, simulate.p.value = TRUE): 'x' has been rounded to
## integer: Mean relative difference: 0.009248
##
## Fisher's Exact Test for Count Data with simulated p-value (based on
## 2000 replicates)
##
## data: myt
## p-value = 0.0004998
## alternative hypothesis: two.sided
#########################################################################
mydata$PrePost2=mydata$PrePost
mydata$PrePost=as.factor(mydata$PrePost)
levels(mydata$PrePost)=c("Pre", "Post")
boxplot(mydata$Score~mydata$PrePost, main="Knowledge Score, Pre vs. Post",notch=TRUE, col=c("red", "dark green"), horizontal = TRUE)
Test questions are uncorrelated.
#########################################################################
mycorr=cor(mydata[, c(10:19)])
corfunction(mycorr)
#########################################################################
We would expect poor performance on the pre-test scores. Students are likely to have only 3308 as the basis for knowledge.
#########################################################################
pre=round(describe(mydata[mydata$PrePost=="Pre",10:19]), 3)
pre%>%kbl(caption="Pre-Test")%>%kable_classic(full_width = F, html_font = "Cambria")
| vars | n | mean | sd | median | trimmed | mad | min | max | range | skew | kurtosis | se | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Texas | 1 | 183 | 0.973 | 0.163 | 1 | 1.000 | 0 | 0 | 1 | 1 | -5.752 | 31.251 | 0.012 |
| Cause | 2 | 183 | 0.760 | 0.429 | 1 | 0.823 | 0 | 0 | 1 | 1 | -1.205 | -0.551 | 0.032 |
| Saliva | 3 | 183 | 0.361 | 0.482 | 0 | 0.327 | 0 | 0 | 1 | 1 | 0.576 | -1.678 | 0.036 |
| Location | 4 | 183 | 0.891 | 0.313 | 1 | 0.986 | 0 | 0 | 1 | 1 | -2.484 | 4.193 | 0.023 |
| Prevalence | 5 | 183 | 0.426 | 0.496 | 0 | 0.408 | 0 | 0 | 1 | 1 | 0.296 | -1.923 | 0.037 |
| Onset | 6 | 183 | 0.634 | 0.483 | 1 | 0.667 | 0 | 0 | 1 | 1 | -0.551 | -1.705 | 0.036 |
| Symptoms | 7 | 183 | 0.235 | 0.425 | 0 | 0.170 | 0 | 0 | 1 | 1 | 1.240 | -0.465 | 0.031 |
| Diagnosis | 8 | 183 | 0.585 | 0.494 | 1 | 0.605 | 0 | 0 | 1 | 1 | -0.341 | -1.894 | 0.037 |
| AntiT_Drugs | 9 | 183 | 0.694 | 0.462 | 1 | 0.741 | 0 | 0 | 1 | 1 | -0.835 | -1.310 | 0.034 |
| Prevention | 10 | 183 | 0.279 | 0.450 | 0 | 0.224 | 0 | 0 | 1 | 1 | 0.979 | -1.047 | 0.033 |
#########################################################################
We would hope that our work teaching the students resulted in better scores. These are the raw descriptives. We will look at pre-post later.
#########################################################################
post=round(describe(mydata[mydata$PrePost=="Post",10:19]), 3)
pre%>%kbl(caption="Post-Test")%>%kable_classic(full_width = F, html_font = "Cambria")
| vars | n | mean | sd | median | trimmed | mad | min | max | range | skew | kurtosis | se | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Texas | 1 | 183 | 0.973 | 0.163 | 1 | 1.000 | 0 | 0 | 1 | 1 | -5.752 | 31.251 | 0.012 |
| Cause | 2 | 183 | 0.760 | 0.429 | 1 | 0.823 | 0 | 0 | 1 | 1 | -1.205 | -0.551 | 0.032 |
| Saliva | 3 | 183 | 0.361 | 0.482 | 0 | 0.327 | 0 | 0 | 1 | 1 | 0.576 | -1.678 | 0.036 |
| Location | 4 | 183 | 0.891 | 0.313 | 1 | 0.986 | 0 | 0 | 1 | 1 | -2.484 | 4.193 | 0.023 |
| Prevalence | 5 | 183 | 0.426 | 0.496 | 0 | 0.408 | 0 | 0 | 1 | 1 | 0.296 | -1.923 | 0.037 |
| Onset | 6 | 183 | 0.634 | 0.483 | 1 | 0.667 | 0 | 0 | 1 | 1 | -0.551 | -1.705 | 0.036 |
| Symptoms | 7 | 183 | 0.235 | 0.425 | 0 | 0.170 | 0 | 0 | 1 | 1 | 1.240 | -0.465 | 0.031 |
| Diagnosis | 8 | 183 | 0.585 | 0.494 | 1 | 0.605 | 0 | 0 | 1 | 1 | -0.341 | -1.894 | 0.037 |
| AntiT_Drugs | 9 | 183 | 0.694 | 0.462 | 1 | 0.741 | 0 | 0 | 1 | 1 | -0.835 | -1.310 | 0.034 |
| Prevention | 10 | 183 | 0.279 | 0.450 | 0 | 0.224 | 0 | 0 | 1 | 1 | 0.979 | -1.047 | 0.033 |
#########################################################################
We compare pre-means versus post-means
mydf=data.frame("Pre-test"=pre$mean, "Post-test"=post$mean)
rownames(mydf)=colnames(mydata[10:19])
mydf%>%kbl(caption="Comparison of Means")%>%kable_classic(full_width = F, html_font = "Cambria")
| Pre.test | Post.test | |
|---|---|---|
| Texas | 0.973 | 1.000 |
| Cause | 0.760 | 0.986 |
| Saliva | 0.361 | 0.634 |
| Location | 0.891 | 0.986 |
| Prevalence | 0.426 | 0.803 |
| Onset | 0.634 | 0.761 |
| Symptoms | 0.235 | 0.296 |
| Diagnosis | 0.585 | 0.507 |
| AntiT_Drugs | 0.694 | 0.887 |
| Prevention | 0.279 | 0.169 |
#########################################################################
myf=function(x,y){
myt=t.test(x, y)
newp=round(p.adjust(myt$p.value),3)
a=c(round(myt$estimate[1],3),round(myt$estimate[2],3),
round(myt$conf.int[1],3),
round(myt$conf.int[2],3),round(myt$statistic,3),
round(myt$parameter,3),round(newp,3))
return(a)
}
p1=mydata[mydata$PrePost=="Pre",]
p2=mydata[mydata$PrePost=="Post",]
a=matrix(rep(NA, 7*11), 11)
for (i in 1:11){a[i,1:7]=myf(p1[,i+9], p2[,i+9])}
a=as.data.frame(a)
for (i in 1:6){a[,i]=as.numeric(a[,i])}
rownames(a)=colnames(mydata[10:20])
colnames(a)=c("Mean Pre","Mean Post","Lower 95% CI", "Upper 95% CI", "t-Value", "df", "Holm-Adjusted p")
a%>%kbl()%>%kable_classic()
| Mean Pre | Mean Post | Lower 95% CI | Upper 95% CI | t-Value | df | Holm-Adjusted p | |
|---|---|---|---|---|---|---|---|
| Texas | 0.973 | 1.000 | -0.051 | -0.003 | -2.261 | 182.000 | 0.025 |
| Cause | 0.760 | 0.986 | -0.295 | -0.158 | -6.529 | 236.991 | 0.000 |
| Saliva | 0.361 | 0.634 | -0.407 | -0.139 | -4.035 | 126.606 | 0.000 |
| Location | 0.891 | 0.986 | -0.149 | -0.042 | -3.516 | 251.934 | 0.001 |
| Prevalence | 0.426 | 0.803 | -0.495 | -0.258 | -6.272 | 156.631 | 0.000 |
| Onset | 0.634 | 0.761 | -0.250 | -0.004 | -2.035 | 142.291 | 0.044 |
| Symptoms | 0.235 | 0.296 | -0.185 | 0.064 | -0.966 | 119.134 | 0.336 |
| Diagnosis | 0.585 | 0.507 | -0.061 | 0.216 | 1.109 | 125.353 | 0.270 |
| AntiT_Drugs | 0.694 | 0.887 | -0.294 | -0.093 | -3.795 | 183.890 | 0.000 |
| Prevention | 0.279 | 0.169 | -0.001 | 0.220 | 1.966 | 150.715 | 0.051 |
| Score | 0.584 | 0.703 | -0.162 | -0.076 | -5.407 | 184.400 | 0.000 |
p1=mydata[mydata$Profession=="CHW",]
p2=mydata[mydata$Profession!="CHW",]
a=matrix(rep(NA, 7*11), 11)
for (i in 1:11){a[i,1:7]=myf(p1[,i+9], p2[,i+9])}
a=as.data.frame(a)
for (i in 1:6){a[,i]=as.numeric(a[,i])}
rownames(a)=colnames(mydata[10:20])
colnames(a)=c("Mean CHW","Mean non-CHW","Lower 95% CI", "Upper 95% CI", "t-Value", "df", "Holm-Adjusted p")
a%>%kbl()%>%kable_classic()
| Mean CHW | Mean non-CHW | Lower 95% CI | Upper 95% CI | t-Value | df | Holm-Adjusted p | |
|---|---|---|---|---|---|---|---|
| Texas | 0.957 | 0.994 | -0.080 | 0.007 | -1.662 | 109.813 | 0.099 |
| Cause | 0.723 | 0.881 | -0.263 | -0.053 | -2.978 | 150.371 | 0.003 |
| Saliva | 0.468 | 0.419 | -0.079 | 0.177 | 0.761 | 192.875 | 0.448 |
| Location | 0.915 | 0.919 | -0.075 | 0.067 | -0.107 | 191.351 | 0.915 |
| Prevalence | 0.500 | 0.550 | -0.178 | 0.078 | -0.767 | 193.866 | 0.444 |
| Onset | 0.404 | 0.825 | -0.538 | -0.304 | -7.114 | 158.271 | 0.000 |
| Symptoms | 0.117 | 0.331 | -0.313 | -0.116 | -4.281 | 246.116 | 0.000 |
| Diagnosis | 0.447 | 0.631 | -0.311 | -0.058 | -2.873 | 189.961 | 0.005 |
| AntiT_Drugs | 0.755 | 0.744 | -0.100 | 0.123 | 0.205 | 197.086 | 0.838 |
| Prevention | 0.160 | 0.300 | -0.244 | -0.037 | -2.672 | 229.007 | 0.008 |
| Score | 0.545 | 0.659 | -0.160 | -0.068 | -4.908 | 212.273 | 0.000 |
#########################################################################
prepost=read.csv("D:/PaulaEcho/prepost.csv")
myf2=function(x){
myt=t.test(x)
newp=round(p.adjust(myt$p.value),3)
a=c(round(myt$estimate[1],3),
round(myt$conf.int[1],3),
round(myt$conf.int[2],3),round(myt$statistic,3),
round(myt$parameter,3),round(newp,3))
return(a)
}
a=matrix(rep(NA, 6*13), 13)
for (i in 1:13){a[i,1:6]=myf2(prepost[,i])}
a=as.data.frame(a)
for (i in 1:6){a[,i]=as.numeric(a[,i])}
rownames(a)=colnames(prepost)
colnames(a)=c("Mean","Lower 95% CI", "Upper 95% CI", "t-Value", "df", "Holm-Adjusted p")
a%>%kbl(caption="CHW Pre-Post")%>%kable_classic()
| Mean | Lower 95% CI | Upper 95% CI | t-Value | df | Holm-Adjusted p | |
|---|---|---|---|---|---|---|
| Knowledge | 1.882 | 1.341 | 2.424 | 7.366 | 16 | 0.000 |
| Recent | 1.706 | 1.309 | 2.103 | 9.114 | 16 | 0.000 |
| Texas | 0.059 | -0.066 | 0.184 | 1.000 | 16 | 0.332 |
| Cause | 0.412 | 0.151 | 0.673 | 3.347 | 16 | 0.004 |
| Saliva | 0.471 | 0.102 | 0.839 | 2.704 | 16 | 0.016 |
| Location | 0.176 | -0.026 | 0.379 | 1.852 | 16 | 0.083 |
| Prevalence | 0.647 | 0.394 | 0.900 | 5.416 | 16 | 0.000 |
| Onset | 0.471 | 0.206 | 0.735 | 3.771 | 16 | 0.002 |
| Symptoms | 0.412 | 0.151 | 0.673 | 3.347 | 16 | 0.004 |
| Diagnosis | -0.118 | -0.519 | 0.284 | -0.621 | 16 | 0.543 |
| AntiT_Drugs | 0.235 | -0.054 | 0.524 | 1.725 | 16 | 0.104 |
| Prevent | -0.294 | -0.536 | -0.053 | -2.582 | 16 | 0.020 |
| Score | 0.247 | 0.146 | 0.348 | 5.165 | 16 | 0.000 |
fu=read.csv("D:/PaulaEcho/followup1.csv")
describe(fu)%>%kbl(caption="CHW Follow-Up")%>%kable_classic(html_font = "Cambria")
## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf
| vars | n | mean | sd | median | trimmed | mad | min | max | range | skew | kurtosis | se | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Well_Organized | 1 | 20 | 4.55 | 0.5104178 | 5.0 | 4.5625 | 0.0000 | 4 | 5 | 1 | -0.1861220 | -2.061035 | 0.1141329 |
| LOs_Met | 2 | 20 | 4.60 | 0.5026247 | 5.0 | 4.6250 | 0.0000 | 4 | 5 | 1 | -0.3780157 | -1.947083 | 0.1123903 |
| Accurate_Relevant | 3 | 20 | 4.65 | 0.4893605 | 5.0 | 4.6875 | 0.0000 | 4 | 5 | 1 | -0.5823928 | -1.740467 | 0.1094243 |
| As_Expected | 4 | 20 | 4.50 | 0.5129892 | 4.5 | 4.5000 | 0.7413 | 4 | 5 | 1 | 0.0000000 | -2.097500 | 0.1147079 |
| Will_Apply | 5 | 20 | 4.60 | 0.5026247 | 5.0 | 4.6250 | 0.0000 | 4 | 5 | 1 | -0.3780157 | -1.947083 | 0.1123903 |
| Overall | 6 | 20 | 4.85 | 0.3663475 | 5.0 | 4.9375 | 0.0000 | 4 | 5 | 1 | -1.8152162 | 1.370931 | 0.0819178 |
| Lecturers_Effective | 7 | 20 | 4.55 | 0.5104178 | 5.0 | 4.5625 | 0.0000 | 4 | 5 | 1 | -0.1861220 | -2.061035 | 0.1141329 |
| Encouraged_Feedback | 8 | 20 | 4.55 | 0.5104178 | 5.0 | 4.5625 | 0.0000 | 4 | 5 | 1 | -0.1861220 | -2.061035 | 0.1141329 |
| Overall_Lecturers | 9 | 20 | 4.80 | 0.4103913 | 5.0 | 4.8750 | 0.0000 | 4 | 5 | 1 | -1.3889182 | -0.066875 | 0.0917663 |
| Recommend | 10 | 20 | 1.00 | 0.0000000 | 1.0 | 1.0000 | 0.0000 | 1 | 1 | 0 | NaN | NaN | 0.0000000 |
| Missing_Content | 11 | 20 | 0.20 | 0.4103913 | 0.0 | 0.1250 | 0.0000 | 0 | 1 | 1 | 1.3889182 | -0.066875 | 0.0917663 |
| X | 12 | 0 | NaN | NA | NA | NaN | NA | Inf | -Inf | -Inf | NA | NA | NA |
| X.1 | 13 | 0 | NaN | NA | NA | NaN | NA | Inf | -Inf | -Inf | NA | NA | NA |
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
mydata$CHW=mydata$Profession
mydata$CHW=rep(0, nrow(mydata))
mydata$CHW[mydata$Profession=="CHW"]=1
mydata$CHW=as.numeric(mydata$CHW)
mydata$Female=as.numeric(mydata$Gender)
mydata$Female[mydata$Female==2]=0
mylm=lm(Score~Age+Female+Ethnicity+Knowledge+Recent+CHW+PrePost2, data=mydata)
summary(mylm)$coefficients%>%kbl()%>%kable_classic(html_font = 'Cambria')
| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| (Intercept) | 0.2335087 | 0.0527751 | 4.4245992 | 0.0000147 |
| Age25 to 34 | 0.2333747 | 0.0461218 | 5.0599674 | 0.0000008 |
| Age35 to 44 | 0.1564692 | 0.0458485 | 3.4127409 | 0.0007547 |
| Age45 to 64 | 0.1625478 | 0.0442242 | 3.6755411 | 0.0002928 |
| Age65 and over | 0.1740629 | 0.0568740 | 3.0605018 | 0.0024609 |
| AgePrefer not to respond | 0.1671223 | 0.0981488 | 1.7027435 | 0.0899108 |
| Female | -0.0203290 | 0.0239566 | -0.8485756 | 0.3969630 |
| EthnicityNon-Hispanic | 0.1081104 | 0.0266992 | 4.0492042 | 0.0000694 |
| EthnicityNon -Hispanic | -0.0193866 | 0.1501385 | -0.1291251 | 0.8973668 |
| EthnicityPrefer not to respond | -0.0100277 | 0.0588353 | -0.1704374 | 0.8648098 |
| Knowledge | 0.0475018 | 0.0132948 | 3.5729522 | 0.0004265 |
| Recent | 0.0151635 | 0.0151710 | 0.9995027 | 0.3185579 |
| CHW | -0.0541472 | 0.0284608 | -1.9025173 | 0.0583005 |
| PrePost2 | 0.0918741 | 0.0258072 | 3.5600231 | 0.0004470 |
summary(mylm)
##
## Call:
## lm(formula = Score ~ Age + Female + Ethnicity + Knowledge + Recent +
## CHW + PrePost2, data = mydata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.43167 -0.10528 -0.00337 0.10343 0.42416
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.23351 0.05278 4.425 1.47e-05 ***
## Age25 to 34 0.23337 0.04612 5.060 8.34e-07 ***
## Age35 to 44 0.15647 0.04585 3.413 0.000755 ***
## Age45 to 64 0.16255 0.04422 3.676 0.000293 ***
## Age65 and over 0.17406 0.05687 3.061 0.002461 **
## AgePrefer not to respond 0.16712 0.09815 1.703 0.089911 .
## Female -0.02033 0.02396 -0.849 0.396963
## EthnicityNon-Hispanic 0.10811 0.02670 4.049 6.94e-05 ***
## EthnicityNon -Hispanic -0.01939 0.15014 -0.129 0.897367
## EthnicityPrefer not to respond -0.01003 0.05884 -0.170 0.864810
## Knowledge 0.04750 0.01329 3.573 0.000427 ***
## Recent 0.01516 0.01517 1.000 0.318558
## CHW -0.05415 0.02846 -1.903 0.058301 .
## PrePost2 0.09187 0.02581 3.560 0.000447 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1473 on 240 degrees of freedom
## Multiple R-squared: 0.4404, Adjusted R-squared: 0.4101
## F-statistic: 14.53 on 13 and 240 DF, p-value: < 2.2e-16
hist(mylm$residuals, col="blue", main="Residuals")
Ethnicity, previous knowledge, and pre-post status are the primary predictors for the subscores.
attach(mydata)
myt=powerTransform(cbind(Texas, Cause, Saliva, Location, Prevalence,
Onset, Symptoms, Diagnosis, AntiT_Drugs, Prevention)+.1)
mydata$nTexas=(mydata$Texas+.1)^myt$lambda[1]
mydata$nCause=(mydata$Cause+.1)^myt$lambda[2]
mydata$nSaliva=(mydata$Saliva+.1)^myt$lambda[3]
mydata$nLocation=(mydata$Location+.1)^myt$lambda[4]
mydata$nPrevalence=(mydata$Prevalence+.1)^myt$lambda[5]
mydata$nOnset=(mydata$Onset+.1)^myt$lambda[6]
mydata$nSymptoms=(mydata$Symptoms+.1)^myt$lambda[7]
mydata$nDiagnosis=(mydata$Diagnosis+.1)^myt$lambda[8]
mydata$nAntiT_Drugs=(mydata$AntiT_Drugs+.1)^myt$lambda[9]
mydata$nPrevention=(mydata$Prevention+.1)^myt$lambda[10]
res.man <- manova(cbind(nTexas, nCause, nSaliva, nLocation, nPrevalence,
nOnset, nSymptoms, nDiagnosis, nAntiT_Drugs, nPrevention) ~
Gender+Ethnicity+PrePost, data = mydata)
summary(res.man, intercept=TRUE)
## Df Pillai approx F num Df den Df Pr(>F)
## (Intercept) 1 0.99248 3155.29 10 239 < 2.2e-16 ***
## Gender 1 0.09483 2.50 10 239 0.007072 **
## Ethnicity 3 0.27874 2.47 30 723 2.729e-05 ***
## PrePost 1 0.23546 7.36 10 239 3.692e-10 ***
## Residuals 248
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary.aov(res.man, intercept = TRUE)
## Response nTexas :
## Df Sum Sq Mean Sq F value Pr(>F)
## (Intercept) 1 13846.6 13846.6 12598.1919 < 2e-16 ***
## Gender 1 1.7 1.7 1.5032 0.22134
## Ethnicity 3 0.7 0.2 0.2220 0.88108
## PrePost 1 3.1 3.1 2.8066 0.09514 .
## Residuals 248 272.6 1.1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response nCause :
## Df Sum Sq Mean Sq F value Pr(>F)
## (Intercept) 1 267.146 267.146 1326.832 < 2.2e-16 ***
## Gender 1 0.003 0.003 0.013 0.90944
## Ethnicity 3 1.972 0.657 3.264 0.02206 *
## PrePost 1 5.054 5.054 25.102 1.034e-06 ***
## Residuals 248 49.933 0.201
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response nSaliva :
## Df Sum Sq Mean Sq F value Pr(>F)
## (Intercept) 1 646.67 646.67 2311.5183 < 2.2e-16 ***
## Gender 1 0.00 0.00 0.0118 0.91347
## Ethnicity 3 2.14 0.71 2.5480 0.05645 .
## PrePost 1 5.61 5.61 20.0618 1.145e-05 ***
## Residuals 248 69.38 0.28
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response nLocation :
## Df Sum Sq Mean Sq F value Pr(>F)
## (Intercept) 1 559.03 559.03 2846.8935 < 2.2e-16 ***
## Gender 1 0.04 0.04 0.1882 0.664777
## Ethnicity 3 0.21 0.07 0.3559 0.784879
## PrePost 1 1.44 1.44 7.3278 0.007262 **
## Residuals 248 48.70 0.20
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response nPrevalence :
## Df Sum Sq Mean Sq F value Pr(>F)
## (Intercept) 1 190.127 190.127 8971.8949 < 2.2e-16 ***
## Gender 1 0.040 0.040 1.8822 0.171323
## Ethnicity 3 0.305 0.102 4.7968 0.002883 **
## PrePost 1 0.883 0.883 41.6567 5.676e-10 ***
## Residuals 248 5.255 0.021
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response nOnset :
## Df Sum Sq Mean Sq F value Pr(>F)
## (Intercept) 1 150.814 150.814 866.8873 < 2.2e-16 ***
## Gender 1 3.110 3.110 17.8784 3.314e-05 ***
## Ethnicity 3 4.540 1.513 8.6988 1.650e-05 ***
## PrePost 1 1.950 1.950 11.2059 0.0009421 ***
## Residuals 248 43.145 0.174
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response nSymptoms :
## Df Sum Sq Mean Sq F value Pr(>F)
## (Intercept) 1 133192 133192 939.8276 < 2.2e-16 ***
## Gender 1 562 562 3.9669 0.04750 *
## Ethnicity 3 5185 1728 12.1944 1.805e-07 ***
## PrePost 1 634 634 4.4710 0.03547 *
## Residuals 248 35147 142
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response nDiagnosis :
## Df Sum Sq Mean Sq F value Pr(>F)
## (Intercept) 1 158.650 158.650 2169.7643 < 2e-16 ***
## Gender 1 0.193 0.193 2.6451 0.10514
## Ethnicity 3 0.565 0.188 2.5756 0.05446 .
## PrePost 1 0.032 0.032 0.4375 0.50895
## Residuals 248 18.133 0.073
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response nAntiT_Drugs :
## Df Sum Sq Mean Sq F value Pr(>F)
## (Intercept) 1 192.167 192.167 853.5847 < 2.2e-16 ***
## Gender 1 0.159 0.159 0.7070 0.4012485
## Ethnicity 3 1.119 0.373 1.6573 0.1768025
## PrePost 1 2.804 2.804 12.4533 0.0004972 ***
## Residuals 248 55.832 0.225
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response nPrevention :
## Df Sum Sq Mean Sq F value Pr(>F)
## (Intercept) 1 157077 157077 839.9246 < 2e-16 ***
## Gender 1 9 9 0.0459 0.83050
## Ethnicity 3 1483 494 2.6439 0.04981 *
## PrePost 1 396 396 2.1170 0.14693
## Residuals 248 46379 187
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1