Exploratory Data Analysis

  1. Methods. Chagas Disease training was provided in an Echo session to two different primary populations of interest, CHW and physicians. These populations completed a pre-examination, a post-examination, and a follow-up assessment of the training. The training was given on the following days. This study was approved by Institutional Review Board protocol number XXXX on XXXXX. Participants were provided informed consent. A copy of the survey is available as Appendix A.

Load libraries and data

#####################Read and Pre-Clean the Data#######################
require(Amelia)
## Loading required package: Amelia
## Loading required package: Rcpp
## ## 
## ## Amelia II: Multiple Imputation
## ## (Version 1.7.6, built: 2019-11-24)
## ## Copyright (C) 2005-2021 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##
require(car)
## Loading required package: car
## Loading required package: carData
require(carData)
require(corrplot)
## Loading required package: corrplot
## corrplot 0.84 loaded
require(ggcorrplot)
## Loading required package: ggcorrplot
## Loading required package: ggplot2
require(ggpubr)
## Loading required package: ggpubr
require(gridExtra)
## Loading required package: gridExtra
require(gtable)
## Loading required package: gtable
require(heplots)
## Loading required package: heplots
require(kableExtra)
## Loading required package: kableExtra
require(leaps)
## Loading required package: leaps
require(MANOVA.RM)
## Loading required package: MANOVA.RM
require(MASS)
## Loading required package: MASS
require(MVN)
## Loading required package: MVN
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
## sROC 0.1-2 loaded
require(mvtnorm)
## Loading required package: mvtnorm
require(nnet)
## Loading required package: nnet
require(psych) #to describe
## Loading required package: psych
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## The following object is masked from 'package:car':
## 
##     logit
require(ggplot2)
require(ggcorrplot)
require(qcc)
## Loading required package: qcc
## Package 'qcc' version 2.7
## Type 'citation("qcc")' for citing this R package in publications.
require(rcompanion)
## Loading required package: rcompanion
## Error: package or namespace load failed for 'rcompanion' in loadNamespace(j <- i[[1L]], c(lib.loc, .libPaths()), versionCheck = vI[[j]]):
##  there is no package called 'coin'
require(reticulate) #to use Python in R as well
## Loading required package: reticulate
require(reshape2)
## Loading required package: reshape2
require(ResourceSelection)
## Loading required package: ResourceSelection
## ResourceSelection 0.3-5   2019-07-22
require(rstatix)
## Loading required package: rstatix
## 
## Attaching package: 'rstatix'
## The following object is masked from 'package:MASS':
## 
##     select
## The following object is masked from 'package:ggcorrplot':
## 
##     cor_pmat
## The following object is masked from 'package:stats':
## 
##     filter
require(tidyverse)
## Loading required package: tidyverse
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v tibble  3.0.6     v dplyr   1.0.4
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.1
## v purrr   0.3.4
## Warning: package 'stringr' was built under R version 4.0.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x psych::%+%()        masks ggplot2::%+%()
## x psych::alpha()      masks ggplot2::alpha()
## x dplyr::combine()    masks gridExtra::combine()
## x dplyr::filter()     masks rstatix::filter(), stats::filter()
## x dplyr::group_rows() masks kableExtra::group_rows()
## x dplyr::lag()        masks stats::lag()
## x dplyr::recode()     masks car::recode()
## x dplyr::select()     masks rstatix::select(), MASS::select()
## x purrr::some()       masks car::some()
corfunction=function(d){
  mycorr=cor(d[, 1:ncol(d)]); p.mat=ggcorrplot::cor_pmat(d[,1:ncol(d)])
  myplot=ggcorrplot(mycorr, hc.order=TRUE,type="lower",colors=c("red", "white","green"),tl.cex = 8, tl.col = "black", lab=TRUE, lab_size=2, p.mat=p.mat, insig="pch", pch=4)
  print(myplot)}

#Barplot Function, Vertical

mybar2=function(y,title){
  fu%>%count(fu[,y])%>%mutate(perc = n / nrow(fu)) -> mydf1
  a=ggplot(mydf1, aes(x='',n, fill=`fu[, y]`,label=paste0(n," (", scales::percent(perc), ")")))+
  geom_bar(position="stack", stat="identity", width=.5)+
  geom_text(size = 3, angle=90, position = position_stack(vjust = 0.5))+
  xlab("")+
  ylab("")+
    coord_flip()+
  theme(legend.title = element_blank())+
  ggtitle(title)+
  theme(legend.text = element_text(size = 8))+
  theme(legend.position="right")
  return(a)
}

mybar3=function(y,varname,rotate=270){
  myt=table(y, mydata$PrePost)
  myt=apply(myt,2,function(z) z/sum(z))
  myt=as.data.frame(myt)
  myt$Prepost=rownames(myt)
  myt=melt(myt, id.vars="Prepost")
  colnames(myt)=c("Prepost", varname,"Percent")
  a=ggplot(myt, aes(myt[,2], Percent, fill=Prepost, label=scales::percent(Percent)))+ 
  geom_bar( stat = "identity", position = "stack" ) +
    geom_text(size = 3, angle=rotate,position = position_stack(vjust = 0.5))+
  coord_flip() +
    xlab("")+
    ylab("")+
   theme_minimal() + theme(legend.title=element_blank(),legend.position = "bottom" )+
    ggtitle(varname)
  return(a)
}




setwd("D:/PaulaEcho")
mydata=read.csv("global.csv", stringsAsFactors = TRUE)


#########################################################################

Check Missing

3.1. Missing Data.

After eliminating those who did not fully complete the pre-post examination or follow-ups or who opted out during informed consent, there were n1=183 pretests and n2=71 post tests. For the follow-up survey, there were 54 completed surveys. Due to optional anonymity, only n3=17 pretests were matched with post-tests. The available data were complete.

#########################################################################
missmap(mydata)

#########################################################################

Descriptives

  1. Results

4.1. Descriptive Statistics

Figure 1 provides the distribution of the gender, ethnicity, age, and profession of both the pre-post groups. This information was not gathered for the follow-up survey. About 74% of the pre-test participants were female as well as 85% of the post-test participants. Tha majority of both of these populations were Hispanic (55% pre-test, 70% post-test), and the age distribution was modally 45-64 for both groups. The majority of the individuals completing the pre-test were MD’s or other (32% each), while the majority completing the post-test were CHWs (56%).

Gender

#########################################################################
pre=mydata[mydata$PrePost=="Pre",]
post=mydata[mydata$PrePost=="Post",]

as.data.frame(100*round(table(mydata$Gender)/length(mydata$Gender),4))%>%
  kbl(col.names = c("Gender", "%"))%>%kable_classic(full_width=F)
Gender %
Female 77.17
Male 22.83
as.data.frame(round(table(mydata$Gender),4))%>%
  kbl(col.names = c("Gender", "n"))%>%kable_classic(full_width=F)
Gender n
Female 196
Male 58
as.data.frame(round(table(pre$Gender),4))%>%
  kbl(col.names = c("Pre Gender", "n"))%>%kable_classic(full_width=F)
Pre Gender n
Female 136
Male 47
as.data.frame(round(table(post$Gender),4))%>%
  
  kbl(col.names = c("Post Gender", "n"))%>%kable_classic(full_width=F)
Post Gender n
Female 60
Male 11
mybar3(mydata$Gender,"Gender")

#########################################################################

Ethnicity

#########################################################################
as.data.frame(100*round(table(mydata$Ethnicity)/length(mydata$Ethnicity),4))%>%  kbl(col.names = c("Ethnicity", "%"))%>%kable_classic(full_width=F)
Ethnicity %
Hispanic 58.27
Non-Hispanic 38.19
Non -Hispanic 0.39
Prefer not to respond 3.15
as.data.frame(round(table(mydata$Ethnicity),4))%>%  kbl(col.names = c("Ethnicity", "n"))%>%kable_classic(full_width=F)
Ethnicity n
Hispanic 148
Non-Hispanic 97
Non -Hispanic 1
Prefer not to respond 8
as.data.frame(round(table(pre$Ethnicity),4))%>%  kbl(col.names = c("Pre Ethnicity", "n"))%>%kable_classic(full_width=F)
Pre Ethnicity n
Hispanic 98
Non-Hispanic 78
Non -Hispanic 1
Prefer not to respond 6
as.data.frame(round(table(post$Ethnicity),4))%>%  kbl(col.names = c("Post Ethnicity", "n"))%>%kable_classic(full_width=F)
Post Ethnicity n
Hispanic 50
Non-Hispanic 19
Non -Hispanic 0
Prefer not to respond 2
mybar3(mydata$Ethnicity,"Ethnicity")

#########################################################################

Age

#########################################################################
as.data.frame(100*round(table(mydata$Age)/length(mydata$Age),4))%>%
  kbl(col.names = c("Age", "%"))%>%kable_classic(full_width=F)
Age %
18 to 24 5.51
25 to 34 18.90
35 to 44 26.38
45 to 64 41.73
65 and over 6.30
Prefer not to respond 1.18
as.data.frame(round(table(mydata$Age),4))%>%
  kbl(col.names = c("Age", "n"))%>%kable_classic(full_width=F)
Age n
18 to 24 14
25 to 34 48
35 to 44 67
45 to 64 106
65 and over 16
Prefer not to respond 3
as.data.frame(round(table(pre$Age),4))%>%
  kbl(col.names = c("Pre Age", "n"))%>%kable_classic(full_width=F)
Pre Age n
18 to 24 12
25 to 34 38
35 to 44 54
45 to 64 68
65 and over 10
Prefer not to respond 1
as.data.frame(round(table(post$Age),4))%>%
  kbl(col.names = c("Post Age", "n"))%>%kable_classic(full_width=F)
Post Age n
18 to 24 2
25 to 34 10
35 to 44 13
45 to 64 38
65 and over 6
Prefer not to respond 2
mybar3(mydata$Age,"Age")

#########################################################################

Profession

#########################################################################
as.data.frame(100*round(table(mydata$Profession)/length(mydata$Profession),4))%>%
  kbl(col.names = c("Profession", "%"))%>%kable_classic(full_width=F)
Profession %
CHW 37.01
DVM 3.54
MD/DO 28.74
NP/PA 2.36
Other 28.35
as.data.frame(round(table(mydata$Profession),4))%>%
  kbl(col.names = c("Profession", "n"))%>%kable_classic(full_width=F)
Profession n
CHW 94
DVM 9
MD/DO 73
NP/PA 6
Other 72
as.data.frame(round(table(pre$Profession),4))%>%
  kbl(col.names = c("Pre Profession", "n"))%>%kable_classic(full_width=F)
Pre Profession n
CHW 54
DVM 7
MD/DO 58
NP/PA 6
Other 58
as.data.frame(round(table(post$Profession),4))%>%
  kbl(col.names = c("Post Profession", "n"))%>%kable_classic(full_width=F)
Post Profession n
CHW 40
DVM 2
MD/DO 15
NP/PA 0
Other 14
mybar3(mydata$Profession,"Profession")

#########################################################################

Specialization

#########################################################################
as.data.frame(100*round(table(mydata$Specialization)/length(mydata$Specialization),4))%>%
  kbl(col.names = c("Specialization", "%"))%>%kable_classic(full_width=F)
Specialization %
42.91
Air Force idmt 0.39
Air Force IDMT 0.39
Biomedicine 0.39
Community health 28.35
Community health,Other 0.39
DC 0.39
Does not apply 2.76
economist 0.39
Economist 0.39
Epidemiologist 1.18
FSC 0.39
Infectious disease,Cardiology,General practitioner/Family physician,Community health 0.79
LVN 0.39
Microbiologist 0.39
MLS 0.39
Other 1.18
Other / Does not apply 4.33
Other,Does not apply 0.39
Ph.D. / Post-Grad / Faculty 4.33
PharmD / MPharm / Pharm Student 4.72
PT, MHA, PhD 0.39
Public Health 1.57
R&D Industry 0.39
RCS 0.39
Student 1.57
Zoonosis Control Program Specialist 0.39
as.data.frame(table(mydata$Specialization))%>%
  kbl(col.names = c("Specialization", "n"))%>%kable_classic(full_width=F)
Specialization n
109
Air Force idmt 1
Air Force IDMT 1
Biomedicine 1
Community health 72
Community health,Other 1
DC 1
Does not apply 7
economist 1
Economist 1
Epidemiologist 3
FSC 1
Infectious disease,Cardiology,General practitioner/Family physician,Community health 2
LVN 1
Microbiologist 1
MLS 1
Other 3
Other / Does not apply 11
Other,Does not apply 1
Ph.D. / Post-Grad / Faculty 11
PharmD / MPharm / Pharm Student 12
PT, MHA, PhD 1
Public Health 4
R&D Industry 1
RCS 1
Student 4
Zoonosis Control Program Specialist 1
as.data.frame(table(pre$Specialization))%>%
  kbl(col.names = c("Pre Specialization", "n"))%>%kable_classic(full_width=F)
Pre Specialization n
89
Air Force idmt 0
Air Force IDMT 1
Biomedicine 1
Community health 39
Community health,Other 1
DC 1
Does not apply 5
economist 0
Economist 1
Epidemiologist 2
FSC 1
Infectious disease,Cardiology,General practitioner/Family physician,Community health 0
LVN 1
Microbiologist 1
MLS 1
Other 3
Other / Does not apply 8
Other,Does not apply 0
Ph.D. / Post-Grad / Faculty 9
PharmD / MPharm / Pharm Student 10
PT, MHA, PhD 1
Public Health 3
R&D Industry 1
RCS 1
Student 2
Zoonosis Control Program Specialist 1
as.data.frame(table(post$Specialization))%>%
  kbl(col.names = c("Post Specialization", "n"))%>%kable_classic(full_width=F)
Post Specialization n
20
Air Force idmt 1
Air Force IDMT 0
Biomedicine 0
Community health 33
Community health,Other 0
DC 0
Does not apply 2
economist 1
Economist 0
Epidemiologist 1
FSC 0
Infectious disease,Cardiology,General practitioner/Family physician,Community health 2
LVN 0
Microbiologist 0
MLS 0
Other 0
Other / Does not apply 3
Other,Does not apply 1
Ph.D. / Post-Grad / Faculty 2
PharmD / MPharm / Pharm Student 2
PT, MHA, PhD 0
Public Health 1
R&D Industry 0
RCS 0
Student 2
Zoonosis Control Program Specialist 0
#########################################################################

For the pre-test, the modal response when asked about their knowledge of CD was “Good” (30%) with only 8% professing “Excellent” knowledge. After training, that self-assessment increased to 58% “Good” and 16% Excellent (Fisher’s Exact Test p-value<0.001).

Knowledge

#########################################################################

myt=table(mydata$PrePost,mydata$Knowledge)
myt=myt/rowSums(myt)
colnames(myt)=c("None", "Very Limited", "Limited", "Good", "Excellent")
row.names(myt)=c("Post", "Pre")
myt=round(100*myt, 4)
myt%>%
  kbl()%>%kable_classic(full_width=F)
None Very Limited Limited Good Excellent
Post 0.0000 5.6338 21.1268 57.7465 15.4930
Pre 12.5683 19.1257 32.2404 30.0546 6.0109
mybar3(mydata$Knowledge,"Knowledge")

fisher.test(myt, simulate.p.value = TRUE)
## Warning in fisher.test(myt, simulate.p.value = TRUE): 'x' has been rounded to
## integer: Mean relative difference: 0.010514
## 
##  Fisher's Exact Test for Count Data with simulated p-value (based on
##  2000 replicates)
## 
## data:  myt
## p-value = 0.0004998
## alternative hypothesis: two.sided
#########################################################################

Prior to the training 75% were not confident or only somewhat confident in the recency of their knowledge. Post-training, 80% were either confident or very confident (Fisher’s Exact Test p-value < .001).

Recency

#########################################################################

myt=table(mydata$PrePost,mydata$Recent)
myt=myt/rowSums(myt)
colnames(myt)=c("Not at all Confident", "Somewhat Confident", "Confident", "Very Confident")
row.names(myt)=c("Post","Pre")
myt=round(100*myt, 4)
myt%>%kbl()%>%kable_classic(full_width=F)
Not at all Confident Somewhat Confident Confident Very Confident
Post 1.4085 16.9014 54.9296 26.7606
Pre 40.9836 34.4262 17.4863 7.1038
mybar3(mydata$Recent, "Recency of Knowledge")

fisher.test(myt, simulate.p.value = TRUE)
## Warning in fisher.test(myt, simulate.p.value = TRUE): 'x' has been rounded to
## integer: Mean relative difference: 0.009248
## 
##  Fisher's Exact Test for Count Data with simulated p-value (based on
##  2000 replicates)
## 
## data:  myt
## p-value = 0.0004998
## alternative hypothesis: two.sided
#########################################################################

For the pre-post, students were provided 10 questions to test their knowledge. The distribution of the pre-post scores is shown in Figure 2 (unmatched) with notched boxplots, boxplots that provide a visual median test. Without controlling for individual ability, median post-scores were statistically better than pre-test scores at the alpha=0.05 level.

Score Pre-Post

#########################################################################
mydata$PrePost2=mydata$PrePost
mydata$PrePost=as.factor(mydata$PrePost)
levels(mydata$PrePost)=c("Post", "Pre")
boxplot(mydata$Score~mydata$PrePost, main="Knowledge Score, Pre vs. Post",notch=TRUE, col=c("red", "dark green"), horizontal = TRUE)

#########################################################################

For the majority of questions, the pre-post scores appeared to improve (Figure 3). The exception to this statement was for “who should be treated with anti-T drugs” and for “prevention” methods. Both of these topics will be reinforced in future sessions.

Subscores

#########################################################################
mynames=c("True or False: Chagas Disease Present in Texas", 
          "Chagas Disease Caused By...",
          "T Cruzi Transmitted by Saliva.", 
          "Parts of World for Chagas Disease", 
          "What % of Patients Develop Clinical Disease",
          "Chagas Disease Symptoms...", 
          "Methods to Confirm Chagas Disease", 
          "Who should be Treated with Antitrypanosomal Drugs", 
          "EKG Findings Typical of Chagas Disease",
          "Steps for Prevention")
for (i in 10:19){
  print(mybar3(mydata[,i],mynames[i-9]))
}

Test questions were found to be uncorrelated, so that performance on one question was statistically not linked to any others. This finding suggests that the topics are sufficiently different.

Correlations

#########################################################################
newdat=mydata[, c(10:19)]
newdat2=pre[, c(10:19)]
newdat3=post[, c(10:19)]
for (i in 1:10){ newdat[,i]=as.numeric(newdat[,i])-1
newdat2[,i]=as.numeric(newdat2[,i])-1
newdat3[,i]=as.numeric(newdat3[,i])-1
}
mycorr=cor(newdat)
mycorr2=cor(newdat2)
mycorr3=cor(newdat3)
## Warning in cor(newdat3): the standard deviation is zero
corfunction(mycorr)

corfunction(mycorr2)

#########################################################################

Table 1 provides the descriptive statistics for the comparison between the pre-test and post-test. These scores are not matched.

4.2. Inferential Testing

Both paired and unpaired, Holm-adjusted t-tests were used to evaluate performance differences for the students. The unpaired t-tests do not account for individual knowledge coming into the class; however, they allow for a larger sample size.

For the unpaired t-tests (Table 2), we found statistically significant differences for all but three of the questions associated with symptoms, diagnoses, and prevention. These are some of the more difficult concepts taught in the course.

When comparing CHWs with non-CHWs (Table 3), we noticed that the CHW workers performed worse than the non-CHWs (many of whom were doctors) as indicated by the negative difference between the scores. There were no statistically significant differences on 4 of the 10 questions, though.

Simple Inferentials

Paired t-test (Small Subset)

prepost=read.csv("D:/PaulaEcho/prepost.csv", stringsAsFactors = T)
MD=prepost[prepost$Group=="MD",]
CHW=prepost[prepost$Group=="CHW",]


prepost$Group=MD$Group=CHW$Group=NULL

myf2=function(x){
  
  myt=t.test(x)
  newp=round(p.adjust(myt$p.value),3)
  a=c(round(myt$estimate[1],3),
      round(myt$conf.int[1],3),   
      round(myt$conf.int[2],3),round(myt$statistic,3), 
      round(myt$parameter,3),round(newp,3))
  return(a)
  
}

a=matrix(rep(NA, 6*12), 12)
for (i in 1:12){a[i,1:6]=myf2(prepost[,i])}
a=as.data.frame(a)
for (i in 1:6){a[,i]=as.numeric(a[,i])}
rownames(a)=colnames(prepost)
colnames(a)=c("Mean","Lower 95% CI", "Upper 95% CI", "t-Value", "df", "Holm-Adjusted p")
a%>%kbl(caption="Pre-Post ALL, n=45")%>%kable_classic()
Pre-Post ALL, n=45
Mean Lower 95% CI Upper 95% CI t-Value df Holm-Adjusted p
Knowledge 0.867 0.469 1.265 4.389 44 0.000
Recent 1.044 0.731 1.358 6.714 44 0.000
Texas 0.022 -0.023 0.067 1.000 44 0.323
Cause 0.200 0.063 0.337 2.934 44 0.005
Saliva 0.289 0.112 0.466 3.292 44 0.002
Location 0.067 -0.033 0.166 1.354 44 0.183
Prevalence 0.356 0.173 0.538 3.917 44 0.000
ClinDisease 0.133 -0.018 0.285 1.773 44 0.083
Symptoms 0.156 -0.025 0.336 1.735 44 0.090
Diagnosis -0.022 -0.220 0.175 -0.227 44 0.821
AntiT_Drugs 0.111 -0.049 0.271 1.402 44 0.168
Prevent -0.178 -0.351 -0.005 -2.072 44 0.044
a=matrix(rep(NA, 6*12), 12)
for (i in 1:12){a[i,1:6]=myf2(MD[,i])}
a=as.data.frame(a)
for (i in 1:6){a[,i]=as.numeric(a[,i])}
rownames(a)=colnames(MD)
colnames(a)=c("Mean","Lower 95% CI", "Upper 95% CI", "t-Value", "df", "Holm-Adjusted p")
a%>%kbl(caption="Pre-Post MD, n=28")%>%kable_classic()
Pre-Post MD, n=28
Mean Lower 95% CI Upper 95% CI t-Value df Holm-Adjusted p
Knowledge 0.250 -0.167 0.667 1.230 27 0.229
Recent 0.643 0.259 1.026 3.438 27 0.002
Texas 0.000 NaN NaN NaN 27 NaN
Cause 0.071 -0.075 0.218 1.000 27 0.326
Saliva 0.179 -0.006 0.363 1.987 27 0.057
Location 0.000 -0.106 0.106 0.000 27 1.000
Prevalence 0.179 -0.059 0.416 1.544 27 0.134
ClinDisease -0.071 -0.218 0.075 -1.000 27 0.326
Symptoms 0.000 -0.236 0.236 0.000 27 1.000
Diagnosis 0.036 -0.188 0.259 0.328 27 0.745
AntiT_Drugs 0.036 -0.161 0.233 0.372 27 0.713
Prevent -0.107 -0.351 0.137 -0.902 27 0.375
a=matrix(rep(NA, 6*12), 12)
for (i in 1:12){a[i,1:6]=myf2(CHW[,i])}
a=as.data.frame(a)
for (i in 1:6){a[,i]=as.numeric(a[,i])}
rownames(a)=colnames(CHW)
colnames(a)=c("Mean","Lower 95% CI", "Upper 95% CI", "t-Value", "df", "Holm-Adjusted p")
a%>%kbl(caption="Pre-Post CHW, n=17")%>%kable_classic()
Pre-Post CHW, n=17
Mean Lower 95% CI Upper 95% CI t-Value df Holm-Adjusted p
Knowledge 1.882 1.341 2.424 7.366 16 0.000
Recent 1.706 1.309 2.103 9.114 16 0.000
Texas 0.059 -0.066 0.184 1.000 16 0.332
Cause 0.412 0.151 0.673 3.347 16 0.004
Saliva 0.471 0.102 0.839 2.704 16 0.016
Location 0.176 -0.026 0.379 1.852 16 0.083
Prevalence 0.647 0.394 0.900 5.416 16 0.000
ClinDisease 0.471 0.206 0.735 3.771 16 0.002
Symptoms 0.412 0.151 0.673 3.347 16 0.004
Diagnosis -0.118 -0.519 0.284 -0.621 16 0.543
AntiT_Drugs 0.235 -0.054 0.524 1.725 16 0.104
Prevent -0.294 -0.536 -0.053 -2.582 16 0.020

A course satisfaction survey revealed wide-spread approval of the content and instructors. The individual questions are Appendix B, and the results of the survey are in Table 5.

Follow-Up Satisfaction

fu=read.csv("D:/PaulaEcho/total followup.csv")

describe(fu)%>%kbl(caption="CHW Follow-Up")%>%kable_classic(html_font = "Cambria")
CHW Follow-Up
vars n mean sd median trimmed mad min max range skew kurtosis se
Occupation* 1 54 2.574074 1.6437630 3 2.318182 1.4826 1 8 7 1.1847971 1.3818618 0.2236878
Learning.objectives.were.clearly.stated* 2 54 2.555556 0.5378738 3 2.590909 0.0000 1 3 2 -0.5729870 -0.9732335 0.0731953
Content.was.well.organized* 3 54 2.574074 0.5697352 3 2.636364 0.0000 1 3 2 -0.8856959 -0.2884584 0.0775311
Stated.learning.objectives.were.met* 4 54 1.555556 0.5015699 2 1.568182 0.0000 1 2 1 -0.2174243 -1.9885288 0.0682550
Content.was.accurate.and.relevant* 5 54 3.518518 0.6656176 4 3.613636 0.0000 1 4 3 -1.3849570 2.0907518 0.0905791
Course.corresponded.to.my.expectations* 6 54 3.351852 0.6488715 3 3.409091 0.0000 1 4 3 -0.8748751 1.3626716 0.0883002
I.intend.to.apply.what.I.learned* 7 54 2.481482 0.6062797 3 2.545454 0.0000 1 3 2 -0.6786852 -0.5699189 0.0825042
The.lecturers.demonstrated.knowledge.of.the.subject.matter* 8 54 1.703704 0.4609109 2 1.750000 0.0000 1 2 1 -0.8675491 -1.2698519 0.0627220
The.lecturers.were.effective.in.communicating.the.content.of.the.training.program* 9 54 1.648148 0.4820322 2 1.681818 0.0000 1 2 1 -0.6032986 -1.6658575 0.0655963
The.lecturers.encouraged.feedback.from.the.participants* 10 54 2.592593 0.5669687 3 2.659091 0.0000 1 3 2 -0.9678568 -0.1285472 0.0771547
Overall..I.would.rate.this.program* 11 54 2.722222 0.4920756 3 2.795454 0.0000 1 3 2 -1.4218180 0.9710169 0.0669630
Overall..I.would.rate.the.lecturers* 12 54 1.777778 0.4196435 2 1.840909 0.0000 1 2 1 -1.2993590 -0.3165050 0.0571062
Did.you.find.this.activity.enhanced.your.understanding.of.Chagas.disease.* 13 54 1.981481 0.1360828 2 2.000000 0.0000 1 2 1 -6.9452610 47.1100823 0.0185185
Would.you.recommend.this.training.program.to.other.clinicians..clinical.staff.members..medical.students..or.community.health.workers.* 14 54 1.000000 0.0000000 1 1.000000 0.0000 1 1 0 NaN NaN 0.0000000
Was.there.content.you.expected.to.be.covered.that.was.not.* 15 54 1.240741 0.4315477 1 1.181818 0.0000 1 2 1 1.1792818 -0.6197488 0.0587262
If.yes..please.detail.below.* 16 53 1.849057 2.1785336 1 1.232558 0.0000 1 10 9 2.4890049 5.0463348 0.2992446
Please.provide.additional.comments.and.feedback..if.any..* 17 54 3.833333 5.0497525 1 2.772727 0.0000 1 18 17 1.5377847 0.9188453 0.6871843
fu=fu[, 1:15]

for (i in 2:15){
  print(mybar2(i,colnames(fu)[i]))
}

Models

A linear model of performance score as a function of age, ethnicity, gender, self-assessed knowledge, recency of knowledge, CHW status, and pre-post status of examination was estimated using ordinary least squares on the n1+n2=254 tests. The results showed that age groupings, ethnicity, self-assessment of knowledge, and pre-post examination were important in predicting test results (F(13,240)=13.3, p<.001, R^2=.440, Adj. R^2=.410).

Regression

library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
mydata$CHW=mydata$Profession
mydata$CHW=rep(0, nrow(mydata))
mydata$CHW[mydata$Profession=="CHW"]=1
mydata$CHW=as.numeric(mydata$CHW)
mydata$Female=as.numeric(mydata$Gender)
mydata$Female[mydata$Female==2]=0
mylm=lm(Score~Age+Female+Ethnicity+Knowledge+Recent+CHW+PrePost2, data=mydata)
summary(mylm)$coefficients%>%kbl()%>%kable_classic(html_font = 'Cambria')
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.4072243 0.0636530 6.3975652 0.0000000
Age25 to 34 0.2332343 0.0464262 5.0237633 0.0000010
Age35 to 44 0.1639557 0.0461276 3.5543908 0.0004579
Age45 to 64 0.1678968 0.0445374 3.7697960 0.0002067
Age65 and over 0.1799692 0.0576812 3.1200660 0.0020342
AgePrefer not to respond 0.1781863 0.0987719 1.8040180 0.0725090
Female -0.0175723 0.0248363 -0.7075240 0.4799420
EthnicityNon-Hispanic 0.0981494 0.0271304 3.6176931 0.0003638
EthnicityNon -Hispanic -0.0323416 0.1509386 -0.2142698 0.8305225
EthnicityPrefer not to respond -0.0165068 0.0593320 -0.2782100 0.7810958
Knowledge2. Very Limited -0.0171143 0.0402922 -0.4247539 0.6714047
Knowledge3. Limited 0.0790134 0.0420758 1.8778821 0.0616357
Knowledge4. Good 0.1289828 0.0460063 2.8035876 0.0054760
Knowledge5. Excellent 0.1441796 0.0594176 2.4265454 0.0159967
Recent2. Somewhat Confident 0.0195944 0.0301687 0.6494920 0.5166548
Recent3. Confident 0.0388984 0.0395262 0.9841180 0.3260696
Recent4. Very Confident 0.0293292 0.0498102 0.5888186 0.5565485
CHW -0.0592721 0.0290590 -2.0397173 0.0424971
PrePost2Pre -0.0891525 0.0266958 -3.3395752 0.0009757
summary(mylm)
## 
## Call:
## lm(formula = Score ~ Age + Female + Ethnicity + Knowledge + Recent + 
##     CHW + PrePost2, data = mydata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.39158 -0.09124 -0.00025  0.09226  0.40799 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     0.40722    0.06365   6.398 8.46e-10 ***
## Age25 to 34                     0.23323    0.04643   5.024 1.00e-06 ***
## Age35 to 44                     0.16396    0.04613   3.554 0.000458 ***
## Age45 to 64                     0.16790    0.04454   3.770 0.000207 ***
## Age65 and over                  0.17997    0.05768   3.120 0.002034 ** 
## AgePrefer not to respond        0.17819    0.09877   1.804 0.072509 .  
## Female                         -0.01757    0.02484  -0.708 0.479942    
## EthnicityNon-Hispanic           0.09815    0.02713   3.618 0.000364 ***
## EthnicityNon -Hispanic         -0.03234    0.15094  -0.214 0.830522    
## EthnicityPrefer not to respond -0.01651    0.05933  -0.278 0.781096    
## Knowledge2. Very Limited       -0.01711    0.04029  -0.425 0.671405    
## Knowledge3. Limited             0.07901    0.04208   1.878 0.061636 .  
## Knowledge4. Good                0.12898    0.04601   2.804 0.005476 ** 
## Knowledge5. Excellent           0.14418    0.05942   2.427 0.015997 *  
## Recent2. Somewhat Confident     0.01959    0.03017   0.649 0.516655    
## Recent3. Confident              0.03890    0.03953   0.984 0.326070    
## Recent4. Very Confident         0.02933    0.04981   0.589 0.556548    
## CHW                            -0.05927    0.02906  -2.040 0.042497 *  
## PrePost2Pre                    -0.08915    0.02670  -3.340 0.000976 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.147 on 235 degrees of freedom
## Multiple R-squared:  0.4546, Adjusted R-squared:  0.4128 
## F-statistic: 10.88 on 18 and 235 DF,  p-value: < 2.2e-16
hist(mylm$residuals, col="blue", main="Residuals")