Exploratory Data Analysis

Methods. Chagas Disease training was provided in an Echo session to two different primary populations of interest, CHW and physicians. These populations completed a pre-examination, a post-examination, and a follow-up assessment of the training. The training was given on the following days. This study was approved by Institutional Review Board protocol number XXXX on XXXXX. Participants were provided informed consent. A copy of the survey is available as Appendix A.

Load libraries and data

#####################Read and Pre-Clean the Data#######################
require(Amelia)

## Loading required package: Amelia

## Loading required package: Rcpp

## ## 
## ## Amelia II: Multiple Imputation
## ## (Version 1.7.6, built: 2019-11-24)
## ## Copyright (C) 2005-2021 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##

require(car)

## Loading required package: car

## Loading required package: carData

require(carData)
require(corrplot)

## Loading required package: corrplot

## corrplot 0.84 loaded

require(ggcorrplot)

## Loading required package: ggcorrplot

## Loading required package: ggplot2

require(ggpubr)

## Loading required package: ggpubr

require(gridExtra)

## Loading required package: gridExtra

require(gtable)

## Loading required package: gtable

require(heplots)

## Loading required package: heplots

require(kableExtra)

## Loading required package: kableExtra

require(leaps)

## Loading required package: leaps

require(MANOVA.RM)

## Loading required package: MANOVA.RM

require(MASS)

## Loading required package: MASS

require(MVN)

## Loading required package: MVN

## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2

## sROC 0.1-2 loaded

require(mvtnorm)

## Loading required package: mvtnorm

require(nnet)

## Loading required package: nnet

require(psych) #to describe

## Loading required package: psych

## 
## Attaching package: 'psych'

## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

## The following object is masked from 'package:car':
## 
##     logit

require(ggplot2)
require(ggcorrplot)
require(qcc)

## Loading required package: qcc

## Package 'qcc' version 2.7

## Type 'citation("qcc")' for citing this R package in publications.

require(rcompanion)

## Loading required package: rcompanion

## Error: package or namespace load failed for 'rcompanion' in loadNamespace(j <- i[[1L]], c(lib.loc, .libPaths()), versionCheck = vI[[j]]):
##  there is no package called 'coin'

require(reticulate) #to use Python in R as well

## Loading required package: reticulate

require(reshape2)

## Loading required package: reshape2

require(ResourceSelection)

## Loading required package: ResourceSelection

## ResourceSelection 0.3-5   2019-07-22

require(rstatix)

## Loading required package: rstatix

## 
## Attaching package: 'rstatix'

## The following object is masked from 'package:MASS':
## 
##     select

## The following object is masked from 'package:ggcorrplot':
## 
##     cor_pmat

## The following object is masked from 'package:stats':
## 
##     filter

require(tidyverse)

## Loading required package: tidyverse

## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --

## v tibble  3.0.6     v dplyr   1.0.4
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.1
## v purrr   0.3.4

## Warning: package 'stringr' was built under R version 4.0.4

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x psych::%+%()        masks ggplot2::%+%()
## x psych::alpha()      masks ggplot2::alpha()
## x dplyr::combine()    masks gridExtra::combine()
## x dplyr::filter()     masks rstatix::filter(), stats::filter()
## x dplyr::group_rows() masks kableExtra::group_rows()
## x dplyr::lag()        masks stats::lag()
## x dplyr::recode()     masks car::recode()
## x dplyr::select()     masks rstatix::select(), MASS::select()
## x purrr::some()       masks car::some()

corfunction=function(d){
  mycorr=cor(d[, 1:ncol(d)]); p.mat=ggcorrplot::cor_pmat(d[,1:ncol(d)])
  myplot=ggcorrplot(mycorr, hc.order=TRUE,type="lower",colors=c("red", "white","green"),tl.cex = 8, tl.col = "black", lab=TRUE, lab_size=2, p.mat=p.mat, insig="pch", pch=4)
  print(myplot)}

#Barplot Function, Vertical

mybar2=function(y,title){
  fu%>%count(fu[,y])%>%mutate(perc = n / nrow(fu)) -> mydf1
  a=ggplot(mydf1, aes(x='',n, fill=`fu[, y]`,label=paste0(n," (", scales::percent(perc), ")")))+
  geom_bar(position="stack", stat="identity", width=.5)+
  geom_text(size = 3, angle=90, position = position_stack(vjust = 0.5))+
  xlab("")+
  ylab("")+
    coord_flip()+
  theme(legend.title = element_blank())+
  ggtitle(title)+
  theme(legend.text = element_text(size = 8))+
  theme(legend.position="right")
  return(a)
}

mybar3=function(y,varname,rotate=270){
  myt=table(y, mydata$PrePost)
  myt=apply(myt,2,function(z) z/sum(z))
  myt=as.data.frame(myt)
  myt$Prepost=rownames(myt)
  myt=melt(myt, id.vars="Prepost")
  colnames(myt)=c("Prepost", varname,"Percent")
  a=ggplot(myt, aes(myt[,2], Percent, fill=Prepost, label=scales::percent(Percent)))+ 
  geom_bar( stat = "identity", position = "stack" ) +
    geom_text(size = 3, angle=rotate,position = position_stack(vjust = 0.5))+
  coord_flip() +
    xlab("")+
    ylab("")+
   theme_minimal() + theme(legend.title=element_blank(),legend.position = "bottom" )+
    ggtitle(varname)
  return(a)
}




setwd("D:/PaulaEcho")
mydata=read.csv("global.csv", stringsAsFactors = TRUE)


#########################################################################

Check Missing

3.1. Missing Data.

After eliminating those who did not fully complete the pre-post examination or follow-ups or who opted out during informed consent, there were n1=183 pretests and n2=71 post tests. For the follow-up survey, there were 54 completed surveys. Due to optional anonymity, only n3=17 pretests were matched with post-tests. The available data were complete.

#########################################################################
missmap(mydata)

#########################################################################

Descriptives

Results

4.1. Descriptive Statistics

Figure 1 provides the distribution of the gender, ethnicity, age, and profession of both the pre-post groups. This information was not gathered for the follow-up survey. About 74% of the pre-test participants were female as well as 85% of the post-test participants. Tha majority of both of these populations were Hispanic (55% pre-test, 70% post-test), and the age distribution was modally 45-64 for both groups. The majority of the individuals completing the pre-test were MD’s or other (32% each), while the majority completing the post-test were CHWs (56%).

Gender

#########################################################################
pre=mydata[mydata$PrePost=="Pre",]
post=mydata[mydata$PrePost=="Post",]

as.data.frame(100*round(table(mydata$Gender)/length(mydata$Gender),4))%>%
  kbl(col.names = c("Gender", "%"))%>%kable_classic(full_width=F)

Gender	%
Female	77.17
Male	22.83

as.data.frame(round(table(mydata$Gender),4))%>%
  kbl(col.names = c("Gender", "n"))%>%kable_classic(full_width=F)

Gender	n
Female	196
Male	58

as.data.frame(round(table(pre$Gender),4))%>%
  kbl(col.names = c("Pre Gender", "n"))%>%kable_classic(full_width=F)

Pre Gender	n
Female	136
Male	47

as.data.frame(round(table(post$Gender),4))%>%
  
  kbl(col.names = c("Post Gender", "n"))%>%kable_classic(full_width=F)

Post Gender	n
Female	60
Male	11

mybar3(mydata$Gender,"Gender")

#########################################################################

Ethnicity

#########################################################################
as.data.frame(100*round(table(mydata$Ethnicity)/length(mydata$Ethnicity),4))%>%  kbl(col.names = c("Ethnicity", "%"))%>%kable_classic(full_width=F)

Ethnicity	%
Hispanic	58.27
Non-Hispanic	38.19
Non -Hispanic	0.39
Prefer not to respond	3.15

as.data.frame(round(table(mydata$Ethnicity),4))%>%  kbl(col.names = c("Ethnicity", "n"))%>%kable_classic(full_width=F)

Ethnicity	n
Hispanic	148
Non-Hispanic	97
Non -Hispanic	1
Prefer not to respond	8

as.data.frame(round(table(pre$Ethnicity),4))%>%  kbl(col.names = c("Pre Ethnicity", "n"))%>%kable_classic(full_width=F)

Pre Ethnicity	n
Hispanic	98
Non-Hispanic	78
Non -Hispanic	1
Prefer not to respond	6

as.data.frame(round(table(post$Ethnicity),4))%>%  kbl(col.names = c("Post Ethnicity", "n"))%>%kable_classic(full_width=F)

Post Ethnicity	n
Hispanic	50
Non-Hispanic	19
Non -Hispanic	0
Prefer not to respond	2

mybar3(mydata$Ethnicity,"Ethnicity")

#########################################################################

Age

#########################################################################
as.data.frame(100*round(table(mydata$Age)/length(mydata$Age),4))%>%
  kbl(col.names = c("Age", "%"))%>%kable_classic(full_width=F)

Age	%
18 to 24	5.51
25 to 34	18.90
35 to 44	26.38
45 to 64	41.73
65 and over	6.30
Prefer not to respond	1.18

as.data.frame(round(table(mydata$Age),4))%>%
  kbl(col.names = c("Age", "n"))%>%kable_classic(full_width=F)

Age	n
18 to 24	14
25 to 34	48
35 to 44	67
45 to 64	106
65 and over	16
Prefer not to respond	3

as.data.frame(round(table(pre$Age),4))%>%
  kbl(col.names = c("Pre Age", "n"))%>%kable_classic(full_width=F)

Pre Age	n
18 to 24	12
25 to 34	38
35 to 44	54
45 to 64	68
65 and over	10
Prefer not to respond	1

as.data.frame(round(table(post$Age),4))%>%
  kbl(col.names = c("Post Age", "n"))%>%kable_classic(full_width=F)

Post Age	n
18 to 24	2
25 to 34	10
35 to 44	13
45 to 64	38
65 and over	6
Prefer not to respond	2

mybar3(mydata$Age,"Age")

#########################################################################

Profession

#########################################################################
as.data.frame(100*round(table(mydata$Profession)/length(mydata$Profession),4))%>%
  kbl(col.names = c("Profession", "%"))%>%kable_classic(full_width=F)

Profession	%
CHW	37.01
DVM	3.54
MD/DO	28.74
NP/PA	2.36
Other	28.35

as.data.frame(round(table(mydata$Profession),4))%>%
  kbl(col.names = c("Profession", "n"))%>%kable_classic(full_width=F)

Profession	n
CHW	94
DVM	9
MD/DO	73
NP/PA	6
Other	72

as.data.frame(round(table(pre$Profession),4))%>%
  kbl(col.names = c("Pre Profession", "n"))%>%kable_classic(full_width=F)

Pre Profession	n
CHW	54
DVM	7
MD/DO	58
NP/PA	6
Other	58

as.data.frame(round(table(post$Profession),4))%>%
  kbl(col.names = c("Post Profession", "n"))%>%kable_classic(full_width=F)

Post Profession	n
CHW	40
DVM	2
MD/DO	15
NP/PA	0
Other	14

mybar3(mydata$Profession,"Profession")

#########################################################################

Specialization

#########################################################################
as.data.frame(100*round(table(mydata$Specialization)/length(mydata$Specialization),4))%>%
  kbl(col.names = c("Specialization", "%"))%>%kable_classic(full_width=F)

Specialization	%
	42.91
Air Force idmt	0.39
Air Force IDMT	0.39
Biomedicine	0.39
Community health	28.35
Community health,Other	0.39
DC	0.39
Does not apply	2.76
economist	0.39
Economist	0.39
Epidemiologist	1.18
FSC	0.39
Infectious disease,Cardiology,General practitioner/Family physician,Community health	0.79
LVN	0.39
Microbiologist	0.39
MLS	0.39
Other	1.18
Other / Does not apply	4.33
Other,Does not apply	0.39
Ph.D. / Post-Grad / Faculty	4.33
PharmD / MPharm / Pharm Student	4.72
PT, MHA, PhD	0.39
Public Health	1.57
R&D Industry	0.39
RCS	0.39
Student	1.57
Zoonosis Control Program Specialist	0.39

as.data.frame(table(mydata$Specialization))%>%
  kbl(col.names = c("Specialization", "n"))%>%kable_classic(full_width=F)

Specialization	n
	109
Air Force idmt	1
Air Force IDMT	1
Biomedicine	1
Community health	72
Community health,Other	1
DC	1
Does not apply	7
economist	1
Economist	1
Epidemiologist	3
FSC	1
Infectious disease,Cardiology,General practitioner/Family physician,Community health	2
LVN	1
Microbiologist	1
MLS	1
Other	3
Other / Does not apply	11
Other,Does not apply	1
Ph.D. / Post-Grad / Faculty	11
PharmD / MPharm / Pharm Student	12
PT, MHA, PhD	1
Public Health	4
R&D Industry	1
RCS	1
Student	4
Zoonosis Control Program Specialist	1

as.data.frame(table(pre$Specialization))%>%
  kbl(col.names = c("Pre Specialization", "n"))%>%kable_classic(full_width=F)

Pre Specialization	n
	89
Air Force idmt	0
Air Force IDMT	1
Biomedicine	1
Community health	39
Community health,Other	1
DC	1
Does not apply	5
economist	0
Economist	1
Epidemiologist	2
FSC	1
Infectious disease,Cardiology,General practitioner/Family physician,Community health	0
LVN	1
Microbiologist	1
MLS	1
Other	3
Other / Does not apply	8
Other,Does not apply	0
Ph.D. / Post-Grad / Faculty	9
PharmD / MPharm / Pharm Student	10
PT, MHA, PhD	1
Public Health	3
R&D Industry	1
RCS	1
Student	2
Zoonosis Control Program Specialist	1

as.data.frame(table(post$Specialization))%>%
  kbl(col.names = c("Post Specialization", "n"))%>%kable_classic(full_width=F)

Post Specialization	n
	20
Air Force idmt	1
Air Force IDMT	0
Biomedicine	0
Community health	33
Community health,Other	0
DC	0
Does not apply	2
economist	1
Economist	0
Epidemiologist	1
FSC	0
Infectious disease,Cardiology,General practitioner/Family physician,Community health	2
LVN	0
Microbiologist	0
MLS	0
Other	0
Other / Does not apply	3
Other,Does not apply	1
Ph.D. / Post-Grad / Faculty	2
PharmD / MPharm / Pharm Student	2
PT, MHA, PhD	0
Public Health	1
R&D Industry	0
RCS	0
Student	2
Zoonosis Control Program Specialist	0

#########################################################################

For the pre-test, the modal response when asked about their knowledge of CD was “Good” (30%) with only 8% professing “Excellent” knowledge. After training, that self-assessment increased to 58% “Good” and 16% Excellent (Fisher’s Exact Test p-value<0.001).

Knowledge

#########################################################################

myt=table(mydata$PrePost,mydata$Knowledge)
myt=myt/rowSums(myt)
colnames(myt)=c("None", "Very Limited", "Limited", "Good", "Excellent")
row.names(myt)=c("Post", "Pre")
myt=round(100*myt, 4)
myt%>%
  kbl()%>%kable_classic(full_width=F)

	None	Very Limited	Limited	Good	Excellent
Post	0.0000	5.6338	21.1268	57.7465	15.4930
Pre	12.5683	19.1257	32.2404	30.0546	6.0109

mybar3(mydata$Knowledge,"Knowledge")

fisher.test(myt, simulate.p.value = TRUE)

## Warning in fisher.test(myt, simulate.p.value = TRUE): 'x' has been rounded to
## integer: Mean relative difference: 0.010514

## 
##  Fisher's Exact Test for Count Data with simulated p-value (based on
##  2000 replicates)
## 
## data:  myt
## p-value = 0.0004998
## alternative hypothesis: two.sided

#########################################################################

Prior to the training 75% were not confident or only somewhat confident in the recency of their knowledge. Post-training, 80% were either confident or very confident (Fisher’s Exact Test p-value < .001).

Recency

#########################################################################

myt=table(mydata$PrePost,mydata$Recent)
myt=myt/rowSums(myt)
colnames(myt)=c("Not at all Confident", "Somewhat Confident", "Confident", "Very Confident")
row.names(myt)=c("Post","Pre")
myt=round(100*myt, 4)
myt%>%kbl()%>%kable_classic(full_width=F)

	Not at all Confident	Somewhat Confident	Confident	Very Confident
Post	1.4085	16.9014	54.9296	26.7606
Pre	40.9836	34.4262	17.4863	7.1038

mybar3(mydata$Recent, "Recency of Knowledge")

fisher.test(myt, simulate.p.value = TRUE)

## Warning in fisher.test(myt, simulate.p.value = TRUE): 'x' has been rounded to
## integer: Mean relative difference: 0.009248

## 
##  Fisher's Exact Test for Count Data with simulated p-value (based on
##  2000 replicates)
## 
## data:  myt
## p-value = 0.0004998
## alternative hypothesis: two.sided

#########################################################################

For the pre-post, students were provided 10 questions to test their knowledge. The distribution of the pre-post scores is shown in Figure 2 (unmatched) with notched boxplots, boxplots that provide a visual median test. Without controlling for individual ability, median post-scores were statistically better than pre-test scores at the alpha=0.05 level.

Score Pre-Post

#########################################################################
mydata$PrePost2=mydata$PrePost
mydata$PrePost=as.factor(mydata$PrePost)
levels(mydata$PrePost)=c("Post", "Pre")
boxplot(mydata$Score~mydata$PrePost, main="Knowledge Score, Pre vs. Post",notch=TRUE, col=c("red", "dark green"), horizontal = TRUE)

#########################################################################

For the majority of questions, the pre-post scores appeared to improve (Figure 3). The exception to this statement was for “who should be treated with anti-T drugs” and for “prevention” methods. Both of these topics will be reinforced in future sessions.

Subscores

#########################################################################
mynames=c("True or False: Chagas Disease Present in Texas", 
          "Chagas Disease Caused By...",
          "T Cruzi Transmitted by Saliva.", 
          "Parts of World for Chagas Disease", 
          "What % of Patients Develop Clinical Disease",
          "Chagas Disease Symptoms...", 
          "Methods to Confirm Chagas Disease", 
          "Who should be Treated with Antitrypanosomal Drugs", 
          "EKG Findings Typical of Chagas Disease",
          "Steps for Prevention")
for (i in 10:19){
  print(mybar3(mydata[,i],mynames[i-9]))
}

Test questions were found to be uncorrelated, so that performance on one question was statistically not linked to any others. This finding suggests that the topics are sufficiently different.

Correlations

#########################################################################
newdat=mydata[, c(10:19)]
newdat2=pre[, c(10:19)]
newdat3=post[, c(10:19)]
for (i in 1:10){ newdat[,i]=as.numeric(newdat[,i])-1
newdat2[,i]=as.numeric(newdat2[,i])-1
newdat3[,i]=as.numeric(newdat3[,i])-1
}
mycorr=cor(newdat)
mycorr2=cor(newdat2)
mycorr3=cor(newdat3)

## Warning in cor(newdat3): the standard deviation is zero

corfunction(mycorr)

corfunction(mycorr2)

#########################################################################

Table 1 provides the descriptive statistics for the comparison between the pre-test and post-test. These scores are not matched.

4.2. Inferential Testing

Both paired and unpaired, Holm-adjusted t-tests were used to evaluate performance differences for the students. The unpaired t-tests do not account for individual knowledge coming into the class; however, they allow for a larger sample size.

For the unpaired t-tests (Table 2), we found statistically significant differences for all but three of the questions associated with symptoms, diagnoses, and prevention. These are some of the more difficult concepts taught in the course.

When comparing CHWs with non-CHWs (Table 3), we noticed that the CHW workers performed worse than the non-CHWs (many of whom were doctors) as indicated by the negative difference between the scores. There were no statistically significant differences on 4 of the 10 questions, though.

Simple Inferentials

Paired t-test (Small Subset)

prepost=read.csv("D:/PaulaEcho/prepost.csv", stringsAsFactors = T)
MD=prepost[prepost$Group=="MD",]
CHW=prepost[prepost$Group=="CHW",]


prepost$Group=MD$Group=CHW$Group=NULL

myf2=function(x){
  
  myt=t.test(x)
  newp=round(p.adjust(myt$p.value),3)
  a=c(round(myt$estimate[1],3),
      round(myt$conf.int[1],3),   
      round(myt$conf.int[2],3),round(myt$statistic,3), 
      round(myt$parameter,3),round(newp,3))
  return(a)
  
}

a=matrix(rep(NA, 6*12), 12)
for (i in 1:12){a[i,1:6]=myf2(prepost[,i])}
a=as.data.frame(a)
for (i in 1:6){a[,i]=as.numeric(a[,i])}
rownames(a)=colnames(prepost)
colnames(a)=c("Mean","Lower 95% CI", "Upper 95% CI", "t-Value", "df", "Holm-Adjusted p")
a%>%kbl(caption="Pre-Post ALL, n=45")%>%kable_classic()

Pre-Post ALL, n=45
	Mean	Lower 95% CI	Upper 95% CI	t-Value	df	Holm-Adjusted p
Knowledge	0.867	0.469	1.265	4.389	44	0.000
Recent	1.044	0.731	1.358	6.714	44	0.000
Texas	0.022	-0.023	0.067	1.000	44	0.323
Cause	0.200	0.063	0.337	2.934	44	0.005
Saliva	0.289	0.112	0.466	3.292	44	0.002
Location	0.067	-0.033	0.166	1.354	44	0.183
Prevalence	0.356	0.173	0.538	3.917	44	0.000
ClinDisease	0.133	-0.018	0.285	1.773	44	0.083
Symptoms	0.156	-0.025	0.336	1.735	44	0.090
Diagnosis	-0.022	-0.220	0.175	-0.227	44	0.821
AntiT_Drugs	0.111	-0.049	0.271	1.402	44	0.168
Prevent	-0.178	-0.351	-0.005	-2.072	44	0.044

a=matrix(rep(NA, 6*12), 12)
for (i in 1:12){a[i,1:6]=myf2(MD[,i])}
a=as.data.frame(a)
for (i in 1:6){a[,i]=as.numeric(a[,i])}
rownames(a)=colnames(MD)
colnames(a)=c("Mean","Lower 95% CI", "Upper 95% CI", "t-Value", "df", "Holm-Adjusted p")
a%>%kbl(caption="Pre-Post MD, n=28")%>%kable_classic()

Pre-Post MD, n=28
	Mean	Lower 95% CI	Upper 95% CI	t-Value	df	Holm-Adjusted p
Knowledge	0.250	-0.167	0.667	1.230	27	0.229
Recent	0.643	0.259	1.026	3.438	27	0.002
Texas	0.000	NaN	NaN	NaN	27	NaN
Cause	0.071	-0.075	0.218	1.000	27	0.326
Saliva	0.179	-0.006	0.363	1.987	27	0.057
Location	0.000	-0.106	0.106	0.000	27	1.000
Prevalence	0.179	-0.059	0.416	1.544	27	0.134
ClinDisease	-0.071	-0.218	0.075	-1.000	27	0.326
Symptoms	0.000	-0.236	0.236	0.000	27	1.000
Diagnosis	0.036	-0.188	0.259	0.328	27	0.745
AntiT_Drugs	0.036	-0.161	0.233	0.372	27	0.713
Prevent	-0.107	-0.351	0.137	-0.902	27	0.375

a=matrix(rep(NA, 6*12), 12)
for (i in 1:12){a[i,1:6]=myf2(CHW[,i])}
a=as.data.frame(a)
for (i in 1:6){a[,i]=as.numeric(a[,i])}
rownames(a)=colnames(CHW)
colnames(a)=c("Mean","Lower 95% CI", "Upper 95% CI", "t-Value", "df", "Holm-Adjusted p")
a%>%kbl(caption="Pre-Post CHW, n=17")%>%kable_classic()

Pre-Post CHW, n=17
	Mean	Lower 95% CI	Upper 95% CI	t-Value	df	Holm-Adjusted p
Knowledge	1.882	1.341	2.424	7.366	16	0.000
Recent	1.706	1.309	2.103	9.114	16	0.000
Texas	0.059	-0.066	0.184	1.000	16	0.332
Cause	0.412	0.151	0.673	3.347	16	0.004
Saliva	0.471	0.102	0.839	2.704	16	0.016
Location	0.176	-0.026	0.379	1.852	16	0.083
Prevalence	0.647	0.394	0.900	5.416	16	0.000
ClinDisease	0.471	0.206	0.735	3.771	16	0.002
Symptoms	0.412	0.151	0.673	3.347	16	0.004
Diagnosis	-0.118	-0.519	0.284	-0.621	16	0.543
AntiT_Drugs	0.235	-0.054	0.524	1.725	16	0.104
Prevent	-0.294	-0.536	-0.053	-2.582	16	0.020

A course satisfaction survey revealed wide-spread approval of the content and instructors. The individual questions are Appendix B, and the results of the survey are in Table 5.

Follow-Up Satisfaction

fu=read.csv("D:/PaulaEcho/total followup.csv")

describe(fu)%>%kbl(caption="CHW Follow-Up")%>%kable_classic(html_font = "Cambria")

CHW Follow-Up
	vars	n	mean	sd	median	trimmed	mad	min	max	range	skew	kurtosis	se
Occupation*	1	54	2.574074	1.6437630	3	2.318182	1.4826	1	8	7	1.1847971	1.3818618	0.2236878
Learning.objectives.were.clearly.stated*	2	54	2.555556	0.5378738	3	2.590909	0.0000	1	3	2	-0.5729870	-0.9732335	0.0731953
Content.was.well.organized*	3	54	2.574074	0.5697352	3	2.636364	0.0000	1	3	2	-0.8856959	-0.2884584	0.0775311
Stated.learning.objectives.were.met*	4	54	1.555556	0.5015699	2	1.568182	0.0000	1	2	1	-0.2174243	-1.9885288	0.0682550
Content.was.accurate.and.relevant*	5	54	3.518518	0.6656176	4	3.613636	0.0000	1	4	3	-1.3849570	2.0907518	0.0905791
Course.corresponded.to.my.expectations*	6	54	3.351852	0.6488715	3	3.409091	0.0000	1	4	3	-0.8748751	1.3626716	0.0883002
I.intend.to.apply.what.I.learned*	7	54	2.481482	0.6062797	3	2.545454	0.0000	1	3	2	-0.6786852	-0.5699189	0.0825042
The.lecturers.demonstrated.knowledge.of.the.subject.matter*	8	54	1.703704	0.4609109	2	1.750000	0.0000	1	2	1	-0.8675491	-1.2698519	0.0627220
The.lecturers.were.effective.in.communicating.the.content.of.the.training.program*	9	54	1.648148	0.4820322	2	1.681818	0.0000	1	2	1	-0.6032986	-1.6658575	0.0655963
The.lecturers.encouraged.feedback.from.the.participants*	10	54	2.592593	0.5669687	3	2.659091	0.0000	1	3	2	-0.9678568	-0.1285472	0.0771547
Overall..I.would.rate.this.program*	11	54	2.722222	0.4920756	3	2.795454	0.0000	1	3	2	-1.4218180	0.9710169	0.0669630
Overall..I.would.rate.the.lecturers*	12	54	1.777778	0.4196435	2	1.840909	0.0000	1	2	1	-1.2993590	-0.3165050	0.0571062
Did.you.find.this.activity.enhanced.your.understanding.of.Chagas.disease.*	13	54	1.981481	0.1360828	2	2.000000	0.0000	1	2	1	-6.9452610	47.1100823	0.0185185
Would.you.recommend.this.training.program.to.other.clinicians..clinical.staff.members..medical.students..or.community.health.workers.*	14	54	1.000000	0.0000000	1	1.000000	0.0000	1	1	0	NaN	NaN	0.0000000
Was.there.content.you.expected.to.be.covered.that.was.not.*	15	54	1.240741	0.4315477	1	1.181818	0.0000	1	2	1	1.1792818	-0.6197488	0.0587262
If.yes..please.detail.below.*	16	53	1.849057	2.1785336	1	1.232558	0.0000	1	10	9	2.4890049	5.0463348	0.2992446
Please.provide.additional.comments.and.feedback..if.any..*	17	54	3.833333	5.0497525	1	2.772727	0.0000	1	18	17	1.5377847	0.9188453	0.6871843

fu=fu[, 1:15]

for (i in 2:15){
  print(mybar2(i,colnames(fu)[i]))
}

Models

A linear model of performance score as a function of age, ethnicity, gender, self-assessed knowledge, recency of knowledge, CHW status, and pre-post status of examination was estimated using ordinary least squares on the n1+n2=254 tests. The results showed that age groupings, ethnicity, self-assessment of knowledge, and pre-post examination were important in predicting test results (F(13,240)=13.3, p<.001, R^2=.440, Adj. R^2=.410).

Regression

library(lmtest)

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

mydata$CHW=mydata$Profession
mydata$CHW=rep(0, nrow(mydata))
mydata$CHW[mydata$Profession=="CHW"]=1
mydata$CHW=as.numeric(mydata$CHW)
mydata$Female=as.numeric(mydata$Gender)
mydata$Female[mydata$Female==2]=0
mylm=lm(Score~Age+Female+Ethnicity+Knowledge+Recent+CHW+PrePost2, data=mydata)
summary(mylm)$coefficients%>%kbl()%>%kable_classic(html_font = 'Cambria')

	Estimate	Std. Error	t value	Pr(>\|t\|)
(Intercept)	0.4072243	0.0636530	6.3975652	0.0000000
Age25 to 34	0.2332343	0.0464262	5.0237633	0.0000010
Age35 to 44	0.1639557	0.0461276	3.5543908	0.0004579
Age45 to 64	0.1678968	0.0445374	3.7697960	0.0002067
Age65 and over	0.1799692	0.0576812	3.1200660	0.0020342
AgePrefer not to respond	0.1781863	0.0987719	1.8040180	0.0725090
Female	-0.0175723	0.0248363	-0.7075240	0.4799420
EthnicityNon-Hispanic	0.0981494	0.0271304	3.6176931	0.0003638
EthnicityNon -Hispanic	-0.0323416	0.1509386	-0.2142698	0.8305225
EthnicityPrefer not to respond	-0.0165068	0.0593320	-0.2782100	0.7810958
Knowledge2. Very Limited	-0.0171143	0.0402922	-0.4247539	0.6714047
Knowledge3. Limited	0.0790134	0.0420758	1.8778821	0.0616357
Knowledge4. Good	0.1289828	0.0460063	2.8035876	0.0054760
Knowledge5. Excellent	0.1441796	0.0594176	2.4265454	0.0159967
Recent2. Somewhat Confident	0.0195944	0.0301687	0.6494920	0.5166548
Recent3. Confident	0.0388984	0.0395262	0.9841180	0.3260696
Recent4. Very Confident	0.0293292	0.0498102	0.5888186	0.5565485
CHW	-0.0592721	0.0290590	-2.0397173	0.0424971
PrePost2Pre	-0.0891525	0.0266958	-3.3395752	0.0009757

summary(mylm)

## 
## Call:
## lm(formula = Score ~ Age + Female + Ethnicity + Knowledge + Recent + 
##     CHW + PrePost2, data = mydata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.39158 -0.09124 -0.00025  0.09226  0.40799 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     0.40722    0.06365   6.398 8.46e-10 ***
## Age25 to 34                     0.23323    0.04643   5.024 1.00e-06 ***
## Age35 to 44                     0.16396    0.04613   3.554 0.000458 ***
## Age45 to 64                     0.16790    0.04454   3.770 0.000207 ***
## Age65 and over                  0.17997    0.05768   3.120 0.002034 ** 
## AgePrefer not to respond        0.17819    0.09877   1.804 0.072509 .  
## Female                         -0.01757    0.02484  -0.708 0.479942    
## EthnicityNon-Hispanic           0.09815    0.02713   3.618 0.000364 ***
## EthnicityNon -Hispanic         -0.03234    0.15094  -0.214 0.830522    
## EthnicityPrefer not to respond -0.01651    0.05933  -0.278 0.781096    
## Knowledge2. Very Limited       -0.01711    0.04029  -0.425 0.671405    
## Knowledge3. Limited             0.07901    0.04208   1.878 0.061636 .  
## Knowledge4. Good                0.12898    0.04601   2.804 0.005476 ** 
## Knowledge5. Excellent           0.14418    0.05942   2.427 0.015997 *  
## Recent2. Somewhat Confident     0.01959    0.03017   0.649 0.516655    
## Recent3. Confident              0.03890    0.03953   0.984 0.326070    
## Recent4. Very Confident         0.02933    0.04981   0.589 0.556548    
## CHW                            -0.05927    0.02906  -2.040 0.042497 *  
## PrePost2Pre                    -0.08915    0.02670  -3.340 0.000976 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.147 on 235 degrees of freedom
## Multiple R-squared:  0.4546, Adjusted R-squared:  0.4128 
## F-statistic: 10.88 on 18 and 235 DF,  p-value: < 2.2e-16

hist(mylm$residuals, col="blue", main="Residuals")

Echo Analysis

Doc Larry Fulton

7 November 2020