Exploratory Data Analysis

Load libraries and data

#####################Read and Pre-Clean the Data#######################
require(Amelia)

## Loading required package: Amelia

## Loading required package: Rcpp

## ## 
## ## Amelia II: Multiple Imputation
## ## (Version 1.7.6, built: 2019-11-24)
## ## Copyright (C) 2005-2021 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##

library(car)

## Loading required package: carData

library(corrplot)

## corrplot 0.84 loaded

library(ggcorrplot)

## Loading required package: ggplot2

library(heplots)
library(kableExtra)
library(MANOVA.RM)
library(MASS)
library(MVN)

## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2

## sROC 0.1-2 loaded

library(mvtnorm)
require(psych) #to describe

## Loading required package: psych

## 
## Attaching package: 'psych'

## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

## The following object is masked from 'package:car':
## 
##     logit

require(ggplot2)
library(ggcorrplot)
library(qcc)

## Package 'qcc' version 2.7

## Type 'citation("qcc")' for citing this R package in publications.

require(reticulate) #to use Python in R as well

## Loading required package: reticulate

require(ResourceSelection)

## Loading required package: ResourceSelection

## ResourceSelection 0.3-5   2019-07-22

library(rstatix)

## 
## Attaching package: 'rstatix'

## The following object is masked from 'package:MASS':
## 
##     select

## The following object is masked from 'package:ggcorrplot':
## 
##     cor_pmat

## The following object is masked from 'package:stats':
## 
##     filter

library(tidyverse)

## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --

## v tibble  3.0.4     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.0
## v purrr   0.3.4

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x psych::%+%()        masks ggplot2::%+%()
## x psych::alpha()      masks ggplot2::alpha()
## x dplyr::filter()     masks rstatix::filter(), stats::filter()
## x dplyr::group_rows() masks kableExtra::group_rows()
## x dplyr::lag()        masks stats::lag()
## x dplyr::recode()     masks car::recode()
## x dplyr::select()     masks rstatix::select(), MASS::select()
## x purrr::some()       masks car::some()

corfunction=function(d){
  mycorr=cor(d[, 1:ncol(d)]); p.mat=ggcorrplot::cor_pmat(d[,1:ncol(d)])
  myplot=ggcorrplot(mycorr, hc.order=TRUE,type="lower",colors=c("red", "white","green"),tl.cex = 8, tl.col = "black", lab=TRUE, lab_size=2, p.mat=p.mat, insig="pch", pch=4)
  print(myplot)}

mydata=read.csv("D:/PaulaEcho/global.csv", stringsAsFactors = TRUE)
colnames(mydata)

##  [1] "PrePost"               "Duration..in.seconds." "Date"                 
##  [4] "Profession"            "Specialization"        "Practice"             
##  [7] "Years"                 "Knowledge"             "Recent"               
## [10] "Texas"                 "Cause"                 "Saliva"               
## [13] "Location"              "Prevalence"            "Onset"                
## [16] "Symptoms"              "Diagnosis"             "AntiT_Drugs"          
## [19] "Prevention"            "Score"                 "Age"                  
## [22] "Ethnicity"             "Gender"                "Screened"             
## [25] "Tested"                "MatchID"

#########################################################################

Check Missing

We have true missing for Quantitative Methods, as that section was recently added.

#########################################################################
missmap(mydata)

#########################################################################

Descriptives

Gender

#########################################################################
as.data.frame(100*round(table(mydata$Gender)/length(mydata$Gender),4))%>%
  kbl(col.names = c("Gender", "%"))%>%kable_classic(full_width=F)

Gender	%
Female	77.17
Male	22.83

#########################################################################

Ethnicity

#########################################################################
as.data.frame(100*round(table(mydata$Ethnicity)/length(mydata$Ethnicity),4))%>%
  kbl(col.names = c("Ethnicity", "%"))%>%kable_classic(full_width=F)

Ethnicity	%
Hispanic	58.27
Non-Hispanic	38.19
Non -Hispanic	0.39
Prefer not to respond	3.15

#########################################################################

Age

#########################################################################
as.data.frame(100*round(table(mydata$Age)/length(mydata$Age),4))%>%
  kbl(col.names = c("Age", "%"))%>%kable_classic(full_width=F)

Age	%
18 to 24	5.51
25 to 34	18.90
35 to 44	26.38
45 to 64	41.73
65 and over	6.30
Prefer not to respond	1.18

#########################################################################

Profession

#########################################################################
as.data.frame(100*round(table(mydata$Profession)/length(mydata$Profession),4))%>%
  kbl(col.names = c("Profession", "%"))%>%kable_classic(full_width=F)

Profession	%
CHW	37.01
DVM	3.54
MD/DO	28.74
NP/PA	2.36
Other	28.35

#########################################################################

Specialization

#########################################################################
as.data.frame(100*round(table(mydata$Specialization)/length(mydata$Specialization),4))%>%
  kbl(col.names = c("Specialization", "%"))%>%kable_classic(full_width=F)

Specialization	%
	42.91
Air Force idmt	0.39
Air Force IDMT	0.39
Biomedicine	0.39
Community health	28.35
Community health,Other	0.39
DC	0.39
Does not apply	2.76
economist	0.39
Economist	0.39
Epidemiologist	1.18
FSC	0.39
Infectious disease,Cardiology,General practitioner/Family physician,Community health	0.79
LVN	0.39
Microbiologist	0.39
MLS	0.39
Other	1.18
Other / Does not apply	4.33
Other,Does not apply	0.39
Ph.D. / Post-Grad / Faculty	4.33
PharmD / MPharm / Pharm Student	4.72
PT, MHA, PhD	0.39
Public Health	1.57
R&D Industry	0.39
RCS	0.39
Student	1.57
Zoonosis Control Program Specialist	0.39

#########################################################################

Knowledge

#########################################################################

myt=table(mydata$PrePost,mydata$Knowledge)
myt=myt/rowSums(myt)
colnames(myt)=c("None", "Very Limited", "Limited", "Good", "Excellent")
row.names(myt)=c("Pre","Post")
myt=round(100*myt, 4)
myt%>%
  kbl()%>%kable_classic(full_width=F)

	None	Very Limited	Limited	Good	Excellent
Pre	12.5683	19.1257	32.2404	30.0546	6.0109
Post	0.0000	5.6338	21.1268	57.7465	15.4930

fisher.test(myt, simulate.p.value = TRUE)

## Warning in fisher.test(myt, simulate.p.value = TRUE): 'x' has been rounded to
## integer: Mean relative difference: 0.010514

## 
##  Fisher's Exact Test for Count Data with simulated p-value (based on
##  2000 replicates)
## 
## data:  myt
## p-value = 0.0004998
## alternative hypothesis: two.sided

#########################################################################

Knowledge

#########################################################################

myt=table(mydata$PrePost,mydata$Recent)
myt=myt/rowSums(myt)
colnames(myt)=c("Not at all Confident", "Somewhat Confident", "Confident", "Very Confident")
row.names(myt)=c("Pre","Post")
myt=round(100*myt, 4)
myt%>%
  kbl()%>%kable_classic(full_width=F)

	Not at all Confident	Somewhat Confident	Confident	Very Confident
Pre	40.9836	34.4262	17.4863	7.1038
Post	1.4085	16.9014	54.9296	26.7606

fisher.test(myt, simulate.p.value = TRUE)

## Warning in fisher.test(myt, simulate.p.value = TRUE): 'x' has been rounded to
## integer: Mean relative difference: 0.009248

## 
##  Fisher's Exact Test for Count Data with simulated p-value (based on
##  2000 replicates)
## 
## data:  myt
## p-value = 0.0004998
## alternative hypothesis: two.sided

#########################################################################

Score Pre-Post

mydata$PrePost2=mydata$PrePost
mydata$PrePost=as.factor(mydata$PrePost)
levels(mydata$PrePost)=c("Pre", "Post")
boxplot(mydata$Score~mydata$PrePost, main="Knowledge Score, Pre vs. Post",notch=TRUE, col=c("red", "dark green"), horizontal = TRUE)

Subscores

Correlations

Test questions are uncorrelated.

#########################################################################
mycorr=cor(mydata[, c(10:19)])
corfunction(mycorr)

#########################################################################

Pre-Test

We would expect poor performance on the pre-test scores. Students are likely to have only 3308 as the basis for knowledge.

#########################################################################
pre=round(describe(mydata[mydata$PrePost=="Pre",10:19]), 3)

pre%>%kbl(caption="Pre-Test")%>%kable_classic(full_width = F, html_font = "Cambria")

Pre-Test
	vars	n	mean	sd	median	trimmed	max	range	skew	kurtosis	se
Texas	1	183	0.973	0.163	1	1.000	1	1	-5.752	31.251	0.012
Cause	2	183	0.760	0.429	1	0.823	1	1	-1.205	-0.551	0.032
Saliva	3	183	0.361	0.482	0	0.327	1	1	0.576	-1.678	0.036
Location	4	183	0.891	0.313	1	0.986	1	1	-2.484	4.193	0.023
Prevalence	5	183	0.426	0.496	0	0.408	1	1	0.296	-1.923	0.037
Onset	6	183	0.634	0.483	1	0.667	1	1	-0.551	-1.705	0.036
Symptoms	7	183	0.235	0.425	0	0.170	1	1	1.240	-0.465	0.031
Diagnosis	8	183	0.585	0.494	1	0.605	1	1	-0.341	-1.894	0.037
AntiT_Drugs	9	183	0.694	0.462	1	0.741	1	1	-0.835	-1.310	0.034
Prevention	10	183	0.279	0.450	0	0.224	1	1	0.979	-1.047	0.033

#########################################################################

Post-Test

We would hope that our work teaching the students resulted in better scores. These are the raw descriptives. We will look at pre-post later.

#########################################################################
post=round(describe(mydata[mydata$PrePost=="Post",10:19]), 3)

pre%>%kbl(caption="Post-Test")%>%kable_classic(full_width = F, html_font = "Cambria")

Post-Test
	vars	n	mean	sd	median	trimmed	max	range	skew	kurtosis	se
Texas	1	183	0.973	0.163	1	1.000	1	1	-5.752	31.251	0.012
Cause	2	183	0.760	0.429	1	0.823	1	1	-1.205	-0.551	0.032
Saliva	3	183	0.361	0.482	0	0.327	1	1	0.576	-1.678	0.036
Location	4	183	0.891	0.313	1	0.986	1	1	-2.484	4.193	0.023
Prevalence	5	183	0.426	0.496	0	0.408	1	1	0.296	-1.923	0.037
Onset	6	183	0.634	0.483	1	0.667	1	1	-0.551	-1.705	0.036
Symptoms	7	183	0.235	0.425	0	0.170	1	1	1.240	-0.465	0.031
Diagnosis	8	183	0.585	0.494	1	0.605	1	1	-0.341	-1.894	0.037
AntiT_Drugs	9	183	0.694	0.462	1	0.741	1	1	-0.835	-1.310	0.034
Prevention	10	183	0.279	0.450	0	0.224	1	1	0.979	-1.047	0.033

#########################################################################

Comparisons

We compare pre-means versus post-means

mydf=data.frame("Pre-test"=pre$mean, "Post-test"=post$mean)
rownames(mydf)=colnames(mydata[10:19])

mydf%>%kbl(caption="Comparison of Means")%>%kable_classic(full_width = F, html_font = "Cambria")

Comparison of Means
	Pre.test	Post.test
Texas	0.973	1.000
Cause	0.760	0.986
Saliva	0.361	0.634
Location	0.891	0.986
Prevalence	0.426	0.803
Onset	0.634	0.761
Symptoms	0.235	0.296
Diagnosis	0.585	0.507
AntiT_Drugs	0.694	0.887
Prevention	0.279	0.169

Simple Inferentials

Unpaired t-tests

#########################################################################

myf=function(x,y){
  
  myt=t.test(x, y)
  newp=round(p.adjust(myt$p.value),3)
  a=c(round(myt$estimate[1],3),round(myt$estimate[2],3),
      round(myt$conf.int[1],3),   
      round(myt$conf.int[2],3),round(myt$statistic,3), 
      round(myt$parameter,3),round(newp,3))
  return(a)
  
}

p1=mydata[mydata$PrePost=="Pre",]
p2=mydata[mydata$PrePost=="Post",]
a=matrix(rep(NA, 7*11), 11)
for (i in 1:11){a[i,1:7]=myf(p1[,i+9], p2[,i+9])}
a=as.data.frame(a)
for (i in 1:6){a[,i]=as.numeric(a[,i])}
rownames(a)=colnames(mydata[10:20])
colnames(a)=c("Mean Pre","Mean Post","Lower 95% CI", "Upper 95% CI", "t-Value", "df", "Holm-Adjusted p")
a%>%kbl()%>%kable_classic()

	Mean Pre	Mean Post	Lower 95% CI	Upper 95% CI	t-Value	df	Holm-Adjusted p
Texas	0.973	1.000	-0.051	-0.003	-2.261	182.000	0.025
Cause	0.760	0.986	-0.295	-0.158	-6.529	236.991	0.000
Saliva	0.361	0.634	-0.407	-0.139	-4.035	126.606	0.000
Location	0.891	0.986	-0.149	-0.042	-3.516	251.934	0.001
Prevalence	0.426	0.803	-0.495	-0.258	-6.272	156.631	0.000
Onset	0.634	0.761	-0.250	-0.004	-2.035	142.291	0.044
Symptoms	0.235	0.296	-0.185	0.064	-0.966	119.134	0.336
Diagnosis	0.585	0.507	-0.061	0.216	1.109	125.353	0.270
AntiT_Drugs	0.694	0.887	-0.294	-0.093	-3.795	183.890	0.000
Prevention	0.279	0.169	-0.001	0.220	1.966	150.715	0.051
Score	0.584	0.703	-0.162	-0.076	-5.407	184.400	0.000

p1=mydata[mydata$Profession=="CHW",]
p2=mydata[mydata$Profession!="CHW",]
a=matrix(rep(NA, 7*11), 11)
for (i in 1:11){a[i,1:7]=myf(p1[,i+9], p2[,i+9])}
a=as.data.frame(a)
for (i in 1:6){a[,i]=as.numeric(a[,i])}
rownames(a)=colnames(mydata[10:20])
colnames(a)=c("Mean CHW","Mean non-CHW","Lower 95% CI", "Upper 95% CI", "t-Value", "df", "Holm-Adjusted p")
a%>%kbl()%>%kable_classic()

	Mean CHW	Mean non-CHW	Lower 95% CI	Upper 95% CI	t-Value	df	Holm-Adjusted p
Texas	0.957	0.994	-0.080	0.007	-1.662	109.813	0.099
Cause	0.723	0.881	-0.263	-0.053	-2.978	150.371	0.003
Saliva	0.468	0.419	-0.079	0.177	0.761	192.875	0.448
Location	0.915	0.919	-0.075	0.067	-0.107	191.351	0.915
Prevalence	0.500	0.550	-0.178	0.078	-0.767	193.866	0.444
Onset	0.404	0.825	-0.538	-0.304	-7.114	158.271	0.000
Symptoms	0.117	0.331	-0.313	-0.116	-4.281	246.116	0.000
Diagnosis	0.447	0.631	-0.311	-0.058	-2.873	189.961	0.005
AntiT_Drugs	0.755	0.744	-0.100	0.123	0.205	197.086	0.838
Prevention	0.160	0.300	-0.244	-0.037	-2.672	229.007	0.008
Score	0.545	0.659	-0.160	-0.068	-4.908	212.273	0.000

#########################################################################

Paired t-test (Small Subset)

prepost=read.csv("D:/PaulaEcho/prepost.csv")

myf2=function(x){
  
  myt=t.test(x)
  newp=round(p.adjust(myt$p.value),3)
  a=c(round(myt$estimate[1],3),
      round(myt$conf.int[1],3),   
      round(myt$conf.int[2],3),round(myt$statistic,3), 
      round(myt$parameter,3),round(newp,3))
  return(a)
  
}

a=matrix(rep(NA, 6*13), 13)
for (i in 1:13){a[i,1:6]=myf2(prepost[,i])}
a=as.data.frame(a)
for (i in 1:6){a[,i]=as.numeric(a[,i])}
rownames(a)=colnames(prepost)
colnames(a)=c("Mean","Lower 95% CI", "Upper 95% CI", "t-Value", "df", "Holm-Adjusted p")
a%>%kbl(caption="CHW Pre-Post")%>%kable_classic()

CHW Pre-Post
	Mean	Lower 95% CI	Upper 95% CI	t-Value	df	Holm-Adjusted p
Knowledge	1.882	1.341	2.424	7.366	16	0.000
Recent	1.706	1.309	2.103	9.114	16	0.000
Texas	0.059	-0.066	0.184	1.000	16	0.332
Cause	0.412	0.151	0.673	3.347	16	0.004
Saliva	0.471	0.102	0.839	2.704	16	0.016
Location	0.176	-0.026	0.379	1.852	16	0.083
Prevalence	0.647	0.394	0.900	5.416	16	0.000
Onset	0.471	0.206	0.735	3.771	16	0.002
Symptoms	0.412	0.151	0.673	3.347	16	0.004
Diagnosis	-0.118	-0.519	0.284	-0.621	16	0.543
AntiT_Drugs	0.235	-0.054	0.524	1.725	16	0.104
Prevent	-0.294	-0.536	-0.053	-2.582	16	0.020
Score	0.247	0.146	0.348	5.165	16	0.000

Follow-Up Satisfaction

fu=read.csv("D:/PaulaEcho/followup1.csv")
describe(fu)%>%kbl(caption="CHW Follow-Up")%>%kable_classic(html_font = "Cambria")

## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

CHW Follow-Up
	vars	n	mean	sd	median	trimmed	mad	min	max	range	skew	kurtosis	se
Well_Organized	1	20	4.55	0.5104178	5.0	4.5625	0.0000	4	5	1	-0.1861220	-2.061035	0.1141329
LOs_Met	2	20	4.60	0.5026247	5.0	4.6250	0.0000	4	5	1	-0.3780157	-1.947083	0.1123903
Accurate_Relevant	3	20	4.65	0.4893605	5.0	4.6875	0.0000	4	5	1	-0.5823928	-1.740467	0.1094243
As_Expected	4	20	4.50	0.5129892	4.5	4.5000	0.7413	4	5	1	0.0000000	-2.097500	0.1147079
Will_Apply	5	20	4.60	0.5026247	5.0	4.6250	0.0000	4	5	1	-0.3780157	-1.947083	0.1123903
Overall	6	20	4.85	0.3663475	5.0	4.9375	0.0000	4	5	1	-1.8152162	1.370931	0.0819178
Lecturers_Effective	7	20	4.55	0.5104178	5.0	4.5625	0.0000	4	5	1	-0.1861220	-2.061035	0.1141329
Encouraged_Feedback	8	20	4.55	0.5104178	5.0	4.5625	0.0000	4	5	1	-0.1861220	-2.061035	0.1141329
Overall_Lecturers	9	20	4.80	0.4103913	5.0	4.8750	0.0000	4	5	1	-1.3889182	-0.066875	0.0917663
Recommend	10	20	1.00	0.0000000	1.0	1.0000	0.0000	1	1	0	NaN	NaN	0.0000000
Missing_Content	11	20	0.20	0.4103913	0.0	0.1250	0.0000	0	1	1	1.3889182	-0.066875	0.0917663
X	12	0	NaN	NA	NA	NaN	NA	Inf	-Inf	-Inf	NA	NA	NA
X.1	13	0	NaN	NA	NA	NaN	NA	Inf	-Inf	-Inf	NA	NA	NA

Models

Regression

library(lmtest)

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

mydata$CHW=mydata$Profession
mydata$CHW=rep(0, nrow(mydata))
mydata$CHW[mydata$Profession=="CHW"]=1
mydata$CHW=as.numeric(mydata$CHW)
mydata$Female=as.numeric(mydata$Gender)
mydata$Female[mydata$Female==2]=0
mylm=lm(Score~Age+Female+Ethnicity+Knowledge+Recent+CHW+PrePost2, data=mydata)
summary(mylm)$coefficients%>%kbl()%>%kable_classic(html_font = 'Cambria')

	Estimate	Std. Error	t value	Pr(>\|t\|)
(Intercept)	0.2335087	0.0527751	4.4245992	0.0000147
Age25 to 34	0.2333747	0.0461218	5.0599674	0.0000008
Age35 to 44	0.1564692	0.0458485	3.4127409	0.0007547
Age45 to 64	0.1625478	0.0442242	3.6755411	0.0002928
Age65 and over	0.1740629	0.0568740	3.0605018	0.0024609
AgePrefer not to respond	0.1671223	0.0981488	1.7027435	0.0899108
Female	-0.0203290	0.0239566	-0.8485756	0.3969630
EthnicityNon-Hispanic	0.1081104	0.0266992	4.0492042	0.0000694
EthnicityNon -Hispanic	-0.0193866	0.1501385	-0.1291251	0.8973668
EthnicityPrefer not to respond	-0.0100277	0.0588353	-0.1704374	0.8648098
Knowledge	0.0475018	0.0132948	3.5729522	0.0004265
Recent	0.0151635	0.0151710	0.9995027	0.3185579
CHW	-0.0541472	0.0284608	-1.9025173	0.0583005
PrePost2	0.0918741	0.0258072	3.5600231	0.0004470

summary(mylm)

## 
## Call:
## lm(formula = Score ~ Age + Female + Ethnicity + Knowledge + Recent + 
##     CHW + PrePost2, data = mydata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.43167 -0.10528 -0.00337  0.10343  0.42416 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     0.23351    0.05278   4.425 1.47e-05 ***
## Age25 to 34                     0.23337    0.04612   5.060 8.34e-07 ***
## Age35 to 44                     0.15647    0.04585   3.413 0.000755 ***
## Age45 to 64                     0.16255    0.04422   3.676 0.000293 ***
## Age65 and over                  0.17406    0.05687   3.061 0.002461 ** 
## AgePrefer not to respond        0.16712    0.09815   1.703 0.089911 .  
## Female                         -0.02033    0.02396  -0.849 0.396963    
## EthnicityNon-Hispanic           0.10811    0.02670   4.049 6.94e-05 ***
## EthnicityNon -Hispanic         -0.01939    0.15014  -0.129 0.897367    
## EthnicityPrefer not to respond -0.01003    0.05884  -0.170 0.864810    
## Knowledge                       0.04750    0.01329   3.573 0.000427 ***
## Recent                          0.01516    0.01517   1.000 0.318558    
## CHW                            -0.05415    0.02846  -1.903 0.058301 .  
## PrePost2                        0.09187    0.02581   3.560 0.000447 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1473 on 240 degrees of freedom
## Multiple R-squared:  0.4404, Adjusted R-squared:  0.4101 
## F-statistic: 14.53 on 13 and 240 DF,  p-value: < 2.2e-16

hist(mylm$residuals, col="blue", main="Residuals")

MANCOVA for Subscores

Ethnicity, previous knowledge, and pre-post status are the primary predictors for the subscores.

attach(mydata)
myt=powerTransform(cbind(Texas, Cause, Saliva, Location, Prevalence, 
                        Onset, Symptoms, Diagnosis, AntiT_Drugs, Prevention)+.1)

mydata$nTexas=(mydata$Texas+.1)^myt$lambda[1]
mydata$nCause=(mydata$Cause+.1)^myt$lambda[2]
mydata$nSaliva=(mydata$Saliva+.1)^myt$lambda[3]
mydata$nLocation=(mydata$Location+.1)^myt$lambda[4]
mydata$nPrevalence=(mydata$Prevalence+.1)^myt$lambda[5]
mydata$nOnset=(mydata$Onset+.1)^myt$lambda[6]
mydata$nSymptoms=(mydata$Symptoms+.1)^myt$lambda[7]
mydata$nDiagnosis=(mydata$Diagnosis+.1)^myt$lambda[8]
mydata$nAntiT_Drugs=(mydata$AntiT_Drugs+.1)^myt$lambda[9]
mydata$nPrevention=(mydata$Prevention+.1)^myt$lambda[10]

res.man <- manova(cbind(nTexas, nCause, nSaliva, nLocation, nPrevalence, 
                        nOnset, nSymptoms, nDiagnosis, nAntiT_Drugs, nPrevention) ~
                    Gender+Ethnicity+PrePost, data = mydata)
summary(res.man, intercept=TRUE)

##              Df  Pillai approx F num Df den Df    Pr(>F)    
## (Intercept)   1 0.99248  3155.29     10    239 < 2.2e-16 ***
## Gender        1 0.09483     2.50     10    239  0.007072 ** 
## Ethnicity     3 0.27874     2.47     30    723 2.729e-05 ***
## PrePost       1 0.23546     7.36     10    239 3.692e-10 ***
## Residuals   248                                             
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary.aov(res.man, intercept = TRUE)

##  Response nTexas :
##              Df  Sum Sq Mean Sq    F value  Pr(>F)    
## (Intercept)   1 13846.6 13846.6 12598.1919 < 2e-16 ***
## Gender        1     1.7     1.7     1.5032 0.22134    
## Ethnicity     3     0.7     0.2     0.2220 0.88108    
## PrePost       1     3.1     3.1     2.8066 0.09514 .  
## Residuals   248   272.6     1.1                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response nCause :
##              Df  Sum Sq Mean Sq  F value    Pr(>F)    
## (Intercept)   1 267.146 267.146 1326.832 < 2.2e-16 ***
## Gender        1   0.003   0.003    0.013   0.90944    
## Ethnicity     3   1.972   0.657    3.264   0.02206 *  
## PrePost       1   5.054   5.054   25.102 1.034e-06 ***
## Residuals   248  49.933   0.201                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response nSaliva :
##              Df Sum Sq Mean Sq   F value    Pr(>F)    
## (Intercept)   1 646.67  646.67 2311.5183 < 2.2e-16 ***
## Gender        1   0.00    0.00    0.0118   0.91347    
## Ethnicity     3   2.14    0.71    2.5480   0.05645 .  
## PrePost       1   5.61    5.61   20.0618 1.145e-05 ***
## Residuals   248  69.38    0.28                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response nLocation :
##              Df Sum Sq Mean Sq   F value    Pr(>F)    
## (Intercept)   1 559.03  559.03 2846.8935 < 2.2e-16 ***
## Gender        1   0.04    0.04    0.1882  0.664777    
## Ethnicity     3   0.21    0.07    0.3559  0.784879    
## PrePost       1   1.44    1.44    7.3278  0.007262 ** 
## Residuals   248  48.70    0.20                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response nPrevalence :
##              Df  Sum Sq Mean Sq   F value    Pr(>F)    
## (Intercept)   1 190.127 190.127 8971.8949 < 2.2e-16 ***
## Gender        1   0.040   0.040    1.8822  0.171323    
## Ethnicity     3   0.305   0.102    4.7968  0.002883 ** 
## PrePost       1   0.883   0.883   41.6567 5.676e-10 ***
## Residuals   248   5.255   0.021                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response nOnset :
##              Df  Sum Sq Mean Sq  F value    Pr(>F)    
## (Intercept)   1 150.814 150.814 866.8873 < 2.2e-16 ***
## Gender        1   3.110   3.110  17.8784 3.314e-05 ***
## Ethnicity     3   4.540   1.513   8.6988 1.650e-05 ***
## PrePost       1   1.950   1.950  11.2059 0.0009421 ***
## Residuals   248  43.145   0.174                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response nSymptoms :
##              Df Sum Sq Mean Sq  F value    Pr(>F)    
## (Intercept)   1 133192  133192 939.8276 < 2.2e-16 ***
## Gender        1    562     562   3.9669   0.04750 *  
## Ethnicity     3   5185    1728  12.1944 1.805e-07 ***
## PrePost       1    634     634   4.4710   0.03547 *  
## Residuals   248  35147     142                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response nDiagnosis :
##              Df  Sum Sq Mean Sq   F value  Pr(>F)    
## (Intercept)   1 158.650 158.650 2169.7643 < 2e-16 ***
## Gender        1   0.193   0.193    2.6451 0.10514    
## Ethnicity     3   0.565   0.188    2.5756 0.05446 .  
## PrePost       1   0.032   0.032    0.4375 0.50895    
## Residuals   248  18.133   0.073                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response nAntiT_Drugs :
##              Df  Sum Sq Mean Sq  F value    Pr(>F)    
## (Intercept)   1 192.167 192.167 853.5847 < 2.2e-16 ***
## Gender        1   0.159   0.159   0.7070 0.4012485    
## Ethnicity     3   1.119   0.373   1.6573 0.1768025    
## PrePost       1   2.804   2.804  12.4533 0.0004972 ***
## Residuals   248  55.832   0.225                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response nPrevention :
##              Df Sum Sq Mean Sq  F value  Pr(>F)    
## (Intercept)   1 157077  157077 839.9246 < 2e-16 ***
## Gender        1      9       9   0.0459 0.83050    
## Ethnicity     3   1483     494   2.6439 0.04981 *  
## PrePost       1    396     396   2.1170 0.14693    
## Residuals   248  46379     187                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Echo Analysis

Doc Larry Fulton

7 November 2020

Exploratory Data Analysis

Load libraries and data

Check Missing

Descriptives

Gender

Ethnicity

Age

Profession

Specialization

Knowledge

Knowledge

Score Pre-Post

Subscores

Correlations

Pre-Test

Post-Test

Comparisons

Simple Inferentials

Unpaired t-tests

Paired t-test (Small Subset)

Follow-Up Satisfaction

Models

Regression

MANCOVA for Subscores