Exploratory Data Analysis

Load libraries and data

#####################Read and Pre-Clean the Data#######################
require(Amelia)
## Loading required package: Amelia
## Loading required package: Rcpp
## ## 
## ## Amelia II: Multiple Imputation
## ## (Version 1.7.6, built: 2019-11-24)
## ## Copyright (C) 2005-2021 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##
library(car)
## Loading required package: carData
library(corrplot)
## corrplot 0.84 loaded
library(ggcorrplot)
## Loading required package: ggplot2
library(heplots)
library(kableExtra)
library(MANOVA.RM)
library(MASS)
library(MVN)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
## sROC 0.1-2 loaded
library(mvtnorm)
require(psych) #to describe
## Loading required package: psych
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## The following object is masked from 'package:car':
## 
##     logit
require(ggplot2)
library(ggcorrplot)
library(qcc)
## Package 'qcc' version 2.7
## Type 'citation("qcc")' for citing this R package in publications.
require(reticulate) #to use Python in R as well
## Loading required package: reticulate
require(ResourceSelection)
## Loading required package: ResourceSelection
## ResourceSelection 0.3-5   2019-07-22
library(rstatix)
## 
## Attaching package: 'rstatix'
## The following object is masked from 'package:MASS':
## 
##     select
## The following object is masked from 'package:ggcorrplot':
## 
##     cor_pmat
## The following object is masked from 'package:stats':
## 
##     filter
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v tibble  3.0.4     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.0
## v purrr   0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x psych::%+%()        masks ggplot2::%+%()
## x psych::alpha()      masks ggplot2::alpha()
## x dplyr::filter()     masks rstatix::filter(), stats::filter()
## x dplyr::group_rows() masks kableExtra::group_rows()
## x dplyr::lag()        masks stats::lag()
## x dplyr::recode()     masks car::recode()
## x dplyr::select()     masks rstatix::select(), MASS::select()
## x purrr::some()       masks car::some()
corfunction=function(d){
  mycorr=cor(d[, 1:ncol(d)]); p.mat=ggcorrplot::cor_pmat(d[,1:ncol(d)])
  myplot=ggcorrplot(mycorr, hc.order=TRUE,type="lower",colors=c("red", "white","green"),tl.cex = 8, tl.col = "black", lab=TRUE, lab_size=2, p.mat=p.mat, insig="pch", pch=4)
  print(myplot)}

mydata=read.csv("D:/PaulaEcho/global.csv", stringsAsFactors = TRUE)
colnames(mydata)
##  [1] "PrePost"               "Duration..in.seconds." "Date"                 
##  [4] "Profession"            "Specialization"        "Practice"             
##  [7] "Years"                 "Knowledge"             "Recent"               
## [10] "Texas"                 "Cause"                 "Saliva"               
## [13] "Location"              "Prevalence"            "Onset"                
## [16] "Symptoms"              "Diagnosis"             "AntiT_Drugs"          
## [19] "Prevention"            "Score"                 "Age"                  
## [22] "Ethnicity"             "Gender"                "Screened"             
## [25] "Tested"                "MatchID"
#########################################################################

Check Missing

We have true missing for Quantitative Methods, as that section was recently added.

#########################################################################
missmap(mydata)

#########################################################################

Descriptives

Gender

#########################################################################
as.data.frame(100*round(table(mydata$Gender)/length(mydata$Gender),4))%>%
  kbl(col.names = c("Gender", "%"))%>%kable_classic(full_width=F)
Gender %
Female 77.17
Male 22.83
#########################################################################

Ethnicity

#########################################################################
as.data.frame(100*round(table(mydata$Ethnicity)/length(mydata$Ethnicity),4))%>%
  kbl(col.names = c("Ethnicity", "%"))%>%kable_classic(full_width=F)
Ethnicity %
Hispanic 58.27
Non-Hispanic 38.19
Non -Hispanic 0.39
Prefer not to respond 3.15
#########################################################################

Age

#########################################################################
as.data.frame(100*round(table(mydata$Age)/length(mydata$Age),4))%>%
  kbl(col.names = c("Age", "%"))%>%kable_classic(full_width=F)
Age %
18 to 24 5.51
25 to 34 18.90
35 to 44 26.38
45 to 64 41.73
65 and over 6.30
Prefer not to respond 1.18
#########################################################################

Profession

#########################################################################
as.data.frame(100*round(table(mydata$Profession)/length(mydata$Profession),4))%>%
  kbl(col.names = c("Profession", "%"))%>%kable_classic(full_width=F)
Profession %
CHW 37.01
DVM 3.54
MD/DO 28.74
NP/PA 2.36
Other 28.35
#########################################################################

Specialization

#########################################################################
as.data.frame(100*round(table(mydata$Specialization)/length(mydata$Specialization),4))%>%
  kbl(col.names = c("Specialization", "%"))%>%kable_classic(full_width=F)
Specialization %
42.91
Air Force idmt 0.39
Air Force IDMT 0.39
Biomedicine 0.39
Community health 28.35
Community health,Other 0.39
DC 0.39
Does not apply 2.76
economist 0.39
Economist 0.39
Epidemiologist 1.18
FSC 0.39
Infectious disease,Cardiology,General practitioner/Family physician,Community health 0.79
LVN 0.39
Microbiologist 0.39
MLS 0.39
Other 1.18
Other / Does not apply 4.33
Other,Does not apply 0.39
Ph.D. / Post-Grad / Faculty 4.33
PharmD / MPharm / Pharm Student 4.72
PT, MHA, PhD 0.39
Public Health 1.57
R&D Industry 0.39
RCS 0.39
Student 1.57
Zoonosis Control Program Specialist 0.39
#########################################################################

Knowledge

#########################################################################

myt=table(mydata$PrePost,mydata$Knowledge)
myt=myt/rowSums(myt)
colnames(myt)=c("None", "Very Limited", "Limited", "Good", "Excellent")
row.names(myt)=c("Pre","Post")
myt=round(100*myt, 4)
myt%>%
  kbl()%>%kable_classic(full_width=F)
None Very Limited Limited Good Excellent
Pre 12.5683 19.1257 32.2404 30.0546 6.0109
Post 0.0000 5.6338 21.1268 57.7465 15.4930
fisher.test(myt, simulate.p.value = TRUE)
## Warning in fisher.test(myt, simulate.p.value = TRUE): 'x' has been rounded to
## integer: Mean relative difference: 0.010514
## 
##  Fisher's Exact Test for Count Data with simulated p-value (based on
##  2000 replicates)
## 
## data:  myt
## p-value = 0.0004998
## alternative hypothesis: two.sided
#########################################################################

Knowledge

#########################################################################

myt=table(mydata$PrePost,mydata$Recent)
myt=myt/rowSums(myt)
colnames(myt)=c("Not at all Confident", "Somewhat Confident", "Confident", "Very Confident")
row.names(myt)=c("Pre","Post")
myt=round(100*myt, 4)
myt%>%
  kbl()%>%kable_classic(full_width=F)
Not at all Confident Somewhat Confident Confident Very Confident
Pre 40.9836 34.4262 17.4863 7.1038
Post 1.4085 16.9014 54.9296 26.7606
fisher.test(myt, simulate.p.value = TRUE)
## Warning in fisher.test(myt, simulate.p.value = TRUE): 'x' has been rounded to
## integer: Mean relative difference: 0.009248
## 
##  Fisher's Exact Test for Count Data with simulated p-value (based on
##  2000 replicates)
## 
## data:  myt
## p-value = 0.0004998
## alternative hypothesis: two.sided
#########################################################################

Score Pre-Post

mydata$PrePost2=mydata$PrePost
mydata$PrePost=as.factor(mydata$PrePost)
levels(mydata$PrePost)=c("Pre", "Post")
boxplot(mydata$Score~mydata$PrePost, main="Knowledge Score, Pre vs. Post",notch=TRUE, col=c("red", "dark green"), horizontal = TRUE)

Subscores

Correlations

Test questions are uncorrelated.

#########################################################################
mycorr=cor(mydata[, c(10:19)])
corfunction(mycorr)

#########################################################################

Pre-Test

We would expect poor performance on the pre-test scores. Students are likely to have only 3308 as the basis for knowledge.

#########################################################################
pre=round(describe(mydata[mydata$PrePost=="Pre",10:19]), 3)

pre%>%kbl(caption="Pre-Test")%>%kable_classic(full_width = F, html_font = "Cambria")
Pre-Test
vars n mean sd median trimmed mad min max range skew kurtosis se
Texas 1 183 0.973 0.163 1 1.000 0 0 1 1 -5.752 31.251 0.012
Cause 2 183 0.760 0.429 1 0.823 0 0 1 1 -1.205 -0.551 0.032
Saliva 3 183 0.361 0.482 0 0.327 0 0 1 1 0.576 -1.678 0.036
Location 4 183 0.891 0.313 1 0.986 0 0 1 1 -2.484 4.193 0.023
Prevalence 5 183 0.426 0.496 0 0.408 0 0 1 1 0.296 -1.923 0.037
Onset 6 183 0.634 0.483 1 0.667 0 0 1 1 -0.551 -1.705 0.036
Symptoms 7 183 0.235 0.425 0 0.170 0 0 1 1 1.240 -0.465 0.031
Diagnosis 8 183 0.585 0.494 1 0.605 0 0 1 1 -0.341 -1.894 0.037
AntiT_Drugs 9 183 0.694 0.462 1 0.741 0 0 1 1 -0.835 -1.310 0.034
Prevention 10 183 0.279 0.450 0 0.224 0 0 1 1 0.979 -1.047 0.033
#########################################################################

Post-Test

We would hope that our work teaching the students resulted in better scores. These are the raw descriptives. We will look at pre-post later.

#########################################################################
post=round(describe(mydata[mydata$PrePost=="Post",10:19]), 3)

pre%>%kbl(caption="Post-Test")%>%kable_classic(full_width = F, html_font = "Cambria")
Post-Test
vars n mean sd median trimmed mad min max range skew kurtosis se
Texas 1 183 0.973 0.163 1 1.000 0 0 1 1 -5.752 31.251 0.012
Cause 2 183 0.760 0.429 1 0.823 0 0 1 1 -1.205 -0.551 0.032
Saliva 3 183 0.361 0.482 0 0.327 0 0 1 1 0.576 -1.678 0.036
Location 4 183 0.891 0.313 1 0.986 0 0 1 1 -2.484 4.193 0.023
Prevalence 5 183 0.426 0.496 0 0.408 0 0 1 1 0.296 -1.923 0.037
Onset 6 183 0.634 0.483 1 0.667 0 0 1 1 -0.551 -1.705 0.036
Symptoms 7 183 0.235 0.425 0 0.170 0 0 1 1 1.240 -0.465 0.031
Diagnosis 8 183 0.585 0.494 1 0.605 0 0 1 1 -0.341 -1.894 0.037
AntiT_Drugs 9 183 0.694 0.462 1 0.741 0 0 1 1 -0.835 -1.310 0.034
Prevention 10 183 0.279 0.450 0 0.224 0 0 1 1 0.979 -1.047 0.033
#########################################################################

Comparisons

We compare pre-means versus post-means

mydf=data.frame("Pre-test"=pre$mean, "Post-test"=post$mean)
rownames(mydf)=colnames(mydata[10:19])

mydf%>%kbl(caption="Comparison of Means")%>%kable_classic(full_width = F, html_font = "Cambria")
Comparison of Means
Pre.test Post.test
Texas 0.973 1.000
Cause 0.760 0.986
Saliva 0.361 0.634
Location 0.891 0.986
Prevalence 0.426 0.803
Onset 0.634 0.761
Symptoms 0.235 0.296
Diagnosis 0.585 0.507
AntiT_Drugs 0.694 0.887
Prevention 0.279 0.169

Simple Inferentials

Unpaired t-tests

#########################################################################

myf=function(x,y){
  
  myt=t.test(x, y)
  newp=round(p.adjust(myt$p.value),3)
  a=c(round(myt$estimate[1],3),round(myt$estimate[2],3),
      round(myt$conf.int[1],3),   
      round(myt$conf.int[2],3),round(myt$statistic,3), 
      round(myt$parameter,3),round(newp,3))
  return(a)
  
}

p1=mydata[mydata$PrePost=="Pre",]
p2=mydata[mydata$PrePost=="Post",]
a=matrix(rep(NA, 7*11), 11)
for (i in 1:11){a[i,1:7]=myf(p1[,i+9], p2[,i+9])}
a=as.data.frame(a)
for (i in 1:6){a[,i]=as.numeric(a[,i])}
rownames(a)=colnames(mydata[10:20])
colnames(a)=c("Mean Pre","Mean Post","Lower 95% CI", "Upper 95% CI", "t-Value", "df", "Holm-Adjusted p")
a%>%kbl()%>%kable_classic()
Mean Pre Mean Post Lower 95% CI Upper 95% CI t-Value df Holm-Adjusted p
Texas 0.973 1.000 -0.051 -0.003 -2.261 182.000 0.025
Cause 0.760 0.986 -0.295 -0.158 -6.529 236.991 0.000
Saliva 0.361 0.634 -0.407 -0.139 -4.035 126.606 0.000
Location 0.891 0.986 -0.149 -0.042 -3.516 251.934 0.001
Prevalence 0.426 0.803 -0.495 -0.258 -6.272 156.631 0.000
Onset 0.634 0.761 -0.250 -0.004 -2.035 142.291 0.044
Symptoms 0.235 0.296 -0.185 0.064 -0.966 119.134 0.336
Diagnosis 0.585 0.507 -0.061 0.216 1.109 125.353 0.270
AntiT_Drugs 0.694 0.887 -0.294 -0.093 -3.795 183.890 0.000
Prevention 0.279 0.169 -0.001 0.220 1.966 150.715 0.051
Score 0.584 0.703 -0.162 -0.076 -5.407 184.400 0.000
p1=mydata[mydata$Profession=="CHW",]
p2=mydata[mydata$Profession!="CHW",]
a=matrix(rep(NA, 7*11), 11)
for (i in 1:11){a[i,1:7]=myf(p1[,i+9], p2[,i+9])}
a=as.data.frame(a)
for (i in 1:6){a[,i]=as.numeric(a[,i])}
rownames(a)=colnames(mydata[10:20])
colnames(a)=c("Mean CHW","Mean non-CHW","Lower 95% CI", "Upper 95% CI", "t-Value", "df", "Holm-Adjusted p")
a%>%kbl()%>%kable_classic()
Mean CHW Mean non-CHW Lower 95% CI Upper 95% CI t-Value df Holm-Adjusted p
Texas 0.957 0.994 -0.080 0.007 -1.662 109.813 0.099
Cause 0.723 0.881 -0.263 -0.053 -2.978 150.371 0.003
Saliva 0.468 0.419 -0.079 0.177 0.761 192.875 0.448
Location 0.915 0.919 -0.075 0.067 -0.107 191.351 0.915
Prevalence 0.500 0.550 -0.178 0.078 -0.767 193.866 0.444
Onset 0.404 0.825 -0.538 -0.304 -7.114 158.271 0.000
Symptoms 0.117 0.331 -0.313 -0.116 -4.281 246.116 0.000
Diagnosis 0.447 0.631 -0.311 -0.058 -2.873 189.961 0.005
AntiT_Drugs 0.755 0.744 -0.100 0.123 0.205 197.086 0.838
Prevention 0.160 0.300 -0.244 -0.037 -2.672 229.007 0.008
Score 0.545 0.659 -0.160 -0.068 -4.908 212.273 0.000
#########################################################################

Paired t-test (Small Subset)

prepost=read.csv("D:/PaulaEcho/prepost.csv")

myf2=function(x){
  
  myt=t.test(x)
  newp=round(p.adjust(myt$p.value),3)
  a=c(round(myt$estimate[1],3),
      round(myt$conf.int[1],3),   
      round(myt$conf.int[2],3),round(myt$statistic,3), 
      round(myt$parameter,3),round(newp,3))
  return(a)
  
}

a=matrix(rep(NA, 6*13), 13)
for (i in 1:13){a[i,1:6]=myf2(prepost[,i])}
a=as.data.frame(a)
for (i in 1:6){a[,i]=as.numeric(a[,i])}
rownames(a)=colnames(prepost)
colnames(a)=c("Mean","Lower 95% CI", "Upper 95% CI", "t-Value", "df", "Holm-Adjusted p")
a%>%kbl(caption="CHW Pre-Post")%>%kable_classic()
CHW Pre-Post
Mean Lower 95% CI Upper 95% CI t-Value df Holm-Adjusted p
Knowledge 1.882 1.341 2.424 7.366 16 0.000
Recent 1.706 1.309 2.103 9.114 16 0.000
Texas 0.059 -0.066 0.184 1.000 16 0.332
Cause 0.412 0.151 0.673 3.347 16 0.004
Saliva 0.471 0.102 0.839 2.704 16 0.016
Location 0.176 -0.026 0.379 1.852 16 0.083
Prevalence 0.647 0.394 0.900 5.416 16 0.000
Onset 0.471 0.206 0.735 3.771 16 0.002
Symptoms 0.412 0.151 0.673 3.347 16 0.004
Diagnosis -0.118 -0.519 0.284 -0.621 16 0.543
AntiT_Drugs 0.235 -0.054 0.524 1.725 16 0.104
Prevent -0.294 -0.536 -0.053 -2.582 16 0.020
Score 0.247 0.146 0.348 5.165 16 0.000

Follow-Up Satisfaction

fu=read.csv("D:/PaulaEcho/followup1.csv")
describe(fu)%>%kbl(caption="CHW Follow-Up")%>%kable_classic(html_font = "Cambria")
## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf
CHW Follow-Up
vars n mean sd median trimmed mad min max range skew kurtosis se
Well_Organized 1 20 4.55 0.5104178 5.0 4.5625 0.0000 4 5 1 -0.1861220 -2.061035 0.1141329
LOs_Met 2 20 4.60 0.5026247 5.0 4.6250 0.0000 4 5 1 -0.3780157 -1.947083 0.1123903
Accurate_Relevant 3 20 4.65 0.4893605 5.0 4.6875 0.0000 4 5 1 -0.5823928 -1.740467 0.1094243
As_Expected 4 20 4.50 0.5129892 4.5 4.5000 0.7413 4 5 1 0.0000000 -2.097500 0.1147079
Will_Apply 5 20 4.60 0.5026247 5.0 4.6250 0.0000 4 5 1 -0.3780157 -1.947083 0.1123903
Overall 6 20 4.85 0.3663475 5.0 4.9375 0.0000 4 5 1 -1.8152162 1.370931 0.0819178
Lecturers_Effective 7 20 4.55 0.5104178 5.0 4.5625 0.0000 4 5 1 -0.1861220 -2.061035 0.1141329
Encouraged_Feedback 8 20 4.55 0.5104178 5.0 4.5625 0.0000 4 5 1 -0.1861220 -2.061035 0.1141329
Overall_Lecturers 9 20 4.80 0.4103913 5.0 4.8750 0.0000 4 5 1 -1.3889182 -0.066875 0.0917663
Recommend 10 20 1.00 0.0000000 1.0 1.0000 0.0000 1 1 0 NaN NaN 0.0000000
Missing_Content 11 20 0.20 0.4103913 0.0 0.1250 0.0000 0 1 1 1.3889182 -0.066875 0.0917663
X 12 0 NaN NA NA NaN NA Inf -Inf -Inf NA NA NA
X.1 13 0 NaN NA NA NaN NA Inf -Inf -Inf NA NA NA

Models

Regression

library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
mydata$CHW=mydata$Profession
mydata$CHW=rep(0, nrow(mydata))
mydata$CHW[mydata$Profession=="CHW"]=1
mydata$CHW=as.numeric(mydata$CHW)
mydata$Female=as.numeric(mydata$Gender)
mydata$Female[mydata$Female==2]=0
mylm=lm(Score~Age+Female+Ethnicity+Knowledge+Recent+CHW+PrePost2, data=mydata)
summary(mylm)$coefficients%>%kbl()%>%kable_classic(html_font = 'Cambria')
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.2335087 0.0527751 4.4245992 0.0000147
Age25 to 34 0.2333747 0.0461218 5.0599674 0.0000008
Age35 to 44 0.1564692 0.0458485 3.4127409 0.0007547
Age45 to 64 0.1625478 0.0442242 3.6755411 0.0002928
Age65 and over 0.1740629 0.0568740 3.0605018 0.0024609
AgePrefer not to respond 0.1671223 0.0981488 1.7027435 0.0899108
Female -0.0203290 0.0239566 -0.8485756 0.3969630
EthnicityNon-Hispanic 0.1081104 0.0266992 4.0492042 0.0000694
EthnicityNon -Hispanic -0.0193866 0.1501385 -0.1291251 0.8973668
EthnicityPrefer not to respond -0.0100277 0.0588353 -0.1704374 0.8648098
Knowledge 0.0475018 0.0132948 3.5729522 0.0004265
Recent 0.0151635 0.0151710 0.9995027 0.3185579
CHW -0.0541472 0.0284608 -1.9025173 0.0583005
PrePost2 0.0918741 0.0258072 3.5600231 0.0004470
summary(mylm)
## 
## Call:
## lm(formula = Score ~ Age + Female + Ethnicity + Knowledge + Recent + 
##     CHW + PrePost2, data = mydata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.43167 -0.10528 -0.00337  0.10343  0.42416 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     0.23351    0.05278   4.425 1.47e-05 ***
## Age25 to 34                     0.23337    0.04612   5.060 8.34e-07 ***
## Age35 to 44                     0.15647    0.04585   3.413 0.000755 ***
## Age45 to 64                     0.16255    0.04422   3.676 0.000293 ***
## Age65 and over                  0.17406    0.05687   3.061 0.002461 ** 
## AgePrefer not to respond        0.16712    0.09815   1.703 0.089911 .  
## Female                         -0.02033    0.02396  -0.849 0.396963    
## EthnicityNon-Hispanic           0.10811    0.02670   4.049 6.94e-05 ***
## EthnicityNon -Hispanic         -0.01939    0.15014  -0.129 0.897367    
## EthnicityPrefer not to respond -0.01003    0.05884  -0.170 0.864810    
## Knowledge                       0.04750    0.01329   3.573 0.000427 ***
## Recent                          0.01516    0.01517   1.000 0.318558    
## CHW                            -0.05415    0.02846  -1.903 0.058301 .  
## PrePost2                        0.09187    0.02581   3.560 0.000447 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1473 on 240 degrees of freedom
## Multiple R-squared:  0.4404, Adjusted R-squared:  0.4101 
## F-statistic: 14.53 on 13 and 240 DF,  p-value: < 2.2e-16
hist(mylm$residuals, col="blue", main="Residuals")

MANCOVA for Subscores

Ethnicity, previous knowledge, and pre-post status are the primary predictors for the subscores.

attach(mydata)
myt=powerTransform(cbind(Texas, Cause, Saliva, Location, Prevalence, 
                        Onset, Symptoms, Diagnosis, AntiT_Drugs, Prevention)+.1)

mydata$nTexas=(mydata$Texas+.1)^myt$lambda[1]
mydata$nCause=(mydata$Cause+.1)^myt$lambda[2]
mydata$nSaliva=(mydata$Saliva+.1)^myt$lambda[3]
mydata$nLocation=(mydata$Location+.1)^myt$lambda[4]
mydata$nPrevalence=(mydata$Prevalence+.1)^myt$lambda[5]
mydata$nOnset=(mydata$Onset+.1)^myt$lambda[6]
mydata$nSymptoms=(mydata$Symptoms+.1)^myt$lambda[7]
mydata$nDiagnosis=(mydata$Diagnosis+.1)^myt$lambda[8]
mydata$nAntiT_Drugs=(mydata$AntiT_Drugs+.1)^myt$lambda[9]
mydata$nPrevention=(mydata$Prevention+.1)^myt$lambda[10]

res.man <- manova(cbind(nTexas, nCause, nSaliva, nLocation, nPrevalence, 
                        nOnset, nSymptoms, nDiagnosis, nAntiT_Drugs, nPrevention) ~
                    Gender+Ethnicity+PrePost, data = mydata)
summary(res.man, intercept=TRUE)
##              Df  Pillai approx F num Df den Df    Pr(>F)    
## (Intercept)   1 0.99248  3155.29     10    239 < 2.2e-16 ***
## Gender        1 0.09483     2.50     10    239  0.007072 ** 
## Ethnicity     3 0.27874     2.47     30    723 2.729e-05 ***
## PrePost       1 0.23546     7.36     10    239 3.692e-10 ***
## Residuals   248                                             
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary.aov(res.man, intercept = TRUE)
##  Response nTexas :
##              Df  Sum Sq Mean Sq    F value  Pr(>F)    
## (Intercept)   1 13846.6 13846.6 12598.1919 < 2e-16 ***
## Gender        1     1.7     1.7     1.5032 0.22134    
## Ethnicity     3     0.7     0.2     0.2220 0.88108    
## PrePost       1     3.1     3.1     2.8066 0.09514 .  
## Residuals   248   272.6     1.1                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response nCause :
##              Df  Sum Sq Mean Sq  F value    Pr(>F)    
## (Intercept)   1 267.146 267.146 1326.832 < 2.2e-16 ***
## Gender        1   0.003   0.003    0.013   0.90944    
## Ethnicity     3   1.972   0.657    3.264   0.02206 *  
## PrePost       1   5.054   5.054   25.102 1.034e-06 ***
## Residuals   248  49.933   0.201                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response nSaliva :
##              Df Sum Sq Mean Sq   F value    Pr(>F)    
## (Intercept)   1 646.67  646.67 2311.5183 < 2.2e-16 ***
## Gender        1   0.00    0.00    0.0118   0.91347    
## Ethnicity     3   2.14    0.71    2.5480   0.05645 .  
## PrePost       1   5.61    5.61   20.0618 1.145e-05 ***
## Residuals   248  69.38    0.28                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response nLocation :
##              Df Sum Sq Mean Sq   F value    Pr(>F)    
## (Intercept)   1 559.03  559.03 2846.8935 < 2.2e-16 ***
## Gender        1   0.04    0.04    0.1882  0.664777    
## Ethnicity     3   0.21    0.07    0.3559  0.784879    
## PrePost       1   1.44    1.44    7.3278  0.007262 ** 
## Residuals   248  48.70    0.20                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response nPrevalence :
##              Df  Sum Sq Mean Sq   F value    Pr(>F)    
## (Intercept)   1 190.127 190.127 8971.8949 < 2.2e-16 ***
## Gender        1   0.040   0.040    1.8822  0.171323    
## Ethnicity     3   0.305   0.102    4.7968  0.002883 ** 
## PrePost       1   0.883   0.883   41.6567 5.676e-10 ***
## Residuals   248   5.255   0.021                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response nOnset :
##              Df  Sum Sq Mean Sq  F value    Pr(>F)    
## (Intercept)   1 150.814 150.814 866.8873 < 2.2e-16 ***
## Gender        1   3.110   3.110  17.8784 3.314e-05 ***
## Ethnicity     3   4.540   1.513   8.6988 1.650e-05 ***
## PrePost       1   1.950   1.950  11.2059 0.0009421 ***
## Residuals   248  43.145   0.174                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response nSymptoms :
##              Df Sum Sq Mean Sq  F value    Pr(>F)    
## (Intercept)   1 133192  133192 939.8276 < 2.2e-16 ***
## Gender        1    562     562   3.9669   0.04750 *  
## Ethnicity     3   5185    1728  12.1944 1.805e-07 ***
## PrePost       1    634     634   4.4710   0.03547 *  
## Residuals   248  35147     142                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response nDiagnosis :
##              Df  Sum Sq Mean Sq   F value  Pr(>F)    
## (Intercept)   1 158.650 158.650 2169.7643 < 2e-16 ***
## Gender        1   0.193   0.193    2.6451 0.10514    
## Ethnicity     3   0.565   0.188    2.5756 0.05446 .  
## PrePost       1   0.032   0.032    0.4375 0.50895    
## Residuals   248  18.133   0.073                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response nAntiT_Drugs :
##              Df  Sum Sq Mean Sq  F value    Pr(>F)    
## (Intercept)   1 192.167 192.167 853.5847 < 2.2e-16 ***
## Gender        1   0.159   0.159   0.7070 0.4012485    
## Ethnicity     3   1.119   0.373   1.6573 0.1768025    
## PrePost       1   2.804   2.804  12.4533 0.0004972 ***
## Residuals   248  55.832   0.225                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response nPrevention :
##              Df Sum Sq Mean Sq  F value  Pr(>F)    
## (Intercept)   1 157077  157077 839.9246 < 2e-16 ***
## Gender        1      9       9   0.0459 0.83050    
## Ethnicity     3   1483     494   2.6439 0.04981 *  
## PrePost       1    396     396   2.1170 0.14693    
## Residuals   248  46379     187                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1