Exploratory Data Analysis

Load libraries and data

#####################Read and Pre-Clean the Data#######################
require(Amelia)

## Loading required package: Amelia

## Loading required package: Rcpp

## ## 
## ## Amelia II: Multiple Imputation
## ## (Version 1.7.6, built: 2019-11-24)
## ## Copyright (C) 2005-2020 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##

library(car)

## Loading required package: carData

library(corrplot)

## corrplot 0.84 loaded

library(ggcorrplot)

## Loading required package: ggplot2

library(heplots)
library(kableExtra)
library(MANOVA.RM)

## Warning: package 'MANOVA.RM' was built under R version 4.0.3

library(MASS)
library(MVN)

## Warning: package 'MVN' was built under R version 4.0.3

## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2

## sROC 0.1-2 loaded

library(mvtnorm)
require(psych) #to describe

## Loading required package: psych

## 
## Attaching package: 'psych'

## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

## The following object is masked from 'package:car':
## 
##     logit

require(ggplot2)
library(ggcorrplot)
library(qcc)

## Warning: package 'qcc' was built under R version 4.0.3

## Package 'qcc' version 2.7

## Type 'citation("qcc")' for citing this R package in publications.

require(reticulate) #to use Python in R as well

## Loading required package: reticulate

require(ResourceSelection)

## Loading required package: ResourceSelection

## ResourceSelection 0.3-5   2019-07-22

library(rstatix)

## Warning: package 'rstatix' was built under R version 4.0.3

## 
## Attaching package: 'rstatix'

## The following object is masked from 'package:MASS':
## 
##     select

## The following object is masked from 'package:ggcorrplot':
## 
##     cor_pmat

## The following object is masked from 'package:stats':
## 
##     filter

library(tidyverse)

## -- Attaching packages ------------------------------------------------ tidyverse 1.3.0 --

## v tibble  3.0.3     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## v purrr   0.3.4

## -- Conflicts --------------------------------------------------- tidyverse_conflicts() --
## x psych::%+%()        masks ggplot2::%+%()
## x psych::alpha()      masks ggplot2::alpha()
## x dplyr::filter()     masks rstatix::filter(), stats::filter()
## x dplyr::group_rows() masks kableExtra::group_rows()
## x dplyr::lag()        masks stats::lag()
## x dplyr::recode()     masks car::recode()
## x dplyr::select()     masks rstatix::select(), MASS::select()
## x purrr::some()       masks car::some()

corfunction=function(d){
  mycorr=cor(d[, 1:ncol(d)]); p.mat=ggcorrplot::cor_pmat(d[,1:ncol(d)])
  myplot=ggcorrplot(mycorr, hc.order=TRUE,type="lower",colors=c("red", "white","green"),tl.cex = 8, tl.col = "black", lab=TRUE, lab_size=2, p.mat=p.mat, insig="pch", pch=4)
  print(myplot)}

mydata=read.csv("C:/Users/lfult/Desktop/Education/2020FALL.csv", stringsAsFactors = TRUE)
colnames(mydata)

##  [1] "Subject"     "Gender"      "Ethnicity"   "Age"         "Yr"         
##  [6] "DaysBetween" "GPA"         "T_GPA"       "PrePost"     "Fin"        
## [11] "Mgt"         "HR"          "Sys"         "HIM"         "Ldr"        
## [16] "Chg"         "Cli"         "QI"          "QM"          "Strat"      
## [21] "Com"         "Lgl"         "Score"       "Rank"        "Time"

#########################################################################

Check Missing

We have true missing for Quantitative Methods, as that section was recently added.

#########################################################################
missmap(mydata)

#########################################################################

Descriptives

Gender

#########################################################################
as.data.frame(100*round(table(mydata$Gender)/length(mydata$Gender),4))%>%
  kbl(col.names = c("Gender", "%"))%>%kable_classic(full_width=F)

Gender	%
F	80.95
M	19.05

#########################################################################

Ethnicity

#########################################################################
as.data.frame(100*round(table(mydata$Ethnicity)/length(mydata$Ethnicity),4))%>%
  kbl(col.names = c("Ethnicity", "%"))%>%kable_classic(full_width=F)

Ethnicity	%
A	5.95
B	16.67
C	41.07
H	36.31

#########################################################################

Age

#########################################################################
par(mfrow=c(1,3))
boxplot(mydata$Age, horizontal=TRUE, main="Age", col="blue")
boxplot(mydata$Age~mydata$Gender, horizontal=TRUE, main="Age~Gender", col="red")
boxplot(mydata$Age~mydata$Ethnicity, horizontal=TRUE, main="Age~Ethnicity", col="orange")

kable(t(round(describe(mydata$Age),3)), col.names="GPA")%>%kable_classic(full_width=F)

	GPA
vars	1.000
n	168.000
mean	22.679
sd	3.016
median	22.000
trimmed	22.074
mad	1.483
min	20.000
max	42.000
range	22.000
skew	3.720
kurtosis	18.561
se	0.233

#########################################################################

GPA

#########################################################################
par(mfrow=c(1,3))
boxplot(mydata$GPA, horizontal=TRUE, main="GPA", col="blue")
boxplot(mydata$GPA~mydata$Gender, horizontal=TRUE, main="GPA~Gender", col="red")
boxplot(mydata$GPA~mydata$Ethnicity, horizontal=TRUE, main="GPA~Ethnicity", col="orange")

kable(t(round(describe(mydata$GPA),3)), col.names="GPA")%>%kable_classic(full_width=F)

	GPA
vars	1.000
n	168.000
mean	3.238
sd	0.254
median	3.200
trimmed	3.223
mad	0.289
min	2.850
max	3.790
range	0.940
skew	0.444
kurtosis	-0.864
se	0.020

#########################################################################

Inter-Test Days

#########################################################################
par(mfrow=c(1,3))
boxplot(mydata$DaysBetween, horizontal=TRUE, main="Days Between Test", col="blue")
boxplot(mydata$DaysBetween~mydata$Gender, horizontal=TRUE, main="Days Between Tests~Gender", col="red")
boxplot(mydata$DaysBetween~mydata$Ethnicity, horizontal=TRUE, main="Days Between Tests~Ethnicity", col="orange")

kable(t(round(describe(mydata$DaysBetween),3)), col.names="Days Between Tests")%>%kable_classic(full_width=F)

	Days Between Tests
vars	1.000
n	168.000
mean	443.786
sd	71.626
median	418.000
trimmed	428.456
mad	25.204
min	333.000
max	654.000
range	321.000
skew	1.957
kurtosis	2.989
se	5.526

#########################################################################

Correlations

We look at hierarchically-clustered correlations to see which competency evaluations are related and in what directions. An “X” indicates no statistically significant correlation. Obviously “score” (the final score) and “rank” (the percentile rank) must be colinear. Interestingly, the student scores on the strategy area are highly correlated to their overall performance. This reinforces the use of strategy as a capstone course.

#########################################################################
mycorr=cor(mydata[, -c(1:3,5,9, 19)])
corfunction(mycorr)

#########################################################################

Kernel Density Estimates

#########################################################################
mysub=subset(mydata, select=c(Age, GPA, Score)) #get a subset for plotting
colnames(mysub)=c("Age", "GPA", "Score") #set column names
kdepairs(mysub) #plot

as.data.frame(cor(mydata$Score[1:84], mydata$Score[85:168]))%>%kbl(col.names="Pre-Post Test Score Correlation")%>%kable_classic()

Pre-Post Test Score Correlation
0.2187299

#########################################################################

Pre-Test

We would expect poor performance on the pre-test scores. Students are likely to have only 3308 as the basis for knowledge.

#########################################################################
pre=round(describe(mydata[1:84,10:25]),3)
post=round(describe(mydata[85:168, 10:25]),3)
mydelta=mydata[85:168, 10:25]-mydata[1:84, 10:25]
delta=round(describe(mydelta),3)
mynames=c("Financial Management", "General Management", "Healthcare Personnel", "Healthcare Systems & Organizations", "Information Management", "Leadership Skills and Behavior", "Managing Change", "Organizational Climate and Culture", "Quality Improvement", "Quantitative Management", "Strategic Planning and Marketing", "Community and the Environment", "Legal Environment of Healthcare Administration", "Final Score", "Percentile Rank", "Time" )
row.names(pre)=row.names(post)=row.names(delta)=mynames

pre%>%kbl(caption="Pre-Test")%>%kable_classic(full_width = F, html_font = "Cambria")

Pre-Test
	vars	n	mean	sd	median	trimmed	mad	min	max	range	skew	kurtosis	se
Financial Management	1	84	54.762	18.068	50.000	54.706	14.826	10.00	100.00	90.00	0.044	0.011	1.971
General Management	2	84	59.643	16.894	60.000	59.412	14.826	30.00	100.00	70.00	0.100	-0.631	1.843
Healthcare Personnel	3	84	65.119	15.010	60.000	65.294	14.826	30.00	100.00	70.00	-0.097	-0.210	1.638
Healthcare Systems & Organizations	4	84	54.405	19.160	50.000	54.118	14.826	10.00	100.00	90.00	0.131	-0.450	2.091
Information Management	5	84	56.905	17.697	60.000	57.500	14.826	10.00	90.00	80.00	-0.267	-0.757	1.931
Leadership Skills and Behavior	6	84	60.952	15.953	60.000	61.912	14.826	20.00	90.00	70.00	-0.435	-0.234	1.741
Managing Change	7	84	62.024	14.626	60.000	62.647	14.826	20.00	90.00	70.00	-0.417	-0.009	1.596
Organizational Climate and Culture	8	84	65.476	16.531	70.000	66.029	14.826	30.00	100.00	70.00	-0.233	-0.350	1.804
Quality Improvement	9	84	58.214	16.218	60.000	58.824	14.826	20.00	90.00	70.00	-0.300	-0.408	1.770
Quantitative Management	10	24	48.333	16.061	50.000	49.000	14.826	20.00	70.00	50.00	-0.103	-1.226	3.279
Strategic Planning and Marketing	11	84	56.905	18.496	60.000	57.353	14.826	10.00	90.00	80.00	-0.225	-0.653	2.018
Community and the Environment	12	84	55.000	16.827	60.000	54.853	14.826	20.00	100.00	80.00	0.097	-0.273	1.836
Legal Environment of Healthcare Administration	13	84	60.238	16.353	60.000	59.853	14.826	30.00	100.00	70.00	0.175	-0.544	1.784
Final Score	14	84	58.863	7.849	60.000	59.043	7.976	39.16	78.33	39.17	-0.134	-0.125	0.856
Percentile Rank	15	84	56.214	23.138	58.000	57.191	22.239	6.00	98.00	92.00	-0.289	-0.762	2.525
Time	16	84	49.089	23.322	44.075	46.158	20.334	13.75	134.30	120.55	1.329	2.002	2.545

Post-Test

We would hope that our work teaching the students resulted in better scores. These are the raw descriptives. We will look at pre-post later.

post[order(-post$mean),]%>%kbl(caption="Post-Test")%>%kable_classic(full_width = F, html_font = "Cambria")

Post-Test
	vars	n	mean	sd	median	trimmed	mad	min	max	range	skew	kurtosis	se
Time	16	84	79.362	22.091	76.135	78.713	20.527	34.77	146.1	111.33	0.376	-0.157	2.410
Percentile Rank	15	84	77.310	16.067	82.000	79.515	10.378	0.00	97.0	97.00	-1.944	5.608	1.753
Healthcare Personnel	3	84	76.786	14.740	80.000	78.382	14.826	30.00	100.0	70.00	-0.849	0.417	1.608
Organizational Climate and Culture	8	84	75.952	15.532	80.000	76.471	14.826	30.00	100.0	70.00	-0.404	-0.372	1.695
Legal Environment of Healthcare Administration	13	84	75.595	16.454	80.000	76.324	14.826	30.00	100.0	70.00	-0.409	-0.474	1.795
General Management	2	84	75.476	12.650	80.000	76.176	14.826	50.00	100.0	50.00	-0.317	-0.518	1.380
Strategic Planning and Marketing	11	84	75.357	14.009	80.000	75.882	14.826	30.00	100.0	70.00	-0.475	0.235	1.529
Leadership Skills and Behavior	6	84	74.762	16.093	80.000	75.294	14.826	40.00	100.0	60.00	-0.292	-0.657	1.756
Financial Management	1	84	73.095	14.055	70.000	73.529	14.826	40.00	100.0	60.00	-0.243	-0.208	1.533
Quality Improvement	9	84	72.738	14.172	70.000	73.382	14.826	40.00	100.0	60.00	-0.308	-0.455	1.546
Final Score	14	84	70.801	5.297	71.530	71.002	4.566	54.61	82.3	27.69	-0.418	-0.003	0.578
Healthcare Systems & Organizations	4	84	69.524	16.640	70.000	70.000	14.826	30.00	100.0	70.00	-0.174	-0.767	1.816
Managing Change	7	84	69.524	15.042	70.000	70.441	14.826	20.00	100.0	80.00	-0.592	0.202	1.641
Community and the Environment	12	84	67.619	15.415	70.000	68.235	14.826	0.00	90.0	90.00	-1.027	2.797	1.682
Information Management	5	84	65.000	13.928	70.000	65.147	14.826	30.00	90.0	60.00	-0.238	-0.433	1.520
Quantitative Management	10	30	50.333	19.025	50.000	50.000	22.239	20.00	90.0	70.00	0.157	-0.944	3.473

Difference Scores

And here is the punchline. All areas improved overall. But the post-test counts, so the time is longer by 30 minutes. In other words, the improvement might be directly attributed to the level of effort on the examination (caring). I see no way of incentivizing students to do their best on the pre-test. Any help here would be appreciated.

delta[order(-delta$mean),]%>%kbl(caption="Difference, Post Minus Pre")%>%kable_classic(full_width = F, html_font = "Cambria")

Difference, Post Minus Pre
	vars	n	mean	sd	median	trimmed	mad	min	max	range	skew	kurtosis	se
Time	16	84	30.273	20.349	30.615	30.216	20.512	-32.88	77.88	110.76	-0.143	0.356	2.220
Percentile Rank	15	84	21.095	25.069	22.000	20.515	25.204	-52.00	82.00	134.00	-0.025	0.294	2.735
Strategic Planning and Marketing	11	84	18.452	22.681	20.000	17.647	29.652	-30.00	80.00	110.00	0.330	-0.148	2.475
Financial Management	1	84	18.333	22.217	20.000	18.529	29.652	-30.00	60.00	90.00	-0.072	-0.796	2.424
General Management	2	84	15.833	22.345	20.000	16.029	29.652	-40.00	70.00	110.00	-0.048	-0.488	2.438
Legal Environment of Healthcare Administration	13	84	15.357	22.732	10.000	15.000	22.239	-40.00	70.00	110.00	0.124	-0.434	2.480
Healthcare Systems & Organizations	4	84	15.119	26.090	20.000	16.029	29.652	-40.00	70.00	110.00	-0.243	-0.499	2.847
Quality Improvement	9	84	14.524	19.843	20.000	14.706	14.826	-40.00	60.00	100.00	-0.191	0.002	2.165
Leadership Skills and Behavior	6	84	13.810	21.222	10.000	13.676	14.826	-40.00	70.00	110.00	0.062	0.180	2.316
Community and the Environment	12	84	12.619	22.178	10.000	12.941	14.826	-40.00	70.00	110.00	-0.072	-0.335	2.420
Final Score	14	84	11.938	8.455	12.210	11.640	8.888	-12.31	35.45	47.76	0.207	0.104	0.922
Healthcare Personnel	3	84	11.667	20.874	10.000	12.059	14.826	-40.00	60.00	100.00	-0.109	-0.217	2.278
Organizational Climate and Culture	8	84	10.476	19.506	10.000	10.294	14.826	-30.00	60.00	90.00	0.184	-0.472	2.128
Information Management	5	84	8.095	22.467	10.000	8.235	29.652	-40.00	50.00	90.00	0.027	-0.655	2.451
Managing Change	7	84	7.500	20.877	10.000	7.794	14.826	-60.00	60.00	120.00	-0.267	0.702	2.278
Quantitative Management	10	24	2.500	26.251	0.000	3.000	29.652	-40.00	40.00	80.00	0.043	-1.265	5.359

#########################################################################

Fall 2020 Analysis

For the most recent cohort, the problematic areas are Quantitative Methods, Change Management, and Leadership. We need to do better in these areas at improving our students. I recommend the following.

Require Healthcare Statistics as a pre-requisite. Remove any other statistics option. It’s hard. I get it. But we need these students to have the skills of business students at a minimum.
Add an LO regarding managing change to the supervisory management course and use a secondary review of this skill. There is a great set of resources from Kotter and HBR on this.
Change supervisory management to be Leadership & Management, similar to the courses offered by business schools.

The best improvements we see are in finance, healthcare law, healthcare systems (all classes post 3308), quality, strategy, and general management. These areas are seeing a 10%+ jump on average with se ~= +/- 4. Kudos to the Team.

I want to specifically point out that HIM (Ms. Brooks) has moved our students’ performance from the bottom up FOUR notches, and not all of our students took her. I am impressed.

mydelta2=-mydata[55:84,10:22]+mydata[139:168, 10:22]
delta2=describe(mydelta2)
delta2=delta2[order(-delta2$mean),]
delta2%>%kbl(caption="Difference, Post Minus Pre, Fall 2020 Only")%>%kable_classic(full_width = F, html_font = "Cambria")

Difference, Post Minus Pre, Fall 2020 Only
	vars	n	mean	sd	median	trimmed	mad	min	max	range	skew	kurtosis	se
Fin	1	30	19.666667	21.89053	20	20.416667	14.826	-30	60	90	-0.3022042	0.0039762	3.996646
Lgl	13	30	18.666667	20.63364	15	17.500000	22.239	-10	70	80	0.4912237	-0.5066014	3.767170
Sys	4	30	16.000000	26.07681	20	17.083333	29.652	-40	70	110	-0.3523527	-0.3442791	4.760952
QI	9	30	13.666667	19.02509	20	15.833333	14.826	-40	40	80	-0.9775203	0.6230282	3.473490
Strat	11	30	13.333333	26.69539	10	11.250000	14.826	-30	80	110	0.6942389	0.0792910	4.873888
Mgt	2	30	12.000000	24.83046	10	12.916667	29.652	-40	60	100	-0.1687867	-0.9313931	4.533401
Com	12	30	8.333333	20.52473	5	7.916667	22.239	-30	50	80	0.0779607	-0.9336668	3.747285
Cli	8	30	8.000000	19.72221	10	7.916667	22.239	-30	50	80	0.0865568	-0.8434596	3.600766
HIM	5	30	7.666667	23.73464	10	8.333333	29.652	-40	50	90	-0.1203200	-0.5856939	4.333333
HR	3	30	6.000000	19.40494	10	6.666667	22.239	-40	40	80	-0.3514456	-0.7592968	3.542841
Ldr	6	30	5.666667	19.41974	10	7.083333	14.826	-40	40	80	-0.6025290	-0.0720909	3.545544
Chg	7	30	4.666667	23.15366	10	5.000000	29.652	-60	50	110	-0.3617384	0.3599314	4.227261
QM	10	24	2.500000	26.25129	0	3.000000	29.652	-40	40	80	0.0431855	-1.2647964	5.358523

Results by Time / Area

This is the “money” table, as it provides the mean pre-test scores by area (PreXbar), the mean post-test scores by area (PostXbar), and the mean differences scores by area (DeltaXbar). We would expect that our curriculum would result in improved performance (DeltaXbar >0). Medians and standard deviations are shown as well.

a=pre[1:14,3]; b=post[1:14,3]; g=delta[1:14,3]
m1=pre[1:14,5 ]; m2=post[1:14,5]; m3=delta[1:14,5]
s1=pre[1:14,4 ]; s2=post[1:14,4]; s3=delta[1:14,4]
mydf=data.frame(PreXbar=a, PostXbar=b, DeltaXbar=g,
                PreMed=m1, PostMed=m2, DeltaMed=m3, 
                PreSD=s1, PostSD=s2, DeltaSD=s3)
row.names(mydf)=mynames[1:14]
mydf%>%kbl()%>%kable_classic(full_width=F, html_font="Calibri")

	PreXbar	PostXbar	DeltaXbar	PreMed	PostMed	DeltaMed	PreSD	PostSD	DeltaSD
Financial Management	54.762	73.095	18.333	50	70.00	20.00	18.068	14.055	22.217
General Management	59.643	75.476	15.833	60	80.00	20.00	16.894	12.650	22.345
Healthcare Personnel	65.119	76.786	11.667	60	80.00	10.00	15.010	14.740	20.874
Healthcare Systems & Organizations	54.405	69.524	15.119	50	70.00	20.00	19.160	16.640	26.090
Information Management	56.905	65.000	8.095	60	70.00	10.00	17.697	13.928	22.467
Leadership Skills and Behavior	60.952	74.762	13.810	60	80.00	10.00	15.953	16.093	21.222
Managing Change	62.024	69.524	7.500	60	70.00	10.00	14.626	15.042	20.877
Organizational Climate and Culture	65.476	75.952	10.476	70	80.00	10.00	16.531	15.532	19.506
Quality Improvement	58.214	72.738	14.524	60	70.00	20.00	16.218	14.172	19.843
Quantitative Management	48.333	50.333	2.500	50	50.00	0.00	16.061	19.025	26.251
Strategic Planning and Marketing	56.905	75.357	18.452	60	80.00	20.00	18.496	14.009	22.681
Community and the Environment	55.000	67.619	12.619	60	70.00	10.00	16.827	15.415	22.178
Legal Environment of Healthcare Administration	60.238	75.595	15.357	60	80.00	10.00	16.353	16.454	22.732
Final Score	58.863	70.801	11.938	60	71.53	12.21	7.849	5.297	8.455

Ethnicity / Race

#########################################################################
levels(mydata$Ethnicity)=c("Asian", "African American", "Caucasian", "Any Hispanic") #Assign new names to factor levels
ethtable=as.data.frame(table(mydata$Ethnicity[1:84],mydata$Gender[1:84])/length(mydata$Ethnicity[1:84])) #get a cross-tabs table for race and gender
colnames(ethtable)=c("Race","Gender", "Percentage")#set the colum names for the table
mylab=paste0(round(ethtable$Percentage,2)*100,"%, ", ethtable$Percentage*84, " students") #Get the labels for the graph

ggplot(data=ethtable, aes(x=Race,y=Percentage, fill=Gender))+
  geom_bar(stat="identity")+
  geom_text(aes( label = mylab,  vjust = 1))

#########################################################################

Age~Race & Gender

Analyze age as a function of race and gender

#########################################################################
describe(mydata$Age[1:84]) #get the descriptive statistics

##    vars  n  mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 84 22.69 3.03     22   22.09 1.48  20  42    22 3.67    18.21 0.33

ggplot(data=mydata[1:84,], aes(x=Age,color=Gender))+
  geom_histogram(fill="white", binwidth=1)+
  ylab("Frequency")  #plot Age ~Gender

ggplot(data=mydata[1:84,], aes(x=Age,color=Ethnicity))+
  geom_histogram(fill="white", binwidth=1)+
  ylab("Frequency")  #plot Age~ Race

#########################################################################

GPA

#########################################################################
describe(mydata$GPA[1:84]) #Get descriptives

##    vars  n mean   sd median trimmed  mad  min  max range skew kurtosis   se
## X1    1 84 3.24 0.26    3.2    3.22 0.28 2.85 3.79  0.94 0.45     -0.9 0.03

ggplot(mydata[1:84, ], aes(x = Ethnicity, y = GPA, fill=Ethnicity))+
  geom_boxplot(aes(color=Gender))+
  coord_flip()  #plot based on ethnicity and gender

#########################################################################

GPA~Race+Gender

Evaluating GPA as a function of race and gender

#########################################################################
newdata1=aggregate(mydata$GPA[1:84], b=list(mydata$Ethnicity[1:84],mydata$Gender[1:84]), FUN=mean) #aggregate the data based on race and gender, get the means by those groups

newdata2=aggregate(mydata$GPA[1:84],b=list(mydata$Ethnicity[1:84],mydata$Gender[1:84]),function(x){qnorm(.975)*sd(x)/sqrt(length(x))}) #get the CI adjustment

newdata=cbind(newdata1,newdata2$x)  #bind the data together by columns
colnames(newdata)=c("Race", "Gender", "GPA", "SD") #name the columns
newdata$RaceGender=as.factor(paste0(newdata$Race,", ",newdata$Gender)) #make a new variable that has both race and gender
ggplot(   #plot
  newdata, 
  aes(x = GPA, y = RaceGender, xmin = GPA-SD, xmax = GPA+SD)
  ) +
  geom_point(aes(color = Gender)) +
  geom_errorbarh(aes(color = Gender), height=.2)+
  theme_light()+
  xlab("95% Confidence Interval for GPA")

## Warning: Removed 1 rows containing missing values (geom_errorbarh).

#########################################################################

Days between Tests

Analyze the time between pretests and posttests

#########################################################################
describe(mydata$DaysBetween[1:84])  #describe the days between

##    vars  n   mean    sd median trimmed  mad min max range skew kurtosis   se
## X1    1 84 442.31 70.01    417  427.32 25.2 333 654   321    2     3.24 7.64

ggplot(data=mydata[1:84,], aes(x=DaysBetween,color=Gender))+  #plot it
  geom_histogram(fill="white", binwidth=30)+
  ylab("Frequency")

#########################################################################

Delta Score Boxplots

Plot the Delta scores.

#########################################################################
par(mfrow=c(1,1))

myscores=as.data.frame(mydelta[1:13]) #Variables for plotting
mybars=error.bars(myscores)

plot(error.bars(myscores),ylab="Variable", xlab="Delta Score", 
     xlim=c(.5, 13.5), ylim=c(0,length(myscores)),lty="dashed", las=2)

#########################################################################

Simple Inferentials

Paired t-tests

#########################################################################
myf=function(x){
  myt=t.test(x)
  newp=round(p.adjust(myt$p.value),3)
  a=c(round(myt$estimate,3),round(myt$conf.int[1],3),
      round(myt$conf.int[2],3),round(myt$statistic,3), round(myt$parameter,3),
      round(newp,3))
  return(a)
  
}

a=matrix(rep(NA, 6*16), 16)
for (i in 1:16){a[i,1:6]=myf(mydelta[i])}
a=as.data.frame(a)
for (i in 1:6){a[,i]=as.numeric(a[,i])}
rownames(a)=c("Financial Management", "General Management", "Healthcare Personnel", "Healthcare Systems and Organizations", "Information Management", "Leadership Skills and Behavior", "Managing Change","Organizational Climate and Culture", "Quality Improvement", "Quantitative Methods","Strategic Planning and Marketing","The Community and the Environment","The Legal Environment", "Total Score", "Percentile Rank", "Test Time")
colnames(a)=c("Estimate","Lower 95% CI", "Upper 95% CI", "t-Value", "df", "Holm-Adjusted p")
a%>%kbl()%>%kable_classic()

	Estimate	Lower 95% CI	Upper 95% CI	t-Value	df	Holm-Adjusted p
Financial Management	18.333	13.512	23.155	7.563	83	0.000
General Management	15.833	10.984	20.682	6.494	83	0.000
Healthcare Personnel	11.667	7.137	16.197	5.122	83	0.000
Healthcare Systems and Organizations	15.119	9.457	20.781	5.311	83	0.000
Information Management	8.095	3.220	12.971	3.302	83	0.001
Leadership Skills and Behavior	13.810	9.204	18.415	5.964	83	0.000
Managing Change	7.500	2.969	12.031	3.293	83	0.001
Organizational Climate and Culture	10.476	6.243	14.709	4.922	83	0.000
Quality Improvement	14.524	10.218	18.830	6.708	83	0.000
Quantitative Methods	2.500	-8.585	13.585	0.467	23	0.645
Strategic Planning and Marketing	18.452	13.530	23.375	7.456	83	0.000
The Community and the Environment	12.619	7.806	17.432	5.215	83	0.000
The Legal Environment	15.357	10.424	20.290	6.192	83	0.000
Total Score	11.938	10.103	13.772	12.941	83	0.000
Percentile Rank	21.095	15.655	26.536	7.712	83	0.000
Test Time	30.273	25.857	34.689	13.635	83	0.000

#########################################################################

Unpaired t-tests

#########################################################################

myf=function(x,y){
  
  myt=t.test(x, y)
  newp=round(p.adjust(myt$p.value),3)
  a=c(round(myt$estimate[1],3),round(myt$estimate[2],3),
      round(myt$conf.int[1],3),   
      round(myt$conf.int[2],3),round(myt$statistic,3), 
      round(myt$parameter,3),round(newp,3))
  return(a)
  
}

a=matrix(rep(NA, 7*16), 16)
for (i in 1:16){a[i,1:7]=myf(mydata[1:84,i+9], mydata[85:168,i+9])}
a=as.data.frame(a)
for (i in 1:6){a[,i]=as.numeric(a[,i])}
rownames(a)=c("Financial Management", "General Management", "Healthcare Personnel", "Healthcare Systems and Organizations", "Information Management", "Leadership Skills and Behavior", "Managing Change","Organizational Climate and Culture", "Quality Improvement", "Quantitative Methods","Strategic Planning and Marketing","The Community and the Environment","The Legal Environment", "Total Score", "Percentile Rank", "Test Time")
colnames(a)=c("Mean Pre","Mean Post","Lower 95% CI", "Upper 95% CI", "t-Value", "df", "Holm-Adjusted p")
a%>%kbl()%>%kable_classic()

	Mean Pre	Mean Post	Lower 95% CI	Upper 95% CI	t-Value	df	Holm-Adjusted p
Financial Management	54.762	73.095	-23.267	-13.400	-7.340	156.525	0.000
General Management	59.643	75.476	-20.382	-11.284	-6.876	153.809	0.000
Healthcare Personnel	65.119	76.786	-16.198	-7.135	-5.083	165.945	0.000
Healthcare Systems and Organizations	54.405	69.524	-20.587	-9.652	-5.460	162.804	0.000
Information Management	56.905	65.000	-12.949	-3.242	-3.295	157.308	0.001
Leadership Skills and Behavior	60.952	74.762	-18.691	-8.928	-5.585	165.987	0.000
Managing Change	62.024	69.524	-12.020	-2.980	-3.276	165.869	0.001
Organizational Climate and Culture	65.476	75.952	-15.363	-5.590	-4.233	165.360	0.000
Quality Improvement	58.214	72.738	-19.164	-9.883	-6.180	163.069	0.000
Quantitative Methods	48.333	50.333	-11.585	7.585	-0.419	51.825	0.677
Strategic Planning and Marketing	56.905	75.357	-23.453	-13.451	-7.289	154.649	0.000
The Community and the Environment	55.000	67.619	-17.535	-7.703	-5.068	164.743	0.000
The Legal Environment	60.238	75.595	-20.354	-10.360	-6.067	165.994	0.000
Total Score	58.863	70.801	-13.980	-9.896	-11.554	145.613	0.000
Percentile Rank	56.214	77.310	-27.169	-15.022	-6.863	147.942	0.000
Test Time	49.089	79.362	-37.193	-23.353	-8.637	165.514	0.000

#########################################################################

Models

MANOVA Prep

#########################################################################

Fin=-mydata$Fin[1:84]+mydata$Fin[85:168]
Mgt=-mydata$Mgt[1:84]+mydata$Mgt[85:168]
HR=-mydata$HR[1:84]+mydata$HR[85:168]
Sys=-mydata$Sys[1:84]+mydata$Sys[85:168]
HIM=-mydata$HIM[1:84]+mydata$HIM[85:168]
Ldr=-mydata$Ldr[1:84]+mydata$Ldr[85:168]
Chg=-mydata$Chg[1:84]+mydata$Chg[85:168]
Cli=-mydata$Cli[1:84]+mydata$Cli[85:168]
QI=-mydata$QI[1:84]+mydata$QI[85:168]
Strat=-mydata$Strat[1:84]+mydata$Strat[85:168]
Com=-mydata$Com[1:84]+mydata$Com[85:168]
Lgl=-mydata$Lgl[1:84]+mydata$Lgl[85:168]

newdata=mydata[1:84,]

newdata$Fin=Fin;newdata$Mgt=Mgt;newdata$HR=HR;newdata$Sys=Sys
newdata$HIM=HIM;newdata$Ldr=Ldr;newdata$Chg=Chg;newdata$Cli=Cli
newdata$QI=QI;newdata$Strat=Strat;newdata$Com=Com;newdata$Lgl=Lgl

corcheck=cor(cbind(Fin, Mgt, HR, Sys, HIM, Ldr,
          Chg, Cli, QI, Strat, Com, Lgl)) #no significant correlations


mvn(newdata[, 10:22])$multivariateNormality #multivariate normal, Mardia

##              Test         Statistic            p value Result
## 1 Mardia Skewness  391.067558137444  0.986281965831057    YES
## 2 Mardia Kurtosis -2.03197441794094 0.0421562458328015     NO
## 3             MVN              <NA>               <NA>     NO

myt=powerTransform(as.matrix(newdata[, 10:22]+100)~1)#Likelihood after location shift
testTransform(myt)%>%kbl()%>%kable_classic(full_width = F, html_font = "Cambria")

	LRT	df	pval
LR test, lambda = (1 1 1 1 1 1 1 1 1 1 1 1 1)	15.12201	13	0.2998

corfunction(corcheck)

#########################################################################

MANOVA Prep2

#########################################################################
temp=matrix(rep(NA, 12*3), nrow=12)
mynames=colnames(newdata[, 10:21])
newdata$GPA2=newdata$GPA
newdata$GPA2[newdata$GPA2<3]=0
newdata$GPA2[newdata$GPA2>=3.0]=1
newdata$GPA2=as.factor(newdata$GPA2)
for (i in 1:12){temp[i,1]=leveneTest(newdata[,i+9], newdata$Gender)$`Pr(>F)`[1]
  temp[i,2]=leveneTest(newdata[,i+9], newdata$Ethnicity)$`Pr(>F)`[1]
  temp[i,3]=leveneTest(newdata[,i+9], newdata$GPA2)$`Pr(>F)`[1]}
row.names(temp)=mynames
colnames(temp)=c("Gender", "Ethnicity", "GPA>=3.0")
temp%>%kbl(caption="Levene's Test p-values")%>%kable_classic()

Levene’s Test p-values
	Gender	Ethnicity	GPA>=3.0
Fin	0.3958432	0.6950813	0.5854971
Mgt	0.1657677	0.2148585	0.5584298
HR	0.1160341	0.4572789	0.9223638
Sys	0.2748534	0.9758254	0.0727096
HIM	0.5796980	0.6842153	0.9080817
Ldr	0.0317125	0.8891262	0.2531923
Chg	0.7254359	0.0920259	0.9530268
Cli	0.1203360	0.8907364	0.2904372
QI	0.6465339	0.7577285	0.2227968
QM	0.2969722	0.9211924	0.2177867
Strat	0.8080935	0.0142457	0.9453991
Com	0.7137436	0.4234616	0.0161944

#########################################################################

MANOVA 1

The MANOVA simultaneously tests all delta variables against the intercept (representing the pre-post change), gender, ethnicity, and discrete-coded GPA above 3.0. Only the pre-post delta makes a difference for all test categories. By fitting to the delta scores, we need not do a repeated measures analysis.

#########################################################################
res.man <- manova(cbind(Fin, Mgt, HR, Sys, HIM, Ldr,
                        Chg, Cli, QI, Strat, Com, Lgl) ~
                    1+Gender+Ethnicity+GPA2, data = newdata)
summary(res.man, intercept=TRUE)

##             Df  Pillai approx F num Df den Df Pr(>F)    
## (Intercept)  1 0.78698  20.6268     12     67 <2e-16 ***
## Gender       1 0.09669   0.5976     12     67 0.8365    
## Ethnicity    3 0.47675   1.0864     36    207 0.3495    
## GPA2         1 0.11668   0.7375     12     67 0.7101    
## Residuals   78                                          
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary.aov(res.man, intercept = TRUE)

##  Response Fin :
##             Df Sum Sq Mean Sq F value    Pr(>F)    
## (Intercept)  1  28233 28233.3 60.1534 2.792e-11 ***
## Gender       1    248   247.9  0.5282    0.4695    
## Ethnicity    3   3006  1002.0  2.1349    0.1025    
## GPA2         1   1103  1102.9  2.3497    0.1293    
## Residuals   78  36610   469.4                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response Mgt :
##             Df Sum Sq Mean Sq F value    Pr(>F)    
## (Intercept)  1  21058 21058.3 44.1410 3.719e-09 ***
## Gender       1    580   579.9  1.2155   0.27363    
## Ethnicity    3   3567  1189.1  2.4925   0.06621 .  
## GPA2         1     83    83.1  0.1741   0.67760    
## Residuals   78  37211   477.1                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response HR :
##             Df Sum Sq Mean Sq F value    Pr(>F)    
## (Intercept)  1  11433 11433.3 26.2056 2.154e-06 ***
## Gender       1    248   247.9  0.5682    0.4532    
## Ethnicity    3   1770   589.9  1.3521    0.2637    
## GPA2         1    118   118.0  0.2705    0.6045    
## Residuals   78  34031   436.3                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response Sys :
##             Df Sum Sq Mean Sq F value    Pr(>F)    
## (Intercept)  1  19201 19201.2 27.2250 1.455e-06 ***
## Gender       1     25    25.3  0.0358    0.8503    
## Ethnicity    3   1309   436.3  0.6187    0.6050    
## GPA2         1    153   152.9  0.2168    0.6428    
## Residuals   78  55012   705.3                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response HIM :
##             Df Sum Sq Mean Sq F value   Pr(>F)   
## (Intercept)  1   5505  5504.8 10.8306 0.001501 **
## Gender       1    488   488.3  0.9606 0.330057   
## Ethnicity    3   1664   554.8  1.0916 0.357751   
## GPA2         1     98    98.2  0.1933 0.661438   
## Residuals   78  39644   508.3                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response Ldr :
##             Df Sum Sq Mean Sq F value    Pr(>F)    
## (Intercept)  1  16019 16019.0 35.8938 6.058e-08 ***
## Gender       1     74    74.0  0.1657    0.6850    
## Ethnicity    3   1647   548.9  1.2300    0.3045    
## GPA2         1    850   849.6  1.9037    0.1716    
## Residuals   78  34811   446.3                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response Chg :
##             Df Sum Sq Mean Sq F value   Pr(>F)   
## (Intercept)  1   4725  4725.0 10.8485 0.001488 **
## Gender       1    124   123.5  0.2836 0.595851   
## Ethnicity    3   1623   541.2  1.2425 0.300066   
## GPA2         1    456   455.7  1.0462 0.309542   
## Residuals   78  33972   435.5                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response Cli :
##             Df  Sum Sq Mean Sq F value    Pr(>F)    
## (Intercept)  1  9219.0  9219.0 23.5596 6.094e-06 ***
## Gender       1   735.7   735.7  1.8802    0.1742    
## Ethnicity    3   316.7   105.6  0.2698    0.8470    
## GPA2         1     6.6     6.6  0.0168    0.8971    
## Residuals   78 30522.0   391.3                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response QI :
##             Df  Sum Sq Mean Sq F value    Pr(>F)    
## (Intercept)  1 17719.0 17719.0 45.1342 2.694e-09 ***
## Gender       1    38.7    38.7  0.0985    0.7545    
## Ethnicity    3  1987.8   662.6  1.6877    0.1765    
## GPA2         1    32.8    32.8  0.0836    0.7732    
## Residuals   78 30621.7   392.6                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response Strat :
##             Df Sum Sq Mean Sq F value    Pr(>F)    
## (Intercept)  1  28601 28601.2 55.7052 1.021e-10 ***
## Gender       1      2     1.8  0.0034    0.9536    
## Ethnicity    3   2515   838.4  1.6330    0.1885    
## GPA2         1    134   133.6  0.2602    0.6114    
## Residuals   78  40048   513.4                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response Com :
##             Df Sum Sq Mean Sq F value    Pr(>F)    
## (Intercept)  1  13376 13376.2 26.8564 1.676e-06 ***
## Gender       1      0     0.3  0.0006    0.9811    
## Ethnicity    3   1565   521.7  1.0474    0.3765    
## GPA2         1    410   409.7  0.8226    0.3672    
## Residuals   78  38849   498.1                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response Lgl :
##             Df Sum Sq Mean Sq F value    Pr(>F)    
## (Intercept)  1  19811 19810.7 38.2416 2.679e-08 ***
## Gender       1    686   686.3  1.3249    0.2532    
## Ethnicity    3    560   186.8  0.3605    0.7816    
## GPA2         1   1235  1235.4  2.3848    0.1266    
## Residuals   78  40407   518.0                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#########################################################################

MANCOVA

A second test models continuous covariates of age, GPA, gender and qualitative covariate ethnicity to explain differences. These models explain nothing, further indicating that the delta is based solely on the curriculum intervention.

#########################################################################
newdata$Gender=as.integer(newdata$Gender)
mymancova=jmv::mancova(
  data=newdata, 
  deps=vars(Fin, Mgt, HR, Sys, HIM, Ldr,
          Chg, Cli, QI, Strat, Com, Lgl), 
  factors=c(Ethnicity), 
  covs=c(Age, GPA, Gender), 
  multivar = list("pillai", "wilks", "hotel", "roy"),
)

options(digits=3)
a=mymancova$multivar
a

## 
##  Multivariate Tests                                                            
##  ----------------------------------------------------------------------------- 
##                                       value     F        df1    df2    p       
##  ----------------------------------------------------------------------------- 
##    Ethnicity    Pillai's Trace        0.5009    1.136     36    204    0.287   
##                 Wilks' Lambda          0.563    1.165     36    196    0.254   
##                 Hotelling's Trace     0.6642    1.193     36    194    0.224   
##                 Roy's Largest Root    0.4406    2.497     12     68    0.009   
##                                                                                
##    Age          Pillai's Trace        0.1245    0.782     12     66    0.666   
##                 Wilks' Lambda          0.875    0.782     12     66    0.666   
##                 Hotelling's Trace     0.1422    0.782     12     66    0.666   
##                 Roy's Largest Root    0.1422    0.782     12     66    0.666   
##                                                                                
##    GPA          Pillai's Trace        0.1792    1.200     12     66    0.302   
##                 Wilks' Lambda          0.821    1.200     12     66    0.302   
##                 Hotelling's Trace     0.2183    1.200     12     66    0.302   
##                 Roy's Largest Root    0.2183    1.200     12     66    0.302   
##                                                                                
##    Gender       Pillai's Trace        0.0824    0.494     12     66    0.911   
##                 Wilks' Lambda          0.918    0.494     12     66    0.911   
##                 Hotelling's Trace     0.0898    0.494     12     66    0.911   
##                 Roy's Largest Root    0.0898    0.494     12     66    0.911   
##  -----------------------------------------------------------------------------

#########################################################################

Appendix A

Program Outcomes (PO) and Student Learning Objectives (SLO)

Teaching

T1. All faculty evaluations (100%) each term have a median of 4.0 or greater out of 5.0 on the question, “Instructor provided the opportunity to learn.” {1=Strongly Disagree,…5=Strongly Agree} PO
T2. 100% of faculty members support opportunities for learning professional behavior as evidenced by in class learning activities and activities in SOHA- sponsored functions. PO
T3. BHA majors will successfully complete (with a grade of 80% or better) a case study/project in HA 3376 (Financial Management). SLO
T4. BHA majors will successfully complete (with a grade of 80% or better) a final exam in HA 3375 (Financial Accounting). SLO
T5. 100% of students will successfully complete the field experience (HA 4848) with a passing evaluation by their preceptor. A preceptor analysis will be used to determine application of the above referenced skills needing improvement. SLO
T6. 100% of students will successfully complete the final project requirements in the field experience (HA 4848) with a passing evaluation by their preceptor. SLO
T7. BHA majors will successfully complete (with a grade of 80% or better) a capstone case study in HA 4325 (strategic management) including an internal audit of strategic assessment; an external environmental assessment; and an assessment of a healthcare organization strategic plan. SLO
T8. 90% of students will demonstrate success in the writing intensive courses by scoring B’s or better on the final writing intensive (WI) assignments in HA3324. SLO
T9. 90% of students will demonstrate success in oral communication by achieving B’s or better on HA3344 group presentations. SLO
T10. 80% of our students will attain IISE Lean Six Sigma Green Belt Certification. SLO
T11. 80% of our students will attain Excel Basic MOS Certification. SLO
T12. 80% of our students will attain Excel Expert MOS Certification. SLO
T13. 80% of our students will attain QuickBooks Certification. SLO
T14. 100% of students will successfully complete the comprehensive exam with greater than median (nationwide) comparative scores. SLO
T15. 100% of students will have higher post-test scores than pre-test scores on the Peregrine exit exam. SLO

Research

R1. 100% of faculty members are engaged in research activities that support the body of knowledge in their respective fields as evidenced by at least one peer-reviewed publication each year. PO
R2. 50% of faculty acquire research funding for the University and the Department (stretch goal). PO
R3. 50% of students present a paper, poster or other research outcome at a professional conference or research symposium. SLO
R4. 10% of students are a named author on an article submitted for publication in a peer- reviewed journal. SLO
R5. 100% of faculty will collaborate either internally or externally for research purposes to build the research portfolio of our program. PO

Service

S1. 100% of faculty members will have demonstrated professional service each year as evidenced by review of CV’s. PO
S2. 100% of faculty members will have demonstrated community service each year as evidenced by review of CV’s. PO
S3. 100% of faculty provide service at the University, College, School, or Department level. PO

Student Success and Admissions

SS1. 50% of students seeking employment will have jobs within 1 month after graduation. SLO
SS2. 100% of students seeking employment will have jobs within 9 months after graduation. SLO
SS3. 80% or more of available seats will be filled with highly qualified students each term. PO
SS4. 70% of original cohort graduate together. SLO
SS5. 25% or more of admitted students will be of minority status. SLO

Program Success

P1. The program will maintain AUPHA certification (binary). PO
P2. One or more students will receive national awards. PO
P3. The BHA budget remains static or grows but does not shrink. PO
P4. 100% of vacant positions are filled. PO
P5. Alumni will serve as preceptors and speakers for the program as well as board members. PO
P6. In-building student computer facilities increase (binary). PO

BHA Examination Results, Pre-Post

Doc Larry Fulton

7 November 2020